Total coverage: 216688 (12%)of 1851629
4 3 1 4 3 3 3 2 3 3 3 3 2 2 3 3 1 2 5 5 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 // SPDX-License-Identifier: GPL-2.0-or-later /* LRW: as defined by Cyril Guyot in * http://grouper.ieee.org/groups/1619/email/pdf00017.pdf * * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> * * Based on ecb.c * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ /* This implementation is checked against the test vectors in the above * document and by a test vector provided by Ken Buchanan at * https://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html * * The test vectors are included in the testing module tcrypt.[ch] */ #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <crypto/b128ops.h> #include <crypto/gf128mul.h> #define LRW_BLOCK_SIZE 16 struct lrw_tfm_ctx { struct crypto_skcipher *child; /* * optimizes multiplying a random (non incrementing, as at the * start of a new sector) value with key2, we could also have * used 4k optimization tables or no optimization at all. In the * latter case we would have to store key2 here */ struct gf128mul_64k *table; /* * stores: * key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 }, * key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 } * key2*{ 0,0,...1,1,1,1,1 }, etc * needed for optimized multiplication of incrementing values * with key2 */ be128 mulinc[128]; }; struct lrw_request_ctx { be128 t; struct skcipher_request subreq; }; static inline void lrw_setbit128_bbe(void *b, int bit) { __set_bit(bit ^ (0x80 - #ifdef __BIG_ENDIAN BITS_PER_LONG #else BITS_PER_BYTE #endif ), b); } static int lrw_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child = ctx->child; int err, bsize = LRW_BLOCK_SIZE; const u8 *tweak = key + keylen - bsize; be128 tmp = { 0 }; int i; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(child, key, keylen - bsize); if (err) return err; if (ctx->table) gf128mul_free_64k(ctx->table); /* initialize multiplication table for Key2 */ ctx->table = gf128mul_init_64k_bbe((be128 *)tweak); if (!ctx->table) return -ENOMEM; /* initialize optimization table */ for (i = 0; i < 128; i++) { lrw_setbit128_bbe(&tmp, i); ctx->mulinc[i] = tmp; gf128mul_64k_bbe(&ctx->mulinc[i], ctx->table); } return 0; } /* * Returns the number of trailing '1' bits in the words of the counter, which is * represented by 4 32-bit words, arranged from least to most significant. * At the same time, increments the counter by one. * * For example: * * u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 }; * int i = lrw_next_index(&counter); * // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 } */ static int lrw_next_index(u32 *counter) { int i, res = 0; for (i = 0; i < 4; i++) { if (counter[i] + 1 != 0) return res + ffz(counter[i]++); counter[i] = 0; res += 32; } /* * If we get here, then x == 128 and we are incrementing the counter * from all ones to all zeros. This means we must return index 127, i.e. * the one corresponding to key2*{ 1,...,1 }. */ return 127; } /* * We compute the tweak masks twice (both before and after the ECB encryption or * decryption) to avoid having to allocate a temporary buffer and/or make * mutliple calls to the 'ecb(..)' instance, which usually would be slower than * just doing the lrw_next_index() calls again. */ static int lrw_xor_tweak(struct skcipher_request *req, bool second_pass) { const int bs = LRW_BLOCK_SIZE; struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); struct lrw_request_ctx *rctx = skcipher_request_ctx(req); be128 t = rctx->t; struct skcipher_walk w; __be32 *iv; u32 counter[4]; int err; if (second_pass) { req = &rctx->subreq; /* set to our TFM to enforce correct alignment: */ skcipher_request_set_tfm(req, tfm); } err = skcipher_walk_virt(&w, req, false); if (err) return err; iv = (__be32 *)w.iv; counter[0] = be32_to_cpu(iv[3]); counter[1] = be32_to_cpu(iv[2]); counter[2] = be32_to_cpu(iv[1]); counter[3] = be32_to_cpu(iv[0]); while (w.nbytes) { unsigned int avail = w.nbytes; const be128 *wsrc; be128 *wdst; wsrc = w.src.virt.addr; wdst = w.dst.virt.addr; do { be128_xor(wdst++, &t, wsrc++); /* T <- I*Key2, using the optimization * discussed in the specification */ be128_xor(&t, &t, &ctx->mulinc[lrw_next_index(counter)]); } while ((avail -= bs) >= bs); if (second_pass && w.nbytes == w.total) { iv[0] = cpu_to_be32(counter[3]); iv[1] = cpu_to_be32(counter[2]); iv[2] = cpu_to_be32(counter[1]); iv[3] = cpu_to_be32(counter[0]); } err = skcipher_walk_done(&w, avail); } return err; } static int lrw_xor_tweak_pre(struct skcipher_request *req) { return lrw_xor_tweak(req, false); } static int lrw_xor_tweak_post(struct skcipher_request *req) { return lrw_xor_tweak(req, true); } static void lrw_crypt_done(void *data, int err) { struct skcipher_request *req = data; if (!err) { struct lrw_request_ctx *rctx = skcipher_request_ctx(req); rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; err = lrw_xor_tweak_post(req); } skcipher_request_complete(req, err); } static void lrw_init_crypt(struct skcipher_request *req) { const struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); struct lrw_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; skcipher_request_set_tfm(subreq, ctx->child); skcipher_request_set_callback(subreq, req->base.flags, lrw_crypt_done, req); /* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */ skcipher_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen, req->iv); /* calculate first value of T */ memcpy(&rctx->t, req->iv, sizeof(rctx->t)); /* T <- I*Key2 */ gf128mul_64k_bbe(&rctx->t, ctx->table); } static int lrw_encrypt(struct skcipher_request *req) { struct lrw_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; lrw_init_crypt(req); return lrw_xor_tweak_pre(req) ?: crypto_skcipher_encrypt(subreq) ?: lrw_xor_tweak_post(req); } static int lrw_decrypt(struct skcipher_request *req) { struct lrw_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->subreq; lrw_init_crypt(req); return lrw_xor_tweak_pre(req) ?: crypto_skcipher_decrypt(subreq) ?: lrw_xor_tweak_post(req); } static int lrw_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *cipher; cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) + sizeof(struct lrw_request_ctx)); return 0; } static void lrw_exit_tfm(struct crypto_skcipher *tfm) { struct lrw_tfm_ctx *ctx = crypto_skcipher_ctx(tfm); if (ctx->table) gf128mul_free_64k(ctx->table); crypto_free_skcipher(ctx->child); } static void lrw_free_instance(struct skcipher_instance *inst) { crypto_drop_skcipher(skcipher_instance_ctx(inst)); kfree(inst); } static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_skcipher_spawn *spawn; struct skcipher_alg_common *alg; struct skcipher_instance *inst; const char *cipher_name; char ecb_name[CRYPTO_MAX_ALG_NAME]; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) return PTR_ERR(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); if (err == -ENOENT) { err = -ENAMETOOLONG; if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), ecb_name, 0, mask); } if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(spawn); err = -EINVAL; if (alg->base.cra_blocksize != LRW_BLOCK_SIZE) goto err_free_inst; if (alg->ivsize) goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw", &alg->base); if (err) goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; /* Alas we screwed up the naming so we have to mangle the * cipher name. */ if (!strncmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); if (len < 2) goto err_free_inst; if (ecb_name[len - 1] != ')') goto err_free_inst; ecb_name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; goto err_free_inst; } } else goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE; inst->alg.base.cra_alignmask = alg->base.cra_alignmask | (__alignof__(be128) - 1); inst->alg.ivsize = LRW_BLOCK_SIZE; inst->alg.min_keysize = alg->min_keysize + LRW_BLOCK_SIZE; inst->alg.max_keysize = alg->max_keysize + LRW_BLOCK_SIZE; inst->alg.base.cra_ctxsize = sizeof(struct lrw_tfm_ctx); inst->alg.init = lrw_init_tfm; inst->alg.exit = lrw_exit_tfm; inst->alg.setkey = lrw_setkey; inst->alg.encrypt = lrw_encrypt; inst->alg.decrypt = lrw_decrypt; inst->free = lrw_free_instance; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: lrw_free_instance(inst); } return err; } static struct crypto_template lrw_tmpl = { .name = "lrw", .create = lrw_create, .module = THIS_MODULE, }; static int __init lrw_module_init(void) { return crypto_register_template(&lrw_tmpl); } static void __exit lrw_module_exit(void) { crypto_unregister_template(&lrw_tmpl); } subsys_initcall(lrw_module_init); module_exit(lrw_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LRW block cipher mode"); MODULE_ALIAS_CRYPTO("lrw"); MODULE_SOFTDEP("pre: ecb");
20 21 21 21 179 179 123 100 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 // SPDX-License-Identifier: GPL-2.0-or-later /* * Asynchronous Compression operations * * Copyright (c) 2016, Intel Corporation * Authors: Weigang Li <weigang.li@intel.com> * Giovanni Cabiddu <giovanni.cabiddu@intel.com> */ #include <crypto/internal/acompress.h> #include <linux/cryptouser.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/page-flags.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> #include <net/netlink.h> #include "compress.h" struct crypto_scomp; static const struct crypto_type crypto_acomp_type; static void acomp_reqchain_done(void *data, int err); static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg) { return container_of(alg, struct acomp_alg, calg.base); } static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm) { return __crypto_acomp_alg(crypto_acomp_tfm(tfm)->__crt_alg); } static int __maybe_unused crypto_acomp_report( struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; memset(&racomp, 0, sizeof(racomp)); strscpy(racomp.type, "acomp", sizeof(racomp.type)); return nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(racomp), &racomp); } static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) { seq_puts(m, "type : acomp\n"); } static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm) { struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); struct acomp_alg *alg = crypto_acomp_alg(acomp); if (alg->exit) alg->exit(acomp); if (acomp_is_async(acomp)) crypto_free_acomp(acomp->fb); } static int crypto_acomp_init_tfm(struct crypto_tfm *tfm) { struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); struct acomp_alg *alg = crypto_acomp_alg(acomp); struct crypto_acomp *fb = NULL; int err; acomp->fb = acomp; if (tfm->__crt_alg->cra_type != &crypto_acomp_type) return crypto_init_scomp_ops_async(tfm); if (acomp_is_async(acomp)) { fb = crypto_alloc_acomp(crypto_acomp_alg_name(acomp), 0, CRYPTO_ALG_ASYNC); if (IS_ERR(fb)) return PTR_ERR(fb); err = -EINVAL; if (crypto_acomp_reqsize(fb) > MAX_SYNC_COMP_REQSIZE) goto out_free_fb; acomp->fb = fb; } acomp->compress = alg->compress; acomp->decompress = alg->decompress; acomp->reqsize = alg->reqsize; acomp->base.exit = crypto_acomp_exit_tfm; if (!alg->init) return 0; err = alg->init(acomp); if (err) goto out_free_fb; return 0; out_free_fb: crypto_free_acomp(fb); return err; } static unsigned int crypto_acomp_extsize(struct crypto_alg *alg) { int extsize = crypto_alg_extsize(alg); if (alg->cra_type != &crypto_acomp_type) extsize += sizeof(struct crypto_scomp *); return extsize; } static const struct crypto_type crypto_acomp_type = { .extsize = crypto_acomp_extsize, .init_tfm = crypto_acomp_init_tfm, #ifdef CONFIG_PROC_FS .show = crypto_acomp_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_acomp_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK, .type = CRYPTO_ALG_TYPE_ACOMPRESS, .tfmsize = offsetof(struct crypto_acomp, base), }; struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_acomp_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_acomp); struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type, u32 mask, int node) { return crypto_alloc_tfm_node(alg_name, &crypto_acomp_type, type, mask, node); } EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node); static void acomp_save_req(struct acomp_req *req, crypto_completion_t cplt) { struct acomp_req_chain *state = &req->chain; state->compl = req->base.complete; state->data = req->base.data; req->base.complete = cplt; req->base.data = state; state->req0 = req; } static void acomp_restore_req(struct acomp_req *req) { struct acomp_req_chain *state = req->base.data; req->base.complete = state->compl; req->base.data = state->data; } static void acomp_reqchain_virt(struct acomp_req_chain *state, int err) { struct acomp_req *req = state->cur; unsigned int slen = req->slen; unsigned int dlen = req->dlen; req->base.err = err; state = &req->chain; if (state->flags & CRYPTO_ACOMP_REQ_SRC_VIRT) acomp_request_set_src_dma(req, state->src, slen); else if (state->flags & CRYPTO_ACOMP_REQ_SRC_FOLIO) acomp_request_set_src_folio(req, state->sfolio, state->soff, slen); if (state->flags & CRYPTO_ACOMP_REQ_DST_VIRT) acomp_request_set_dst_dma(req, state->dst, dlen); else if (state->flags & CRYPTO_ACOMP_REQ_DST_FOLIO) acomp_request_set_dst_folio(req, state->dfolio, state->doff, dlen); } static void acomp_virt_to_sg(struct acomp_req *req) { struct acomp_req_chain *state = &req->chain; state->flags = req->base.flags & (CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_SRC_FOLIO | CRYPTO_ACOMP_REQ_DST_FOLIO); if (acomp_request_src_isvirt(req)) { unsigned int slen = req->slen; const u8 *svirt = req->svirt; state->src = svirt; sg_init_one(&state->ssg, svirt, slen); acomp_request_set_src_sg(req, &state->ssg, slen); } else if (acomp_request_src_isfolio(req)) { struct folio *folio = req->sfolio; unsigned int slen = req->slen; size_t off = req->soff; state->sfolio = folio; state->soff = off; sg_init_table(&state->ssg, 1); sg_set_page(&state->ssg, folio_page(folio, off / PAGE_SIZE), slen, off % PAGE_SIZE); acomp_request_set_src_sg(req, &state->ssg, slen); } if (acomp_request_dst_isvirt(req)) { unsigned int dlen = req->dlen; u8 *dvirt = req->dvirt; state->dst = dvirt; sg_init_one(&state->dsg, dvirt, dlen); acomp_request_set_dst_sg(req, &state->dsg, dlen); } else if (acomp_request_dst_isfolio(req)) { struct folio *folio = req->dfolio; unsigned int dlen = req->dlen; size_t off = req->doff; state->dfolio = folio; state->doff = off; sg_init_table(&state->dsg, 1); sg_set_page(&state->dsg, folio_page(folio, off / PAGE_SIZE), dlen, off % PAGE_SIZE); acomp_request_set_src_sg(req, &state->dsg, dlen); } } static int acomp_do_nondma(struct acomp_req_chain *state, struct acomp_req *req) { u32 keep = CRYPTO_ACOMP_REQ_SRC_VIRT | CRYPTO_ACOMP_REQ_SRC_NONDMA | CRYPTO_ACOMP_REQ_DST_VIRT | CRYPTO_ACOMP_REQ_DST_NONDMA; ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req)); int err; acomp_request_set_callback(fbreq, req->base.flags, NULL, NULL); fbreq->base.flags &= ~keep; fbreq->base.flags |= req->base.flags & keep; fbreq->src = req->src; fbreq->dst = req->dst; fbreq->slen = req->slen; fbreq->dlen = req->dlen; if (state->op == crypto_acomp_reqtfm(req)->compress) err = crypto_acomp_compress(fbreq); else err = crypto_acomp_decompress(fbreq); req->dlen = fbreq->dlen; return err; } static int acomp_do_one_req(struct acomp_req_chain *state, struct acomp_req *req) { state->cur = req; if (acomp_request_isnondma(req)) return acomp_do_nondma(state, req); acomp_virt_to_sg(req); return state->op(req); } static int acomp_reqchain_finish(struct acomp_req *req0, int err, u32 mask) { struct acomp_req_chain *state = req0->base.data; struct acomp_req *req = state->cur; struct acomp_req *n; acomp_reqchain_virt(state, err); if (req != req0) list_add_tail(&req->base.list, &req0->base.list); list_for_each_entry_safe(req, n, &state->head, base.list) { list_del_init(&req->base.list); req->base.flags &= mask; req->base.complete = acomp_reqchain_done; req->base.data = state; err = acomp_do_one_req(state, req); if (err == -EINPROGRESS) { if (!list_empty(&state->head)) err = -EBUSY; goto out; } if (err == -EBUSY) goto out; acomp_reqchain_virt(state, err); list_add_tail(&req->base.list, &req0->base.list); } acomp_restore_req(req0); out: return err; } static void acomp_reqchain_done(void *data, int err) { struct acomp_req_chain *state = data; crypto_completion_t compl = state->compl; data = state->data; if (err == -EINPROGRESS) { if (!list_empty(&state->head)) return; goto notify; } err = acomp_reqchain_finish(state->req0, err, CRYPTO_TFM_REQ_MAY_BACKLOG); if (err == -EBUSY) return; notify: compl(data, err); } static int acomp_do_req_chain(struct acomp_req *req, int (*op)(struct acomp_req *req)) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); struct acomp_req_chain *state; int err; if (crypto_acomp_req_chain(tfm) || (!acomp_request_chained(req) && acomp_request_issg(req))) return op(req); acomp_save_req(req, acomp_reqchain_done); state = req->base.data; state->op = op; state->src = NULL; INIT_LIST_HEAD(&state->head); list_splice_init(&req->base.list, &state->head); err = acomp_do_one_req(state, req); if (err == -EBUSY || err == -EINPROGRESS) return -EBUSY; return acomp_reqchain_finish(req, err, ~0); } int crypto_acomp_compress(struct acomp_req *req) { return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->compress); } EXPORT_SYMBOL_GPL(crypto_acomp_compress); int crypto_acomp_decompress(struct acomp_req *req) { return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->decompress); } EXPORT_SYMBOL_GPL(crypto_acomp_decompress); void comp_prepare_alg(struct comp_alg_common *alg) { struct crypto_alg *base = &alg->base; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; } int crypto_register_acomp(struct acomp_alg *alg) { struct crypto_alg *base = &alg->calg.base; comp_prepare_alg(&alg->calg); base->cra_type = &crypto_acomp_type; base->cra_flags |= CRYPTO_ALG_TYPE_ACOMPRESS; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_acomp); void crypto_unregister_acomp(struct acomp_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_acomp); int crypto_register_acomps(struct acomp_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_acomp(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_acomp(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_acomps); void crypto_unregister_acomps(struct acomp_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_acomp(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_acomps); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous compression type");
2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/completion.h> #include <linux/device.h> #include <linux/hwmon.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/types.h> #include <linux/usb.h> #define DRIVER_NAME "powerz" #define POWERZ_EP_CMD_OUT 0x01 #define POWERZ_EP_DATA_IN 0x81 struct powerz_sensor_data { u8 _unknown_1[8]; __le32 V_bus; __le32 I_bus; __le32 V_bus_avg; __le32 I_bus_avg; u8 _unknown_2[8]; u8 temp[2]; __le16 V_cc1; __le16 V_cc2; __le16 V_dp; __le16 V_dm; __le16 V_dd; u8 _unknown_3[4]; } __packed; struct powerz_priv { char transfer_buffer[64]; /* first member to satisfy DMA alignment */ struct mutex mutex; struct completion completion; struct urb *urb; int status; }; static const struct hwmon_channel_info *const powerz_info[] = { HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL | HWMON_I_AVERAGE, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(curr, HWMON_C_INPUT | HWMON_C_LABEL | HWMON_C_AVERAGE), HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_LABEL), NULL }; static int powerz_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) { if (type == hwmon_curr && attr == hwmon_curr_label) { *str = "IBUS"; } else if (type == hwmon_in && attr == hwmon_in_label) { if (channel == 0) *str = "VBUS"; else if (channel == 1) *str = "VCC1"; else if (channel == 2) *str = "VCC2"; else if (channel == 3) *str = "VDP"; else if (channel == 4) *str = "VDM"; else if (channel == 5) *str = "VDD"; else return -EOPNOTSUPP; } else if (type == hwmon_temp && attr == hwmon_temp_label) { *str = "TEMP"; } else { return -EOPNOTSUPP; } return 0; } static void powerz_usb_data_complete(struct urb *urb) { struct powerz_priv *priv = urb->context; complete(&priv->completion); } static void powerz_usb_cmd_complete(struct urb *urb) { struct powerz_priv *priv = urb->context; usb_fill_bulk_urb(urb, urb->dev, usb_rcvbulkpipe(urb->dev, POWERZ_EP_DATA_IN), priv->transfer_buffer, sizeof(priv->transfer_buffer), powerz_usb_data_complete, priv); priv->status = usb_submit_urb(urb, GFP_ATOMIC); if (priv->status) complete(&priv->completion); } static int powerz_read_data(struct usb_device *udev, struct powerz_priv *priv) { int ret; priv->status = -ETIMEDOUT; reinit_completion(&priv->completion); priv->transfer_buffer[0] = 0x0c; priv->transfer_buffer[1] = 0x00; priv->transfer_buffer[2] = 0x02; priv->transfer_buffer[3] = 0x00; usb_fill_bulk_urb(priv->urb, udev, usb_sndbulkpipe(udev, POWERZ_EP_CMD_OUT), priv->transfer_buffer, 4, powerz_usb_cmd_complete, priv); ret = usb_submit_urb(priv->urb, GFP_KERNEL); if (ret) return ret; if (!wait_for_completion_interruptible_timeout (&priv->completion, msecs_to_jiffies(5))) { usb_kill_urb(priv->urb); return -EIO; } if (priv->urb->actual_length < sizeof(struct powerz_sensor_data)) return -EIO; return priv->status; } static int powerz_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct usb_interface *intf = to_usb_interface(dev->parent); struct usb_device *udev = interface_to_usbdev(intf); struct powerz_priv *priv = usb_get_intfdata(intf); struct powerz_sensor_data *data; int ret; if (!priv) return -EIO; /* disconnected */ mutex_lock(&priv->mutex); ret = powerz_read_data(udev, priv); if (ret) goto out; data = (struct powerz_sensor_data *)priv->transfer_buffer; if (type == hwmon_curr) { if (attr == hwmon_curr_input) *val = ((s32)le32_to_cpu(data->I_bus)) / 1000; else if (attr == hwmon_curr_average) *val = ((s32)le32_to_cpu(data->I_bus_avg)) / 1000; else ret = -EOPNOTSUPP; } else if (type == hwmon_in) { if (attr == hwmon_in_input) { if (channel == 0) *val = le32_to_cpu(data->V_bus) / 1000; else if (channel == 1) *val = le16_to_cpu(data->V_cc1) / 10; else if (channel == 2) *val = le16_to_cpu(data->V_cc2) / 10; else if (channel == 3) *val = le16_to_cpu(data->V_dp) / 10; else if (channel == 4) *val = le16_to_cpu(data->V_dm) / 10; else if (channel == 5) *val = le16_to_cpu(data->V_dd) / 10; else ret = -EOPNOTSUPP; } else if (attr == hwmon_in_average && channel == 0) { *val = le32_to_cpu(data->V_bus_avg) / 1000; } else { ret = -EOPNOTSUPP; } } else if (type == hwmon_temp && attr == hwmon_temp_input) { *val = data->temp[1] * 2000 + data->temp[0] * 1000 / 128; } else { ret = -EOPNOTSUPP; } out: mutex_unlock(&priv->mutex); return ret; } static const struct hwmon_ops powerz_hwmon_ops = { .visible = 0444, .read = powerz_read, .read_string = powerz_read_string, }; static const struct hwmon_chip_info powerz_chip_info = { .ops = &powerz_hwmon_ops, .info = powerz_info, }; static int powerz_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct powerz_priv *priv; struct device *hwmon_dev; struct device *parent; parent = &intf->dev; priv = devm_kzalloc(parent, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; priv->urb = usb_alloc_urb(0, GFP_KERNEL); if (!priv->urb) return -ENOMEM; mutex_init(&priv->mutex); init_completion(&priv->completion); hwmon_dev = devm_hwmon_device_register_with_info(parent, DRIVER_NAME, priv, &powerz_chip_info, NULL); if (IS_ERR(hwmon_dev)) { usb_free_urb(priv->urb); return PTR_ERR(hwmon_dev); } usb_set_intfdata(intf, priv); return 0; } static void powerz_disconnect(struct usb_interface *intf) { struct powerz_priv *priv = usb_get_intfdata(intf); mutex_lock(&priv->mutex); usb_kill_urb(priv->urb); usb_free_urb(priv->urb); mutex_unlock(&priv->mutex); } static const struct usb_device_id powerz_id_table[] = { { USB_DEVICE_INTERFACE_NUMBER(0x5FC9, 0x0061, 0x00) }, /* ChargerLAB POWER-Z KM002C */ { USB_DEVICE_INTERFACE_NUMBER(0x5FC9, 0x0063, 0x00) }, /* ChargerLAB POWER-Z KM003C */ { } }; MODULE_DEVICE_TABLE(usb, powerz_id_table); static struct usb_driver powerz_driver = { .name = DRIVER_NAME, .id_table = powerz_id_table, .probe = powerz_probe, .disconnect = powerz_disconnect, }; module_usb_driver(powerz_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Thomas Weißschuh <linux@weissschuh.net>"); MODULE_DESCRIPTION("ChargerLAB POWER-Z USB-C tester");
1 1 3 2 1 1 1 1 2011 447 2011 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_tables.h> #include <net/ip.h> #include <net/inet_dscp.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_flow_table.h> struct nft_flow_offload { struct nft_flowtable *flowtable; }; static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst) { if (dst_xfrm(dst)) return FLOW_OFFLOAD_XMIT_XFRM; return FLOW_OFFLOAD_XMIT_NEIGH; } static void nft_default_forward_path(struct nf_flow_route *route, struct dst_entry *dst_cache, enum ip_conntrack_dir dir) { route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex; route->tuple[dir].dst = dst_cache; route->tuple[dir].xmit_type = nft_xmit_type(dst_cache); } static bool nft_is_valid_ether_device(const struct net_device *dev) { if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) return false; return true; } static int nft_dev_fill_forward_path(const struct nf_flow_route *route, const struct dst_entry *dst_cache, const struct nf_conn *ct, enum ip_conntrack_dir dir, u8 *ha, struct net_device_path_stack *stack) { const void *daddr = &ct->tuplehash[!dir].tuple.src.u3; struct net_device *dev = dst_cache->dev; struct neighbour *n; u8 nud_state; if (!nft_is_valid_ether_device(dev)) goto out; n = dst_neigh_lookup(dst_cache, daddr); if (!n) return -1; read_lock_bh(&n->lock); nud_state = n->nud_state; ether_addr_copy(ha, n->ha); read_unlock_bh(&n->lock); neigh_release(n); if (!(nud_state & NUD_VALID)) return -1; out: return dev_fill_forward_path(dev, ha, stack); } struct nft_forward_info { const struct net_device *indev; const struct net_device *outdev; const struct net_device *hw_outdev; struct id { __u16 id; __be16 proto; } encap[NF_FLOW_TABLE_ENCAP_MAX]; u8 num_encaps; u8 ingress_vlans; u8 h_source[ETH_ALEN]; u8 h_dest[ETH_ALEN]; enum flow_offload_xmit_type xmit_type; }; static void nft_dev_path_info(const struct net_device_path_stack *stack, struct nft_forward_info *info, unsigned char *ha, struct nf_flowtable *flowtable) { const struct net_device_path *path; int i; memcpy(info->h_dest, ha, ETH_ALEN); for (i = 0; i < stack->num_paths; i++) { path = &stack->path[i]; switch (path->type) { case DEV_PATH_ETHERNET: case DEV_PATH_DSA: case DEV_PATH_VLAN: case DEV_PATH_PPPOE: info->indev = path->dev; if (is_zero_ether_addr(info->h_source)) memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); if (path->type == DEV_PATH_ETHERNET) break; if (path->type == DEV_PATH_DSA) { i = stack->num_paths; break; } /* DEV_PATH_VLAN and DEV_PATH_PPPOE */ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) { info->indev = NULL; break; } if (!info->outdev) info->outdev = path->dev; info->encap[info->num_encaps].id = path->encap.id; info->encap[info->num_encaps].proto = path->encap.proto; info->num_encaps++; if (path->type == DEV_PATH_PPPOE) memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); break; case DEV_PATH_BRIDGE: if (is_zero_ether_addr(info->h_source)) memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); switch (path->bridge.vlan_mode) { case DEV_PATH_BR_VLAN_UNTAG_HW: info->ingress_vlans |= BIT(info->num_encaps - 1); break; case DEV_PATH_BR_VLAN_TAG: info->encap[info->num_encaps].id = path->bridge.vlan_id; info->encap[info->num_encaps].proto = path->bridge.vlan_proto; info->num_encaps++; break; case DEV_PATH_BR_VLAN_UNTAG: info->num_encaps--; break; case DEV_PATH_BR_VLAN_KEEP: break; } info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; break; default: info->indev = NULL; break; } } if (!info->outdev) info->outdev = info->indev; info->hw_outdev = info->indev; if (nf_flowtable_hw_offload(flowtable) && nft_is_valid_ether_device(info->indev)) info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; } static bool nft_flowtable_find_dev(const struct net_device *dev, struct nft_flowtable *ft) { struct nft_hook *hook; bool found = false; list_for_each_entry_rcu(hook, &ft->hook_list, list) { if (hook->ops.dev != dev) continue; found = true; break; } return found; } static void nft_dev_forward_path(struct nf_flow_route *route, const struct nf_conn *ct, enum ip_conntrack_dir dir, struct nft_flowtable *ft) { const struct dst_entry *dst = route->tuple[dir].dst; struct net_device_path_stack stack; struct nft_forward_info info = {}; unsigned char ha[ETH_ALEN]; int i; if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0) nft_dev_path_info(&stack, &info, ha, &ft->data); if (!info.indev || !nft_flowtable_find_dev(info.indev, ft)) return; route->tuple[!dir].in.ifindex = info.indev->ifindex; for (i = 0; i < info.num_encaps; i++) { route->tuple[!dir].in.encap[i].id = info.encap[i].id; route->tuple[!dir].in.encap[i].proto = info.encap[i].proto; } route->tuple[!dir].in.num_encaps = info.num_encaps; route->tuple[!dir].in.ingress_vlans = info.ingress_vlans; if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); route->tuple[dir].out.ifindex = info.outdev->ifindex; route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex; route->tuple[dir].xmit_type = info.xmit_type; } } static int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct, struct nf_flow_route *route, enum ip_conntrack_dir dir, struct nft_flowtable *ft) { struct dst_entry *this_dst = skb_dst(pkt->skb); struct dst_entry *other_dst = NULL; struct flowi fl; memset(&fl, 0, sizeof(fl)); switch (nft_pf(pkt)) { case NFPROTO_IPV4: fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb))); fl.u.ip4.flowi4_mark = pkt->skb->mark; fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6; fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; fl.u.ip6.flowi6_iif = this_dst->dev->ifindex; fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb)); fl.u.ip6.flowi6_mark = pkt->skb->mark; fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC; break; } if (!dst_hold_safe(this_dst)) return -ENOENT; nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); if (!other_dst) { dst_release(this_dst); return -ENOENT; } nft_default_forward_path(route, this_dst, dir); nft_default_forward_path(route, other_dst, !dir); if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH && route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) { nft_dev_forward_path(route, ct, dir, ft); nft_dev_forward_path(route, ct, !dir, ft); } return 0; } static bool nft_flow_offload_skip(struct sk_buff *skb, int family) { if (skb_sec_path(skb)) return true; if (family == NFPROTO_IPV4) { const struct ip_options *opt; opt = &(IPCB(skb)->opt); if (unlikely(opt->optlen)) return true; } return false; } static void flow_offload_ct_tcp(struct nf_conn *ct) { /* conntrack will not see all packets, disable tcp window validation. */ spin_lock_bh(&ct->lock); ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; spin_unlock_bh(&ct->lock); } static void nft_flow_offload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; struct tcphdr _tcph, *tcph = NULL; struct nf_flow_route route = {}; enum ip_conntrack_info ctinfo; struct flow_offload *flow; enum ip_conntrack_dir dir; struct nf_conn *ct; int ret; if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) goto out; ct = nf_ct_get(pkt->skb, &ctinfo); if (!ct) goto out; switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { case IPPROTO_TCP: tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(_tcph), &_tcph); if (unlikely(!tcph || tcph->fin || tcph->rst || !nf_conntrack_tcp_established(ct))) goto out; break; case IPPROTO_UDP: break; #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: { struct nf_conntrack_tuple *tuple; if (ct->status & IPS_NAT_MASK) goto out; tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; /* No support for GRE v1 */ if (tuple->src.u.gre.key || tuple->dst.u.gre.key) goto out; break; } #endif default: goto out; } if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH)) goto out; if (!nf_ct_is_confirmed(ct)) goto out; if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) goto out; dir = CTINFO2DIR(ctinfo); if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0) goto err_flow_route; flow = flow_offload_alloc(ct); if (!flow) goto err_flow_alloc; flow_offload_route_init(flow, &route); if (tcph) flow_offload_ct_tcp(ct); __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags); ret = flow_offload_add(flowtable, flow); if (ret < 0) goto err_flow_add; return; err_flow_add: flow_offload_free(flow); err_flow_alloc: dst_release(route.tuple[dir].dst); dst_release(route.tuple[!dir].dst); err_flow_route: clear_bit(IPS_OFFLOAD_BIT, &ct->status); out: regs->verdict.code = NFT_BREAK; } static int nft_flow_offload_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { unsigned int hook_mask = (1 << NF_INET_FORWARD); if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && ctx->family != NFPROTO_INET) return -EOPNOTSUPP; return nft_chain_validate_hooks(ctx->chain, hook_mask); } static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = { [NFTA_FLOW_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_NAME_MAXLEN - 1 }, }; static int nft_flow_offload_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_flow_offload *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_flowtable *flowtable; if (!tb[NFTA_FLOW_TABLE_NAME]) return -EINVAL; flowtable = nft_flowtable_lookup(ctx->net, ctx->table, tb[NFTA_FLOW_TABLE_NAME], genmask); if (IS_ERR(flowtable)) return PTR_ERR(flowtable); if (!nft_use_inc(&flowtable->use)) return -EMFILE; priv->flowtable = flowtable; return nf_ct_netns_get(ctx->net, ctx->family); } static void nft_flow_offload_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_flow_offload *priv = nft_expr_priv(expr); nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase); } static void nft_flow_offload_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_flow_offload *priv = nft_expr_priv(expr); nft_use_inc_restore(&priv->flowtable->use); } static void nft_flow_offload_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { nf_ct_netns_put(ctx->net, ctx->family); } static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { struct nft_flow_offload *priv = nft_expr_priv(expr); if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static struct nft_expr_type nft_flow_offload_type; static const struct nft_expr_ops nft_flow_offload_ops = { .type = &nft_flow_offload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), .eval = nft_flow_offload_eval, .init = nft_flow_offload_init, .activate = nft_flow_offload_activate, .deactivate = nft_flow_offload_deactivate, .destroy = nft_flow_offload_destroy, .validate = nft_flow_offload_validate, .dump = nft_flow_offload_dump, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_flow_offload_type __read_mostly = { .name = "flow_offload", .ops = &nft_flow_offload_ops, .policy = nft_flow_offload_policy, .maxattr = NFTA_FLOW_MAX, .owner = THIS_MODULE, }; static int flow_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (event != NETDEV_DOWN) return NOTIFY_DONE; nf_flow_table_cleanup(dev); return NOTIFY_DONE; } static struct notifier_block flow_offload_netdev_notifier = { .notifier_call = flow_offload_netdev_event, }; static int __init nft_flow_offload_module_init(void) { int err; err = register_netdevice_notifier(&flow_offload_netdev_notifier); if (err) goto err; err = nft_register_expr(&nft_flow_offload_type); if (err < 0) goto register_expr; return 0; register_expr: unregister_netdevice_notifier(&flow_offload_netdev_notifier); err: return err; } static void __exit nft_flow_offload_module_exit(void) { nft_unregister_expr(&nft_flow_offload_type); unregister_netdevice_notifier(&flow_offload_netdev_notifier); } module_init(nft_flow_offload_module_init); module_exit(nft_flow_offload_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("flow_offload"); MODULE_DESCRIPTION("nftables hardware flow offload module");
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM oom #if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OOM_H #include <linux/tracepoint.h> #include <trace/events/mmflags.h> #define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10)) TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), TP_ARGS(task), TP_STRUCT__entry( __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN ) __field( short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->oom_score_adj) ); TRACE_EVENT(reclaim_retry_zone, TP_PROTO(struct zoneref *zoneref, int order, unsigned long reclaimable, unsigned long available, unsigned long min_wmark, int no_progress_loops, bool wmark_check), TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check), TP_STRUCT__entry( __field( int, node) __field( int, zone_idx) __field( int, order) __field( unsigned long, reclaimable) __field( unsigned long, available) __field( unsigned long, min_wmark) __field( int, no_progress_loops) __field( bool, wmark_check) ), TP_fast_assign( __entry->node = zonelist_node_idx(zoneref); __entry->zone_idx = zonelist_zone_idx(zoneref); __entry->order = order; __entry->reclaimable = reclaimable; __entry->available = available; __entry->min_wmark = min_wmark; __entry->no_progress_loops = no_progress_loops; __entry->wmark_check = wmark_check; ), TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d", __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE), __entry->order, __entry->reclaimable, __entry->available, __entry->min_wmark, __entry->no_progress_loops, __entry->wmark_check) ); TRACE_EVENT(mark_victim, TP_PROTO(struct task_struct *task, uid_t uid), TP_ARGS(task, uid), TP_STRUCT__entry( __field(int, pid) __string(comm, task->comm) __field(unsigned long, total_vm) __field(unsigned long, anon_rss) __field(unsigned long, file_rss) __field(unsigned long, shmem_rss) __field(uid_t, uid) __field(unsigned long, pgtables) __field(short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; __assign_str(comm); __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm); __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES)); __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES)); __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES)); __entry->uid = uid; __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10; __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd", __entry->pid, __get_str(comm), __entry->total_vm, __entry->anon_rss, __entry->file_rss, __entry->shmem_rss, __entry->uid, __entry->pgtables, __entry->oom_score_adj ) ); TRACE_EVENT(wake_reaper, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(start_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(finish_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(skip_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); #ifdef CONFIG_COMPACTION TRACE_EVENT(compact_retry, TP_PROTO(int order, enum compact_priority priority, enum compact_result result, int retries, int max_retries, bool ret), TP_ARGS(order, priority, result, retries, max_retries, ret), TP_STRUCT__entry( __field( int, order) __field( int, priority) __field( int, result) __field( int, retries) __field( int, max_retries) __field( bool, ret) ), TP_fast_assign( __entry->order = order; __entry->priority = priority; __entry->result = compact_result_to_feedback(result); __entry->retries = retries; __entry->max_retries = max_retries; __entry->ret = ret; ), TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d", __entry->order, __print_symbolic(__entry->priority, COMPACTION_PRIORITY), __print_symbolic(__entry->result, COMPACTION_FEEDBACK), __entry->retries, __entry->max_retries, __entry->ret) ); #endif /* CONFIG_COMPACTION */ #endif /* This part must be outside protection */ #include <trace/define_trace.h>
171 171 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <net/ipv6.h> #include <net/icmp.h> #include <net/udp.h> #include <net/tcp.h> #include <net/route.h> #include <linux/netfilter.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; struct arppayload { unsigned char mac_src[ETH_ALEN]; unsigned char ip_src[4]; unsigned char mac_dst[ETH_ALEN]; unsigned char ip_dst[4]; }; /* Guard against containers flooding syslog. */ static bool nf_log_allowed(const struct net *net) { return net_eq(net, &init_net) || sysctl_nf_log_all_netns; } static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb) { u16 vid; if (!skb_vlan_tag_present(skb)) return; vid = skb_vlan_tag_get(skb); nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid); } static void noinline_for_stack dump_arp_packet(struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int nhoff) { const struct arppayload *ap; struct arppayload _arpp; const struct arphdr *ah; unsigned int logflags; struct arphdr _arph; ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; } if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else logflags = NF_LOG_DEFAULT_MASK; if (logflags & NF_LOG_MACDECODE) { nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); nf_log_buf_add(m, "MACPROTO=%04x ", ntohs(eth_hdr(skb)->h_proto)); } nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d", ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, then log the ARP * payload. */ if (ah->ar_hrd != htons(ARPHRD_ETHER) || ah->ar_hln != ETH_ALEN || ah->ar_pln != sizeof(__be32)) return; ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); return; } nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4", ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); } static void nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix, struct net *net) { const struct net_device *physoutdev __maybe_unused; const struct net_device *physindev __maybe_unused; nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) physindev = nf_bridge_get_physindev(skb, net); if (physindev && in != physindev) nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); physoutdev = nf_bridge_get_physoutdev(skb); if (physoutdev && out != physoutdev) nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); #endif } static void nf_log_arp_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } static struct nf_logger nf_arp_logger __read_mostly = { .name = "nf_log_arp", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_arp_packet, .me = THIS_MODULE, }; static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, struct sock *sk) { if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) return; read_lock_bh(&sk->sk_callback_lock); if (sk->sk_socket && sk->sk_socket->file) { const struct cred *cred = sk->sk_socket->file->f_cred; nf_log_buf_add(m, "UID=%u GID=%u ", from_kuid_munged(&init_user_ns, cred->fsuid), from_kgid_munged(&init_user_ns, cred->fsgid)); } read_unlock_bh(&sk->sk_callback_lock); } static noinline_for_stack int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, u8 proto, int fragment, unsigned int offset, unsigned int logflags) { struct tcphdr _tcph; const struct tcphdr *th; /* Max length: 10 "PROTO=TCP " */ nf_log_buf_add(m, "PROTO=TCP "); if (fragment) return 0; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); if (!th) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); return 1; } /* Max length: 20 "SPT=65535 DPT=65535 " */ nf_log_buf_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ if (logflags & NF_LOG_TCPSEQ) { nf_log_buf_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); } /* Max length: 13 "WINDOW=65535 " */ nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window)); /* Max length: 9 "RES=0x3C " */ nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); /* Max length: 35 "AE CWR ECE URG ACK PSH RST SYN FIN " */ if (th->ae) nf_log_buf_add(m, "AE "); if (th->cwr) nf_log_buf_add(m, "CWR "); if (th->ece) nf_log_buf_add(m, "ECE "); if (th->urg) nf_log_buf_add(m, "URG "); if (th->ack) nf_log_buf_add(m, "ACK "); if (th->psh) nf_log_buf_add(m, "PSH "); if (th->rst) nf_log_buf_add(m, "RST "); if (th->syn) nf_log_buf_add(m, "SYN "); if (th->fin) nf_log_buf_add(m, "FIN "); /* Max length: 11 "URGP=65535 " */ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); if ((logflags & NF_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); u8 _opt[60 - sizeof(struct tcphdr)]; unsigned int i; const u8 *op; op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), optsize, _opt); if (!op) { nf_log_buf_add(m, "OPT (TRUNCATED)"); return 1; } /* Max length: 127 "OPT (" 15*4*2chars ") " */ nf_log_buf_add(m, "OPT ("); for (i = 0; i < optsize; i++) nf_log_buf_add(m, "%02X", op[i]); nf_log_buf_add(m, ") "); } return 0; } static noinline_for_stack int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb, u8 proto, int fragment, unsigned int offset) { struct udphdr _udph; const struct udphdr *uh; if (proto == IPPROTO_UDP) /* Max length: 10 "PROTO=UDP " */ nf_log_buf_add(m, "PROTO=UDP "); else /* Max length: 14 "PROTO=UDPLITE " */ nf_log_buf_add(m, "PROTO=UDPLITE "); if (fragment) goto out; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); if (!uh) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); return 1; } /* Max length: 20 "SPT=65535 DPT=65535 " */ nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len)); out: return 0; } /* One level of recursion won't kill us */ static noinline_for_stack void dump_ipv4_packet(struct net *net, struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { const struct iphdr *ih; unsigned int logflags; struct iphdr _iph; if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (!ih) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Important fields: * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. * Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr); /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", iph_totlen(skb, ih), ih->tos & IPTOS_TOS_MASK, ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); /* Max length: 6 "CE DF MF " */ if (ntohs(ih->frag_off) & IP_CE) nf_log_buf_add(m, "CE "); if (ntohs(ih->frag_off) & IP_DF) nf_log_buf_add(m, "DF "); if (ntohs(ih->frag_off) & IP_MF) nf_log_buf_add(m, "MF "); /* Max length: 11 "FRAG:65535 " */ if (ntohs(ih->frag_off) & IP_OFFSET) nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); if ((logflags & NF_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; const unsigned char *op; unsigned int i, optsize; optsize = ih->ihl * 4 - sizeof(struct iphdr); op = skb_header_pointer(skb, iphoff + sizeof(_iph), optsize, _opt); if (!op) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Max length: 127 "OPT (" 15*4*2chars ") " */ nf_log_buf_add(m, "OPT ("); for (i = 0; i < optsize; i++) nf_log_buf_add(m, "%02X", op[i]); nf_log_buf_add(m, ") "); } switch (ih->protocol) { case IPPROTO_TCP: if (nf_log_dump_tcp_header(m, skb, ih->protocol, ntohs(ih->frag_off) & IP_OFFSET, iphoff + ih->ihl * 4, logflags)) return; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: if (nf_log_dump_udp_header(m, skb, ih->protocol, ntohs(ih->frag_off) & IP_OFFSET, iphoff + ih->ihl * 4)) return; break; case IPPROTO_ICMP: { static const size_t required_len[NR_ICMP_TYPES + 1] = { [ICMP_ECHOREPLY] = 4, [ICMP_DEST_UNREACH] = 8 + sizeof(struct iphdr), [ICMP_SOURCE_QUENCH] = 8 + sizeof(struct iphdr), [ICMP_REDIRECT] = 8 + sizeof(struct iphdr), [ICMP_ECHO] = 4, [ICMP_TIME_EXCEEDED] = 8 + sizeof(struct iphdr), [ICMP_PARAMETERPROB] = 8 + sizeof(struct iphdr), [ICMP_TIMESTAMP] = 20, [ICMP_TIMESTAMPREPLY] = 20, [ICMP_ADDRESS] = 12, [ICMP_ADDRESSREPLY] = 12 }; const struct icmphdr *ich; struct icmphdr _icmph; /* Max length: 11 "PROTO=ICMP " */ nf_log_buf_add(m, "PROTO=ICMP "); if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, sizeof(_icmph), &_icmph); if (!ich) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } /* Max length: 18 "TYPE=255 CODE=255 " */ nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ if (ich->type <= NR_ICMP_TYPES && required_len[ich->type] && skb->len - iphoff - ih->ihl * 4 < required_len[ich->type]) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } switch (ich->type) { case ICMP_ECHOREPLY: case ICMP_ECHO: /* Max length: 19 "ID=65535 SEQ=65535 " */ nf_log_buf_add(m, "ID=%u SEQ=%u ", ntohs(ich->un.echo.id), ntohs(ich->un.echo.sequence)); break; case ICMP_PARAMETERPROB: /* Max length: 14 "PARAMETER=255 " */ nf_log_buf_add(m, "PARAMETER=%u ", ntohl(ich->un.gateway) >> 24); break; case ICMP_REDIRECT: /* Max length: 24 "GATEWAY=255.255.255.255 " */ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); fallthrough; case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: case ICMP_TIME_EXCEEDED: /* Max length: 3+maxlen */ if (!iphoff) { /* Only recurse once. */ nf_log_buf_add(m, "["); dump_ipv4_packet(net, m, info, skb, iphoff + ih->ihl * 4 + sizeof(_icmph)); nf_log_buf_add(m, "] "); } /* Max length: 10 "MTU=65535 " */ if (ich->type == ICMP_DEST_UNREACH && ich->code == ICMP_FRAG_NEEDED) { nf_log_buf_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); } } break; } /* Max Length */ case IPPROTO_AH: { const struct ip_auth_hdr *ah; struct ip_auth_hdr _ahdr; if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 9 "PROTO=AH " */ nf_log_buf_add(m, "PROTO=AH "); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ ah = skb_header_pointer(skb, iphoff + ih->ihl * 4, sizeof(_ahdr), &_ahdr); if (!ah) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } /* Length: 15 "SPI=0xF1234567 " */ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); break; } case IPPROTO_ESP: { const struct ip_esp_hdr *eh; struct ip_esp_hdr _esph; /* Max length: 10 "PROTO=ESP " */ nf_log_buf_add(m, "PROTO=ESP "); if (ntohs(ih->frag_off) & IP_OFFSET) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ eh = skb_header_pointer(skb, iphoff + ih->ihl * 4, sizeof(_esph), &_esph); if (!eh) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl * 4); break; } /* Length: 15 "SPI=0xF1234567 " */ nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi)); break; } /* Max length: 10 "PROTO 255 " */ default: nf_log_buf_add(m, "PROTO=%u ", ih->protocol); } /* Max length: 15 "UID=4294967295 " */ if ((logflags & NF_LOG_UID) && !iphoff) nf_log_dump_sk_uid_gid(net, m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!iphoff && skb->mark) nf_log_buf_add(m, "MARK=0x%x ", skb->mark); /* Proto Max log string length */ /* IP: 40+46+6+11+127 = 230 */ /* TCP: 10+max(25,20+30+13+9+35+11+127) = 255 */ /* UDP: 10+max(25,20) = 35 */ /* UDPLITE: 14+max(25,20) = 39 */ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ /* ESP: 10+max(25)+15 = 50 */ /* AH: 9+max(25)+15 = 49 */ /* unknown: 10 */ /* (ICMP allows recursion one level deep) */ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ /* maxlen = 230+ 91 + 230 + 255 = 806 */ } static noinline_for_stack void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int ip6hoff, int recurse) { const struct ipv6hdr *ih; unsigned int hdrlen = 0; unsigned int logflags; struct ipv6hdr _ip6h; unsigned int ptr; u8 currenthdr; int fragment; if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (!ih) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", ntohs(ih->payload_len) + sizeof(struct ipv6hdr), (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, ih->hop_limit, (ntohl(*(__be32 *)ih) & 0x000fffff)); fragment = 0; ptr = ip6hoff + sizeof(struct ipv6hdr); currenthdr = ih->nexthdr; while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) { struct ipv6_opt_hdr _hdr; const struct ipv6_opt_hdr *hp; hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); if (!hp) { nf_log_buf_add(m, "TRUNCATED"); return; } /* Max length: 48 "OPT (...) " */ if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, "OPT ( "); switch (currenthdr) { case IPPROTO_FRAGMENT: { struct frag_hdr _fhdr; const struct frag_hdr *fh; nf_log_buf_add(m, "FRAG:"); fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), &_fhdr); if (!fh) { nf_log_buf_add(m, "TRUNCATED "); return; } /* Max length: 6 "65535 " */ nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); /* Max length: 11 "INCOMPLETE " */ if (fh->frag_off & htons(0x0001)) nf_log_buf_add(m, "INCOMPLETE "); nf_log_buf_add(m, "ID:%08x ", ntohl(fh->identification)); if (ntohs(fh->frag_off) & 0xFFF8) fragment = 1; hdrlen = 8; break; } case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ")"); return; } hdrlen = ipv6_optlen(hp); break; /* Max Length */ case IPPROTO_AH: if (logflags & NF_LOG_IPOPT) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; /* Max length: 3 "AH " */ nf_log_buf_add(m, "AH "); if (fragment) { nf_log_buf_add(m, ")"); return; } ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), &_ahdr); if (!ah) { /* Max length: 26 "INCOMPLETE [65535 bytes] )" */ nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", skb->len - ptr); return; } /* Length: 15 "SPI=0xF1234567 */ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi)); } hdrlen = ipv6_authlen(hp); break; case IPPROTO_ESP: if (logflags & NF_LOG_IPOPT) { struct ip_esp_hdr _esph; const struct ip_esp_hdr *eh; /* Max length: 4 "ESP " */ nf_log_buf_add(m, "ESP "); if (fragment) { nf_log_buf_add(m, ")"); return; } /* Max length: 26 "INCOMPLETE [65535 bytes] )" */ eh = skb_header_pointer(skb, ptr, sizeof(_esph), &_esph); if (!eh) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] )", skb->len - ptr); return; } /* Length: 16 "SPI=0xF1234567 )" */ nf_log_buf_add(m, "SPI=0x%x )", ntohl(eh->spi)); } return; default: /* Max length: 20 "Unknown Ext Hdr 255" */ nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); return; } if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ") "); currenthdr = hp->nexthdr; ptr += hdrlen; } switch (currenthdr) { case IPPROTO_TCP: if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment, ptr, logflags)) return; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr)) return; break; case IPPROTO_ICMPV6: { struct icmp6hdr _icmp6h; const struct icmp6hdr *ic; /* Max length: 13 "PROTO=ICMPv6 " */ nf_log_buf_add(m, "PROTO=ICMPv6 "); if (fragment) break; /* Max length: 25 "INCOMPLETE [65535 bytes] " */ ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); if (!ic) { nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); return; } /* Max length: 18 "TYPE=255 CODE=255 " */ nf_log_buf_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); switch (ic->icmp6_type) { case ICMPV6_ECHO_REQUEST: case ICMPV6_ECHO_REPLY: /* Max length: 19 "ID=65535 SEQ=65535 " */ nf_log_buf_add(m, "ID=%u SEQ=%u ", ntohs(ic->icmp6_identifier), ntohs(ic->icmp6_sequence)); break; case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: break; case ICMPV6_PARAMPROB: /* Max length: 17 "POINTER=ffffffff " */ nf_log_buf_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_PKT_TOOBIG: case ICMPV6_TIME_EXCEED: /* Max length: 3+maxlen */ if (recurse) { nf_log_buf_add(m, "["); dump_ipv6_packet(net, m, info, skb, ptr + sizeof(_icmp6h), 0); nf_log_buf_add(m, "] "); } /* Max length: 10 "MTU=65535 " */ if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) { nf_log_buf_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); } } break; } /* Max length: 10 "PROTO=255 " */ default: nf_log_buf_add(m, "PROTO=%u ", currenthdr); } /* Max length: 15 "UID=4294967295 " */ if ((logflags & NF_LOG_UID) && recurse) nf_log_dump_sk_uid_gid(net, m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (recurse && skb->mark) nf_log_buf_add(m, "MARK=0x%x ", skb->mark); } static void dump_mac_header(struct nf_log_buf *m, const struct nf_loginfo *info, const struct sk_buff *skb) { struct net_device *dev = skb->dev; unsigned int logflags = 0; if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { case ARPHRD_ETHER: nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); nf_log_buf_add(m, "MACPROTO=%04x ", ntohs(eth_hdr(skb)->h_proto)); return; default: break; } fallback: nf_log_buf_add(m, "MAC="); if (dev->hard_header_len && skb->mac_header != skb->network_header) { const unsigned char *p = skb_mac_header(skb); unsigned int i; if (dev->type == ARPHRD_SIT) { p -= ETH_HLEN; if (p < skb->head) p = NULL; } if (p) { nf_log_buf_add(m, "%02x", *p++); for (i = 1; i < dev->hard_header_len; i++) nf_log_buf_add(m, ":%02x", *p++); } if (dev->type == ARPHRD_SIT) { const struct iphdr *iph = (struct iphdr *)skb_mac_header(skb); nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr, &iph->daddr); } } nf_log_buf_add(m, " "); } static void nf_log_ip_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); if (in) dump_mac_header(m, loginfo, skb); dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } static struct nf_logger nf_ip_logger __read_mostly = { .name = "nf_log_ipv4", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_ip_packet, .me = THIS_MODULE, }; static void nf_log_ip6_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); if (in) dump_mac_header(m, loginfo, skb); dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1); nf_log_buf_close(m); } static struct nf_logger nf_ip6_logger __read_mostly = { .name = "nf_log_ipv6", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_ip6_packet, .me = THIS_MODULE, }; static void nf_log_unknown_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { struct nf_log_buf *m; if (!nf_log_allowed(net)) return; m = nf_log_buf_open(); if (!loginfo) loginfo = &default_loginfo; nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix, net); dump_mac_header(m, loginfo, skb); nf_log_buf_close(m); } static void nf_log_netdev_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) { switch (skb->protocol) { case htons(ETH_P_IP): nf_log_ip_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; case htons(ETH_P_IPV6): nf_log_ip6_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; case htons(ETH_P_ARP): case htons(ETH_P_RARP): nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; default: nf_log_unknown_packet(net, pf, hooknum, skb, in, out, loginfo, prefix); break; } } static struct nf_logger nf_netdev_logger __read_mostly = { .name = "nf_log_netdev", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_netdev_packet, .me = THIS_MODULE, }; static struct nf_logger nf_bridge_logger __read_mostly = { .name = "nf_log_bridge", .type = NF_LOG_TYPE_LOG, .logfn = nf_log_netdev_packet, .me = THIS_MODULE, }; static int __net_init nf_log_syslog_net_init(struct net *net) { int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); if (ret) return ret; ret = nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); if (ret) goto err1; ret = nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); if (ret) goto err2; ret = nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); if (ret) goto err3; ret = nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); if (ret) goto err4; return 0; err4: nf_log_unset(net, &nf_netdev_logger); err3: nf_log_unset(net, &nf_ip6_logger); err2: nf_log_unset(net, &nf_arp_logger); err1: nf_log_unset(net, &nf_ip_logger); return ret; } static void __net_exit nf_log_syslog_net_exit(struct net *net) { nf_log_unset(net, &nf_ip_logger); nf_log_unset(net, &nf_arp_logger); nf_log_unset(net, &nf_ip6_logger); nf_log_unset(net, &nf_netdev_logger); nf_log_unset(net, &nf_bridge_logger); } static struct pernet_operations nf_log_syslog_net_ops = { .init = nf_log_syslog_net_init, .exit = nf_log_syslog_net_exit, }; static int __init nf_log_syslog_init(void) { int ret; ret = register_pernet_subsys(&nf_log_syslog_net_ops); if (ret < 0) return ret; ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); if (ret < 0) goto err1; ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); if (ret < 0) goto err2; ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); if (ret < 0) goto err3; ret = nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); if (ret < 0) goto err4; ret = nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger); if (ret < 0) goto err5; return 0; err5: nf_log_unregister(&nf_netdev_logger); err4: nf_log_unregister(&nf_ip6_logger); err3: nf_log_unregister(&nf_arp_logger); err2: nf_log_unregister(&nf_ip_logger); err1: pr_err("failed to register logger\n"); unregister_pernet_subsys(&nf_log_syslog_net_ops); return ret; } static void __exit nf_log_syslog_exit(void) { unregister_pernet_subsys(&nf_log_syslog_net_ops); nf_log_unregister(&nf_ip_logger); nf_log_unregister(&nf_arp_logger); nf_log_unregister(&nf_ip6_logger); nf_log_unregister(&nf_netdev_logger); nf_log_unregister(&nf_bridge_logger); } module_init(nf_log_syslog_init); module_exit(nf_log_syslog_exit); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Netfilter syslog packet logging"); MODULE_LICENSE("GPL"); MODULE_ALIAS("nf_log_arp"); MODULE_ALIAS("nf_log_bridge"); MODULE_ALIAS("nf_log_ipv4"); MODULE_ALIAS("nf_log_ipv6"); MODULE_ALIAS("nf_log_netdev"); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0); MODULE_ALIAS_NF_LOGGER(AF_INET, 0); MODULE_ALIAS_NF_LOGGER(3, 0); MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
4 4 4 1 4 4 44 43 44 6 6 4 6 6 12 147 147 40 147 18 147 101 121 40 40 40 6 36 36 12 15 26 18 18 10 1 12 18 5 4 62 11 10 59 8 59 3102 3066 54 41 41 33 41 41 39 17 14 4 2 2 16 54 37 13 1 14 23 40 5 18 18 4 17 17 17 18 15 3024 3022 3 70 70 19 62 1 59 13 62 1 62 70 46 17 48 48 20 48 48 13 46 9 155 126 51 25 48 3 48 29 48 1 48 48 48 3 48 49 44 19 19 14 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 // SPDX-License-Identifier: GPL-2.0-only /* * mm/truncate.c - code for taking down pages from address_spaces * * Copyright (C) 2002, Linus Torvalds * * 10Sep2002 Andrew Morton * Initial version. */ #include <linux/kernel.h> #include <linux/backing-dev.h> #include <linux/dax.h> #include <linux/gfp.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/export.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/pagevec.h> #include <linux/task_io_accounting_ops.h> #include <linux/shmem_fs.h> #include <linux/rmap.h> #include "internal.h" static void clear_shadow_entries(struct address_space *mapping, unsigned long start, unsigned long max) { XA_STATE(xas, &mapping->i_pages, start); struct folio *folio; /* Handled by shmem itself, or for DAX we do nothing. */ if (shmem_mapping(mapping) || dax_mapping(mapping)) return; xas_set_update(&xas, workingset_update_node); spin_lock(&mapping->host->i_lock); xas_lock_irq(&xas); /* Clear all shadow entries from start to max */ xas_for_each(&xas, folio, max) { if (xa_is_value(folio)) xas_store(&xas, NULL); } xas_unlock_irq(&xas); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); } /* * Unconditionally remove exceptional entries. Usually called from truncate * path. Note that the folio_batch may be altered by this function by removing * exceptional entries similar to what folio_batch_remove_exceptionals() does. * Please note that indices[] has entries in ascending order as guaranteed by * either find_get_entries() or find_lock_entries(). */ static void truncate_folio_batch_exceptionals(struct address_space *mapping, struct folio_batch *fbatch, pgoff_t *indices) { XA_STATE(xas, &mapping->i_pages, indices[0]); int nr = folio_batch_count(fbatch); struct folio *folio; int i, j; /* Handled by shmem itself */ if (shmem_mapping(mapping)) return; for (j = 0; j < nr; j++) if (xa_is_value(fbatch->folios[j])) break; if (j == nr) return; if (dax_mapping(mapping)) { for (i = j; i < nr; i++) { if (xa_is_value(fbatch->folios[i])) { /* * File systems should already have called * dax_break_layout_entry() to remove all DAX * entries while holding a lock to prevent * establishing new entries. Therefore we * shouldn't find any here. */ WARN_ON_ONCE(1); /* * Delete the mapping so truncate_pagecache() * doesn't loop forever. */ dax_delete_mapping_entry(mapping, indices[i]); } } goto out; } xas_set(&xas, indices[j]); xas_set_update(&xas, workingset_update_node); spin_lock(&mapping->host->i_lock); xas_lock_irq(&xas); xas_for_each(&xas, folio, indices[nr-1]) { if (xa_is_value(folio)) xas_store(&xas, NULL); } xas_unlock_irq(&xas); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); out: folio_batch_remove_exceptionals(fbatch); } /** * folio_invalidate - Invalidate part or all of a folio. * @folio: The folio which is affected. * @offset: start of the range to invalidate * @length: length of the range to invalidate * * folio_invalidate() is called when all or part of the folio has become * invalidated by a truncate operation. * * folio_invalidate() does not have to release all buffers, but it must * ensure that no dirty buffer is left outside @offset and that no I/O * is underway against any of the blocks which are outside the truncation * point. Because the caller is about to free (and possibly reuse) those * blocks on-disk. */ void folio_invalidate(struct folio *folio, size_t offset, size_t length) { const struct address_space_operations *aops = folio->mapping->a_ops; if (aops->invalidate_folio) aops->invalidate_folio(folio, offset, length); } EXPORT_SYMBOL_GPL(folio_invalidate); /* * If truncate cannot remove the fs-private metadata from the page, the page * becomes orphaned. It will be left on the LRU and may even be mapped into * user pagetables if we're racing with filemap_fault(). * * We need to bail out if page->mapping is no longer equal to the original * mapping. This happens a) when the VM reclaimed the page while we waited on * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ static void truncate_cleanup_folio(struct folio *folio) { if (folio_mapped(folio)) unmap_mapping_folio(folio); if (folio_needs_release(folio)) folio_invalidate(folio, 0, folio_size(folio)); /* * Some filesystems seem to re-dirty the page even after * the VM has canceled the dirty bit (eg ext3 journaling). * Hence dirty accounting check is placed after invalidation. */ folio_cancel_dirty(folio); } int truncate_inode_folio(struct address_space *mapping, struct folio *folio) { if (folio->mapping != mapping) return -EIO; truncate_cleanup_folio(folio); filemap_remove_folio(folio); return 0; } /* * Handle partial folios. The folio may be entirely within the * range if a split has raced with us. If not, we zero the part of the * folio that's within the [start, end] range, and then split the folio if * it's large. split_page_range() will discard pages which now lie beyond * i_size, and we rely on the caller to discard pages which lie within a * newly created hole. * * Returns false if splitting failed so the caller can avoid * discarding the entire folio which is stubbornly unsplit. */ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); unsigned int offset, length; struct page *split_at, *split_at2; if (pos < start) offset = start - pos; else offset = 0; length = folio_size(folio); if (pos + length <= (u64)end) length = length - offset; else length = end + 1 - pos - offset; folio_wait_writeback(folio); if (length == folio_size(folio)) { truncate_inode_folio(folio->mapping, folio); return true; } /* * We may be zeroing pages we're about to discard, but it avoids * doing a complex calculation here, and then doing the zeroing * anyway if the page split fails. */ if (!mapping_inaccessible(folio->mapping)) folio_zero_range(folio, offset, length); if (folio_needs_release(folio)) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); split_at2 = folio_page(folio, PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); if (!try_folio_split(folio, split_at, NULL)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste * for shmem truncate */ struct folio *folio2 = page_folio(split_at2); if (!folio_try_get(folio2)) goto no_split; if (!folio_test_large(folio2)) goto out; if (!folio_trylock(folio2)) goto out; /* * make sure folio2 is large and does not change its mapping. * Its split result does not matter here. */ if (folio_test_large(folio2) && folio2->mapping == folio->mapping) try_folio_split(folio2, split_at2, NULL); folio_unlock(folio2); out: folio_put(folio2); no_split: return true; } if (folio_test_dirty(folio)) return false; truncate_inode_folio(folio->mapping, folio); return true; } /* * Used to get rid of pages on hardware memory corruption. */ int generic_error_remove_folio(struct address_space *mapping, struct folio *folio) { if (!mapping) return -EINVAL; /* * Only punch for normal data pages for now. * Handling other types like directories would need more auditing. */ if (!S_ISREG(mapping->host->i_mode)) return -EIO; return truncate_inode_folio(mapping, folio); } EXPORT_SYMBOL(generic_error_remove_folio); /** * mapping_evict_folio() - Remove an unused folio from the page-cache. * @mapping: The mapping this folio belongs to. * @folio: The folio to remove. * * Safely remove one folio from the page cache. * It only drops clean, unused folios. * * Context: Folio must be locked. * Return: The number of pages successfully removed. */ long mapping_evict_folio(struct address_space *mapping, struct folio *folio) { /* The page may have been truncated before it was locked */ if (!mapping) return 0; if (folio_test_dirty(folio) || folio_test_writeback(folio)) return 0; /* The refcount will be elevated if any page in the folio is mapped */ if (folio_ref_count(folio) > folio_nr_pages(folio) + folio_has_private(folio) + 1) return 0; if (!filemap_release_folio(folio, 0)) return 0; return remove_mapping(mapping, folio); } /** * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets * @mapping: mapping to truncate * @lstart: offset from which to truncate * @lend: offset to which to truncate (inclusive) * * Truncate the page cache, removing the pages that are between * specified offsets (and zeroing out partial pages * if lstart or lend + 1 is not page aligned). * * Truncate takes two passes - the first pass is nonblocking. It will not * block on page locks and it will not block on writeback. The second pass * will wait. This is to prevent as much IO as possible in the affected region. * The first pass will remove most pages, so the search cost of the second pass * is low. * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. * * Note that since ->invalidate_folio() accepts range to invalidate * truncate_inode_pages_range is able to handle cases where lend + 1 is not * page aligned properly. */ void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, loff_t lend) { pgoff_t start; /* inclusive */ pgoff_t end; /* exclusive */ struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; int i; struct folio *folio; bool same_folio; if (mapping_empty(mapping)) return; /* * 'start' and 'end' always covers the range of pages to be fully * truncated. Partial pages are covered with 'partial_start' at the * start of the range and 'partial_end' at the end of the range. * Note that 'end' is exclusive while 'lend' is inclusive. */ start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; if (lend == -1) /* * lend == -1 indicates end-of-file so we have to set 'end' * to the highest possible pgoff_t and since the type is * unsigned we're using -1. */ end = -1; else end = (lend + 1) >> PAGE_SHIFT; folio_batch_init(&fbatch); index = start; while (index < end && find_lock_entries(mapping, &index, end - 1, &fbatch, indices)) { truncate_folio_batch_exceptionals(mapping, &fbatch, indices); for (i = 0; i < folio_batch_count(&fbatch); i++) truncate_cleanup_folio(fbatch.folios[i]); delete_from_page_cache_batch(mapping, &fbatch); for (i = 0; i < folio_batch_count(&fbatch); i++) folio_unlock(fbatch.folios[i]); folio_batch_release(&fbatch); cond_resched(); } same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); if (!IS_ERR(folio)) { same_folio = lend < folio_pos(folio) + folio_size(folio); if (!truncate_inode_partial_folio(folio, lstart, lend)) { start = folio_next_index(folio); if (same_folio) end = folio->index; } folio_unlock(folio); folio_put(folio); folio = NULL; } if (!same_folio) { folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT, FGP_LOCK, 0); if (!IS_ERR(folio)) { if (!truncate_inode_partial_folio(folio, lstart, lend)) end = folio->index; folio_unlock(folio); folio_put(folio); } } index = start; while (index < end) { cond_resched(); if (!find_get_entries(mapping, &index, end - 1, &fbatch, indices)) { /* If all gone from start onwards, we're done */ if (index == start) break; /* Otherwise restart to make sure all gone */ index = start; continue; } for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing page->index */ if (xa_is_value(folio)) continue; folio_lock(folio); VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); folio_wait_writeback(folio); truncate_inode_folio(mapping, folio); folio_unlock(folio); } truncate_folio_batch_exceptionals(mapping, &fbatch, indices); folio_batch_release(&fbatch); } } EXPORT_SYMBOL(truncate_inode_pages_range); /** * truncate_inode_pages - truncate *all* the pages from an offset * @mapping: mapping to truncate * @lstart: offset from which to truncate * * Called under (and serialised by) inode->i_rwsem and * mapping->invalidate_lock. * * Note: When this function returns, there can be a page in the process of * deletion (inside __filemap_remove_folio()) in the specified range. Thus * mapping->nrpages can be non-zero when this function returns even after * truncation of the whole mapping. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { truncate_inode_pages_range(mapping, lstart, (loff_t)-1); } EXPORT_SYMBOL(truncate_inode_pages); /** * truncate_inode_pages_final - truncate *all* pages before inode dies * @mapping: mapping to truncate * * Called under (and serialized by) inode->i_rwsem. * * Filesystems have to use this in the .evict_inode path to inform the * VM that this is the final truncate and the inode is going away. */ void truncate_inode_pages_final(struct address_space *mapping) { /* * Page reclaim can not participate in regular inode lifetime * management (can't call iput()) and thus can race with the * inode teardown. Tell it when the address space is exiting, * so that it does not install eviction information after the * final truncate has begun. */ mapping_set_exiting(mapping); if (!mapping_empty(mapping)) { /* * As truncation uses a lockless tree lookup, cycle * the tree lock to make sure any ongoing tree * modification that does not see AS_EXITING is * completed before starting the final truncate. */ xa_lock_irq(&mapping->i_pages); xa_unlock_irq(&mapping->i_pages); } truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); /** * mapping_try_invalidate - Invalidate all the evictable folios of one inode * @mapping: the address_space which holds the folios to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * @nr_failed: How many folio invalidations failed * * This function is similar to invalidate_mapping_pages(), except that it * returns the number of folios which could not be evicted in @nr_failed. */ unsigned long mapping_try_invalidate(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_failed) { pgoff_t indices[PAGEVEC_SIZE]; struct folio_batch fbatch; pgoff_t index = start; unsigned long ret; unsigned long count = 0; int i; folio_batch_init(&fbatch); while (find_lock_entries(mapping, &index, end, &fbatch, indices)) { bool xa_has_values = false; int nr = folio_batch_count(&fbatch); for (i = 0; i < nr; i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) { xa_has_values = true; count++; continue; } ret = mapping_evict_folio(mapping, folio); folio_unlock(folio); /* * Invalidation is a hint that the folio is no longer * of interest and try to speed up its reclaim. */ if (!ret) { deactivate_file_folio(folio); /* Likely in the lru cache of a remote CPU */ if (nr_failed) (*nr_failed)++; } count += ret; } if (xa_has_values) clear_shadow_entries(mapping, indices[0], indices[nr-1]); folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); } return count; } /** * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode * @mapping: the address_space which holds the cache to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * * This function removes pages that are clean, unmapped and unlocked, * as well as shadow entries. It will not block on IO activity. * * If you want to remove all the pages of one inode, regardless of * their use and writeback state, use truncate_inode_pages(). * * Return: The number of indices that had their contents invalidated */ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { return mapping_try_invalidate(mapping, start, end, NULL); } EXPORT_SYMBOL(invalidate_mapping_pages); static int folio_launder(struct address_space *mapping, struct folio *folio) { if (!folio_test_dirty(folio)) return 0; if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL) return 0; return mapping->a_ops->launder_folio(folio); } /* * This is like mapping_evict_folio(), except it ignores the folio's * refcount. We do this because invalidate_inode_pages2() needs stronger * invalidation guarantees, and cannot afford to leave folios behind because * shrink_folio_list() has a temp ref on them, or because they're transiently * sitting in the folio_add_lru() caches. */ int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, gfp_t gfp) { int ret; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (folio_mapped(folio)) unmap_mapping_folio(folio); BUG_ON(folio_mapped(folio)); ret = folio_launder(mapping, folio); if (ret) return ret; if (folio->mapping != mapping) return -EBUSY; if (!filemap_release_folio(folio, gfp)) return -EBUSY; spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); if (folio_test_dirty(folio)) goto failed; BUG_ON(folio_has_private(folio)); __filemap_remove_folio(folio, NULL); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); filemap_free_folio(mapping, folio); return 1; failed: xa_unlock_irq(&mapping->i_pages); spin_unlock(&mapping->host->i_lock); return -EBUSY; } /** * invalidate_inode_pages2_range - remove range of pages from an address_space * @mapping: the address_space * @start: the page offset 'from' which to invalidate * @end: the page offset 'to' which to invalidate (inclusive) * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * * Return: -EBUSY if any pages could not be invalidated. */ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { pgoff_t indices[PAGEVEC_SIZE]; struct folio_batch fbatch; pgoff_t index; int i; int ret = 0; int ret2 = 0; int did_range_unmap = 0; if (mapping_empty(mapping)) return 0; folio_batch_init(&fbatch); index = start; while (find_get_entries(mapping, &index, end, &fbatch, indices)) { bool xa_has_values = false; int nr = folio_batch_count(&fbatch); for (i = 0; i < nr; i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) { xa_has_values = true; if (dax_mapping(mapping) && !dax_invalidate_mapping_entry_sync(mapping, indices[i])) ret = -EBUSY; continue; } if (!did_range_unmap && folio_mapped(folio)) { /* * If folio is mapped, before taking its lock, * zap the rest of the file in one hit. */ unmap_mapping_pages(mapping, indices[i], (1 + end - indices[i]), false); did_range_unmap = 1; } folio_lock(folio); if (unlikely(folio->mapping != mapping)) { folio_unlock(folio); continue; } VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); folio_wait_writeback(folio); ret2 = folio_unmap_invalidate(mapping, folio, GFP_KERNEL); if (ret2 < 0) ret = ret2; folio_unlock(folio); } if (xa_has_values) clear_shadow_entries(mapping, indices[0], indices[nr-1]); folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); } /* * For DAX we invalidate page tables after invalidating page cache. We * could invalidate page tables while invalidating each entry however * that would be expensive. And doing range unmapping before doesn't * work as we have no cheap way to find whether page cache entry didn't * get remapped later. */ if (dax_mapping(mapping)) { unmap_mapping_pages(mapping, start, end - start + 1, false); } return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); /** * invalidate_inode_pages2 - remove all pages from an address_space * @mapping: the address_space * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * * Return: -EBUSY if any pages could not be invalidated. */ int invalidate_inode_pages2(struct address_space *mapping) { return invalidate_inode_pages2_range(mapping, 0, -1); } EXPORT_SYMBOL_GPL(invalidate_inode_pages2); /** * truncate_pagecache - unmap and remove pagecache that has been truncated * @inode: inode * @newsize: new file size * * inode's new i_size must already be written before truncate_pagecache * is called. * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache(struct inode *inode, loff_t newsize) { struct address_space *mapping = inode->i_mapping; loff_t holebegin = round_up(newsize, PAGE_SIZE); /* * unmap_mapping_range is called twice, first simply for * efficiency so that truncate_inode_pages does fewer * single-page unmaps. However after this first call, and * before truncate_inode_pages finishes, it is possible for * private pages to be COWed, which remain after * truncate_inode_pages finishes, hence the second * unmap_mapping_range call must be made for correctness. */ unmap_mapping_range(mapping, holebegin, 0, 1); truncate_inode_pages(mapping, newsize); unmap_mapping_range(mapping, holebegin, 0, 1); } EXPORT_SYMBOL(truncate_pagecache); /** * truncate_setsize - update inode and pagecache for a new file size * @inode: inode * @newsize: new file size * * truncate_setsize updates i_size and performs pagecache truncation (if * necessary) to @newsize. It will be typically be called from the filesystem's * setattr function when ATTR_SIZE is passed in. * * Must be called with a lock serializing truncates and writes (generally * i_rwsem but e.g. xfs uses a different lock) and before all filesystem * specific block truncation has been performed. */ void truncate_setsize(struct inode *inode, loff_t newsize) { loff_t oldsize = inode->i_size; i_size_write(inode, newsize); if (newsize > oldsize) pagecache_isize_extended(inode, oldsize, newsize); truncate_pagecache(inode, newsize); } EXPORT_SYMBOL(truncate_setsize); /** * pagecache_isize_extended - update pagecache after extension of i_size * @inode: inode for which i_size was extended * @from: original inode size * @to: new inode size * * Handle extension of inode size either caused by extending truncate or * by write starting after current i_size. We mark the page straddling * current i_size RO so that page_mkwrite() is called on the first * write access to the page. The filesystem will update its per-block * information before user writes to the page via mmap after the i_size * has been changed. * * The function must be called after i_size is updated so that page fault * coming after we unlock the folio will already see the new i_size. * The function must be called while we still hold i_rwsem - this not only * makes sure i_size is stable but also that userspace cannot observe new * i_size value before we are prepared to store mmap writes at new inode size. */ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) { int bsize = i_blocksize(inode); loff_t rounded_from; struct folio *folio; WARN_ON(to > inode->i_size); if (from >= to || bsize >= PAGE_SIZE) return; /* Page straddling @from will not have any hole block created? */ rounded_from = round_up(from, bsize); if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1))) return; folio = filemap_lock_folio(inode->i_mapping, from / PAGE_SIZE); /* Folio not cached? Nothing to do */ if (IS_ERR(folio)) return; /* * See folio_clear_dirty_for_io() for details why folio_mark_dirty() * is needed. */ if (folio_mkclean(folio)) folio_mark_dirty(folio); /* * The post-eof range of the folio must be zeroed before it is exposed * to the file. Writeback normally does this, but since i_size has been * increased we handle it here. */ if (folio_test_dirty(folio)) { unsigned int offset, end; offset = from - folio_pos(folio); end = min_t(unsigned int, to - folio_pos(folio), folio_size(folio)); folio_zero_segment(folio, offset, end); } folio_unlock(folio); folio_put(folio); } EXPORT_SYMBOL(pagecache_isize_extended); /** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole * @lend: offset of last byte of hole * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) { struct address_space *mapping = inode->i_mapping; loff_t unmap_start = round_up(lstart, PAGE_SIZE); loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; /* * This rounding is currently just for example: unmap_mapping_range * expands its hole outwards, whereas we want it to contract the hole * inwards. However, existing callers of truncate_pagecache_range are * doing their own page rounding first. Note that unmap_mapping_range * allows holelen 0 for all, and we allow lend -1 for end of file. */ /* * Unlike in truncate_pagecache, unmap_mapping_range is called only * once (before truncating pagecache), and without "even_cows" flag: * hole-punching should not remove private COWed pages from the hole. */ if ((u64)unmap_end > (u64)unmap_start) unmap_mapping_range(mapping, unmap_start, 1 + unmap_end - unmap_start, 0); truncate_inode_pages_range(mapping, lstart, lend); } EXPORT_SYMBOL(truncate_pagecache_range);
2 2 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS module and super block management. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Ryusuke Konishi. */ /* * linux/fs/ext2/super.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 */ #include <linux/module.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" #include "alloc.h" #include "btree.h" #include "btnode.h" #include "page.h" #include "cpfile.h" #include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */ #include "ifile.h" #include "dat.h" #include "segment.h" #include "segbuf.h" MODULE_AUTHOR("NTT Corp."); MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); static struct kmem_cache *nilfs_inode_cachep; struct kmem_cache *nilfs_transaction_cachep; struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; int level; va_start(args, fmt); level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; if (sb) printk("%c%cNILFS (%s): %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf); else printk("%c%cNILFS: %pV\n", KERN_SOH_ASCII, level, &vaf); va_end(args); } static void nilfs_set_error(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; down_write(&nilfs->ns_sem); if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { nilfs->ns_mount_state |= NILFS_ERROR_FS; sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); if (sbp[1]) sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } } up_write(&nilfs->ns_sem); } /** * __nilfs_error() - report failure condition on a filesystem * @sb: super block instance * @function: name of calling function * @fmt: format string for message to be output * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when * NILFS detects incoherences or defects of meta data on disk. * * This implements the body of nilfs_error() macro. Normally, * nilfs_error() should be used. As for sustainable errors such as a * single-shot I/O error, nilfs_err() should be used instead. * * Callers should not add a trailing newline since this will do it. */ void __nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...) { struct the_nilfs *nilfs = sb->s_fs_info; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_CRIT "NILFS error (device %s): %s: %pV\n", sb->s_id, function, &vaf); va_end(args); if (!sb_rdonly(sb)) { nilfs_set_error(sb); if (nilfs_test_opt(nilfs, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; } } if (nilfs_test_opt(nilfs, ERRORS_PANIC)) panic("NILFS (device %s): panic forced after error\n", sb->s_id); } struct inode *nilfs_alloc_inode(struct super_block *sb) { struct nilfs_inode_info *ii; ii = alloc_inode_sb(sb, nilfs_inode_cachep, GFP_NOFS); if (!ii) return NULL; ii->i_bh = NULL; ii->i_state = 0; ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; return &ii->vfs_inode; } static void nilfs_free_inode(struct inode *inode) { if (nilfs_is_metadata_file_inode(inode)) nilfs_mdt_destroy(inode); kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } static int nilfs_sync_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; int err; retry: set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } if (unlikely(err)) { nilfs_err(sb, "unable to write superblock: err=%d", err); if (err == -EIO && nilfs->ns_sbh[1]) { /* * sbp[0] points to newer log than sbp[1], * so copy sbp[0] to sbp[1] to take over sbp[0]. */ memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0], nilfs->ns_sbsize); nilfs_fall_back_super_block(nilfs); goto retry; } } else { struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; nilfs->ns_sbwcount++; /* * The latest segment becomes trailable from the position * written in superblock. */ clear_nilfs_discontinued(nilfs); /* update GC protection for recent segments */ if (nilfs->ns_sbh[1]) { if (flag == NILFS_SB_COMMIT_ALL) { set_buffer_dirty(nilfs->ns_sbh[1]); if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0) goto out; } if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) < le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno)) sbp = nilfs->ns_sbp[1]; } spin_lock(&nilfs->ns_last_segment_lock); nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); spin_unlock(&nilfs->ns_last_segment_lock); } out: return err; } void nilfs_set_log_cursor(struct nilfs_super_block *sbp, struct the_nilfs *nilfs) { sector_t nfreeblocks; /* nilfs->ns_sem must be locked by the caller. */ nilfs_count_free_blocks(nilfs, &nfreeblocks); sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); spin_lock(&nilfs->ns_last_segment_lock); sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); spin_unlock(&nilfs->ns_last_segment_lock); } struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, int flip) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; /* nilfs->ns_sem must be locked by the caller. */ if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { nilfs_crit(sb, "superblock broke"); return NULL; } } else if (sbp[1] && sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); } if (flip && sbp[1]) nilfs_swap_super_block(nilfs); return sbp; } int nilfs_commit_super(struct super_block *sb, int flag) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp = nilfs->ns_sbp; time64_t t; /* nilfs->ns_sem must be locked by the caller. */ t = ktime_get_real_seconds(); nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[0], nilfs->ns_sbsize)); if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) { sbp[1]->s_wtime = sbp[0]->s_wtime; sbp[1]->s_sum = 0; sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[1], nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); nilfs->ns_flushed_device = 1; /* make sure store to ns_flushed_device cannot be reordered */ smp_wmb(); return nilfs_sync_super(sb, flag); } /** * nilfs_cleanup_super() - write filesystem state for cleanup * @sb: super block instance to be unmounted or degraded to read-only * * This function restores state flags in the on-disk super block. * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the * filesystem was not clean previously. * * Return: 0 on success, %-EIO if I/O error or superblock is corrupted. */ int nilfs_cleanup_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int flag = NILFS_SB_COMMIT; int ret = -EIO; sbp = nilfs_prepare_super(sb, 0); if (sbp) { sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); nilfs_set_log_cursor(sbp[0], nilfs); if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { /* * make the "clean" flag also to the opposite * super block if both super blocks point to * the same checkpoint. */ sbp[1]->s_state = sbp[0]->s_state; flag = NILFS_SB_COMMIT_ALL; } ret = nilfs_commit_super(sb, flag); } return ret; } /** * nilfs_move_2nd_super - relocate secondary super block * @sb: super block instance * @sb2off: new offset of the secondary super block (in bytes) * * Return: 0 on success, or a negative error code on failure. */ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) { struct the_nilfs *nilfs = sb->s_fs_info; struct buffer_head *nsbh; struct nilfs_super_block *nsbp; sector_t blocknr, newblocknr; unsigned long offset; int sb2i; /* array index of the secondary superblock */ int ret = 0; /* nilfs->ns_sem must be locked by the caller. */ if (nilfs->ns_sbh[1] && nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 1; blocknr = nilfs->ns_sbh[1]->b_blocknr; } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 0; blocknr = nilfs->ns_sbh[0]->b_blocknr; } else { sb2i = -1; blocknr = 0; } if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) goto out; /* super block location is unchanged */ /* Get new super block buffer */ newblocknr = sb2off >> nilfs->ns_blocksize_bits; offset = sb2off & (nilfs->ns_blocksize - 1); nsbh = sb_getblk(sb, newblocknr); if (!nsbh) { nilfs_warn(sb, "unable to move secondary superblock to block %llu", (unsigned long long)newblocknr); ret = -EIO; goto out; } nsbp = (void *)nsbh->b_data + offset; lock_buffer(nsbh); if (sb2i >= 0) { /* * The position of the second superblock only changes by 4KiB, * which is larger than the maximum superblock data size * (= 1KiB), so there is no need to use memmove() to allow * overlap between source and destination. */ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); /* * Zero fill after copy to avoid overwriting in case of move * within the same block. */ memset(nsbh->b_data, 0, offset); memset((void *)nsbp + nilfs->ns_sbsize, 0, nsbh->b_size - offset - nilfs->ns_sbsize); } else { memset(nsbh->b_data, 0, nsbh->b_size); } set_buffer_uptodate(nsbh); unlock_buffer(nsbh); if (sb2i >= 0) { brelse(nilfs->ns_sbh[sb2i]); nilfs->ns_sbh[sb2i] = nsbh; nilfs->ns_sbp[sb2i] = nsbp; } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) { /* secondary super block will be restored to index 1 */ nilfs->ns_sbh[1] = nsbh; nilfs->ns_sbp[1] = nsbp; } else { brelse(nsbh); } out: return ret; } /** * nilfs_resize_fs - resize the filesystem * @sb: super block instance * @newsize: new size of the filesystem (in bytes) * * Return: 0 on success, or a negative error code on failure. */ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; __u64 devsize, newnsegs; loff_t sb2off; int ret; ret = -ERANGE; devsize = bdev_nr_bytes(sb->s_bdev); if (newsize > devsize) goto out; /* * Prevent underflow in second superblock position calculation. * The exact minimum size check is done in nilfs_sufile_resize(). */ if (newsize < 4096) { ret = -ENOSPC; goto out; } /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. */ down_write(&nilfs->ns_segctor_sem); sb2off = NILFS_SB2_OFFSET_BYTES(newsize); newnsegs = sb2off >> nilfs->ns_blocksize_bits; newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment); ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); up_write(&nilfs->ns_segctor_sem); if (ret < 0) goto out; ret = nilfs_construct_segment(sb); if (ret < 0) goto out; down_write(&nilfs->ns_sem); nilfs_move_2nd_super(sb, sb2off); ret = -EIO; sbp = nilfs_prepare_super(sb, 0); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); /* * Drop NILFS_RESIZE_FS flag for compatibility with * mount-time resize which may be implemented in a * future release. */ sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_RESIZE_FS); sbp[0]->s_dev_size = cpu_to_le64(newsize); sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments); if (sbp[1]) memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } up_write(&nilfs->ns_sem); /* * Reset the range of allocatable segments last. This order * is important in the case of expansion because the secondary * superblock must be protected from log write until migration * completes. */ if (!ret) nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1); out: return ret; } static void nilfs_put_super(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; nilfs_detach_log_writer(sb); if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); destroy_nilfs(nilfs); sb->s_fs_info = NULL; } static int nilfs_sync_fs(struct super_block *sb, int wait) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int err = 0; /* This function is called when super block should be written back */ if (wait) err = nilfs_construct_segment(sb); down_write(&nilfs->ns_sem); if (nilfs_sb_dirty(nilfs)) { sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs)); if (likely(sbp)) { nilfs_set_log_cursor(sbp[0], nilfs); nilfs_commit_super(sb, NILFS_SB_COMMIT); } } up_write(&nilfs->ns_sem); if (!err) err = nilfs_flush_device(nilfs); return err; } int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, struct nilfs_root **rootp) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; int err = -ENOMEM; root = nilfs_find_or_create_root( nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); if (!root) return err; if (root->ifile) goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size); up_read(&nilfs->ns_segctor_sem); if (unlikely(err)) goto failed; reuse: *rootp = root; return 0; failed: if (err == -EINVAL) nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", (unsigned long long)cno); nilfs_put_root(root); return err; } static int nilfs_freeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; int err; if (sb_rdonly(sb)) return 0; /* Mark super block clean */ down_write(&nilfs->ns_sem); err = nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); return err; } static int nilfs_unfreeze(struct super_block *sb) { struct the_nilfs *nilfs = sb->s_fs_info; if (sb_rdonly(sb)) return 0; down_write(&nilfs->ns_sem); nilfs_setup_super(sb, false); up_write(&nilfs->ns_sem); return 0; } static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; u64 nmaxinodes, nfreeinodes; int err; /* * Compute all of the segment blocks * * The blocks before first segment and after last segment * are excluded. */ blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments - nilfs->ns_first_data_block; nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; /* * Compute the overhead * * When distributing meta data blocks outside segment structure, * We must count them as the overhead. */ overhead = 0; err = nilfs_count_free_blocks(nilfs, &nfreeblocks); if (unlikely(err)) return err; err = nilfs_ifile_count_free_inodes(root->ifile, &nmaxinodes, &nfreeinodes); if (unlikely(err)) { nilfs_warn(sb, "failed to count free inodes: err=%d", err); if (err == -ERANGE) { /* * If nilfs_palloc_count_max_entries() returns * -ERANGE error code then we simply treat * curent inodes count as maximum possible and * zero as free inodes value. */ nmaxinodes = atomic64_read(&root->inodes_count); nfreeinodes = 0; err = 0; } else return err; } buf->f_type = NILFS_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = blocks - overhead; buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; buf->f_files = nmaxinodes; buf->f_ffree = nfreeinodes; buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid = u64_to_fsid(id); return 0; } static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct super_block *sb = dentry->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root; if (!nilfs_test_opt(nilfs, BARRIER)) seq_puts(seq, ",nobarrier"); if (root->cno != NILFS_CPTREE_CURRENT_CNO) seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); if (nilfs_test_opt(nilfs, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (nilfs_test_opt(nilfs, ERRORS_CONT)) seq_puts(seq, ",errors=continue"); if (nilfs_test_opt(nilfs, STRICT_ORDER)) seq_puts(seq, ",order=strict"); if (nilfs_test_opt(nilfs, NORECOVERY)) seq_puts(seq, ",norecovery"); if (nilfs_test_opt(nilfs, DISCARD)) seq_puts(seq, ",discard"); return 0; } static const struct super_operations nilfs_sops = { .alloc_inode = nilfs_alloc_inode, .free_inode = nilfs_free_inode, .dirty_inode = nilfs_dirty_inode, .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, .sync_fs = nilfs_sync_fs, .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, .show_options = nilfs_show_options }; enum { Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, Opt_discard, }; static const struct constant_table nilfs_param_err[] = { {"continue", NILFS_MOUNT_ERRORS_CONT}, {"panic", NILFS_MOUNT_ERRORS_PANIC}, {"remount-ro", NILFS_MOUNT_ERRORS_RO}, {} }; static const struct fs_parameter_spec nilfs_param_spec[] = { fsparam_enum ("errors", Opt_err, nilfs_param_err), fsparam_flag_no ("barrier", Opt_barrier), fsparam_u64 ("cp", Opt_snapshot), fsparam_string ("order", Opt_order), fsparam_flag ("norecovery", Opt_norecovery), fsparam_flag_no ("discard", Opt_discard), {} }; struct nilfs_fs_context { unsigned long ns_mount_opt; __u64 cno; }; static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct nilfs_fs_context *nilfs = fc->fs_private; int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; struct fs_parse_result result; int opt; opt = fs_parse(fc, nilfs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_barrier: if (result.negated) nilfs_clear_opt(nilfs, BARRIER); else nilfs_set_opt(nilfs, BARRIER); break; case Opt_order: if (strcmp(param->string, "relaxed") == 0) /* Ordered data semantics */ nilfs_clear_opt(nilfs, STRICT_ORDER); else if (strcmp(param->string, "strict") == 0) /* Strict in-order semantics */ nilfs_set_opt(nilfs, STRICT_ORDER); else return -EINVAL; break; case Opt_err: nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; nilfs->ns_mount_opt |= result.uint_32; break; case Opt_snapshot: if (is_remount) { struct super_block *sb = fc->root->d_sb; nilfs_err(sb, "\"%s\" option is invalid for remount", param->key); return -EINVAL; } if (result.uint_64 == 0) { nilfs_err(NULL, "invalid option \"cp=0\": invalid checkpoint number 0"); return -EINVAL; } nilfs->cno = result.uint_64; break; case Opt_norecovery: nilfs_set_opt(nilfs, NORECOVERY); break; case Opt_discard: if (result.negated) nilfs_clear_opt(nilfs, DISCARD); else nilfs_set_opt(nilfs, DISCARD); break; default: return -EINVAL; } return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_super_block **sbp; int max_mnt_count; int mnt_count; /* nilfs->ns_sem must be locked by the caller. */ sbp = nilfs_prepare_super(sb, 0); if (!sbp) return -EIO; if (!is_mount) goto skip_mount_setup; max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); if (nilfs->ns_mount_state & NILFS_ERROR_FS) { nilfs_warn(sb, "mounting fs with errors"); #if 0 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { nilfs_warn(sb, "maximal mount count reached"); #endif } if (!max_mnt_count) sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds()); skip_mount_setup: sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); /* synchronize sbp[1] with sbp[0] */ if (sbp[1]) memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); } struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, u64 pos, int blocksize, struct buffer_head **pbh) { unsigned long long sb_index = pos; unsigned long offset; offset = do_div(sb_index, blocksize); *pbh = sb_bread(sb, sb_index); if (!*pbh) return NULL; return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } int nilfs_store_magic(struct super_block *sb, struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; sb->s_magic = le16_to_cpu(sbp->s_magic); /* FS independent flags */ #ifdef NILFS_ATIME_DISABLE sb->s_flags |= SB_NOATIME; #endif nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, struct nilfs_super_block *sbp) { __u64 features; features = le64_to_cpu(sbp->s_feature_incompat) & ~NILFS_FEATURE_INCOMPAT_SUPP; if (features) { nilfs_err(sb, "couldn't mount because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; } features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { nilfs_err(sb, "couldn't mount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; } return 0; } static int nilfs_get_root_dentry(struct super_block *sb, struct nilfs_root *root, struct dentry **root_dentry) { struct inode *inode; struct dentry *dentry; int ret = 0; inode = nilfs_iget(sb, root, NILFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); nilfs_err(sb, "error %d getting root inode", ret); goto out; } if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { iput(inode); nilfs_err(sb, "corrupt root inode"); ret = -EINVAL; goto out; } if (root->cno == NILFS_CPTREE_CURRENT_CNO) { dentry = d_find_alias(inode); if (!dentry) { dentry = d_make_root(inode); if (!dentry) { ret = -ENOMEM; goto failed_dentry; } } else { iput(inode); } } else { dentry = d_obtain_root(inode); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto failed_dentry; } } *root_dentry = dentry; out: return ret; failed_dentry: nilfs_err(sb, "error %d getting root dentry", ret); goto out; } static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct dentry **root_dentry) { struct the_nilfs *nilfs = s->s_fs_info; struct nilfs_root *root; int ret; mutex_lock(&nilfs->ns_snapshot_mount_mutex); down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); if (ret < 0) { ret = (ret == -ENOENT) ? -EINVAL : ret; goto out; } else if (!ret) { nilfs_err(s, "The specified checkpoint is not a snapshot (checkpoint number=%llu)", (unsigned long long)cno); ret = -EINVAL; goto out; } ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { nilfs_err(s, "error %d while loading snapshot (checkpoint number=%llu)", ret, (unsigned long long)cno); goto out; } ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } /** * nilfs_tree_is_busy() - try to shrink dentries of a checkpoint * @root_dentry: root dentry of the tree to be shrunk * * Return: true if the tree was in-use, false otherwise. */ static bool nilfs_tree_is_busy(struct dentry *root_dentry) { shrink_dcache_parent(root_dentry); return d_count(root_dentry) > 1; } int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; struct inode *inode; struct dentry *dentry; int ret; if (cno > nilfs->ns_cno) return false; if (cno >= nilfs_last_cno(nilfs)) return true; /* protect recent checkpoints */ ret = false; root = nilfs_lookup_root(nilfs, cno); if (root) { inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); if (inode) { dentry = d_find_alias(inode); if (dentry) { ret = nilfs_tree_is_busy(dentry); dput(dentry); } iput(inode); } nilfs_put_root(root); } return ret; } /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. * * Return: 0 on success, or a negative error code on failure. */ static int nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; nilfs = alloc_nilfs(sb); if (!nilfs) return -ENOMEM; sb->s_fs_info = nilfs; err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; /* Copy in parsed mount options */ nilfs->ns_mount_opt = ctx->ns_mount_opt; sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; sb->s_bdi = bdi_get(sb->s_bdev->bd_disk->bdi); err = load_nilfs(nilfs, sb); if (err) goto failed_nilfs; super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, sizeof(nilfs->ns_sbp[0]->s_uuid)); super_set_sysfs_name_bdev(sb); cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { nilfs_err(sb, "error %d while loading last checkpoint (checkpoint number=%llu)", err, (unsigned long long)cno); goto failed_unload; } if (!sb_rdonly(sb)) { err = nilfs_attach_log_writer(sb, fsroot); if (err) goto failed_checkpoint; } err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); if (err) goto failed_segctor; nilfs_put_root(fsroot); if (!sb_rdonly(sb)) { down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } return 0; failed_segctor: nilfs_detach_log_writer(sb); failed_checkpoint: nilfs_put_root(fsroot); failed_unload: nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); failed_nilfs: destroy_nilfs(nilfs); return err; } static int nilfs_reconfigure(struct fs_context *fc) { struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; int err; sync_filesystem(sb); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { nilfs_warn(sb, "couldn't remount because the filesystem is in an incomplete recovery state"); goto ignore_opts; } if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; /* * Remounting a valid RW partition RDONLY, so set * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); nilfs_cleanup_super(sb); up_write(&nilfs->ns_sem); } else { __u64 features; struct nilfs_root *root; /* * Mounting a RDONLY partition read-write, so reread and * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) */ down_read(&nilfs->ns_sem); features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; up_read(&nilfs->ns_sem); if (features) { nilfs_warn(sb, "couldn't remount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; goto ignore_opts; } sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); if (err) { sb->s_flags |= SB_RDONLY; goto ignore_opts; } down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: sb->s_flags = (sb->s_flags & ~SB_POSIXACL); /* Copy over parsed remount options */ nilfs->ns_mount_opt = ctx->ns_mount_opt; return 0; ignore_opts: return err; } static int nilfs_get_tree(struct fs_context *fc) { struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *s; dev_t dev; int err; if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) { nilfs_err(NULL, "invalid option \"cp=%llu\": read-only option is not specified", ctx->cno); return -EINVAL; } err = lookup_bdev(fc->source, &dev); if (err) return err; s = sget_dev(fc, dev); if (IS_ERR(s)) return PTR_ERR(s); if (!s->s_root) { err = setup_bdev_super(s, fc->sb_flags, fc); if (!err) err = nilfs_fill_super(s, fc); if (err) goto failed_super; s->s_flags |= SB_ACTIVE; } else if (!ctx->cno) { if (nilfs_tree_is_busy(s->s_root)) { if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); err = -EBUSY; goto failed_super; } } else { /* * Try reconfigure to setup mount states if the current * tree is not mounted and only snapshots use this sb. * * Since nilfs_reconfigure() requires fc->root to be * set, set it first and release it on failure. */ fc->root = dget(s->s_root); err = nilfs_reconfigure(fc); if (err) { dput(fc->root); fc->root = NULL; /* prevent double release */ goto failed_super; } return 0; } } if (ctx->cno) { struct dentry *root_dentry; err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry); if (err) goto failed_super; fc->root = root_dentry; return 0; } fc->root = dget(s->s_root); return 0; failed_super: deactivate_locked_super(s); return err; } static void nilfs_free_fc(struct fs_context *fc) { kfree(fc->fs_private); } static const struct fs_context_operations nilfs_context_ops = { .parse_param = nilfs_parse_param, .get_tree = nilfs_get_tree, .reconfigure = nilfs_reconfigure, .free = nilfs_free_fc, }; static int nilfs_init_fs_context(struct fs_context *fc) { struct nilfs_fs_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; fc->fs_private = ctx; fc->ops = &nilfs_context_ops; return 0; } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = nilfs_init_fs_context, .parameters = nilfs_param_spec, }; MODULE_ALIAS_FS("nilfs2"); static void nilfs_inode_init_once(void *obj) { struct nilfs_inode_info *ii = obj; INIT_LIST_HEAD(&ii->i_dirty); #ifdef CONFIG_NILFS_XATTR init_rwsem(&ii->xattr_sem); #endif inode_init_once(&ii->vfs_inode); } static void nilfs_segbuf_init_once(void *obj) { memset(obj, 0, sizeof(struct nilfs_segment_buffer)); } static void nilfs_destroy_cachep(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(nilfs_inode_cachep); kmem_cache_destroy(nilfs_transaction_cachep); kmem_cache_destroy(nilfs_segbuf_cachep); kmem_cache_destroy(nilfs_btree_path_cache); } static int __init nilfs_init_cachep(void) { nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", sizeof(struct nilfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, nilfs_inode_init_once); if (!nilfs_inode_cachep) goto fail; nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache", sizeof(struct nilfs_transaction_info), 0, SLAB_RECLAIM_ACCOUNT, NULL); if (!nilfs_transaction_cachep) goto fail; nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache", sizeof(struct nilfs_segment_buffer), 0, SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once); if (!nilfs_segbuf_cachep) goto fail; nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache", sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX, 0, 0, NULL); if (!nilfs_btree_path_cache) goto fail; return 0; fail: nilfs_destroy_cachep(); return -ENOMEM; } static int __init init_nilfs_fs(void) { int err; err = nilfs_init_cachep(); if (err) goto fail; err = nilfs_sysfs_init(); if (err) goto free_cachep; err = register_filesystem(&nilfs_fs_type); if (err) goto deinit_sysfs_entry; printk(KERN_INFO "NILFS version 2 loaded\n"); return 0; deinit_sysfs_entry: nilfs_sysfs_exit(); free_cachep: nilfs_destroy_cachep(); fail: return err; } static void __exit exit_nilfs_fs(void) { nilfs_destroy_cachep(); nilfs_sysfs_exit(); unregister_filesystem(&nilfs_fs_type); } module_init(init_nilfs_fs) module_exit(exit_nilfs_fs)
143 15 129 131 55 20 20 53 1 128 35 123 123 144 144 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 // SPDX-License-Identifier: GPL-2.0-or-later /* * Create default crypto algorithm instances. * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/aead.h> #include <linux/completion.h> #include <linux/ctype.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/notifier.h> #include <linux/rtnetlink.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/string.h> #include "internal.h" struct cryptomgr_param { struct rtattr *tb[CRYPTO_MAX_ATTRS + 2]; struct { struct rtattr attr; struct crypto_attr_type data; } type; struct { struct rtattr attr; struct crypto_attr_alg data; } attrs[CRYPTO_MAX_ATTRS]; char template[CRYPTO_MAX_ALG_NAME]; struct crypto_larval *larval; u32 otype; u32 omask; }; struct crypto_test_param { char driver[CRYPTO_MAX_ALG_NAME]; char alg[CRYPTO_MAX_ALG_NAME]; u32 type; }; static int cryptomgr_probe(void *data) { struct cryptomgr_param *param = data; struct crypto_template *tmpl; int err = -ENOENT; tmpl = crypto_lookup_template(param->template); if (!tmpl) goto out; do { err = tmpl->create(tmpl, param->tb); } while (err == -EAGAIN && !signal_pending(current)); crypto_tmpl_put(tmpl); out: param->larval->adult = ERR_PTR(err); param->larval->alg.cra_flags |= CRYPTO_ALG_DEAD; complete_all(&param->larval->completion); crypto_alg_put(&param->larval->alg); kfree(param); module_put_and_kthread_exit(0); } static int cryptomgr_schedule_probe(struct crypto_larval *larval) { struct task_struct *thread; struct cryptomgr_param *param; const char *name = larval->alg.cra_name; const char *p; unsigned int len; int i; if (!try_module_get(THIS_MODULE)) goto err; param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) goto err_put_module; for (p = name; isalnum(*p) || *p == '-' || *p == '_'; p++) ; len = p - name; if (!len || *p != '(') goto err_free_param; memcpy(param->template, name, len); i = 0; for (;;) { name = ++p; for (; isalnum(*p) || *p == '-' || *p == '_'; p++) ; if (*p == '(') { int recursion = 0; for (;;) { if (!*++p) goto err_free_param; if (*p == '(') recursion++; else if (*p == ')' && !recursion--) break; } p++; } len = p - name; if (!len) goto err_free_param; param->attrs[i].attr.rta_len = sizeof(param->attrs[i]); param->attrs[i].attr.rta_type = CRYPTOA_ALG; memcpy(param->attrs[i].data.name, name, len); param->tb[i + 1] = &param->attrs[i].attr; i++; if (i >= CRYPTO_MAX_ATTRS) goto err_free_param; if (*p == ')') break; if (*p != ',') goto err_free_param; } param->tb[i + 1] = NULL; param->type.attr.rta_len = sizeof(param->type); param->type.attr.rta_type = CRYPTOA_TYPE; param->type.data.type = larval->alg.cra_flags & ~CRYPTO_ALG_TESTED; param->type.data.mask = larval->mask & ~CRYPTO_ALG_TESTED; param->tb[0] = &param->type.attr; param->otype = larval->alg.cra_flags; param->omask = larval->mask; crypto_alg_get(&larval->alg); param->larval = larval; thread = kthread_run(cryptomgr_probe, param, "cryptomgr_probe"); if (IS_ERR(thread)) goto err_put_larval; return NOTIFY_STOP; err_put_larval: crypto_alg_put(&larval->alg); err_free_param: kfree(param); err_put_module: module_put(THIS_MODULE); err: return NOTIFY_OK; } static int cryptomgr_test(void *data) { struct crypto_test_param *param = data; u32 type = param->type; int err; err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED); crypto_alg_tested(param->driver, err); kfree(param); module_put_and_kthread_exit(0); } static int cryptomgr_schedule_test(struct crypto_alg *alg) { struct task_struct *thread; struct crypto_test_param *param; if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) return NOTIFY_DONE; if (!try_module_get(THIS_MODULE)) goto err; param = kzalloc(sizeof(*param), GFP_KERNEL); if (!param) goto err_put_module; memcpy(param->driver, alg->cra_driver_name, sizeof(param->driver)); memcpy(param->alg, alg->cra_name, sizeof(param->alg)); param->type = alg->cra_flags; thread = kthread_run(cryptomgr_test, param, "cryptomgr_test"); if (IS_ERR(thread)) goto err_free_param; return NOTIFY_STOP; err_free_param: kfree(param); err_put_module: module_put(THIS_MODULE); err: return NOTIFY_OK; } static int cryptomgr_notify(struct notifier_block *this, unsigned long msg, void *data) { switch (msg) { case CRYPTO_MSG_ALG_REQUEST: return cryptomgr_schedule_probe(data); case CRYPTO_MSG_ALG_REGISTER: return cryptomgr_schedule_test(data); case CRYPTO_MSG_ALG_LOADED: break; } return NOTIFY_DONE; } static struct notifier_block cryptomgr_notifier = { .notifier_call = cryptomgr_notify, }; static int __init cryptomgr_init(void) { return crypto_register_notifier(&cryptomgr_notifier); } static void __exit cryptomgr_exit(void) { int err = crypto_unregister_notifier(&cryptomgr_notifier); BUG_ON(err); } /* * This is arch_initcall() so that the crypto self-tests are run on algorithms * registered early by subsys_initcall(). subsys_initcall() is needed for * generic implementations so that they're available for comparison tests when * other implementations are registered later by module_init(). */ arch_initcall(cryptomgr_init); module_exit(cryptomgr_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Crypto Algorithm Manager");
7 6 2 1 4 8 10 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 #undef TRACE_SYSTEM #define TRACE_SYSTEM rtc #if !defined(_TRACE_RTC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RTC_H #include <linux/rtc.h> #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(rtc_time_alarm_class, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err), TP_STRUCT__entry( __field(time64_t, secs) __field(int, err) ), TP_fast_assign( __entry->secs = secs; __entry->err = err; ), TP_printk("UTC (%lld) (%d)", __entry->secs, __entry->err ) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_set_time, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_read_time, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_set_alarm, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_read_alarm, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); TRACE_EVENT(rtc_irq_set_freq, TP_PROTO(int freq, int err), TP_ARGS(freq, err), TP_STRUCT__entry( __field(int, freq) __field(int, err) ), TP_fast_assign( __entry->freq = freq; __entry->err = err; ), TP_printk("set RTC periodic IRQ frequency:%u (%d)", __entry->freq, __entry->err ) ); TRACE_EVENT(rtc_irq_set_state, TP_PROTO(int enabled, int err), TP_ARGS(enabled, err), TP_STRUCT__entry( __field(int, enabled) __field(int, err) ), TP_fast_assign( __entry->enabled = enabled; __entry->err = err; ), TP_printk("%s RTC 2^N Hz periodic IRQs (%d)", __entry->enabled ? "enable" : "disable", __entry->err ) ); TRACE_EVENT(rtc_alarm_irq_enable, TP_PROTO(unsigned int enabled, int err), TP_ARGS(enabled, err), TP_STRUCT__entry( __field(unsigned int, enabled) __field(int, err) ), TP_fast_assign( __entry->enabled = enabled; __entry->err = err; ), TP_printk("%s RTC alarm IRQ (%d)", __entry->enabled ? "enable" : "disable", __entry->err ) ); DECLARE_EVENT_CLASS(rtc_offset_class, TP_PROTO(long offset, int err), TP_ARGS(offset, err), TP_STRUCT__entry( __field(long, offset) __field(int, err) ), TP_fast_assign( __entry->offset = offset; __entry->err = err; ), TP_printk("RTC offset: %ld (%d)", __entry->offset, __entry->err ) ); DEFINE_EVENT(rtc_offset_class, rtc_set_offset, TP_PROTO(long offset, int err), TP_ARGS(offset, err) ); DEFINE_EVENT(rtc_offset_class, rtc_read_offset, TP_PROTO(long offset, int err), TP_ARGS(offset, err) ); DECLARE_EVENT_CLASS(rtc_timer_class, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer), TP_STRUCT__entry( __field(struct rtc_timer *, timer) __field(ktime_t, expires) __field(ktime_t, period) ), TP_fast_assign( __entry->timer = timer; __entry->expires = timer->node.expires; __entry->period = timer->period; ), TP_printk("RTC timer:(%p) expires:%lld period:%lld", __entry->timer, __entry->expires, __entry->period ) ); DEFINE_EVENT(rtc_timer_class, rtc_timer_enqueue, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer) ); DEFINE_EVENT(rtc_timer_class, rtc_timer_dequeue, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer) ); DEFINE_EVENT(rtc_timer_class, rtc_timer_fired, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer) ); #endif /* _TRACE_RTC_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
576 163 192 182 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 /* SPDX-License-Identifier: GPL-2.0 */ /* * workqueue.h --- work queue handling for Linux. */ #ifndef _LINUX_WORKQUEUE_H #define _LINUX_WORKQUEUE_H #include <linux/timer.h> #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/lockdep.h> #include <linux/threads.h> #include <linux/atomic.h> #include <linux/cpumask_types.h> #include <linux/rcupdate.h> #include <linux/workqueue_types.h> /* * The first word is the work queue pointer and the flags rolled into * one */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ WORK_STRUCT_PWQ_BIT, /* data points to pwq */ WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif WORK_STRUCT_FLAG_BITS, /* color for workqueue flushing */ WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, /* * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. * * MSB * [ pwq pointer ] [ flush color ] [ STRUCT flags ] * 4 bits 4 or 5 bits */ WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, /* * data contains off-queue information when !WORK_STRUCT_PWQ. * * MSB * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] * 16 bits 1 bit 4 or 5 bits */ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_FLAG_END, WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_DISABLE_BITS = 16, /* * When a work item is off queue, the high bits encode off-queue flags * and the last pool it was on. Cap pool ID to 31 bits and use the * highest number to indicate that no pool is associated. */ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, }; enum work_flags { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, #else WORK_STRUCT_STATIC = 0, #endif }; enum wq_misc_consts { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ #define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT) #define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) #define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) #define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)) struct delayed_work { struct work_struct work; struct timer_list timer; /* target workqueue and CPU ->timer uses to queue ->work */ struct workqueue_struct *wq; int cpu; }; struct rcu_work { struct work_struct work; struct rcu_head rcu; /* target workqueue ->rcu uses to queue ->work */ struct workqueue_struct *wq; }; enum wq_affn_scope { WQ_AFFN_DFL, /* use system default */ WQ_AFFN_CPU, /* one pod per CPU */ WQ_AFFN_SMT, /* one pod poer SMT */ WQ_AFFN_CACHE, /* one pod per LLC */ WQ_AFFN_NUMA, /* one pod per NUMA node */ WQ_AFFN_SYSTEM, /* one pod across the whole system */ WQ_AFFN_NR_TYPES, }; /** * struct workqueue_attrs - A struct for workqueue attributes. * * This can be used to change attributes of an unbound workqueue. */ struct workqueue_attrs { /** * @nice: nice level */ int nice; /** * @cpumask: allowed CPUs * * Work items in this workqueue are affine to these CPUs and not allowed * to execute on other CPUs. A pool serving a workqueue must have the * same @cpumask. */ cpumask_var_t cpumask; /** * @__pod_cpumask: internal attribute used to create per-pod pools * * Internal use only. * * Per-pod unbound worker pools are used to improve locality. Always a * subset of ->cpumask. A workqueue can be associated with multiple * worker pools with disjoint @__pod_cpumask's. Whether the enforcement * of a pool's @__pod_cpumask is strict depends on @affn_strict. */ cpumask_var_t __pod_cpumask; /** * @affn_strict: affinity scope is strict * * If clear, workqueue will make a best-effort attempt at starting the * worker inside @__pod_cpumask but the scheduler is free to migrate it * outside. * * If set, workers are only allowed to run inside @__pod_cpumask. */ bool affn_strict; /* * Below fields aren't properties of a worker_pool. They only modify how * :c:func:`apply_workqueue_attrs` select pools and thus don't * participate in pool hash calculations or equality comparisons. * * If @affn_strict is set, @cpumask isn't a property of a worker_pool * either. */ /** * @affn_scope: unbound CPU affinity scope * * CPU pods are used to improve execution locality of unbound work * items. There are multiple pod types, one for each wq_affn_scope, and * every CPU in the system belongs to one pod in every pod type. CPUs * that belong to the same pod share the worker pool. For example, * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker * pool for each NUMA node. */ enum wq_affn_scope affn_scope; /** * @ordered: work items must be executed one by one in queueing order */ bool ordered; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) { return container_of(work, struct delayed_work, work); } static inline struct rcu_work *to_rcu_work(struct work_struct *work) { return container_of(work, struct rcu_work, work); } struct execute_work { struct work_struct work; }; #ifdef CONFIG_LOCKDEP /* * NB: because we have to copy the lockdep_map, setting _key * here is required, otherwise it could get initialised to the * copy of the lockdep_map! */ #define __WORK_INIT_LOCKDEP_MAP(n, k) \ .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k), #else #define __WORK_INIT_LOCKDEP_MAP(n, k) #endif #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_STATIC_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ } #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\ (tflags) | TIMER_IRQSAFE), \ } #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0) #define DECLARE_DEFERRABLE_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE) #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); extern void destroy_delayed_work_on_stack(struct delayed_work *work); static inline unsigned int work_static(struct work_struct *work) { return *work_data_bits(work) & WORK_STRUCT_STATIC; } #else static inline void __init_work(struct work_struct *work, int onstack) { } static inline void destroy_work_on_stack(struct work_struct *work) { } static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { } static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif /* * initialize all of a work item in one go * * NOTE! No point in using "atomic_long_set()": using a direct * assignment of the work data initializer allows the compiler * to generate better code. */ #ifdef CONFIG_LOCKDEP #define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else #define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #endif #define __INIT_WORK(_work, _func, _onstack) \ do { \ static __maybe_unused struct lock_class_key __key; \ \ __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ } while (0) #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) #define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ __INIT_WORK_KEY((_work), (_func), 1, _key) #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ __init_timer(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ __init_timer_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) #define INIT_DELAYED_WORK(_work, _func) \ __INIT_DELAYED_WORK(_work, _func, 0) #define INIT_DELAYED_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, 0) #define INIT_DEFERRABLE_WORK(_work, _func) \ __INIT_DELAYED_WORK(_work, _func, TIMER_DEFERRABLE) #define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE) #define INIT_RCU_WORK(_work, _func) \ INIT_WORK(&(_work)->work, (_func)) #define INIT_RCU_WORK_ONSTACK(_work, _func) \ INIT_WORK_ONSTACK(&(_work)->work, (_func)) /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question */ #define work_pending(work) \ test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) /** * delayed_work_pending - Find out whether a delayable work item is currently * pending * @w: The work item in question */ #define delayed_work_pending(w) \ work_pending(&(w)->work) /* * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ enum wq_flags { WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see workqueue_sysfs_register() */ /* * Per-cpu workqueues are generally preferred because they tend to * show better performance thanks to cache locality. Per-cpu * workqueues exclude the scheduler from choosing the CPU to * execute the worker threads, which has an unfortunate side effect * of increasing power consumption. * * The scheduler considers a CPU idle if it doesn't have any task * to execute and tries to keep idle cores idle to conserve power; * however, for example, a per-cpu work item scheduled from an * interrupt handler on an idle CPU will force the scheduler to * execute the work item on that CPU breaking the idleness, which in * turn may lead to more scheduling choices which are sub-optimal * in terms of power consumption. * * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default * but become unbound if workqueue.power_efficient kernel param is * specified. Per-cpu workqueues which are identified to * contribute significantly to power-consumption are identified and * marked with this flag and enabling the power_efficient mode * leads to noticeable power saving at the cost of small * performance disadvantage. * * http://thread.gmane.org/gmane.linux.kernel/1480396 */ WQ_POWER_EFFICIENT = 1 << 7, __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ /* BH wq only allows the following flags */ __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, }; enum wq_consts { WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, /* * Per-node default cap on min_active. Unless explicitly set, min_active * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see * workqueue_struct->min_active definition. */ WQ_DFL_MIN_ACTIVE = 8, }; /* * System-wide workqueues which are always present. * * system_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. * * system_highpri_wq is similar to system_wq but for work items which * require WQ_HIGHPRI. * * system_long_wq is similar to system_wq but may host long running * works. Queue flushing might take relatively long. * * system_unbound_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. * * system_freezable_wq is equivalent to system_wq except that it's * freezable. * * *_power_efficient_wq are inclined towards saving power and converted * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. * * system_bh[_highpri]_wq are convenience interface to softirq. BH work items * are executed in the queueing CPU's BH context in the queueing order. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; extern struct workqueue_struct *system_bh_wq; extern struct workqueue_struct *system_bh_highpri_wq; void workqueue_softirq_action(bool highpri); void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default * @...: args for @fmt * * For a per-cpu workqueue, @max_active limits the number of in-flight work * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be * executing at most one work item for the workqueue. * * For unbound workqueues, @max_active limits the number of in-flight work items * for the whole system. e.g. @max_active of 16 indicates that that there can be * at most 16 work items executing for the workqueue in the whole system. * * As sharing the same active counter for an unbound workqueue across multiple * NUMA nodes can be expensive, @max_active is distributed to each NUMA node * according to the proportion of the number of online CPUs and enforced * independently. * * Depending on online CPU distribution, a node may end up with per-node * max_active which is significantly lower than @max_active, which can lead to * deadlocks if the per-node concurrency limit is lower than the maximum number * of interdependent work items for the workqueue. * * To guarantee forward progress regardless of online CPU distribution, the * concurrency limit on every node is guaranteed to be equal to or greater than * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means * that the sum of per-node max_active's may be larger than @max_active. * * For detailed information on %WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(1, 4) struct workqueue_struct * alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); #ifdef CONFIG_LOCKDEP /** * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default * @lockdep_map: user-defined lockdep_map * @...: args for @fmt * * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for * workqueues created with the same purpose and to avoid leaking a lockdep_map * on each workqueue creation. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(1, 5) struct workqueue_struct * alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, struct lockdep_map *lockdep_map, ...); /** * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with * user-defined lockdep_map * * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @lockdep_map: user-defined lockdep_map * @args: args for @fmt * * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. * Useful for workqueues created with the same purpose and to avoid leaking a * lockdep_map on each workqueue creation. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), \ 1, lockdep_map, ##args) #endif /** * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @args: args for @fmt * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are * implemented as unbound workqueues with @max_active of one. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) #define create_freezable_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ WQ_MEM_RECLAIM, 1, (name)) #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) #define from_work(var, callback_work, work_fieldname) \ container_of(callback_work, typeof(*var), work_fieldname) extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(void); void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_work_node(int node, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); extern void __flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern bool disable_work(struct work_struct *work); extern bool disable_work_sync(struct work_struct *work); extern bool enable_work(struct work_struct *work); extern bool disable_delayed_work(struct delayed_work *dwork); extern bool disable_delayed_work_sync(struct delayed_work *dwork); extern bool enable_delayed_work(struct delayed_work *dwork); extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); extern void workqueue_set_min_active(struct workqueue_struct *wq, int min_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); extern void show_all_workqueues(void); extern void show_freezable_workqueues(void); extern void show_one_workqueue(struct workqueue_struct *wq); extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); /** * queue_work - queue work on a workqueue * @wq: workqueue to use * @work: work to queue * * Returns %false if @work was already on a queue, %true otherwise. * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. * * Memory-ordering properties: If it returns %true, guarantees that all stores * preceding the call to queue_work() in the program order will be visible from * the CPU which will execute @work by the time such work executes, e.g., * * { x is initially 0 } * * CPU0 CPU1 * * WRITE_ONCE(x, 1); [ @work is being executed ] * r0 = queue_work(wq, work); r1 = READ_ONCE(x); * * Forbids: r0 == true && r1 == 0 */ static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work) { return queue_work_on(WORK_CPU_UNBOUND, wq, work); } /** * queue_delayed_work - queue work on a workqueue after delay * @wq: workqueue to use * @dwork: delayable work to queue * @delay: number of jiffies to wait before queueing * * Equivalent to queue_delayed_work_on() but tries to use the local CPU. */ static inline bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } /** * mod_delayed_work - modify delay of or queue a delayed work * @wq: workqueue to use * @dwork: work to queue * @delay: number of jiffies to wait before queueing * * mod_delayed_work_on() on local CPU. */ static inline bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } /** * schedule_work_on - put work task on a specific cpu * @cpu: cpu to put the work task on * @work: job to be done * * This puts a job on a specific cpu */ static inline bool schedule_work_on(int cpu, struct work_struct *work) { return queue_work_on(cpu, system_wq, work); } /** * schedule_work - put work task in global workqueue * @work: job to be done * * Returns %false if @work was already on the kernel-global workqueue and * %true otherwise. * * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. * * Shares the same memory-ordering properties of queue_work(), cf. the * DocBook header of queue_work(). */ static inline bool schedule_work(struct work_struct *work) { return queue_work(system_wq, work); } /** * enable_and_queue_work - Enable and queue a work item on a specific workqueue * @wq: The target workqueue * @work: The work item to be enabled and queued * * This function combines the operations of enable_work() and queue_work(), * providing a convenient way to enable and queue a work item in a single call. * It invokes enable_work() on @work and then queues it if the disable depth * reached 0. Returns %true if the disable depth reached 0 and @work is queued, * and %false otherwise. * * Note that @work is always queued when disable depth reaches zero. If the * desired behavior is queueing only if certain events took place while @work is * disabled, the user should implement the necessary state tracking and perform * explicit conditional queueing after enable_work(). */ static inline bool enable_and_queue_work(struct workqueue_struct *wq, struct work_struct *work) { if (enable_work(work)) { queue_work(wq, work); return true; } return false; } /* * Detect attempt to flush system-wide workqueues at compile time when possible. * Warn attempt to flush system-wide workqueues at runtime. * * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp * for reasons and steps for converting system-wide workqueues into local workqueues. */ extern void __warn_flushing_systemwide_wq(void) __compiletime_warning("Please avoid flushing system-wide workqueues."); /* Please stop using this function, for this function will be removed in near future. */ #define flush_scheduled_work() \ ({ \ __warn_flushing_systemwide_wq(); \ __flush_workqueue(system_wq); \ }) #define flush_workqueue(wq) \ ({ \ struct workqueue_struct *_wq = (wq); \ \ if ((__builtin_constant_p(_wq == system_wq) && \ _wq == system_wq) || \ (__builtin_constant_p(_wq == system_highpri_wq) && \ _wq == system_highpri_wq) || \ (__builtin_constant_p(_wq == system_long_wq) && \ _wq == system_long_wq) || \ (__builtin_constant_p(_wq == system_unbound_wq) && \ _wq == system_unbound_wq) || \ (__builtin_constant_p(_wq == system_freezable_wq) && \ _wq == system_freezable_wq) || \ (__builtin_constant_p(_wq == system_power_efficient_wq) && \ _wq == system_power_efficient_wq) || \ (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \ _wq == system_freezable_power_efficient_wq)) \ __warn_flushing_systemwide_wq(); \ __flush_workqueue(_wq); \ }) /** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done * @delay: number of jiffies to wait * * After waiting for a given time this puts a job in the kernel-global * workqueue on the specified CPU. */ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work_on(cpu, system_wq, dwork, delay); } /** * schedule_delayed_work - put work task in global workqueue after delay * @dwork: job to be done * @delay: number of jiffies to wait or 0 for immediate execution * * After waiting for a given time this puts a job in the kernel-global * workqueue. */ static inline bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work(system_wq, dwork, delay); } #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } #else long work_on_cpu_key(int cpu, long (*fn)(void *), void *arg, struct lock_class_key *key); /* * A new key is defined for each caller to make sure the work * associated with the function doesn't share its locking class. */ #define work_on_cpu(_cpu, _fn, _arg) \ ({ \ static struct lock_class_key __key; \ \ work_on_cpu_key(_cpu, _fn, _arg, &__key); \ }) long work_on_cpu_safe_key(int cpu, long (*fn)(void *), void *arg, struct lock_class_key *key); /* * A new key is defined for each caller to make sure the work * associated with the function doesn't share its locking class. */ #define work_on_cpu_safe(_cpu, _fn, _arg) \ ({ \ static struct lock_class_key __key; \ \ work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ }) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER extern void freeze_workqueues_begin(void); extern bool freeze_workqueues_busy(void); extern void thaw_workqueues(void); #endif /* CONFIG_FREEZER */ #ifdef CONFIG_SYSFS int workqueue_sysfs_register(struct workqueue_struct *wq); #else /* CONFIG_SYSFS */ static inline int workqueue_sysfs_register(struct workqueue_struct *wq) { return 0; } #endif /* CONFIG_SYSFS */ #ifdef CONFIG_WQ_WATCHDOG void wq_watchdog_touch(int cpu); #else /* CONFIG_WQ_WATCHDOG */ static inline void wq_watchdog_touch(int cpu) { } #endif /* CONFIG_WQ_WATCHDOG */ #ifdef CONFIG_SMP int workqueue_prepare_cpu(unsigned int cpu); int workqueue_online_cpu(unsigned int cpu); int workqueue_offline_cpu(unsigned int cpu); #endif void __init workqueue_init_early(void); void __init workqueue_init(void); void __init workqueue_init_topology(void); #endif
1 107 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 // SPDX-License-Identifier: GPL-2.0+ /* * Fast-charge control for Apple "MFi" devices * * Copyright (C) 2019 Bastien Nocera <hadess@hadess.net> */ /* Standard include files */ #include <linux/module.h> #include <linux/power_supply.h> #include <linux/slab.h> #include <linux/usb.h> MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>"); MODULE_DESCRIPTION("Fast-charge control for Apple \"MFi\" devices"); MODULE_LICENSE("GPL"); #define TRICKLE_CURRENT_MA 0 #define FAST_CURRENT_MA 2500 #define APPLE_VENDOR_ID 0x05ac /* Apple */ /* The product ID is defined as starting with 0x12nn, as per the * "Choosing an Apple Device USB Configuration" section in * release R9 (2012) of the "MFi Accessory Hardware Specification" * * To distinguish an Apple device, a USB host can check the device * descriptor of attached USB devices for the following fields: * ■ Vendor ID: 0x05AC * ■ Product ID: 0x12nn * * Those checks will be done in .match() and .probe(). */ static const struct usb_device_id mfi_fc_id_table[] = { { .idVendor = APPLE_VENDOR_ID, .match_flags = USB_DEVICE_ID_MATCH_VENDOR }, {}, }; MODULE_DEVICE_TABLE(usb, mfi_fc_id_table); /* Driver-local specific stuff */ struct mfi_device { struct usb_device *udev; struct power_supply *battery; int charge_type; }; static int apple_mfi_fc_set_charge_type(struct mfi_device *mfi, const union power_supply_propval *val) { int current_ma; int retval; __u8 request_type; if (mfi->charge_type == val->intval) { dev_dbg(&mfi->udev->dev, "charge type %d already set\n", mfi->charge_type); return 0; } switch (val->intval) { case POWER_SUPPLY_CHARGE_TYPE_TRICKLE: current_ma = TRICKLE_CURRENT_MA; break; case POWER_SUPPLY_CHARGE_TYPE_FAST: current_ma = FAST_CURRENT_MA; break; default: return -EINVAL; } request_type = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE; retval = usb_control_msg(mfi->udev, usb_sndctrlpipe(mfi->udev, 0), 0x40, /* Vendor‐defined power request */ request_type, current_ma, /* wValue, current offset */ current_ma, /* wIndex, current offset */ NULL, 0, USB_CTRL_GET_TIMEOUT); if (retval) { dev_dbg(&mfi->udev->dev, "retval = %d\n", retval); return retval; } mfi->charge_type = val->intval; return 0; } static int apple_mfi_fc_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { struct mfi_device *mfi = power_supply_get_drvdata(psy); dev_dbg(&mfi->udev->dev, "prop: %d\n", psp); switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: val->intval = mfi->charge_type; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; default: return -ENODATA; } return 0; } static int apple_mfi_fc_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { struct mfi_device *mfi = power_supply_get_drvdata(psy); int ret; dev_dbg(&mfi->udev->dev, "prop: %d\n", psp); ret = pm_runtime_get_sync(&mfi->udev->dev); if (ret < 0) { pm_runtime_put_noidle(&mfi->udev->dev); return ret; } switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: ret = apple_mfi_fc_set_charge_type(mfi, val); break; default: ret = -EINVAL; } pm_runtime_mark_last_busy(&mfi->udev->dev); pm_runtime_put_autosuspend(&mfi->udev->dev); return ret; } static int apple_mfi_fc_property_is_writeable(struct power_supply *psy, enum power_supply_property psp) { switch (psp) { case POWER_SUPPLY_PROP_CHARGE_TYPE: return 1; default: return 0; } } static enum power_supply_property apple_mfi_fc_properties[] = { POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_SCOPE }; static const struct power_supply_desc apple_mfi_fc_desc = { .name = "apple_mfi_fastcharge", .type = POWER_SUPPLY_TYPE_BATTERY, .properties = apple_mfi_fc_properties, .num_properties = ARRAY_SIZE(apple_mfi_fc_properties), .get_property = apple_mfi_fc_get_property, .set_property = apple_mfi_fc_set_property, .property_is_writeable = apple_mfi_fc_property_is_writeable }; static bool mfi_fc_match(struct usb_device *udev) { int idProduct; idProduct = le16_to_cpu(udev->descriptor.idProduct); /* See comment above mfi_fc_id_table[] */ return (idProduct >= 0x1200 && idProduct <= 0x12ff); } static int mfi_fc_probe(struct usb_device *udev) { struct power_supply_config battery_cfg = {}; struct mfi_device *mfi = NULL; int err; if (!mfi_fc_match(udev)) return -ENODEV; mfi = kzalloc(sizeof(struct mfi_device), GFP_KERNEL); if (!mfi) return -ENOMEM; battery_cfg.drv_data = mfi; mfi->charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE; mfi->battery = power_supply_register(&udev->dev, &apple_mfi_fc_desc, &battery_cfg); if (IS_ERR(mfi->battery)) { dev_err(&udev->dev, "Can't register battery\n"); err = PTR_ERR(mfi->battery); kfree(mfi); return err; } mfi->udev = usb_get_dev(udev); dev_set_drvdata(&udev->dev, mfi); return 0; } static void mfi_fc_disconnect(struct usb_device *udev) { struct mfi_device *mfi; mfi = dev_get_drvdata(&udev->dev); if (mfi->battery) power_supply_unregister(mfi->battery); dev_set_drvdata(&udev->dev, NULL); usb_put_dev(mfi->udev); kfree(mfi); } static struct usb_device_driver mfi_fc_driver = { .name = "apple-mfi-fastcharge", .probe = mfi_fc_probe, .disconnect = mfi_fc_disconnect, .id_table = mfi_fc_id_table, .match = mfi_fc_match, .generic_subclass = 1, }; static int __init mfi_fc_driver_init(void) { return usb_register_device_driver(&mfi_fc_driver, THIS_MODULE); } static void __exit mfi_fc_driver_exit(void) { usb_deregister_device_driver(&mfi_fc_driver); } module_init(mfi_fc_driver_init); module_exit(mfi_fc_driver_exit);
14 17 1 17 14 1 14 17 26 26 26 166 155 155 197 226 227 227 104 223 145 50 169 74 104 104 104 74 74 74 106 67 94 170 170 131 169 45 169 170 170 135 169 170 170 169 170 169 135 170 170 170 169 170 104 166 135 155 166 104 170 170 134 170 135 134 133 135 134 297 299 299 170 170 24 170 170 24 170 170 170 170 170 24 170 53 150 202 201 25 15 18 25 1 14 14 8 12 25 25 135 12 134 135 73 106 188 188 50 188 188 188 225 225 83 188 189 185 7 7 206 2 206 201 1 202 76 28 28 27 17 2 17 17 12 15 15 15 9 9 9 9 280 280 280 4 280 280 185 174 43 135 135 185 277 277 3 5166 2 6 5172 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/swapfile.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie */ #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/shmem_fs.h> #include <linux/blk-cgroup.h> #include <linux/random.h> #include <linux/writeback.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/security.h> #include <linux/backing-dev.h> #include <linux/mutex.h> #include <linux/capability.h> #include <linux/syscalls.h> #include <linux/memcontrol.h> #include <linux/poll.h> #include <linux/oom.h> #include <linux/swapfile.h> #include <linux/export.h> #include <linux/sort.h> #include <linux/completion.h> #include <linux/suspend.h> #include <linux/zswap.h> #include <linux/plist.h> #include <asm/tlbflush.h> #include <linux/swapops.h> #include <linux/swap_cgroup.h> #include "internal.h" #include "swap.h" static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); static void free_swap_count_continuations(struct swap_info_struct *); static void swap_entry_range_free(struct swap_info_struct *si, struct swap_cluster_info *ci, swp_entry_t entry, unsigned int nr_pages); static void swap_range_alloc(struct swap_info_struct *si, unsigned int nr_entries); static bool folio_swapcache_freeable(struct folio *folio); static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, unsigned long offset); static inline void unlock_cluster(struct swap_cluster_info *ci); static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; atomic_long_t nr_swap_pages; /* * Some modules use swappable objects and may try to swap them out under * memory pressure (via the shrinker). Before doing so, they may wish to * check to see if any swap space is available. */ EXPORT_SYMBOL_GPL(nr_swap_pages); /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority = -1; unsigned long swapfile_maximum_size; #ifdef CONFIG_MIGRATION bool swap_migration_ad_supported; #endif /* CONFIG_MIGRATION */ static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; /* * all active swap_info_structs * protected with swap_lock, and ordered by priority. */ static PLIST_HEAD(swap_active_head); /* * all available (active, not full) swap_info_structs * protected with swap_avail_lock, ordered by priority. * This is used by folio_alloc_swap() instead of swap_active_head * because swap_active_head includes all swap_info_structs, * but folio_alloc_swap() doesn't need to look at full ones. * This uses its own lock instead of swap_lock because when a * swap_info_struct changes between not-full/full, it needs to * add/remove itself to/from this list, but the swap_info_struct->lock * is held and the locking order requires swap_lock to be taken * before any swap_info_struct->lock. */ static struct plist_head *swap_avail_heads; static DEFINE_SPINLOCK(swap_avail_lock); static struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait); /* Activity counter to indicate that a swapon or swapoff has occurred */ static atomic_t proc_poll_event = ATOMIC_INIT(0); atomic_t nr_rotate_swap = ATOMIC_INIT(0); struct percpu_swap_cluster { struct swap_info_struct *si[SWAP_NR_ORDERS]; unsigned long offset[SWAP_NR_ORDERS]; local_lock_t lock; }; static DEFINE_PER_CPU(struct percpu_swap_cluster, percpu_swap_cluster) = { .si = { NULL }, .offset = { SWAP_ENTRY_INVALID }, .lock = INIT_LOCAL_LOCK(), }; static struct swap_info_struct *swap_type_to_swap_info(int type) { if (type >= MAX_SWAPFILES) return NULL; return READ_ONCE(swap_info[type]); /* rcu_dereference() */ } static inline unsigned char swap_count(unsigned char ent) { return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */ } /* * Use the second highest bit of inuse_pages counter as the indicator * if one swap device is on the available plist, so the atomic can * still be updated arithmetically while having special data embedded. * * inuse_pages counter is the only thing indicating if a device should * be on avail_lists or not (except swapon / swapoff). By embedding the * off-list bit in the atomic counter, updates no longer need any lock * to check the list status. * * This bit will be set if the device is not on the plist and not * usable, will be cleared if the device is on the plist. */ #define SWAP_USAGE_OFFLIST_BIT (1UL << (BITS_PER_TYPE(atomic_t) - 2)) #define SWAP_USAGE_COUNTER_MASK (~SWAP_USAGE_OFFLIST_BIT) static long swap_usage_in_pages(struct swap_info_struct *si) { return atomic_long_read(&si->inuse_pages) & SWAP_USAGE_COUNTER_MASK; } /* Reclaim the swap entry anyway if possible */ #define TTRS_ANYWAY 0x1 /* * Reclaim the swap entry if there are no more mappings of the * corresponding page */ #define TTRS_UNMAPPED 0x2 /* Reclaim the swap entry if swap is getting full */ #define TTRS_FULL 0x4 static bool swap_only_has_cache(struct swap_info_struct *si, unsigned long offset, int nr_pages) { unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; do { VM_BUG_ON(!(*map & SWAP_HAS_CACHE)); if (*map != SWAP_HAS_CACHE) return false; } while (++map < map_end); return true; } static bool swap_is_last_map(struct swap_info_struct *si, unsigned long offset, int nr_pages, bool *has_cache) { unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; unsigned char count = *map; if (swap_count(count) != 1) return false; while (++map < map_end) { if (*map != count) return false; } *has_cache = !!(count & SWAP_HAS_CACHE); return true; } /* * returns number of pages in the folio that backs the swap entry. If positive, * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no * folio was associated with the swap entry. */ static int __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset, unsigned long flags) { swp_entry_t entry = swp_entry(si->type, offset); struct address_space *address_space = swap_address_space(entry); struct swap_cluster_info *ci; struct folio *folio; int ret, nr_pages; bool need_reclaim; again: folio = filemap_get_folio(address_space, swap_cache_index(entry)); if (IS_ERR(folio)) return 0; nr_pages = folio_nr_pages(folio); ret = -nr_pages; /* * When this function is called from scan_swap_map_slots() and it's * called by vmscan.c at reclaiming folios. So we hold a folio lock * here. We have to use trylock for avoiding deadlock. This is a special * case and you should use folio_free_swap() with explicit folio_lock() * in usual operations. */ if (!folio_trylock(folio)) goto out; /* * Offset could point to the middle of a large folio, or folio * may no longer point to the expected offset before it's locked. */ entry = folio->swap; if (offset < swp_offset(entry) || offset >= swp_offset(entry) + nr_pages) { folio_unlock(folio); folio_put(folio); goto again; } offset = swp_offset(entry); need_reclaim = ((flags & TTRS_ANYWAY) || ((flags & TTRS_UNMAPPED) && !folio_mapped(folio)) || ((flags & TTRS_FULL) && mem_cgroup_swap_full(folio))); if (!need_reclaim || !folio_swapcache_freeable(folio)) goto out_unlock; /* * It's safe to delete the folio from swap cache only if the folio's * swap_map is HAS_CACHE only, which means the slots have no page table * reference or pending writeback, and can't be allocated to others. */ ci = lock_cluster(si, offset); need_reclaim = swap_only_has_cache(si, offset, nr_pages); unlock_cluster(ci); if (!need_reclaim) goto out_unlock; delete_from_swap_cache(folio); folio_set_dirty(folio); ret = nr_pages; out_unlock: folio_unlock(folio); out: folio_put(folio); return ret; } static inline struct swap_extent *first_se(struct swap_info_struct *sis) { struct rb_node *rb = rb_first(&sis->swap_extent_root); return rb_entry(rb, struct swap_extent, rb_node); } static inline struct swap_extent *next_se(struct swap_extent *se) { struct rb_node *rb = rb_next(&se->rb_node); return rb ? rb_entry(rb, struct swap_extent, rb_node) : NULL; } /* * swapon tell device that all the old swap contents can be discarded, * to allow the swap device to optimize its wear-levelling. */ static int discard_swap(struct swap_info_struct *si) { struct swap_extent *se; sector_t start_block; sector_t nr_blocks; int err = 0; /* Do not discard the swap header page! */ se = first_se(si); start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_KERNEL); if (err) return err; cond_resched(); } for (se = next_se(se); se; se = next_se(se)) { start_block = se->start_block << (PAGE_SHIFT - 9); nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_KERNEL); if (err) break; cond_resched(); } return err; /* That will often be -EOPNOTSUPP */ } static struct swap_extent * offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) { struct swap_extent *se; struct rb_node *rb; rb = sis->swap_extent_root.rb_node; while (rb) { se = rb_entry(rb, struct swap_extent, rb_node); if (offset < se->start_page) rb = rb->rb_left; else if (offset >= se->start_page + se->nr_pages) rb = rb->rb_right; else return se; } /* It *must* be present */ BUG(); } sector_t swap_folio_sector(struct folio *folio) { struct swap_info_struct *sis = swp_swap_info(folio->swap); struct swap_extent *se; sector_t sector; pgoff_t offset; offset = swp_offset(folio->swap); se = offset_to_swap_extent(sis, offset); sector = se->start_block + (offset - se->start_page); return sector << (PAGE_SHIFT - 9); } /* * swap allocation tell device that a cluster of swap can now be discarded, * to allow the swap device to optimize its wear-levelling. */ static void discard_swap_cluster(struct swap_info_struct *si, pgoff_t start_page, pgoff_t nr_pages) { struct swap_extent *se = offset_to_swap_extent(si, start_page); while (nr_pages) { pgoff_t offset = start_page - se->start_page; sector_t start_block = se->start_block + offset; sector_t nr_blocks = se->nr_pages - offset; if (nr_blocks > nr_pages) nr_blocks = nr_pages; start_page += nr_blocks; nr_pages -= nr_blocks; start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, nr_blocks, GFP_NOIO)) break; se = next_se(se); } } #ifdef CONFIG_THP_SWAP #define SWAPFILE_CLUSTER HPAGE_PMD_NR #define swap_entry_order(order) (order) #else #define SWAPFILE_CLUSTER 256 /* * Define swap_entry_order() as constant to let compiler to optimize * out some code if !CONFIG_THP_SWAP */ #define swap_entry_order(order) 0 #endif #define LATENCY_LIMIT 256 static inline bool cluster_is_empty(struct swap_cluster_info *info) { return info->count == 0; } static inline bool cluster_is_discard(struct swap_cluster_info *info) { return info->flags == CLUSTER_FLAG_DISCARD; } static inline bool cluster_is_usable(struct swap_cluster_info *ci, int order) { if (unlikely(ci->flags > CLUSTER_FLAG_USABLE)) return false; if (!order) return true; return cluster_is_empty(ci) || order == ci->order; } static inline unsigned int cluster_index(struct swap_info_struct *si, struct swap_cluster_info *ci) { return ci - si->cluster_info; } static inline struct swap_cluster_info *offset_to_cluster(struct swap_info_struct *si, unsigned long offset) { return &si->cluster_info[offset / SWAPFILE_CLUSTER]; } static inline unsigned int cluster_offset(struct swap_info_struct *si, struct swap_cluster_info *ci) { return cluster_index(si, ci) * SWAPFILE_CLUSTER; } static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, unsigned long offset) { struct swap_cluster_info *ci; ci = offset_to_cluster(si, offset); spin_lock(&ci->lock); return ci; } static inline void unlock_cluster(struct swap_cluster_info *ci) { spin_unlock(&ci->lock); } static void move_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci, struct list_head *list, enum swap_cluster_flags new_flags) { VM_WARN_ON(ci->flags == new_flags); BUILD_BUG_ON(1 << sizeof(ci->flags) * BITS_PER_BYTE < CLUSTER_FLAG_MAX); lockdep_assert_held(&ci->lock); spin_lock(&si->lock); if (ci->flags == CLUSTER_FLAG_NONE) list_add_tail(&ci->list, list); else list_move_tail(&ci->list, list); spin_unlock(&si->lock); if (ci->flags == CLUSTER_FLAG_FRAG) atomic_long_dec(&si->frag_cluster_nr[ci->order]); else if (new_flags == CLUSTER_FLAG_FRAG) atomic_long_inc(&si->frag_cluster_nr[ci->order]); ci->flags = new_flags; } /* Add a cluster to discard list and schedule it to do discard */ static void swap_cluster_schedule_discard(struct swap_info_struct *si, struct swap_cluster_info *ci) { VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD); schedule_work(&si->discard_work); } static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { lockdep_assert_held(&ci->lock); move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE); ci->order = 0; } /* * Isolate and lock the first cluster that is not contented on a list, * clean its flag before taken off-list. Cluster flag must be in sync * with list status, so cluster updaters can always know the cluster * list status without touching si lock. * * Note it's possible that all clusters on a list are contented so * this returns NULL for an non-empty list. */ static struct swap_cluster_info *isolate_lock_cluster( struct swap_info_struct *si, struct list_head *list) { struct swap_cluster_info *ci, *ret = NULL; spin_lock(&si->lock); if (unlikely(!(si->flags & SWP_WRITEOK))) goto out; list_for_each_entry(ci, list, list) { if (!spin_trylock(&ci->lock)) continue; /* We may only isolate and clear flags of following lists */ VM_BUG_ON(!ci->flags); VM_BUG_ON(ci->flags > CLUSTER_FLAG_USABLE && ci->flags != CLUSTER_FLAG_FULL); list_del(&ci->list); ci->flags = CLUSTER_FLAG_NONE; ret = ci; break; } out: spin_unlock(&si->lock); return ret; } /* * Doing discard actually. After a cluster discard is finished, the cluster * will be added to free cluster list. Discard cluster is a bit special as * they don't participate in allocation or reclaim, so clusters marked as * CLUSTER_FLAG_DISCARD must remain off-list or on discard list. */ static bool swap_do_scheduled_discard(struct swap_info_struct *si) { struct swap_cluster_info *ci; bool ret = false; unsigned int idx; spin_lock(&si->lock); while (!list_empty(&si->discard_clusters)) { ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list); /* * Delete the cluster from list to prepare for discard, but keep * the CLUSTER_FLAG_DISCARD flag, percpu_swap_cluster could be * pointing to it, or ran into by relocate_cluster. */ list_del(&ci->list); idx = cluster_index(si, ci); spin_unlock(&si->lock); discard_swap_cluster(si, idx * SWAPFILE_CLUSTER, SWAPFILE_CLUSTER); spin_lock(&ci->lock); /* * Discard is done, clear its flags as it's off-list, then * return the cluster to allocation list. */ ci->flags = CLUSTER_FLAG_NONE; __free_cluster(si, ci); spin_unlock(&ci->lock); ret = true; spin_lock(&si->lock); } spin_unlock(&si->lock); return ret; } static void swap_discard_work(struct work_struct *work) { struct swap_info_struct *si; si = container_of(work, struct swap_info_struct, discard_work); swap_do_scheduled_discard(si); } static void swap_users_ref_free(struct percpu_ref *ref) { struct swap_info_struct *si; si = container_of(ref, struct swap_info_struct, users); complete(&si->comp); } /* * Must be called after freeing if ci->count == 0, moves the cluster to free * or discard list. */ static void free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { VM_BUG_ON(ci->count != 0); VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); lockdep_assert_held(&ci->lock); /* * If the swap is discardable, prepare discard the cluster * instead of free it immediately. The cluster will be freed * after discard. */ if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == (SWP_WRITEOK | SWP_PAGE_DISCARD)) { swap_cluster_schedule_discard(si, ci); return; } __free_cluster(si, ci); } /* * Must be called after freeing if ci->count != 0, moves the cluster to * nonfull list. */ static void partial_free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { VM_BUG_ON(!ci->count || ci->count == SWAPFILE_CLUSTER); lockdep_assert_held(&ci->lock); if (ci->flags != CLUSTER_FLAG_NONFULL) move_cluster(si, ci, &si->nonfull_clusters[ci->order], CLUSTER_FLAG_NONFULL); } /* * Must be called after allocation, moves the cluster to full or frag list. * Note: allocation doesn't acquire si lock, and may drop the ci lock for * reclaim, so the cluster could be any where when called. */ static void relocate_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) { lockdep_assert_held(&ci->lock); /* Discard cluster must remain off-list or on discard list */ if (cluster_is_discard(ci)) return; if (!ci->count) { if (ci->flags != CLUSTER_FLAG_FREE) free_cluster(si, ci); } else if (ci->count != SWAPFILE_CLUSTER) { if (ci->flags != CLUSTER_FLAG_FRAG) move_cluster(si, ci, &si->frag_clusters[ci->order], CLUSTER_FLAG_FRAG); } else { if (ci->flags != CLUSTER_FLAG_FULL) move_cluster(si, ci, &si->full_clusters, CLUSTER_FLAG_FULL); } } /* * The cluster corresponding to page_nr will be used. The cluster will not be * added to free cluster list and its usage counter will be increased by 1. * Only used for initialization. */ static void inc_cluster_info_page(struct swap_info_struct *si, struct swap_cluster_info *cluster_info, unsigned long page_nr) { unsigned long idx = page_nr / SWAPFILE_CLUSTER; struct swap_cluster_info *ci; ci = cluster_info + idx; ci->count++; VM_BUG_ON(ci->count > SWAPFILE_CLUSTER); VM_BUG_ON(ci->flags); } static bool cluster_reclaim_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long start, unsigned long end) { unsigned char *map = si->swap_map; unsigned long offset = start; int nr_reclaim; spin_unlock(&ci->lock); do { switch (READ_ONCE(map[offset])) { case 0: offset++; break; case SWAP_HAS_CACHE: nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); if (nr_reclaim > 0) offset += nr_reclaim; else goto out; break; default: goto out; } } while (offset < end); out: spin_lock(&ci->lock); /* * Recheck the range no matter reclaim succeeded or not, the slot * could have been be freed while we are not holding the lock. */ for (offset = start; offset < end; offset++) if (READ_ONCE(map[offset])) return false; return true; } static bool cluster_scan_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long start, unsigned int nr_pages, bool *need_reclaim) { unsigned long offset, end = start + nr_pages; unsigned char *map = si->swap_map; if (cluster_is_empty(ci)) return true; for (offset = start; offset < end; offset++) { switch (READ_ONCE(map[offset])) { case 0: continue; case SWAP_HAS_CACHE: if (!vm_swap_full()) return false; *need_reclaim = true; continue; default: return false; } } return true; } static bool cluster_alloc_range(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned int start, unsigned char usage, unsigned int order) { unsigned int nr_pages = 1 << order; lockdep_assert_held(&ci->lock); if (!(si->flags & SWP_WRITEOK)) return false; /* * The first allocation in a cluster makes the * cluster exclusive to this order */ if (cluster_is_empty(ci)) ci->order = order; memset(si->swap_map + start, usage, nr_pages); swap_range_alloc(si, nr_pages); ci->count += nr_pages; return true; } /* Try use a new cluster for current CPU and allocate from it. */ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci, unsigned long offset, unsigned int order, unsigned char usage) { unsigned int next = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; unsigned long start = ALIGN_DOWN(offset, SWAPFILE_CLUSTER); unsigned long end = min(start + SWAPFILE_CLUSTER, si->max); unsigned int nr_pages = 1 << order; bool need_reclaim, ret; lockdep_assert_held(&ci->lock); if (end < nr_pages || ci->count + nr_pages > SWAPFILE_CLUSTER) goto out; for (end -= nr_pages; offset <= end; offset += nr_pages) { need_reclaim = false; if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim)) continue; if (need_reclaim) { ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages); /* * Reclaim drops ci->lock and cluster could be used * by another order. Not checking flag as off-list * cluster has no flag set, and change of list * won't cause fragmentation. */ if (!cluster_is_usable(ci, order)) goto out; if (cluster_is_empty(ci)) offset = start; /* Reclaim failed but cluster is usable, try next */ if (!ret) continue; } if (!cluster_alloc_range(si, ci, offset, usage, order)) break; found = offset; offset += nr_pages; if (ci->count < SWAPFILE_CLUSTER && offset <= end) next = offset; break; } out: relocate_cluster(si, ci); unlock_cluster(ci); if (si->flags & SWP_SOLIDSTATE) { this_cpu_write(percpu_swap_cluster.offset[order], next); this_cpu_write(percpu_swap_cluster.si[order], si); } else { si->global_cluster->next[order] = next; } return found; } static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) { long to_scan = 1; unsigned long offset, end; struct swap_cluster_info *ci; unsigned char *map = si->swap_map; int nr_reclaim; if (force) to_scan = swap_usage_in_pages(si) / SWAPFILE_CLUSTER; while ((ci = isolate_lock_cluster(si, &si->full_clusters))) { offset = cluster_offset(si, ci); end = min(si->max, offset + SWAPFILE_CLUSTER); to_scan--; while (offset < end) { if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) { spin_unlock(&ci->lock); nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY); spin_lock(&ci->lock); if (nr_reclaim) { offset += abs(nr_reclaim); continue; } } offset++; } /* in case no swap cache is reclaimed */ if (ci->flags == CLUSTER_FLAG_NONE) relocate_cluster(si, ci); unlock_cluster(ci); if (to_scan <= 0) break; } } static void swap_reclaim_work(struct work_struct *work) { struct swap_info_struct *si; si = container_of(work, struct swap_info_struct, reclaim_work); swap_reclaim_full_clusters(si, true); } /* * Try to allocate swap entries with specified order and try set a new * cluster for current CPU too. */ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int order, unsigned char usage) { struct swap_cluster_info *ci; unsigned int offset = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; /* * Swapfile is not block device so unable * to allocate large entries. */ if (order && !(si->flags & SWP_BLKDEV)) return 0; if (!(si->flags & SWP_SOLIDSTATE)) { /* Serialize HDD SWAP allocation for each device. */ spin_lock(&si->global_cluster_lock); offset = si->global_cluster->next[order]; if (offset == SWAP_ENTRY_INVALID) goto new_cluster; ci = lock_cluster(si, offset); /* Cluster could have been used by another order */ if (cluster_is_usable(ci, order)) { if (cluster_is_empty(ci)) offset = cluster_offset(si, ci); found = alloc_swap_scan_cluster(si, ci, offset, order, usage); } else { unlock_cluster(ci); } if (found) goto done; } new_cluster: ci = isolate_lock_cluster(si, &si->free_clusters); if (ci) { found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), order, usage); if (found) goto done; } /* Try reclaim from full clusters if free clusters list is drained */ if (vm_swap_full()) swap_reclaim_full_clusters(si, false); if (order < PMD_ORDER) { unsigned int frags = 0, frags_existing; while ((ci = isolate_lock_cluster(si, &si->nonfull_clusters[order]))) { found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), order, usage); if (found) goto done; /* Clusters failed to allocate are moved to frag_clusters */ frags++; } frags_existing = atomic_long_read(&si->frag_cluster_nr[order]); while (frags < frags_existing && (ci = isolate_lock_cluster(si, &si->frag_clusters[order]))) { atomic_long_dec(&si->frag_cluster_nr[order]); /* * Rotate the frag list to iterate, they were all * failing high order allocation or moved here due to * per-CPU usage, but they could contain newly released * reclaimable (eg. lazy-freed swap cache) slots. */ found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), order, usage); if (found) goto done; frags++; } } /* * We don't have free cluster but have some clusters in * discarding, do discard now and reclaim them, then * reread cluster_next_cpu since we dropped si->lock */ if ((si->flags & SWP_PAGE_DISCARD) && swap_do_scheduled_discard(si)) goto new_cluster; if (order) goto done; /* Order 0 stealing from higher order */ for (int o = 1; o < SWAP_NR_ORDERS; o++) { /* * Clusters here have at least one usable slots and can't fail order 0 * allocation, but reclaim may drop si->lock and race with another user. */ while ((ci = isolate_lock_cluster(si, &si->frag_clusters[o]))) { atomic_long_dec(&si->frag_cluster_nr[o]); found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), 0, usage); if (found) goto done; } while ((ci = isolate_lock_cluster(si, &si->nonfull_clusters[o]))) { found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), 0, usage); if (found) goto done; } } done: if (!(si->flags & SWP_SOLIDSTATE)) spin_unlock(&si->global_cluster_lock); return found; } /* SWAP_USAGE_OFFLIST_BIT can only be set by this helper. */ static void del_from_avail_list(struct swap_info_struct *si, bool swapoff) { int nid; unsigned long pages; spin_lock(&swap_avail_lock); if (swapoff) { /* * Forcefully remove it. Clear the SWP_WRITEOK flags for * swapoff here so it's synchronized by both si->lock and * swap_avail_lock, to ensure the result can be seen by * add_to_avail_list. */ lockdep_assert_held(&si->lock); si->flags &= ~SWP_WRITEOK; atomic_long_or(SWAP_USAGE_OFFLIST_BIT, &si->inuse_pages); } else { /* * If not called by swapoff, take it off-list only if it's * full and SWAP_USAGE_OFFLIST_BIT is not set (strictly * si->inuse_pages == pages), any concurrent slot freeing, * or device already removed from plist by someone else * will make this return false. */ pages = si->pages; if (!atomic_long_try_cmpxchg(&si->inuse_pages, &pages, pages | SWAP_USAGE_OFFLIST_BIT)) goto skip; } for_each_node(nid) plist_del(&si->avail_lists[nid], &swap_avail_heads[nid]); skip: spin_unlock(&swap_avail_lock); } /* SWAP_USAGE_OFFLIST_BIT can only be cleared by this helper. */ static void add_to_avail_list(struct swap_info_struct *si, bool swapon) { int nid; long val; unsigned long pages; spin_lock(&swap_avail_lock); /* Corresponding to SWP_WRITEOK clearing in del_from_avail_list */ if (swapon) { lockdep_assert_held(&si->lock); si->flags |= SWP_WRITEOK; } else { if (!(READ_ONCE(si->flags) & SWP_WRITEOK)) goto skip; } if (!(atomic_long_read(&si->inuse_pages) & SWAP_USAGE_OFFLIST_BIT)) goto skip; val = atomic_long_fetch_and_relaxed(~SWAP_USAGE_OFFLIST_BIT, &si->inuse_pages); /* * When device is full and device is on the plist, only one updater will * see (inuse_pages == si->pages) and will call del_from_avail_list. If * that updater happen to be here, just skip adding. */ pages = si->pages; if (val == pages) { /* Just like the cmpxchg in del_from_avail_list */ if (atomic_long_try_cmpxchg(&si->inuse_pages, &pages, pages | SWAP_USAGE_OFFLIST_BIT)) goto skip; } for_each_node(nid) plist_add(&si->avail_lists[nid], &swap_avail_heads[nid]); skip: spin_unlock(&swap_avail_lock); } /* * swap_usage_add / swap_usage_sub of each slot are serialized by ci->lock * within each cluster, so the total contribution to the global counter should * always be positive and cannot exceed the total number of usable slots. */ static bool swap_usage_add(struct swap_info_struct *si, unsigned int nr_entries) { long val = atomic_long_add_return_relaxed(nr_entries, &si->inuse_pages); /* * If device is full, and SWAP_USAGE_OFFLIST_BIT is not set, * remove it from the plist. */ if (unlikely(val == si->pages)) { del_from_avail_list(si, false); return true; } return false; } static void swap_usage_sub(struct swap_info_struct *si, unsigned int nr_entries) { long val = atomic_long_sub_return_relaxed(nr_entries, &si->inuse_pages); /* * If device is not full, and SWAP_USAGE_OFFLIST_BIT is set, * add it to the plist. */ if (unlikely(val & SWAP_USAGE_OFFLIST_BIT)) add_to_avail_list(si, false); } static void swap_range_alloc(struct swap_info_struct *si, unsigned int nr_entries) { if (swap_usage_add(si, nr_entries)) { if (vm_swap_full()) schedule_work(&si->reclaim_work); } } static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); unsigned int i; /* * Use atomic clear_bit operations only on zeromap instead of non-atomic * bitmap_clear to prevent adjacent bits corruption due to simultaneous writes. */ for (i = 0; i < nr_entries; i++) { clear_bit(offset + i, si->zeromap); zswap_invalidate(swp_entry(si->type, offset + i)); } if (si->flags & SWP_BLKDEV) swap_slot_free_notify = si->bdev->bd_disk->fops->swap_slot_free_notify; else swap_slot_free_notify = NULL; while (offset <= end) { arch_swap_invalidate_page(si->type, offset); if (swap_slot_free_notify) swap_slot_free_notify(si->bdev, offset); offset++; } clear_shadow_from_swap_cache(si->type, begin, end); /* * Make sure that try_to_unuse() observes si->inuse_pages reaching 0 * only after the above cleanups are done. */ smp_wmb(); atomic_long_add(nr_entries, &nr_swap_pages); swap_usage_sub(si, nr_entries); } static bool get_swap_device_info(struct swap_info_struct *si) { if (!percpu_ref_tryget_live(&si->users)) return false; /* * Guarantee the si->users are checked before accessing other * fields of swap_info_struct, and si->flags (SWP_WRITEOK) is * up to dated. * * Paired with the spin_unlock() after setup_swap_info() in * enable_swap_info(), and smp_wmb() in swapoff. */ smp_rmb(); return true; } /* * Fast path try to get swap entries with specified order from current * CPU's swap entry pool (a cluster). */ static bool swap_alloc_fast(swp_entry_t *entry, int order) { struct swap_cluster_info *ci; struct swap_info_struct *si; unsigned int offset, found = SWAP_ENTRY_INVALID; /* * Once allocated, swap_info_struct will never be completely freed, * so checking it's liveness by get_swap_device_info is enough. */ si = this_cpu_read(percpu_swap_cluster.si[order]); offset = this_cpu_read(percpu_swap_cluster.offset[order]); if (!si || !offset || !get_swap_device_info(si)) return false; ci = lock_cluster(si, offset); if (cluster_is_usable(ci, order)) { if (cluster_is_empty(ci)) offset = cluster_offset(si, ci); found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE); if (found) *entry = swp_entry(si->type, found); } else { unlock_cluster(ci); } put_swap_device(si); return !!found; } /* Rotate the device and switch to a new cluster */ static bool swap_alloc_slow(swp_entry_t *entry, int order) { int node; unsigned long offset; struct swap_info_struct *si, *next; node = numa_node_id(); spin_lock(&swap_avail_lock); start_over: plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) { /* Rotate the device and switch to a new cluster */ plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); spin_unlock(&swap_avail_lock); if (get_swap_device_info(si)) { offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE); put_swap_device(si); if (offset) { *entry = swp_entry(si->type, offset); return true; } if (order) return false; } spin_lock(&swap_avail_lock); /* * if we got here, it's likely that si was almost full before, * and since scan_swap_map_slots() can drop the si->lock, * multiple callers probably all tried to get a page from the * same si and it filled up before we could get one; or, the si * filled up between us dropping swap_avail_lock and taking * si->lock. Since we dropped the swap_avail_lock, the * swap_avail_head list may have been modified; so if next is * still in the swap_avail_head list then try it, otherwise * start over if we have not gotten any slots. */ if (plist_node_empty(&next->avail_lists[node])) goto start_over; } spin_unlock(&swap_avail_lock); return false; } /** * folio_alloc_swap - allocate swap space for a folio * @folio: folio we want to move to swap * @gfp: gfp mask for shadow nodes * * Allocate swap space for the folio and add the folio to the * swap cache. * * Context: Caller needs to hold the folio lock. * Return: Whether the folio was added to the swap cache. */ int folio_alloc_swap(struct folio *folio, gfp_t gfp) { unsigned int order = folio_order(folio); unsigned int size = 1 << order; swp_entry_t entry = {}; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); if (order) { /* * Reject large allocation when THP_SWAP is disabled, * the caller should split the folio and try again. */ if (!IS_ENABLED(CONFIG_THP_SWAP)) return -EAGAIN; /* * Allocation size should never exceed cluster size * (HPAGE_PMD_SIZE). */ if (size > SWAPFILE_CLUSTER) { VM_WARN_ON_ONCE(1); return -EINVAL; } } local_lock(&percpu_swap_cluster.lock); if (!swap_alloc_fast(&entry, order)) swap_alloc_slow(&entry, order); local_unlock(&percpu_swap_cluster.lock); /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */ if (mem_cgroup_try_charge_swap(folio, entry)) goto out_free; if (!entry.val) return -ENOMEM; /* * XArray node allocations from PF_MEMALLOC contexts could * completely exhaust the page allocator. __GFP_NOMEMALLOC * stops emergency reserves from being allocated. * * TODO: this could cause a theoretical memory reclaim * deadlock in the swap out path. */ if (add_to_swap_cache(folio, entry, gfp | __GFP_NOMEMALLOC, NULL)) goto out_free; atomic_long_sub(size, &nr_swap_pages); return 0; out_free: put_swap_folio(folio, entry); return -ENOMEM; } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) { struct swap_info_struct *si; unsigned long offset; if (!entry.val) goto out; si = swp_swap_info(entry); if (!si) goto bad_nofile; if (data_race(!(si->flags & SWP_USED))) goto bad_device; offset = swp_offset(entry); if (offset >= si->max) goto bad_offset; if (data_race(!si->swap_map[swp_offset(entry)])) goto bad_free; return si; bad_free: pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val); goto out; bad_offset: pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val); goto out; bad_device: pr_err("%s: %s%08lx\n", __func__, Unused_file, entry.val); goto out; bad_nofile: pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); out: return NULL; } static unsigned char __swap_entry_free_locked(struct swap_info_struct *si, unsigned long offset, unsigned char usage) { unsigned char count; unsigned char has_cache; count = si->swap_map[offset]; has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (usage == SWAP_HAS_CACHE) { VM_BUG_ON(!has_cache); has_cache = 0; } else if (count == SWAP_MAP_SHMEM) { /* * Or we could insist on shmem.c using a special * swap_shmem_free() and free_shmem_swap_and_cache()... */ count = 0; } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) { if (count == COUNT_CONTINUED) { if (swap_count_continued(si, offset, count)) count = SWAP_MAP_MAX | COUNT_CONTINUED; else count = SWAP_MAP_MAX; } else count--; } usage = count | has_cache; if (usage) WRITE_ONCE(si->swap_map[offset], usage); else WRITE_ONCE(si->swap_map[offset], SWAP_HAS_CACHE); return usage; } /* * When we get a swap entry, if there aren't some other ways to * prevent swapoff, such as the folio in swap cache is locked, RCU * reader side is locked, etc., the swap entry may become invalid * because of swapoff. Then, we need to enclose all swap related * functions with get_swap_device() and put_swap_device(), unless the * swap functions call get/put_swap_device() by themselves. * * RCU reader side lock (including any spinlock) is sufficient to * prevent swapoff, because synchronize_rcu() is called in swapoff() * before freeing data structures. * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until * put_swap_device() is called. Otherwise return NULL. * * Notice that swapoff or swapoff+swapon can still happen before the * percpu_ref_tryget_live() in get_swap_device() or after the * percpu_ref_put() in put_swap_device() if there isn't any other way * to prevent swapoff. The caller must be prepared for that. For * example, the following situation is possible. * * CPU1 CPU2 * do_swap_page() * ... swapoff+swapon * __read_swap_cache_async() * swapcache_prepare() * __swap_duplicate() * // check swap_map * // verify PTE not changed * * In __swap_duplicate(), the swap_map need to be checked before * changing partly because the specified swap entry may be for another * swap device which has been swapoff. And in do_swap_page(), after * the page is read from the swap device, the PTE is verified not * changed with the page table locked to check whether the swap device * has been swapoff or swapoff+swapon. */ struct swap_info_struct *get_swap_device(swp_entry_t entry) { struct swap_info_struct *si; unsigned long offset; if (!entry.val) goto out; si = swp_swap_info(entry); if (!si) goto bad_nofile; if (!get_swap_device_info(si)) goto out; offset = swp_offset(entry); if (offset >= si->max) goto put_out; return si; bad_nofile: pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val); out: return NULL; put_out: pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val); percpu_ref_put(&si->users); return NULL; } static unsigned char __swap_entry_free(struct swap_info_struct *si, swp_entry_t entry) { struct swap_cluster_info *ci; unsigned long offset = swp_offset(entry); unsigned char usage; ci = lock_cluster(si, offset); usage = __swap_entry_free_locked(si, offset, 1); if (!usage) swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1); unlock_cluster(ci); return usage; } static bool __swap_entries_free(struct swap_info_struct *si, swp_entry_t entry, int nr) { unsigned long offset = swp_offset(entry); unsigned int type = swp_type(entry); struct swap_cluster_info *ci; bool has_cache = false; unsigned char count; int i; if (nr <= 1 || swap_count(data_race(si->swap_map[offset])) != 1) goto fallback; /* cross into another cluster */ if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER) goto fallback; ci = lock_cluster(si, offset); if (!swap_is_last_map(si, offset, nr, &has_cache)) { unlock_cluster(ci); goto fallback; } for (i = 0; i < nr; i++) WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE); if (!has_cache) swap_entry_range_free(si, ci, entry, nr); unlock_cluster(ci); return has_cache; fallback: for (i = 0; i < nr; i++) { if (data_race(si->swap_map[offset + i])) { count = __swap_entry_free(si, swp_entry(type, offset + i)); if (count == SWAP_HAS_CACHE) has_cache = true; } else { WARN_ON_ONCE(1); } } return has_cache; } /* * Drop the last HAS_CACHE flag of swap entries, caller have to * ensure all entries belong to the same cgroup. */ static void swap_entry_range_free(struct swap_info_struct *si, struct swap_cluster_info *ci, swp_entry_t entry, unsigned int nr_pages) { unsigned long offset = swp_offset(entry); unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; /* It should never free entries across different clusters */ VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1)); VM_BUG_ON(cluster_is_empty(ci)); VM_BUG_ON(ci->count < nr_pages); ci->count -= nr_pages; do { VM_BUG_ON(*map != SWAP_HAS_CACHE); *map = 0; } while (++map < map_end); mem_cgroup_uncharge_swap(entry, nr_pages); swap_range_free(si, offset, nr_pages); if (!ci->count) free_cluster(si, ci); else partial_free_cluster(si, ci); } static void cluster_swap_free_nr(struct swap_info_struct *si, unsigned long offset, int nr_pages, unsigned char usage) { struct swap_cluster_info *ci; unsigned long end = offset + nr_pages; ci = lock_cluster(si, offset); do { if (!__swap_entry_free_locked(si, offset, usage)) swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1); } while (++offset < end); unlock_cluster(ci); } /* * Caller has made sure that the swap device corresponding to entry * is still around or has not been recycled. */ void swap_free_nr(swp_entry_t entry, int nr_pages) { int nr; struct swap_info_struct *sis; unsigned long offset = swp_offset(entry); sis = _swap_info_get(entry); if (!sis) return; while (nr_pages) { nr = min_t(int, nr_pages, SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); cluster_swap_free_nr(sis, offset, nr, 1); offset += nr; nr_pages -= nr; } } /* * Called after dropping swapcache to decrease refcnt to swap entries. */ void put_swap_folio(struct folio *folio, swp_entry_t entry) { unsigned long offset = swp_offset(entry); struct swap_cluster_info *ci; struct swap_info_struct *si; int size = 1 << swap_entry_order(folio_order(folio)); si = _swap_info_get(entry); if (!si) return; ci = lock_cluster(si, offset); if (swap_only_has_cache(si, offset, size)) swap_entry_range_free(si, ci, entry, size); else { for (int i = 0; i < size; i++, entry.val++) { if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) swap_entry_range_free(si, ci, entry, 1); } } unlock_cluster(ci); } int __swap_count(swp_entry_t entry) { struct swap_info_struct *si = swp_swap_info(entry); pgoff_t offset = swp_offset(entry); return swap_count(si->swap_map[offset]); } /* * How many references to @entry are currently swapped out? * This does not give an exact answer when swap count is continued, * but does include the high COUNT_CONTINUED flag to allow for that. */ bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry) { pgoff_t offset = swp_offset(entry); struct swap_cluster_info *ci; int count; ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); unlock_cluster(ci); return !!count; } /* * How many references to @entry are currently swapped out? * This considers COUNT_CONTINUED so it returns exact answer. */ int swp_swapcount(swp_entry_t entry) { int count, tmp_count, n; struct swap_info_struct *si; struct swap_cluster_info *ci; struct page *page; pgoff_t offset; unsigned char *map; si = _swap_info_get(entry); if (!si) return 0; offset = swp_offset(entry); ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); if (!(count & COUNT_CONTINUED)) goto out; count &= ~COUNT_CONTINUED; n = SWAP_MAP_MAX + 1; page = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; VM_BUG_ON(page_private(page) != SWP_CONTINUED); do { page = list_next_entry(page, lru); map = kmap_local_page(page); tmp_count = map[offset]; kunmap_local(map); count += (tmp_count & ~COUNT_CONTINUED) * n; n *= (SWAP_CONT_MAX + 1); } while (tmp_count & COUNT_CONTINUED); out: unlock_cluster(ci); return count; } static bool swap_page_trans_huge_swapped(struct swap_info_struct *si, swp_entry_t entry, int order) { struct swap_cluster_info *ci; unsigned char *map = si->swap_map; unsigned int nr_pages = 1 << order; unsigned long roffset = swp_offset(entry); unsigned long offset = round_down(roffset, nr_pages); int i; bool ret = false; ci = lock_cluster(si, offset); if (nr_pages == 1) { if (swap_count(map[roffset])) ret = true; goto unlock_out; } for (i = 0; i < nr_pages; i++) { if (swap_count(map[offset + i])) { ret = true; break; } } unlock_out: unlock_cluster(ci); return ret; } static bool folio_swapped(struct folio *folio) { swp_entry_t entry = folio->swap; struct swap_info_struct *si = _swap_info_get(entry); if (!si) return false; if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio))) return swap_entry_swapped(si, entry); return swap_page_trans_huge_swapped(si, entry, folio_order(folio)); } static bool folio_swapcache_freeable(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); if (!folio_test_swapcache(folio)) return false; if (folio_test_writeback(folio)) return false; /* * Once hibernation has begun to create its image of memory, * there's a danger that one of the calls to folio_free_swap() * - most probably a call from __try_to_reclaim_swap() while * hibernation is allocating its own swap pages for the image, * but conceivably even a call from memory reclaim - will free * the swap from a folio which has already been recorded in the * image as a clean swapcache folio, and then reuse its swap for * another page of the image. On waking from hibernation, the * original folio might be freed under memory pressure, then * later read back in from swap, now with the wrong data. * * Hibernation suspends storage while it is writing the image * to disk so check that here. */ if (pm_suspended_storage()) return false; return true; } /** * folio_free_swap() - Free the swap space used for this folio. * @folio: The folio to remove. * * If swap is getting full, or if there are no more mappings of this folio, * then call folio_free_swap to free its swap space. * * Return: true if we were able to release the swap space. */ bool folio_free_swap(struct folio *folio) { if (!folio_swapcache_freeable(folio)) return false; if (folio_swapped(folio)) return false; delete_from_swap_cache(folio); folio_set_dirty(folio); return true; } /** * free_swap_and_cache_nr() - Release reference on range of swap entries and * reclaim their cache if no more references remain. * @entry: First entry of range. * @nr: Number of entries in range. * * For each swap entry in the contiguous range, release a reference. If any swap * entries become free, try to reclaim their underlying folios, if present. The * offset range is defined by [entry.offset, entry.offset + nr). */ void free_swap_and_cache_nr(swp_entry_t entry, int nr) { const unsigned long start_offset = swp_offset(entry); const unsigned long end_offset = start_offset + nr; struct swap_info_struct *si; bool any_only_cache = false; unsigned long offset; si = get_swap_device(entry); if (!si) return; if (WARN_ON(end_offset > si->max)) goto out; /* * First free all entries in the range. */ any_only_cache = __swap_entries_free(si, entry, nr); /* * Short-circuit the below loop if none of the entries had their * reference drop to zero. */ if (!any_only_cache) goto out; /* * Now go back over the range trying to reclaim the swap cache. This is * more efficient for large folios because we will only try to reclaim * the swap once per folio in the common case. If we do * __swap_entry_free() and __try_to_reclaim_swap() in the same loop, the * latter will get a reference and lock the folio for every individual * page but will only succeed once the swap slot for every subpage is * zero. */ for (offset = start_offset; offset < end_offset; offset += nr) { nr = 1; if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { /* * Folios are always naturally aligned in swap so * advance forward to the next boundary. Zero means no * folio was found for the swap entry, so advance by 1 * in this case. Negative value means folio was found * but could not be reclaimed. Here we can still advance * to the next boundary. */ nr = __try_to_reclaim_swap(si, offset, TTRS_UNMAPPED | TTRS_FULL); if (nr == 0) nr = 1; else if (nr < 0) nr = -nr; nr = ALIGN(offset + 1, nr) - offset; } } out: put_swap_device(si); } #ifdef CONFIG_HIBERNATION swp_entry_t get_swap_page_of_type(int type) { struct swap_info_struct *si = swap_type_to_swap_info(type); unsigned long offset; swp_entry_t entry = {0}; if (!si) goto fail; /* This is called for allocating swap entry, not cache */ if (get_swap_device_info(si)) { if (si->flags & SWP_WRITEOK) { offset = cluster_alloc_swap_entry(si, 0, 1); if (offset) { entry = swp_entry(si->type, offset); atomic_long_dec(&nr_swap_pages); } } put_swap_device(si); } fail: return entry; } /* * Find the swap type that corresponds to given device (if any). * * @offset - number of the PAGE_SIZE-sized block of the device, starting * from 0, in which the swap header is expected to be located. * * This is needed for the suspend to disk (aka swsusp). */ int swap_type_of(dev_t device, sector_t offset) { int type; if (!device) return -1; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; if (!(sis->flags & SWP_WRITEOK)) continue; if (device == sis->bdev->bd_dev) { struct swap_extent *se = first_se(sis); if (se->start_block == offset) { spin_unlock(&swap_lock); return type; } } } spin_unlock(&swap_lock); return -ENODEV; } int find_first_swap(dev_t *device) { int type; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *sis = swap_info[type]; if (!(sis->flags & SWP_WRITEOK)) continue; *device = sis->bdev->bd_dev; spin_unlock(&swap_lock); return type; } spin_unlock(&swap_lock); return -ENODEV; } /* * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev * corresponding to given index in swap_info (swap type). */ sector_t swapdev_block(int type, pgoff_t offset) { struct swap_info_struct *si = swap_type_to_swap_info(type); struct swap_extent *se; if (!si || !(si->flags & SWP_WRITEOK)) return 0; se = offset_to_swap_extent(si, offset); return se->start_block + (offset - se->start_page); } /* * Return either the total number of swap pages of given type, or the number * of free pages of that type (depending on @free) * * This is needed for software suspend */ unsigned int count_swap_pages(int type, int free) { unsigned int n = 0; spin_lock(&swap_lock); if ((unsigned int)type < nr_swapfiles) { struct swap_info_struct *sis = swap_info[type]; spin_lock(&sis->lock); if (sis->flags & SWP_WRITEOK) { n = sis->pages; if (free) n -= swap_usage_in_pages(sis); } spin_unlock(&sis->lock); } spin_unlock(&swap_lock); return n; } #endif /* CONFIG_HIBERNATION */ static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) { return pte_same(pte_swp_clear_flags(pte), swp_pte); } /* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct folio *folio) { struct page *page; struct folio *swapcache; spinlock_t *ptl; pte_t *pte, new_pte, old_pte; bool hwpoisoned = false; int ret = 1; swapcache = folio; folio = ksm_might_need_to_copy(folio, vma, addr); if (unlikely(!folio)) return -ENOMEM; else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { hwpoisoned = true; folio = swapcache; } page = folio_file_page(folio, swp_offset(entry)); if (PageHWPoison(page)) hwpoisoned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), swp_entry_to_pte(entry)))) { ret = 0; goto out; } old_pte = ptep_get(pte); if (unlikely(hwpoisoned || !folio_test_uptodate(folio))) { swp_entry_t swp_entry; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); if (hwpoisoned) { swp_entry = make_hwpoison_entry(page); } else { swp_entry = make_poisoned_swp_entry(); } new_pte = swp_entry_to_pte(swp_entry); ret = 0; goto setpte; } /* * Some architectures may have to restore extra metadata to the page * when reading from swap. This metadata may be indexed by swap entry * so this must be called before swap_free(). */ arch_swap_restore(folio_swap(entry, folio), folio); dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_get(folio); if (folio == swapcache) { rmap_t rmap_flags = RMAP_NONE; /* * See do_swap_page(): writeback would be problematic. * However, we do a folio_wait_writeback() just before this * call and have the folio locked. */ VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); if (pte_swp_exclusive(old_pte)) rmap_flags |= RMAP_EXCLUSIVE; /* * We currently only expect small !anon folios, which are either * fully exclusive or fully shared. If we ever get large folios * here, we have to be careful. */ if (!folio_test_anon(folio)) { VM_WARN_ON_ONCE(folio_test_large(folio)); VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); folio_add_new_anon_rmap(folio, vma, addr, rmap_flags); } else { folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags); } } else { /* ksm created a completely new copy */ folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE); folio_add_lru_vma(folio, vma); } new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); if (pte_swp_soft_dirty(old_pte)) new_pte = pte_mksoft_dirty(new_pte); if (pte_swp_uffd_wp(old_pte)) new_pte = pte_mkuffd_wp(new_pte); setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); swap_free(entry); out: if (pte) pte_unmap_unlock(pte, ptl); if (folio != swapcache) { folio_unlock(folio); folio_put(folio); } return ret; } static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned int type) { pte_t *pte = NULL; struct swap_info_struct *si; si = swap_info[type]; do { struct folio *folio; unsigned long offset; unsigned char swp_count; swp_entry_t entry; int ret; pte_t ptent; if (!pte++) { pte = pte_offset_map(pmd, addr); if (!pte) break; } ptent = ptep_get_lockless(pte); if (!is_swap_pte(ptent)) continue; entry = pte_to_swp_entry(ptent); if (swp_type(entry) != type) continue; offset = swp_offset(entry); pte_unmap(pte); pte = NULL; folio = swap_cache_get_folio(entry, vma, addr); if (!folio) { struct vm_fault vmf = { .vma = vma, .address = addr, .real_address = addr, .pmd = pmd, }; folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf); } if (!folio) { swp_count = READ_ONCE(si->swap_map[offset]); if (swp_count == 0 || swp_count == SWAP_MAP_BAD) continue; return -ENOMEM; } folio_lock(folio); folio_wait_writeback(folio); ret = unuse_pte(vma, pmd, addr, entry, folio); if (ret < 0) { folio_unlock(folio); folio_put(folio); return ret; } folio_free_swap(folio); folio_unlock(folio); folio_put(folio); } while (addr += PAGE_SIZE, addr != end); if (pte) pte_unmap(pte); return 0; } static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, unsigned int type) { pmd_t *pmd; unsigned long next; int ret; pmd = pmd_offset(pud, addr); do { cond_resched(); next = pmd_addr_end(addr, end); ret = unuse_pte_range(vma, pmd, addr, next, type); if (ret) return ret; } while (pmd++, addr = next, addr != end); return 0; } static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned int type) { pud_t *pud; unsigned long next; int ret; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; ret = unuse_pmd_range(vma, pud, addr, next, type); if (ret) return ret; } while (pud++, addr = next, addr != end); return 0; } static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned int type) { p4d_t *p4d; unsigned long next; int ret; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; ret = unuse_pud_range(vma, p4d, addr, next, type); if (ret) return ret; } while (p4d++, addr = next, addr != end); return 0; } static int unuse_vma(struct vm_area_struct *vma, unsigned int type) { pgd_t *pgd; unsigned long addr, end, next; int ret; addr = vma->vm_start; end = vma->vm_end; pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; ret = unuse_p4d_range(vma, pgd, addr, next, type); if (ret) return ret; } while (pgd++, addr = next, addr != end); return 0; } static int unuse_mm(struct mm_struct *mm, unsigned int type) { struct vm_area_struct *vma; int ret = 0; VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); for_each_vma(vmi, vma) { if (vma->anon_vma && !is_vm_hugetlb_page(vma)) { ret = unuse_vma(vma, type); if (ret) break; } cond_resched(); } mmap_read_unlock(mm); return ret; } /* * Scan swap_map from current position to next entry still in use. * Return 0 if there are no inuse entries after prev till end of * the map. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, unsigned int prev) { unsigned int i; unsigned char count; /* * No need for swap_lock here: we're just looking * for whether an entry is in use, not modifying it; false * hits are okay, and sys_swapoff() has already prevented new * allocations from this area (while holding swap_lock). */ for (i = prev + 1; i < si->max; i++) { count = READ_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) break; if ((i % LATENCY_LIMIT) == 0) cond_resched(); } if (i == si->max) i = 0; return i; } static int try_to_unuse(unsigned int type) { struct mm_struct *prev_mm; struct mm_struct *mm; struct list_head *p; int retval = 0; struct swap_info_struct *si = swap_info[type]; struct folio *folio; swp_entry_t entry; unsigned int i; if (!swap_usage_in_pages(si)) goto success; retry: retval = shmem_unuse(type); if (retval) return retval; prev_mm = &init_mm; mmget(prev_mm); spin_lock(&mmlist_lock); p = &init_mm.mmlist; while (swap_usage_in_pages(si) && !signal_pending(current) && (p = p->next) != &init_mm.mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!mmget_not_zero(mm)) continue; spin_unlock(&mmlist_lock); mmput(prev_mm); prev_mm = mm; retval = unuse_mm(mm, type); if (retval) { mmput(prev_mm); return retval; } /* * Make sure that we aren't completely killing * interactive performance. */ cond_resched(); spin_lock(&mmlist_lock); } spin_unlock(&mmlist_lock); mmput(prev_mm); i = 0; while (swap_usage_in_pages(si) && !signal_pending(current) && (i = find_next_to_unuse(si, i)) != 0) { entry = swp_entry(type, i); folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); if (IS_ERR(folio)) continue; /* * It is conceivable that a racing task removed this folio from * swap cache just before we acquired the page lock. The folio * might even be back in swap cache on another swap area. But * that is okay, folio_free_swap() only removes stale folios. */ folio_lock(folio); folio_wait_writeback(folio); folio_free_swap(folio); folio_unlock(folio); folio_put(folio); } /* * Lets check again to see if there are still swap entries in the map. * If yes, we would need to do retry the unuse logic again. * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. * * Limit the number of retries? No: when mmget_not_zero() * above fails, that mm is likely to be freeing swap from * exit_mmap(), which proceeds at its own independent pace; * and even shmem_writepage() could have been preempted after * folio_alloc_swap(), temporarily hiding that swap. It's easy * and robust (though cpu-intensive) just to keep retrying. */ if (swap_usage_in_pages(si)) { if (!signal_pending(current)) goto retry; return -EINTR; } success: /* * Make sure that further cleanups after try_to_unuse() returns happen * after swap_range_free() reduces si->inuse_pages to 0. */ smp_mb(); return 0; } /* * After a successful try_to_unuse, if no swap is now in use, we know * we can empty the mmlist. swap_lock must be held on entry and exit. * Note that mmlist_lock nests inside swap_lock, and an mm must be * added to the mmlist just after page_duplicate - before would be racy. */ static void drain_mmlist(void) { struct list_head *p, *next; unsigned int type; for (type = 0; type < nr_swapfiles; type++) if (swap_usage_in_pages(swap_info[type])) return; spin_lock(&mmlist_lock); list_for_each_safe(p, next, &init_mm.mmlist) list_del_init(p); spin_unlock(&mmlist_lock); } /* * Free all of a swapdev's extent information */ static void destroy_swap_extents(struct swap_info_struct *sis) { while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { struct rb_node *rb = sis->swap_extent_root.rb_node; struct swap_extent *se = rb_entry(rb, struct swap_extent, rb_node); rb_erase(rb, &sis->swap_extent_root); kfree(se); } if (sis->flags & SWP_ACTIVATED) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; sis->flags &= ~SWP_ACTIVATED; if (mapping->a_ops->swap_deactivate) mapping->a_ops->swap_deactivate(swap_file); } } /* * Add a block range (and the corresponding page range) into this swapdev's * extent tree. * * This function rather assumes that it is called in ascending page order. */ int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block) { struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; struct swap_extent *se; struct swap_extent *new_se; /* * place the new node at the right most since the * function is called in ascending page order. */ while (*link) { parent = *link; link = &parent->rb_right; } if (parent) { se = rb_entry(parent, struct swap_extent, rb_node); BUG_ON(se->start_page + se->nr_pages != start_page); if (se->start_block + se->nr_pages == start_block) { /* Merge it */ se->nr_pages += nr_pages; return 0; } } /* No merge, insert a new extent. */ new_se = kmalloc(sizeof(*se), GFP_KERNEL); if (new_se == NULL) return -ENOMEM; new_se->start_page = start_page; new_se->nr_pages = nr_pages; new_se->start_block = start_block; rb_link_node(&new_se->rb_node, parent, link); rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); return 1; } EXPORT_SYMBOL_GPL(add_swap_extent); /* * A `swap extent' is a simple thing which maps a contiguous range of pages * onto a contiguous range of disk blocks. A rbtree of swap extents is * built at swapon time and is then used at swap_writepage/swap_read_folio * time for locating where on disk a page belongs. * * If the swapfile is an S_ISBLK block device, a single extent is installed. * This is done so that the main operating code can treat S_ISBLK and S_ISREG * swap files identically. * * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap * extent rbtree operates in PAGE_SIZE disk blocks. Both S_ISREG and S_ISBLK * swapfiles are handled *identically* after swapon time. * * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks * and will parse them into a rbtree, in PAGE_SIZE chunks. If some stray * blocks are found which do not fall within the PAGE_SIZE alignment * requirements, they are simply tossed out - we will never use those blocks * for swapping. * * For all swap devices we set S_SWAPFILE across the life of the swapon. This * prevents users from writing to the swap device, which will corrupt memory. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of * extents in the rbtree. - akpm. */ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) { struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; int ret; if (S_ISBLK(inode->i_mode)) { ret = add_swap_extent(sis, 0, sis->max, 0); *span = sis->pages; return ret; } if (mapping->a_ops->swap_activate) { ret = mapping->a_ops->swap_activate(sis, swap_file, span); if (ret < 0) return ret; sis->flags |= SWP_ACTIVATED; if ((sis->flags & SWP_FS_OPS) && sio_pool_init() != 0) { destroy_swap_extents(sis); return -ENOMEM; } return ret; } return generic_swapfile_activate(sis, swap_file, span); } static int swap_node(struct swap_info_struct *si) { struct block_device *bdev; if (si->bdev) bdev = si->bdev; else bdev = si->swap_file->f_inode->i_sb->s_bdev; return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; } static void setup_swap_info(struct swap_info_struct *si, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info, unsigned long *zeromap) { int i; if (prio >= 0) si->prio = prio; else si->prio = --least_priority; /* * the plist prio is negated because plist ordering is * low-to-high, while swap ordering is high-to-low */ si->list.prio = -si->prio; for_each_node(i) { if (si->prio >= 0) si->avail_lists[i].prio = -si->prio; else { if (swap_node(si) == i) si->avail_lists[i].prio = 1; else si->avail_lists[i].prio = -si->prio; } } si->swap_map = swap_map; si->cluster_info = cluster_info; si->zeromap = zeromap; } static void _enable_swap_info(struct swap_info_struct *si) { atomic_long_add(si->pages, &nr_swap_pages); total_swap_pages += si->pages; assert_spin_locked(&swap_lock); /* * both lists are plists, and thus priority ordered. * swap_active_head needs to be priority ordered for swapoff(), * which on removal of any swap_info_struct with an auto-assigned * (i.e. negative) priority increments the auto-assigned priority * of any lower-priority swap_info_structs. * swap_avail_head needs to be priority ordered for folio_alloc_swap(), * which allocates swap pages from the highest available priority * swap_info_struct. */ plist_add(&si->list, &swap_active_head); /* Add back to available list */ add_to_avail_list(si, true); } static void enable_swap_info(struct swap_info_struct *si, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info, unsigned long *zeromap) { spin_lock(&swap_lock); spin_lock(&si->lock); setup_swap_info(si, prio, swap_map, cluster_info, zeromap); spin_unlock(&si->lock); spin_unlock(&swap_lock); /* * Finished initializing swap device, now it's safe to reference it. */ percpu_ref_resurrect(&si->users); spin_lock(&swap_lock); spin_lock(&si->lock); _enable_swap_info(si); spin_unlock(&si->lock); spin_unlock(&swap_lock); } static void reinsert_swap_info(struct swap_info_struct *si) { spin_lock(&swap_lock); spin_lock(&si->lock); setup_swap_info(si, si->prio, si->swap_map, si->cluster_info, si->zeromap); _enable_swap_info(si); spin_unlock(&si->lock); spin_unlock(&swap_lock); } /* * Called after clearing SWP_WRITEOK, ensures cluster_alloc_range * see the updated flags, so there will be no more allocations. */ static void wait_for_allocation(struct swap_info_struct *si) { unsigned long offset; unsigned long end = ALIGN(si->max, SWAPFILE_CLUSTER); struct swap_cluster_info *ci; BUG_ON(si->flags & SWP_WRITEOK); for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { ci = lock_cluster(si, offset); unlock_cluster(ci); } } /* * Called after swap device's reference count is dead, so * neither scan nor allocation will use it. */ static void flush_percpu_swap_cluster(struct swap_info_struct *si) { int cpu, i; struct swap_info_struct **pcp_si; for_each_possible_cpu(cpu) { pcp_si = per_cpu_ptr(percpu_swap_cluster.si, cpu); /* * Invalidate the percpu swap cluster cache, si->users * is dead, so no new user will point to it, just flush * any existing user. */ for (i = 0; i < SWAP_NR_ORDERS; i++) cmpxchg(&pcp_si[i], si, NULL); } } SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct *p = NULL; unsigned char *swap_map; unsigned long *zeromap; struct swap_cluster_info *cluster_info; struct file *swap_file, *victim; struct address_space *mapping; struct inode *inode; struct filename *pathname; int err, found = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; BUG_ON(!current->mm); pathname = getname(specialfile); if (IS_ERR(pathname)) return PTR_ERR(pathname); victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); err = PTR_ERR(victim); if (IS_ERR(victim)) goto out; mapping = victim->f_mapping; spin_lock(&swap_lock); plist_for_each_entry(p, &swap_active_head, list) { if (p->flags & SWP_WRITEOK) { if (p->swap_file->f_mapping == mapping) { found = 1; break; } } } if (!found) { err = -EINVAL; spin_unlock(&swap_lock); goto out_dput; } if (!security_vm_enough_memory_mm(current->mm, p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; spin_unlock(&swap_lock); goto out_dput; } spin_lock(&p->lock); del_from_avail_list(p, true); if (p->prio < 0) { struct swap_info_struct *si = p; int nid; plist_for_each_entry_continue(si, &swap_active_head, list) { si->prio++; si->list.prio--; for_each_node(nid) { if (si->avail_lists[nid].prio != 1) si->avail_lists[nid].prio--; } } least_priority++; } plist_del(&p->list, &swap_active_head); atomic_long_sub(p->pages, &nr_swap_pages); total_swap_pages -= p->pages; spin_unlock(&p->lock); spin_unlock(&swap_lock); wait_for_allocation(p); set_current_oom_origin(); err = try_to_unuse(p->type); clear_current_oom_origin(); if (err) { /* re-insert swap space back into swap_list */ reinsert_swap_info(p); goto out_dput; } /* * Wait for swap operations protected by get/put_swap_device() * to complete. Because of synchronize_rcu() here, all swap * operations protected by RCU reader side lock (including any * spinlock) will be waited too. This makes it easy to * prevent folio_test_swapcache() and the following swap cache * operations from racing with swapoff. */ percpu_ref_kill(&p->users); synchronize_rcu(); wait_for_completion(&p->comp); flush_work(&p->discard_work); flush_work(&p->reclaim_work); flush_percpu_swap_cluster(p); destroy_swap_extents(p); if (p->flags & SWP_CONTINUED) free_swap_count_continuations(p); if (!p->bdev || !bdev_nonrot(p->bdev)) atomic_dec(&nr_rotate_swap); mutex_lock(&swapon_mutex); spin_lock(&swap_lock); spin_lock(&p->lock); drain_mmlist(); swap_file = p->swap_file; p->swap_file = NULL; p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; zeromap = p->zeromap; p->zeromap = NULL; cluster_info = p->cluster_info; p->cluster_info = NULL; spin_unlock(&p->lock); spin_unlock(&swap_lock); arch_swap_invalidate_area(p->type); zswap_swapoff(p->type); mutex_unlock(&swapon_mutex); kfree(p->global_cluster); p->global_cluster = NULL; vfree(swap_map); kvfree(zeromap); kvfree(cluster_info); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); exit_swap_address_space(p->type); inode = mapping->host; inode_lock(inode); inode->i_flags &= ~S_SWAPFILE; inode_unlock(inode); filp_close(swap_file, NULL); /* * Clear the SWP_USED flag after all resources are freed so that swapon * can reuse this swap_info in alloc_swap_info() safely. It is ok to * not hold p->lock after we cleared its SWP_WRITEOK. */ spin_lock(&swap_lock); p->flags = 0; spin_unlock(&swap_lock); err = 0; atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); out_dput: filp_close(victim, NULL); out: putname(pathname); return err; } #ifdef CONFIG_PROC_FS static __poll_t swaps_poll(struct file *file, poll_table *wait) { struct seq_file *seq = file->private_data; poll_wait(file, &proc_poll_wait, wait); if (seq->poll_event != atomic_read(&proc_poll_event)) { seq->poll_event = atomic_read(&proc_poll_event); return EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; } return EPOLLIN | EPOLLRDNORM; } /* iterator */ static void *swap_start(struct seq_file *swap, loff_t *pos) { struct swap_info_struct *si; int type; loff_t l = *pos; mutex_lock(&swapon_mutex); if (!l) return SEQ_START_TOKEN; for (type = 0; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; if (!--l) return si; } return NULL; } static void *swap_next(struct seq_file *swap, void *v, loff_t *pos) { struct swap_info_struct *si = v; int type; if (v == SEQ_START_TOKEN) type = 0; else type = si->type + 1; ++(*pos); for (; (si = swap_type_to_swap_info(type)); type++) { if (!(si->flags & SWP_USED) || !si->swap_map) continue; return si; } return NULL; } static void swap_stop(struct seq_file *swap, void *v) { mutex_unlock(&swapon_mutex); } static int swap_show(struct seq_file *swap, void *v) { struct swap_info_struct *si = v; struct file *file; int len; unsigned long bytes, inuse; if (si == SEQ_START_TOKEN) { seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n"); return 0; } bytes = K(si->pages); inuse = K(swap_usage_in_pages(si)); file = si->swap_file; len = seq_file_path(swap, file, " \t\n\\"); seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? "partition" : "file\t", bytes, bytes < 10000000 ? "\t" : "", inuse, inuse < 10000000 ? "\t" : "", si->prio); return 0; } static const struct seq_operations swaps_op = { .start = swap_start, .next = swap_next, .stop = swap_stop, .show = swap_show }; static int swaps_open(struct inode *inode, struct file *file) { struct seq_file *seq; int ret; ret = seq_open(file, &swaps_op); if (ret) return ret; seq = file->private_data; seq->poll_event = atomic_read(&proc_poll_event); return 0; } static const struct proc_ops swaps_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_open = swaps_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = seq_release, .proc_poll = swaps_poll, }; static int __init procswaps_init(void) { proc_create("swaps", 0, NULL, &swaps_proc_ops); return 0; } __initcall(procswaps_init); #endif /* CONFIG_PROC_FS */ #ifdef MAX_SWAPFILES_CHECK static int __init max_swapfiles_check(void) { MAX_SWAPFILES_CHECK(); return 0; } late_initcall(max_swapfiles_check); #endif static struct swap_info_struct *alloc_swap_info(void) { struct swap_info_struct *p; struct swap_info_struct *defer = NULL; unsigned int type; int i; p = kvzalloc(struct_size(p, avail_lists, nr_node_ids), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); if (percpu_ref_init(&p->users, swap_users_ref_free, PERCPU_REF_INIT_DEAD, GFP_KERNEL)) { kvfree(p); return ERR_PTR(-ENOMEM); } spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { if (!(swap_info[type]->flags & SWP_USED)) break; } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); percpu_ref_exit(&p->users); kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { p->type = type; /* * Publish the swap_info_struct after initializing it. * Note that kvzalloc() above zeroes all its fields. */ smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */ nr_swapfiles++; } else { defer = p; p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() * would be relying on p->type to remain valid. */ } p->swap_extent_root = RB_ROOT; plist_node_init(&p->list, 0); for_each_node(i) plist_node_init(&p->avail_lists[i], 0); p->flags = SWP_USED; spin_unlock(&swap_lock); if (defer) { percpu_ref_exit(&defer->users); kvfree(defer); } spin_lock_init(&p->lock); spin_lock_init(&p->cont_lock); atomic_long_set(&p->inuse_pages, SWAP_USAGE_OFFLIST_BIT); init_completion(&p->comp); return p; } static int claim_swapfile(struct swap_info_struct *si, struct inode *inode) { if (S_ISBLK(inode->i_mode)) { si->bdev = I_BDEV(inode); /* * Zoned block devices contain zones that have a sequential * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ if (bdev_is_zoned(si->bdev)) return -EINVAL; si->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { si->bdev = inode->i_sb->s_bdev; } return 0; } /* * Find out how many pages are allowed for a single swap device. There * are two limiting factors: * 1) the number of bits for the swap offset in the swp_entry_t type, and * 2) the number of bits in the swap pte, as defined by the different * architectures. * * In order to find the largest possible bit mask, a swap entry with * swap type 0 and swap offset ~0UL is created, encoded to a swap pte, * decoded to a swp_entry_t again, and finally the swap offset is * extracted. * * This will mask all the bits from the initial ~0UL mask that can't * be encoded in either the swp_entry_t or the architecture definition * of a swap pte. */ unsigned long generic_max_swapfile_size(void) { return swp_offset(pte_to_swp_entry( swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; } /* Can be overridden by an architecture for additional checks. */ __weak unsigned long arch_max_swapfile_size(void) { return generic_max_swapfile_size(); } static unsigned long read_swap_header(struct swap_info_struct *si, union swap_header *swap_header, struct inode *inode) { int i; unsigned long maxpages; unsigned long swapfilepages; unsigned long last_page; if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { pr_err("Unable to find swap-space signature\n"); return 0; } /* swap partition endianness hack... */ if (swab32(swap_header->info.version) == 1) { swab32s(&swap_header->info.version); swab32s(&swap_header->info.last_page); swab32s(&swap_header->info.nr_badpages); if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) return 0; for (i = 0; i < swap_header->info.nr_badpages; i++) swab32s(&swap_header->info.badpages[i]); } /* Check the swap header's sub-version */ if (swap_header->info.version != 1) { pr_warn("Unable to handle swap header version %d\n", swap_header->info.version); return 0; } maxpages = swapfile_maximum_size; last_page = swap_header->info.last_page; if (!last_page) { pr_warn("Empty swap-file\n"); return 0; } if (last_page > maxpages) { pr_warn("Truncating oversized swap area, only using %luk out of %luk\n", K(maxpages), K(last_page)); } if (maxpages > last_page) { maxpages = last_page + 1; /* p->max is an unsigned int: don't overflow it */ if ((unsigned int)maxpages == 0) maxpages = UINT_MAX; } if (!maxpages) return 0; swapfilepages = i_size_read(inode) >> PAGE_SHIFT; if (swapfilepages && maxpages > swapfilepages) { pr_warn("Swap area shorter than signature indicates\n"); return 0; } if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) return 0; if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) return 0; return maxpages; } static int setup_swap_map_and_extents(struct swap_info_struct *si, union swap_header *swap_header, unsigned char *swap_map, unsigned long maxpages, sector_t *span) { unsigned int nr_good_pages; unsigned long i; int nr_extents; nr_good_pages = maxpages - 1; /* omit header page */ for (i = 0; i < swap_header->info.nr_badpages; i++) { unsigned int page_nr = swap_header->info.badpages[i]; if (page_nr == 0 || page_nr > swap_header->info.last_page) return -EINVAL; if (page_nr < maxpages) { swap_map[page_nr] = SWAP_MAP_BAD; nr_good_pages--; } } if (nr_good_pages) { swap_map[0] = SWAP_MAP_BAD; si->max = maxpages; si->pages = nr_good_pages; nr_extents = setup_swap_extents(si, span); if (nr_extents < 0) return nr_extents; nr_good_pages = si->pages; } if (!nr_good_pages) { pr_warn("Empty swap-file\n"); return -EINVAL; } return nr_extents; } #define SWAP_CLUSTER_INFO_COLS \ DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info)) #define SWAP_CLUSTER_SPACE_COLS \ DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER) #define SWAP_CLUSTER_COLS \ max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS) static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, union swap_header *swap_header, unsigned long maxpages) { unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); struct swap_cluster_info *cluster_info; unsigned long i, j, idx; int err = -ENOMEM; cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL); if (!cluster_info) goto err; for (i = 0; i < nr_clusters; i++) spin_lock_init(&cluster_info[i].lock); if (!(si->flags & SWP_SOLIDSTATE)) { si->global_cluster = kmalloc(sizeof(*si->global_cluster), GFP_KERNEL); if (!si->global_cluster) goto err_free; for (i = 0; i < SWAP_NR_ORDERS; i++) si->global_cluster->next[i] = SWAP_ENTRY_INVALID; spin_lock_init(&si->global_cluster_lock); } /* * Mark unusable pages as unavailable. The clusters aren't * marked free yet, so no list operations are involved yet. * * See setup_swap_map_and_extents(): header page, bad pages, * and the EOF part of the last cluster. */ inc_cluster_info_page(si, cluster_info, 0); for (i = 0; i < swap_header->info.nr_badpages; i++) inc_cluster_info_page(si, cluster_info, swap_header->info.badpages[i]); for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++) inc_cluster_info_page(si, cluster_info, i); INIT_LIST_HEAD(&si->free_clusters); INIT_LIST_HEAD(&si->full_clusters); INIT_LIST_HEAD(&si->discard_clusters); for (i = 0; i < SWAP_NR_ORDERS; i++) { INIT_LIST_HEAD(&si->nonfull_clusters[i]); INIT_LIST_HEAD(&si->frag_clusters[i]); atomic_long_set(&si->frag_cluster_nr[i], 0); } /* * Reduce false cache line sharing between cluster_info and * sharing same address space. */ for (j = 0; j < SWAP_CLUSTER_COLS; j++) { for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { struct swap_cluster_info *ci; idx = i * SWAP_CLUSTER_COLS + j; ci = cluster_info + idx; if (idx >= nr_clusters) continue; if (ci->count) { ci->flags = CLUSTER_FLAG_NONFULL; list_add_tail(&ci->list, &si->nonfull_clusters[0]); continue; } ci->flags = CLUSTER_FLAG_FREE; list_add_tail(&ci->list, &si->free_clusters); } } return cluster_info; err_free: kvfree(cluster_info); err: return ERR_PTR(err); } SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct *si; struct filename *name; struct file *swap_file = NULL; struct address_space *mapping; struct dentry *dentry; int prio; int error; union swap_header *swap_header; int nr_extents; sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; unsigned long *zeromap = NULL; struct swap_cluster_info *cluster_info = NULL; struct folio *folio = NULL; struct inode *inode = NULL; bool inced_nr_rotate_swap = false; if (swap_flags & ~SWAP_FLAGS_VALID) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!swap_avail_heads) return -ENOMEM; si = alloc_swap_info(); if (IS_ERR(si)) return PTR_ERR(si); INIT_WORK(&si->discard_work, swap_discard_work); INIT_WORK(&si->reclaim_work, swap_reclaim_work); name = getname(specialfile); if (IS_ERR(name)) { error = PTR_ERR(name); name = NULL; goto bad_swap; } swap_file = file_open_name(name, O_RDWR | O_LARGEFILE | O_EXCL, 0); if (IS_ERR(swap_file)) { error = PTR_ERR(swap_file); swap_file = NULL; goto bad_swap; } si->swap_file = swap_file; mapping = swap_file->f_mapping; dentry = swap_file->f_path.dentry; inode = mapping->host; error = claim_swapfile(si, inode); if (unlikely(error)) goto bad_swap; inode_lock(inode); if (d_unlinked(dentry) || cant_mount(dentry)) { error = -ENOENT; goto bad_swap_unlock_inode; } if (IS_SWAPFILE(inode)) { error = -EBUSY; goto bad_swap_unlock_inode; } /* * Read the swap header. */ if (!mapping->a_ops->read_folio) { error = -EINVAL; goto bad_swap_unlock_inode; } folio = read_mapping_folio(mapping, 0, swap_file); if (IS_ERR(folio)) { error = PTR_ERR(folio); goto bad_swap_unlock_inode; } swap_header = kmap_local_folio(folio, 0); maxpages = read_swap_header(si, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; goto bad_swap_unlock_inode; } error = swap_cgroup_swapon(si->type, maxpages); if (error) goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; goto bad_swap_unlock_inode; } /* * Use kvmalloc_array instead of bitmap_zalloc as the allocation order might * be above MAX_PAGE_ORDER incase of a large swap file. */ zeromap = kvmalloc_array(BITS_TO_LONGS(maxpages), sizeof(long), GFP_KERNEL | __GFP_ZERO); if (!zeromap) { error = -ENOMEM; goto bad_swap_unlock_inode; } if (si->bdev && bdev_stable_writes(si->bdev)) si->flags |= SWP_STABLE_WRITES; if (si->bdev && bdev_synchronous(si->bdev)) si->flags |= SWP_SYNCHRONOUS_IO; if (si->bdev && bdev_nonrot(si->bdev)) { si->flags |= SWP_SOLIDSTATE; } else { atomic_inc(&nr_rotate_swap); inced_nr_rotate_swap = true; } cluster_info = setup_clusters(si, swap_header, maxpages); if (IS_ERR(cluster_info)) { error = PTR_ERR(cluster_info); cluster_info = NULL; goto bad_swap_unlock_inode; } if ((swap_flags & SWAP_FLAG_DISCARD) && si->bdev && bdev_max_discard_sectors(si->bdev)) { /* * When discard is enabled for swap with no particular * policy flagged, we set all swap discard flags here in * order to sustain backward compatibility with older * swapon(8) releases. */ si->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | SWP_PAGE_DISCARD); /* * By flagging sys_swapon, a sysadmin can tell us to * either do single-time area discards only, or to just * perform discards for released swap page-clusters. * Now it's time to adjust the p->flags accordingly. */ if (swap_flags & SWAP_FLAG_DISCARD_ONCE) si->flags &= ~SWP_PAGE_DISCARD; else if (swap_flags & SWAP_FLAG_DISCARD_PAGES) si->flags &= ~SWP_AREA_DISCARD; /* issue a swapon-time discard if it's still required */ if (si->flags & SWP_AREA_DISCARD) { int err = discard_swap(si); if (unlikely(err)) pr_err("swapon: discard_swap(%p): %d\n", si, err); } } error = init_swap_address_space(si->type, maxpages); if (error) goto bad_swap_unlock_inode; error = zswap_swapon(si->type, maxpages); if (error) goto free_swap_address_space; /* * Flush any pending IO and dirty mappings before we start using this * swap device. */ inode->i_flags |= S_SWAPFILE; error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; goto free_swap_zswap; } mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) prio = swap_flags & SWAP_FLAG_PRIO_MASK; enable_swap_info(si, prio, swap_map, cluster_info, zeromap); pr_info("Adding %uk swap on %s. Priority:%d extents:%d across:%lluk %s%s%s%s\n", K(si->pages), name->name, si->prio, nr_extents, K((unsigned long long)span), (si->flags & SWP_SOLIDSTATE) ? "SS" : "", (si->flags & SWP_DISCARDABLE) ? "D" : "", (si->flags & SWP_AREA_DISCARD) ? "s" : "", (si->flags & SWP_PAGE_DISCARD) ? "c" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); error = 0; goto out; free_swap_zswap: zswap_swapoff(si->type); free_swap_address_space: exit_swap_address_space(si->type); bad_swap_unlock_inode: inode_unlock(inode); bad_swap: kfree(si->global_cluster); si->global_cluster = NULL; inode = NULL; destroy_swap_extents(si); swap_cgroup_swapoff(si->type); spin_lock(&swap_lock); si->swap_file = NULL; si->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); kvfree(zeromap); kvfree(cluster_info); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); if (swap_file) filp_close(swap_file, NULL); out: if (!IS_ERR_OR_NULL(folio)) folio_release_kmap(folio, swap_header); if (name) putname(name); if (inode) inode_unlock(inode); return error; } void si_swapinfo(struct sysinfo *val) { unsigned int type; unsigned long nr_to_be_unused = 0; spin_lock(&swap_lock); for (type = 0; type < nr_swapfiles; type++) { struct swap_info_struct *si = swap_info[type]; if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) nr_to_be_unused += swap_usage_in_pages(si); } val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; spin_unlock(&swap_lock); } /* * Verify that nr swap entries are valid and increment their swap map counts. * * Returns error code in following case. * - success -> 0 * - swp_entry is invalid -> EINVAL * - swap-cache reference is requested but there is already one. -> EEXIST * - swap-cache reference is requested but the entry is not used. -> ENOENT * - swap-mapped reference requested but needs continued swap count. -> ENOMEM */ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr) { struct swap_info_struct *si; struct swap_cluster_info *ci; unsigned long offset; unsigned char count; unsigned char has_cache; int err, i; si = swp_swap_info(entry); if (WARN_ON_ONCE(!si)) { pr_err("%s%08lx\n", Bad_file, entry.val); return -EINVAL; } offset = swp_offset(entry); VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); VM_WARN_ON(usage == 1 && nr > 1); ci = lock_cluster(si, offset); err = 0; for (i = 0; i < nr; i++) { count = si->swap_map[offset + i]; /* * swapin_readahead() doesn't check if a swap entry is valid, so the * swap entry could be SWAP_MAP_BAD. Check here with lock held. */ if (unlikely(swap_count(count) == SWAP_MAP_BAD)) { err = -ENOENT; goto unlock_out; } has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (!count && !has_cache) { err = -ENOENT; } else if (usage == SWAP_HAS_CACHE) { if (has_cache) err = -EEXIST; } else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) { err = -EINVAL; } if (err) goto unlock_out; } for (i = 0; i < nr; i++) { count = si->swap_map[offset + i]; has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; if (usage == SWAP_HAS_CACHE) has_cache = SWAP_HAS_CACHE; else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX) count += usage; else if (swap_count_continued(si, offset + i, count)) count = COUNT_CONTINUED; else { /* * Don't need to rollback changes, because if * usage == 1, there must be nr == 1. */ err = -ENOMEM; goto unlock_out; } WRITE_ONCE(si->swap_map[offset + i], count | has_cache); } unlock_out: unlock_cluster(ci); return err; } /* * Help swapoff by noting that swap entry belongs to shmem/tmpfs * (in which case its reference count is never incremented). */ void swap_shmem_alloc(swp_entry_t entry, int nr) { __swap_duplicate(entry, SWAP_MAP_SHMEM, nr); } /* * Increase reference count of swap entry by 1. * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required * but could not be atomically allocated. Returns 0, just as if it succeeded, * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which * might occur if a page table entry has got corrupted. */ int swap_duplicate(swp_entry_t entry) { int err = 0; while (!err && __swap_duplicate(entry, 1, 1) == -ENOMEM) err = add_swap_count_continuation(entry, GFP_ATOMIC); return err; } /* * @entry: first swap entry from which we allocate nr swap cache. * * Called when allocating swap cache for existing swap entries, * This can return error codes. Returns 0 at success. * -EEXIST means there is a swap cache. * Note: return code is different from swap_duplicate(). */ int swapcache_prepare(swp_entry_t entry, int nr) { return __swap_duplicate(entry, SWAP_HAS_CACHE, nr); } void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr) { unsigned long offset = swp_offset(entry); cluster_swap_free_nr(si, offset, nr, SWAP_HAS_CACHE); } struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); } /* * out-of-line methods to avoid include hell. */ struct address_space *swapcache_mapping(struct folio *folio) { return swp_swap_info(folio->swap)->swap_file->f_mapping; } EXPORT_SYMBOL_GPL(swapcache_mapping); pgoff_t __folio_swap_cache_index(struct folio *folio) { return swap_cache_index(folio->swap); } EXPORT_SYMBOL_GPL(__folio_swap_cache_index); /* * add_swap_count_continuation - called when a swap count is duplicated * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's * page of the original vmalloc'ed swap_map, to hold the continuation count * (for that entry and for its neighbouring PAGE_SIZE swap entries). Called * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc. * * These continuation pages are seldom referenced: the common paths all work * on the original swap_map, only referring to a continuation page when the * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX. * * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL) * can be called after dropping locks. */ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) { struct swap_info_struct *si; struct swap_cluster_info *ci; struct page *head; struct page *page; struct page *list_page; pgoff_t offset; unsigned char count; int ret = 0; /* * When debugging, it's easier to use __GFP_ZERO here; but it's better * for latency not to zero a page while GFP_ATOMIC and holding locks. */ page = alloc_page(gfp_mask | __GFP_HIGHMEM); si = get_swap_device(entry); if (!si) { /* * An acceptable race has occurred since the failing * __swap_duplicate(): the swap device may be swapoff */ goto outer; } offset = swp_offset(entry); ci = lock_cluster(si, offset); count = swap_count(si->swap_map[offset]); if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) { /* * The higher the swap count, the more likely it is that tasks * will race to add swap count continuation: we need to avoid * over-provisioning. */ goto out; } if (!page) { ret = -ENOMEM; goto out; } head = vmalloc_to_page(si->swap_map + offset); offset &= ~PAGE_MASK; spin_lock(&si->cont_lock); /* * Page allocation does not initialize the page's lru field, * but it does always reset its private field. */ if (!page_private(head)) { BUG_ON(count & COUNT_CONTINUED); INIT_LIST_HEAD(&head->lru); set_page_private(head, SWP_CONTINUED); si->flags |= SWP_CONTINUED; } list_for_each_entry(list_page, &head->lru, lru) { unsigned char *map; /* * If the previous map said no continuation, but we've found * a continuation page, free our allocation and use this one. */ if (!(count & COUNT_CONTINUED)) goto out_unlock_cont; map = kmap_local_page(list_page) + offset; count = *map; kunmap_local(map); /* * If this continuation count now has some space in it, * free our allocation and use this one. */ if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) goto out_unlock_cont; } list_add_tail(&page->lru, &head->lru); page = NULL; /* now it's attached, don't free it */ out_unlock_cont: spin_unlock(&si->cont_lock); out: unlock_cluster(ci); put_swap_device(si); outer: if (page) __free_page(page); return ret; } /* * swap_count_continued - when the original swap_map count is incremented * from SWAP_MAP_MAX, check if there is already a continuation page to carry * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. * Called while __swap_duplicate() or caller of __swap_entry_free_locked() * holds cluster lock. */ static bool swap_count_continued(struct swap_info_struct *si, pgoff_t offset, unsigned char count) { struct page *head; struct page *page; unsigned char *map; bool ret; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head) != SWP_CONTINUED) { BUG_ON(count & COUNT_CONTINUED); return false; /* need to add count continuation */ } spin_lock(&si->cont_lock); offset &= ~PAGE_MASK; page = list_next_entry(head, lru); map = kmap_local_page(page) + offset; if (count == SWAP_MAP_MAX) /* initial increment from swap_map */ goto init_map; /* jump over SWAP_CONT_MAX checks */ if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */ /* * Think of how you add 1 to 999 */ while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) { kunmap_local(map); page = list_next_entry(page, lru); BUG_ON(page == head); map = kmap_local_page(page) + offset; } if (*map == SWAP_CONT_MAX) { kunmap_local(map); page = list_next_entry(page, lru); if (page == head) { ret = false; /* add count continuation */ goto out; } map = kmap_local_page(page) + offset; init_map: *map = 0; /* we didn't zero the page */ } *map += 1; kunmap_local(map); while ((page = list_prev_entry(page, lru)) != head) { map = kmap_local_page(page) + offset; *map = COUNT_CONTINUED; kunmap_local(map); } ret = true; /* incremented */ } else { /* decrementing */ /* * Think of how you subtract 1 from 1000 */ BUG_ON(count != COUNT_CONTINUED); while (*map == COUNT_CONTINUED) { kunmap_local(map); page = list_next_entry(page, lru); BUG_ON(page == head); map = kmap_local_page(page) + offset; } BUG_ON(*map == 0); *map -= 1; if (*map == 0) count = 0; kunmap_local(map); while ((page = list_prev_entry(page, lru)) != head) { map = kmap_local_page(page) + offset; *map = SWAP_CONT_MAX | count; count = COUNT_CONTINUED; kunmap_local(map); } ret = count == COUNT_CONTINUED; } out: spin_unlock(&si->cont_lock); return ret; } /* * free_swap_count_continuations - swapoff free all the continuation pages * appended to the swap_map, after swap_map is quiesced, before vfree'ing it. */ static void free_swap_count_continuations(struct swap_info_struct *si) { pgoff_t offset; for (offset = 0; offset < si->max; offset += PAGE_SIZE) { struct page *head; head = vmalloc_to_page(si->swap_map + offset); if (page_private(head)) { struct page *page, *next; list_for_each_entry_safe(page, next, &head->lru, lru) { list_del(&page->lru); __free_page(page); } } } } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) static bool __has_usable_swap(void) { return !plist_head_empty(&swap_active_head); } void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp) { struct swap_info_struct *si, *next; int nid = folio_nid(folio); if (!(gfp & __GFP_IO)) return; if (!__has_usable_swap()) return; if (!blk_cgroup_congested()) return; /* * We've already scheduled a throttle, avoid taking the global swap * lock. */ if (current->throttle_disk) return; spin_lock(&swap_avail_lock); plist_for_each_entry_safe(si, next, &swap_avail_heads[nid], avail_lists[nid]) { if (si->bdev) { blkcg_schedule_throttle(si->bdev->bd_disk, true); break; } } spin_unlock(&swap_avail_lock); } #endif static int __init swapfile_init(void) { int nid; swap_avail_heads = kmalloc_array(nr_node_ids, sizeof(struct plist_head), GFP_KERNEL); if (!swap_avail_heads) { pr_emerg("Not enough memory for swap heads, swap is disabled\n"); return -ENOMEM; } for_each_node(nid) plist_head_init(&swap_avail_heads[nid]); swapfile_maximum_size = arch_max_swapfile_size(); #ifdef CONFIG_MIGRATION if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS)) swap_migration_ad_supported = true; #endif /* CONFIG_MIGRATION */ return 0; } subsys_initcall(swapfile_init);
97 10 24 23 3 1 1 4 4 19 19 19 19 20 20 17 17 17 17 17 17 17 17 9 9 3 17 17 17 18 18 18 3 18 3 3 4 4 141 140 3 142 23 142 16 1 141 133 4 4 8 133 134 15 122 20 132 133 15 121 20 131 130 14 102 101 101 14 14 19 19 19 19 131 131 131 6 125 101 5 8 100 17 93 73 1 1 1 1 1 100 90 9 101 99 95 27 13 17 101 101 27 24 4 4 2 4 100 101 83 18 19 82 100 101 12 7 93 10 4 24 73 94 7 100 3 96 98 4 22 3 3 3 3 22 22 22 1 3 3 3 95 94 95 94 3 98 98 98 22 76 2 2 4 4 1 1 1 1 100 100 6 6 1 5 1 1 1 15 15 2 13 1 7 7 5 2 7 2 4 4 2 2 2 1 4 4 4 4 4 3 1 2 1 5 1 5 1 1 1 2 2 1 1 1 1 1 4 2 5 1 5 1 1 1 1 32 32 2 16 2 29 29 19 29 29 29 19 28 29 11 29 19 19 3 11 11 2 2 1 1 2 2 1 2 1 1 3 1 2 1 3 1 1 1 4 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 9 9 9 7 7 15 10 3 7 1 9 9 25 25 25 3 2 1 2 1 1 1 1 3 3 3 3 3 7 1 6 1 1 2 4 4 1 3 1 2 1 1 12 1 11 4 1 1 9 2 1 2 2 1 6 6 2 4 1 2 1 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-vid-cap.c - video capture support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/videodev2.h> #include <linux/prandom.h> #include <linux/v4l2-dv-timings.h> #include <media/v4l2-common.h> #include <media/v4l2-event.h> #include <media/v4l2-dv-timings.h> #include <media/v4l2-rect.h> #include "vivid-core.h" #include "vivid-vid-common.h" #include "vivid-kthread-cap.h" #include "vivid-vid-cap.h" /* Sizes must be in increasing order */ static const struct v4l2_frmsize_discrete webcam_sizes[] = { { 320, 180 }, { 640, 360 }, { 640, 480 }, { 1280, 720 }, { 1920, 1080 }, { 3840, 2160 }, }; /* * Intervals must be in increasing order and there must be twice as many * elements in this array as there are in webcam_sizes. */ static const struct v4l2_fract webcam_intervals[] = { { 1, 1 }, { 1, 2 }, { 1, 4 }, { 1, 5 }, { 1, 10 }, { 2, 25 }, { 1, 15 }, /* 7 - maximum for 2160p */ { 1, 25 }, { 1, 30 }, /* 9 - maximum for 1080p */ { 1, 40 }, { 1, 50 }, { 1, 60 }, /* 12 - maximum for 720p */ { 1, 120 }, }; /* Limit maximum FPS rates for high resolutions */ #define IVAL_COUNT_720P 12 /* 720p and up is limited to 60 fps */ #define IVAL_COUNT_1080P 9 /* 1080p and up is limited to 30 fps */ #define IVAL_COUNT_2160P 7 /* 2160p and up is limited to 15 fps */ static inline unsigned int webcam_ival_count(const struct vivid_dev *dev, unsigned int frmsize_idx) { if (webcam_sizes[frmsize_idx].height >= 2160) return IVAL_COUNT_2160P; if (webcam_sizes[frmsize_idx].height >= 1080) return IVAL_COUNT_1080P; if (webcam_sizes[frmsize_idx].height >= 720) return IVAL_COUNT_720P; /* For low resolutions, allow all FPS rates */ return ARRAY_SIZE(webcam_intervals); } static int vid_cap_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { struct vivid_dev *dev = vb2_get_drv_priv(vq); unsigned buffers = tpg_g_buffers(&dev->tpg); unsigned h = dev->fmt_cap_rect.height; unsigned p; if (dev->field_cap == V4L2_FIELD_ALTERNATE) { /* * You cannot use read() with FIELD_ALTERNATE since the field * information (TOP/BOTTOM) cannot be passed back to the user. */ if (vb2_fileio_is_active(vq)) return -EINVAL; } if (dev->queue_setup_error) { /* * Error injection: test what happens if queue_setup() returns * an error. */ dev->queue_setup_error = false; return -EINVAL; } if (*nplanes) { /* * Check if the number of requested planes match * the number of buffers in the current format. You can't mix that. */ if (*nplanes != buffers) return -EINVAL; for (p = 0; p < buffers; p++) { if (sizes[p] < tpg_g_line_width(&dev->tpg, p) * h / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]) return -EINVAL; } } else { for (p = 0; p < buffers; p++) sizes[p] = (tpg_g_line_width(&dev->tpg, p) * h) / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]; } *nplanes = buffers; dprintk(dev, 1, "%s: count=%d\n", __func__, *nbuffers); for (p = 0; p < buffers; p++) dprintk(dev, 1, "%s: size[%u]=%u\n", __func__, p, sizes[p]); return 0; } static int vid_cap_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); unsigned long size; unsigned buffers = tpg_g_buffers(&dev->tpg); unsigned p; dprintk(dev, 1, "%s\n", __func__); if (WARN_ON(NULL == dev->fmt_cap)) return -EINVAL; if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } for (p = 0; p < buffers; p++) { size = (tpg_g_line_width(&dev->tpg, p) * dev->fmt_cap_rect.height) / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]; if (vb2_plane_size(vb, p) < size) { dprintk(dev, 1, "%s data will not fit into plane %u (%lu < %lu)\n", __func__, p, vb2_plane_size(vb, p), size); return -EINVAL; } vb2_set_plane_payload(vb, p, size); vb->planes[p].data_offset = dev->fmt_cap->data_offset[p]; } return 0; } static void vid_cap_buf_finish(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct v4l2_timecode *tc = &vbuf->timecode; unsigned fps = 25; unsigned seq = vbuf->sequence; if (!vivid_is_sdtv_cap(dev)) return; /* * Set the timecode. Rarely used, so it is interesting to * test this. */ vbuf->flags |= V4L2_BUF_FLAG_TIMECODE; if (dev->std_cap[dev->input] & V4L2_STD_525_60) fps = 30; tc->type = (fps == 30) ? V4L2_TC_TYPE_30FPS : V4L2_TC_TYPE_25FPS; tc->flags = 0; tc->frames = seq % fps; tc->seconds = (seq / fps) % 60; tc->minutes = (seq / (60 * fps)) % 60; tc->hours = (seq / (60 * 60 * fps)) % 24; } static void vid_cap_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->vid_cap_active); spin_unlock(&dev->slock); } static int vid_cap_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); unsigned i; int err; dev->vid_cap_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); for (i = 0; i < MAX_VID_CAP_BUFFERS; i++) dev->must_blank[i] = tpg_g_perc_fill(&dev->tpg) < 100; if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else { err = vivid_start_generating_vid_cap(dev, &dev->vid_cap_streaming); } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->vid_cap_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void vid_cap_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); dprintk(dev, 1, "%s\n", __func__); vivid_stop_generating_vid_cap(dev, &dev->vid_cap_streaming); } static void vid_cap_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_vid_cap); } const struct vb2_ops vivid_vid_cap_qops = { .queue_setup = vid_cap_queue_setup, .buf_prepare = vid_cap_buf_prepare, .buf_finish = vid_cap_buf_finish, .buf_queue = vid_cap_buf_queue, .start_streaming = vid_cap_start_streaming, .stop_streaming = vid_cap_stop_streaming, .buf_request_complete = vid_cap_buf_request_complete, }; /* * Determine the 'picture' quality based on the current TV frequency: either * COLOR for a good 'signal', GRAY (grayscale picture) for a slightly off * signal or NOISE for no signal. */ void vivid_update_quality(struct vivid_dev *dev) { unsigned freq_modulus; if (dev->input_is_connected_to_output[dev->input]) { /* * The 'noise' will only be replaced by the actual video * if the output video matches the input video settings. */ tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, 0); return; } if (vivid_is_hdmi_cap(dev) && VIVID_INVALID_SIGNAL(dev->dv_timings_signal_mode[dev->input])) { tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, 0); return; } if (vivid_is_sdtv_cap(dev) && VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) { tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, 0); return; } if (!vivid_is_tv_cap(dev)) { tpg_s_quality(&dev->tpg, TPG_QUAL_COLOR, 0); return; } /* * There is a fake channel every 6 MHz at 49.25, 55.25, etc. * From +/- 0.25 MHz around the channel there is color, and from * +/- 1 MHz there is grayscale (chroma is lost). * Everywhere else it is just noise. */ freq_modulus = (dev->tv_freq - 676 /* (43.25-1) * 16 */) % (6 * 16); if (freq_modulus > 2 * 16) { tpg_s_quality(&dev->tpg, TPG_QUAL_NOISE, next_pseudo_random32(dev->tv_freq ^ 0x55) & 0x3f); return; } if (freq_modulus < 12 /*0.75 * 16*/ || freq_modulus > 20 /*1.25 * 16*/) tpg_s_quality(&dev->tpg, TPG_QUAL_GRAY, 0); else tpg_s_quality(&dev->tpg, TPG_QUAL_COLOR, 0); } /* * Get the current picture quality and the associated afc value. */ static enum tpg_quality vivid_get_quality(struct vivid_dev *dev, s32 *afc) { unsigned freq_modulus; if (afc) *afc = 0; if (tpg_g_quality(&dev->tpg) == TPG_QUAL_COLOR || tpg_g_quality(&dev->tpg) == TPG_QUAL_NOISE) return tpg_g_quality(&dev->tpg); /* * There is a fake channel every 6 MHz at 49.25, 55.25, etc. * From +/- 0.25 MHz around the channel there is color, and from * +/- 1 MHz there is grayscale (chroma is lost). * Everywhere else it is just gray. */ freq_modulus = (dev->tv_freq - 676 /* (43.25-1) * 16 */) % (6 * 16); if (afc) *afc = freq_modulus - 1 * 16; return TPG_QUAL_GRAY; } enum tpg_video_aspect vivid_get_video_aspect(const struct vivid_dev *dev) { if (vivid_is_sdtv_cap(dev)) return dev->std_aspect_ratio[dev->input]; if (vivid_is_hdmi_cap(dev)) return dev->dv_timings_aspect_ratio[dev->input]; return TPG_VIDEO_ASPECT_IMAGE; } static enum tpg_pixel_aspect vivid_get_pixel_aspect(const struct vivid_dev *dev) { if (vivid_is_sdtv_cap(dev)) return (dev->std_cap[dev->input] & V4L2_STD_525_60) ? TPG_PIXEL_ASPECT_NTSC : TPG_PIXEL_ASPECT_PAL; if (vivid_is_hdmi_cap(dev) && dev->src_rect.width == 720 && dev->src_rect.height <= 576) return dev->src_rect.height == 480 ? TPG_PIXEL_ASPECT_NTSC : TPG_PIXEL_ASPECT_PAL; return TPG_PIXEL_ASPECT_SQUARE; } /* * Called whenever the format has to be reset which can occur when * changing inputs, standard, timings, etc. */ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) { struct v4l2_bt_timings *bt = &dev->dv_timings_cap[dev->input].bt; u32 dims[V4L2_CTRL_MAX_DIMS] = {}; unsigned size; u64 pixelclock; switch (dev->input_type[dev->input]) { case WEBCAM: default: dev->src_rect.width = webcam_sizes[dev->webcam_size_idx].width; dev->src_rect.height = webcam_sizes[dev->webcam_size_idx].height; dev->timeperframe_vid_cap = webcam_intervals[dev->webcam_ival_idx]; dev->field_cap = V4L2_FIELD_NONE; tpg_s_rgb_range(&dev->tpg, V4L2_DV_RGB_RANGE_AUTO); break; case TV: case SVID: dev->field_cap = dev->tv_field_cap; dev->src_rect.width = 720; if (dev->std_cap[dev->input] & V4L2_STD_525_60) { dev->src_rect.height = 480; dev->timeperframe_vid_cap = (struct v4l2_fract) { 1001, 30000 }; dev->service_set_cap = V4L2_SLICED_CAPTION_525; } else { dev->src_rect.height = 576; dev->timeperframe_vid_cap = (struct v4l2_fract) { 1000, 25000 }; dev->service_set_cap = V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; } tpg_s_rgb_range(&dev->tpg, V4L2_DV_RGB_RANGE_AUTO); break; case HDMI: dev->src_rect.width = bt->width; dev->src_rect.height = bt->height; size = V4L2_DV_BT_FRAME_WIDTH(bt) * V4L2_DV_BT_FRAME_HEIGHT(bt); if (dev->reduced_fps && can_reduce_fps(bt)) { pixelclock = div_u64(bt->pixelclock * 1000, 1001); bt->flags |= V4L2_DV_FL_REDUCED_FPS; } else { pixelclock = bt->pixelclock; bt->flags &= ~V4L2_DV_FL_REDUCED_FPS; } dev->timeperframe_vid_cap = (struct v4l2_fract) { size / 100, (u32)pixelclock / 100 }; if (bt->interlaced) dev->field_cap = V4L2_FIELD_ALTERNATE; else dev->field_cap = V4L2_FIELD_NONE; /* * We can be called from within s_ctrl, in that case we can't * set/get controls. Luckily we don't need to in that case. */ if (keep_controls || !dev->colorspace) break; if (bt->flags & V4L2_DV_FL_IS_CE_VIDEO) { if (bt->width == 720 && bt->height <= 576) v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_170M); else v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_709); v4l2_ctrl_s_ctrl(dev->real_rgb_range_cap, 1); } else { v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_SRGB); v4l2_ctrl_s_ctrl(dev->real_rgb_range_cap, 0); } tpg_s_rgb_range(&dev->tpg, v4l2_ctrl_g_ctrl(dev->rgb_range_cap)); break; } vivid_update_quality(dev); tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); dev->crop_cap = dev->src_rect; dev->crop_bounds_cap = dev->src_rect; dev->compose_cap = dev->crop_cap; if (V4L2_FIELD_HAS_T_OR_B(dev->field_cap)) dev->compose_cap.height /= 2; dev->fmt_cap_rect = dev->compose_cap; tpg_s_video_aspect(&dev->tpg, vivid_get_video_aspect(dev)); tpg_s_pixel_aspect(&dev->tpg, vivid_get_pixel_aspect(dev)); tpg_update_mv_step(&dev->tpg); /* * We can be called from within s_ctrl, in that case we can't * modify controls. Luckily we don't need to in that case. */ if (keep_controls) return; dims[0] = roundup(dev->src_rect.width, PIXEL_ARRAY_DIV); dims[1] = roundup(dev->src_rect.height, PIXEL_ARRAY_DIV); v4l2_ctrl_modify_dimensions(dev->pixel_array, dims); } /* Map the field to something that is valid for the current input */ static enum v4l2_field vivid_field_cap(struct vivid_dev *dev, enum v4l2_field field) { if (vivid_is_sdtv_cap(dev)) { switch (field) { case V4L2_FIELD_INTERLACED_TB: case V4L2_FIELD_INTERLACED_BT: case V4L2_FIELD_SEQ_TB: case V4L2_FIELD_SEQ_BT: case V4L2_FIELD_TOP: case V4L2_FIELD_BOTTOM: case V4L2_FIELD_ALTERNATE: return field; case V4L2_FIELD_INTERLACED: default: return V4L2_FIELD_INTERLACED; } } if (vivid_is_hdmi_cap(dev)) return dev->dv_timings_cap[dev->input].bt.interlaced ? V4L2_FIELD_ALTERNATE : V4L2_FIELD_NONE; return V4L2_FIELD_NONE; } static unsigned vivid_colorspace_cap(struct vivid_dev *dev) { if (!vivid_input_is_connected_to(dev)) return tpg_g_colorspace(&dev->tpg); return dev->colorspace_out; } static unsigned vivid_xfer_func_cap(struct vivid_dev *dev) { if (!vivid_input_is_connected_to(dev)) return tpg_g_xfer_func(&dev->tpg); return dev->xfer_func_out; } static unsigned vivid_ycbcr_enc_cap(struct vivid_dev *dev) { if (!vivid_input_is_connected_to(dev)) return tpg_g_ycbcr_enc(&dev->tpg); return dev->ycbcr_enc_out; } static unsigned int vivid_hsv_enc_cap(struct vivid_dev *dev) { if (!vivid_input_is_connected_to(dev)) return tpg_g_hsv_enc(&dev->tpg); return dev->hsv_enc_out; } static unsigned vivid_quantization_cap(struct vivid_dev *dev) { if (!vivid_input_is_connected_to(dev)) return tpg_g_quantization(&dev->tpg); return dev->quantization_out; } int vivid_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; unsigned p; mp->width = dev->fmt_cap_rect.width; mp->height = dev->fmt_cap_rect.height; mp->field = dev->field_cap; mp->pixelformat = dev->fmt_cap->fourcc; mp->colorspace = vivid_colorspace_cap(dev); mp->xfer_func = vivid_xfer_func_cap(dev); if (dev->fmt_cap->color_enc == TGP_COLOR_ENC_HSV) mp->hsv_enc = vivid_hsv_enc_cap(dev); else mp->ycbcr_enc = vivid_ycbcr_enc_cap(dev); mp->quantization = vivid_quantization_cap(dev); mp->num_planes = dev->fmt_cap->buffers; for (p = 0; p < mp->num_planes; p++) { mp->plane_fmt[p].bytesperline = tpg_g_bytesperline(&dev->tpg, p); mp->plane_fmt[p].sizeimage = (tpg_g_line_width(&dev->tpg, p) * mp->height) / dev->fmt_cap->vdownsampling[p] + dev->fmt_cap->data_offset[p]; } return 0; } int vivid_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; struct v4l2_plane_pix_format *pfmt = mp->plane_fmt; struct vivid_dev *dev = video_drvdata(file); const struct vivid_fmt *fmt; unsigned bytesperline, max_bpl; unsigned factor = 1; unsigned w, h; unsigned p; bool user_set_csc = !!(mp->flags & V4L2_PIX_FMT_FLAG_SET_CSC); fmt = vivid_get_format(dev, mp->pixelformat); if (!fmt) { dprintk(dev, 1, "Fourcc format (0x%08x) unknown.\n", mp->pixelformat); mp->pixelformat = V4L2_PIX_FMT_YUYV; fmt = vivid_get_format(dev, mp->pixelformat); } mp->field = vivid_field_cap(dev, mp->field); if (vivid_is_webcam(dev)) { const struct v4l2_frmsize_discrete *sz = v4l2_find_nearest_size(webcam_sizes, ARRAY_SIZE(webcam_sizes), width, height, mp->width, mp->height); w = sz->width; h = sz->height; } else if (vivid_is_sdtv_cap(dev)) { w = 720; h = (dev->std_cap[dev->input] & V4L2_STD_525_60) ? 480 : 576; } else { w = dev->src_rect.width; h = dev->src_rect.height; } if (V4L2_FIELD_HAS_T_OR_B(mp->field)) factor = 2; if (vivid_is_webcam(dev) || (!dev->has_scaler_cap && !dev->has_crop_cap && !dev->has_compose_cap)) { mp->width = w; mp->height = h / factor; } else { struct v4l2_rect r = { 0, 0, mp->width, mp->height * factor }; v4l2_rect_set_min_size(&r, &vivid_min_rect); v4l2_rect_set_max_size(&r, &vivid_max_rect); if (dev->has_scaler_cap && !dev->has_compose_cap) { struct v4l2_rect max_r = { 0, 0, MAX_ZOOM * w, MAX_ZOOM * h }; v4l2_rect_set_max_size(&r, &max_r); } else if (!dev->has_scaler_cap && dev->has_crop_cap && !dev->has_compose_cap) { v4l2_rect_set_max_size(&r, &dev->src_rect); } else if (!dev->has_scaler_cap && !dev->has_crop_cap) { v4l2_rect_set_min_size(&r, &dev->src_rect); } mp->width = r.width; mp->height = r.height / factor; } /* This driver supports custom bytesperline values */ mp->num_planes = fmt->buffers; for (p = 0; p < fmt->buffers; p++) { /* Calculate the minimum supported bytesperline value */ bytesperline = (mp->width * fmt->bit_depth[p]) >> 3; /* Calculate the maximum supported bytesperline value */ max_bpl = (MAX_ZOOM * MAX_WIDTH * fmt->bit_depth[p]) >> 3; if (pfmt[p].bytesperline > max_bpl) pfmt[p].bytesperline = max_bpl; if (pfmt[p].bytesperline < bytesperline) pfmt[p].bytesperline = bytesperline; pfmt[p].sizeimage = (pfmt[p].bytesperline * mp->height) / fmt->vdownsampling[p] + fmt->data_offset[p]; memset(pfmt[p].reserved, 0, sizeof(pfmt[p].reserved)); } for (p = fmt->buffers; p < fmt->planes; p++) pfmt[0].sizeimage += (pfmt[0].bytesperline * mp->height * (fmt->bit_depth[p] / fmt->vdownsampling[p])) / (fmt->bit_depth[0] / fmt->vdownsampling[0]); if (!user_set_csc || !v4l2_is_colorspace_valid(mp->colorspace)) mp->colorspace = vivid_colorspace_cap(dev); if (!user_set_csc || !v4l2_is_xfer_func_valid(mp->xfer_func)) mp->xfer_func = vivid_xfer_func_cap(dev); if (fmt->color_enc == TGP_COLOR_ENC_HSV) { if (!user_set_csc || !v4l2_is_hsv_enc_valid(mp->hsv_enc)) mp->hsv_enc = vivid_hsv_enc_cap(dev); } else if (fmt->color_enc == TGP_COLOR_ENC_YCBCR) { if (!user_set_csc || !v4l2_is_ycbcr_enc_valid(mp->ycbcr_enc)) mp->ycbcr_enc = vivid_ycbcr_enc_cap(dev); } else { mp->ycbcr_enc = vivid_ycbcr_enc_cap(dev); } if (fmt->color_enc == TGP_COLOR_ENC_YCBCR || fmt->color_enc == TGP_COLOR_ENC_RGB) { if (!user_set_csc || !v4l2_is_quant_valid(mp->quantization)) mp->quantization = vivid_quantization_cap(dev); } else { mp->quantization = vivid_quantization_cap(dev); } memset(mp->reserved, 0, sizeof(mp->reserved)); return 0; } int vivid_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *mp = &f->fmt.pix_mp; struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_cap; struct v4l2_rect *compose = &dev->compose_cap; struct vb2_queue *q = &dev->vb_vid_cap_q; int ret = vivid_try_fmt_vid_cap(file, priv, f); unsigned factor = 1; unsigned p; unsigned i; if (ret < 0) return ret; if (vb2_is_busy(q)) { dprintk(dev, 1, "%s device busy\n", __func__); return -EBUSY; } dev->fmt_cap = vivid_get_format(dev, mp->pixelformat); if (V4L2_FIELD_HAS_T_OR_B(mp->field)) factor = 2; /* Note: the webcam input doesn't support scaling, cropping or composing */ if (!vivid_is_webcam(dev) && (dev->has_scaler_cap || dev->has_crop_cap || dev->has_compose_cap)) { struct v4l2_rect r = { 0, 0, mp->width, mp->height }; if (dev->has_scaler_cap) { if (dev->has_compose_cap) v4l2_rect_map_inside(compose, &r); else *compose = r; if (dev->has_crop_cap && !dev->has_compose_cap) { struct v4l2_rect min_r = { 0, 0, r.width / MAX_ZOOM, factor * r.height / MAX_ZOOM }; struct v4l2_rect max_r = { 0, 0, r.width * MAX_ZOOM, factor * r.height * MAX_ZOOM }; v4l2_rect_set_min_size(crop, &min_r); v4l2_rect_set_max_size(crop, &max_r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); } else if (dev->has_crop_cap) { struct v4l2_rect min_r = { 0, 0, compose->width / MAX_ZOOM, factor * compose->height / MAX_ZOOM }; struct v4l2_rect max_r = { 0, 0, compose->width * MAX_ZOOM, factor * compose->height * MAX_ZOOM }; v4l2_rect_set_min_size(crop, &min_r); v4l2_rect_set_max_size(crop, &max_r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); } } else if (dev->has_crop_cap && !dev->has_compose_cap) { r.height *= factor; v4l2_rect_set_size_to(crop, &r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); r = *crop; r.height /= factor; v4l2_rect_set_size_to(compose, &r); } else if (!dev->has_crop_cap) { v4l2_rect_map_inside(compose, &r); } else { r.height *= factor; v4l2_rect_set_max_size(crop, &r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); compose->top *= factor; compose->height *= factor; v4l2_rect_set_size_to(compose, crop); v4l2_rect_map_inside(compose, &r); compose->top /= factor; compose->height /= factor; } } else if (vivid_is_webcam(dev)) { unsigned int ival_sz = webcam_ival_count(dev, dev->webcam_size_idx); /* Guaranteed to be a match */ for (i = 0; i < ARRAY_SIZE(webcam_sizes); i++) if (webcam_sizes[i].width == mp->width && webcam_sizes[i].height == mp->height) break; dev->webcam_size_idx = i; if (dev->webcam_ival_idx >= ival_sz) dev->webcam_ival_idx = ival_sz - 1; vivid_update_format_cap(dev, false); } else { struct v4l2_rect r = { 0, 0, mp->width, mp->height }; v4l2_rect_set_size_to(compose, &r); r.height *= factor; v4l2_rect_set_size_to(crop, &r); } dev->fmt_cap_rect.width = mp->width; dev->fmt_cap_rect.height = mp->height; tpg_s_buf_height(&dev->tpg, mp->height); tpg_s_fourcc(&dev->tpg, dev->fmt_cap->fourcc); for (p = 0; p < tpg_g_buffers(&dev->tpg); p++) tpg_s_bytesperline(&dev->tpg, p, mp->plane_fmt[p].bytesperline); dev->field_cap = mp->field; if (dev->field_cap == V4L2_FIELD_ALTERNATE) tpg_s_field(&dev->tpg, V4L2_FIELD_TOP, true); else tpg_s_field(&dev->tpg, dev->field_cap, false); tpg_s_crop_compose(&dev->tpg, &dev->crop_cap, &dev->compose_cap); if (vivid_is_sdtv_cap(dev)) dev->tv_field_cap = mp->field; tpg_update_mv_step(&dev->tpg); dev->tpg.colorspace = mp->colorspace; dev->tpg.xfer_func = mp->xfer_func; if (dev->fmt_cap->color_enc == TGP_COLOR_ENC_YCBCR) dev->tpg.ycbcr_enc = mp->ycbcr_enc; else dev->tpg.hsv_enc = mp->hsv_enc; dev->tpg.quantization = mp->quantization; return 0; } int vidioc_g_fmt_vid_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_g_fmt_vid_cap(file, priv, f); } int vidioc_try_fmt_vid_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_try_fmt_vid_cap(file, priv, f); } int vidioc_s_fmt_vid_cap_mplane(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (!dev->multiplanar) return -ENOTTY; return vivid_s_fmt_vid_cap(file, priv, f); } int vidioc_g_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_g_fmt_vid_cap); } int vidioc_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_try_fmt_vid_cap); } int vidioc_s_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); if (dev->multiplanar) return -ENOTTY; return fmt_sp2mp_func(file, priv, f, vivid_s_fmt_vid_cap); } int vivid_vid_cap_g_selection(struct file *file, void *priv, struct v4l2_selection *sel) { struct vivid_dev *dev = video_drvdata(file); if (!dev->has_crop_cap && !dev->has_compose_cap) return -ENOTTY; if (sel->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; if (vivid_is_webcam(dev)) return -ENODATA; sel->r.left = sel->r.top = 0; switch (sel->target) { case V4L2_SEL_TGT_CROP: if (!dev->has_crop_cap) return -EINVAL; sel->r = dev->crop_cap; break; case V4L2_SEL_TGT_CROP_DEFAULT: case V4L2_SEL_TGT_CROP_BOUNDS: if (!dev->has_crop_cap) return -EINVAL; sel->r = dev->src_rect; break; case V4L2_SEL_TGT_COMPOSE_BOUNDS: if (!dev->has_compose_cap) return -EINVAL; sel->r = vivid_max_rect; break; case V4L2_SEL_TGT_COMPOSE: if (!dev->has_compose_cap) return -EINVAL; sel->r = dev->compose_cap; break; case V4L2_SEL_TGT_COMPOSE_DEFAULT: if (!dev->has_compose_cap) return -EINVAL; sel->r = dev->fmt_cap_rect; break; default: return -EINVAL; } return 0; } int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection *s) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_rect *crop = &dev->crop_cap; struct v4l2_rect *compose = &dev->compose_cap; unsigned factor = V4L2_FIELD_HAS_T_OR_B(dev->field_cap) ? 2 : 1; int ret; if (!dev->has_crop_cap && !dev->has_compose_cap) return -ENOTTY; if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; if (vivid_is_webcam(dev)) return -ENODATA; switch (s->target) { case V4L2_SEL_TGT_CROP: if (!dev->has_crop_cap) return -EINVAL; ret = vivid_vid_adjust_sel(s->flags, &s->r); if (ret) return ret; v4l2_rect_set_min_size(&s->r, &vivid_min_rect); v4l2_rect_set_max_size(&s->r, &dev->src_rect); v4l2_rect_map_inside(&s->r, &dev->crop_bounds_cap); s->r.top /= factor; s->r.height /= factor; if (dev->has_scaler_cap) { struct v4l2_rect fmt = dev->fmt_cap_rect; struct v4l2_rect max_rect = { 0, 0, s->r.width * MAX_ZOOM, s->r.height * MAX_ZOOM }; struct v4l2_rect min_rect = { 0, 0, s->r.width / MAX_ZOOM, s->r.height / MAX_ZOOM }; v4l2_rect_set_min_size(&fmt, &min_rect); if (!dev->has_compose_cap) v4l2_rect_set_max_size(&fmt, &max_rect); if (!v4l2_rect_same_size(&dev->fmt_cap_rect, &fmt) && vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; if (dev->has_compose_cap) { v4l2_rect_set_min_size(compose, &min_rect); v4l2_rect_set_max_size(compose, &max_rect); v4l2_rect_map_inside(compose, &fmt); } dev->fmt_cap_rect = fmt; tpg_s_buf_height(&dev->tpg, fmt.height); } else if (dev->has_compose_cap) { struct v4l2_rect fmt = dev->fmt_cap_rect; v4l2_rect_set_min_size(&fmt, &s->r); if (!v4l2_rect_same_size(&dev->fmt_cap_rect, &fmt) && vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->fmt_cap_rect = fmt; tpg_s_buf_height(&dev->tpg, fmt.height); v4l2_rect_set_size_to(compose, &s->r); v4l2_rect_map_inside(compose, &dev->fmt_cap_rect); } else { if (!v4l2_rect_same_size(&s->r, &dev->fmt_cap_rect) && vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; v4l2_rect_set_size_to(&dev->fmt_cap_rect, &s->r); v4l2_rect_set_size_to(compose, &s->r); v4l2_rect_map_inside(compose, &dev->fmt_cap_rect); tpg_s_buf_height(&dev->tpg, dev->fmt_cap_rect.height); } s->r.top *= factor; s->r.height *= factor; *crop = s->r; break; case V4L2_SEL_TGT_COMPOSE: if (!dev->has_compose_cap) return -EINVAL; ret = vivid_vid_adjust_sel(s->flags, &s->r); if (ret) return ret; v4l2_rect_set_min_size(&s->r, &vivid_min_rect); v4l2_rect_set_max_size(&s->r, &dev->fmt_cap_rect); if (dev->has_scaler_cap) { struct v4l2_rect max_rect = { 0, 0, dev->src_rect.width * MAX_ZOOM, (dev->src_rect.height / factor) * MAX_ZOOM }; v4l2_rect_set_max_size(&s->r, &max_rect); if (dev->has_crop_cap) { struct v4l2_rect min_rect = { 0, 0, s->r.width / MAX_ZOOM, (s->r.height * factor) / MAX_ZOOM }; struct v4l2_rect max_rect = { 0, 0, s->r.width * MAX_ZOOM, (s->r.height * factor) * MAX_ZOOM }; v4l2_rect_set_min_size(crop, &min_rect); v4l2_rect_set_max_size(crop, &max_rect); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); } } else if (dev->has_crop_cap) { s->r.top *= factor; s->r.height *= factor; v4l2_rect_set_max_size(&s->r, &dev->src_rect); v4l2_rect_set_size_to(crop, &s->r); v4l2_rect_map_inside(crop, &dev->crop_bounds_cap); s->r.top /= factor; s->r.height /= factor; } else { v4l2_rect_set_size_to(&s->r, &dev->src_rect); s->r.height /= factor; } v4l2_rect_map_inside(&s->r, &dev->fmt_cap_rect); *compose = s->r; break; default: return -EINVAL; } tpg_s_crop_compose(&dev->tpg, crop, compose); return 0; } int vivid_vid_cap_g_pixelaspect(struct file *file, void *priv, int type, struct v4l2_fract *f) { struct vivid_dev *dev = video_drvdata(file); if (type != V4L2_BUF_TYPE_VIDEO_CAPTURE) return -EINVAL; switch (vivid_get_pixel_aspect(dev)) { case TPG_PIXEL_ASPECT_NTSC: f->numerator = 11; f->denominator = 10; break; case TPG_PIXEL_ASPECT_PAL: f->numerator = 54; f->denominator = 59; break; default: break; } return 0; } static const struct v4l2_audio vivid_audio_inputs[] = { { 0, "TV", V4L2_AUDCAP_STEREO }, { 1, "Line-In", V4L2_AUDCAP_STEREO }, }; int vidioc_enum_input(struct file *file, void *priv, struct v4l2_input *inp) { struct vivid_dev *dev = video_drvdata(file); if (inp->index >= dev->num_inputs) return -EINVAL; inp->type = V4L2_INPUT_TYPE_CAMERA; switch (dev->input_type[inp->index]) { case WEBCAM: snprintf(inp->name, sizeof(inp->name), "Webcam %03u-%u", dev->inst, dev->input_name_counter[inp->index]); inp->capabilities = 0; break; case TV: snprintf(inp->name, sizeof(inp->name), "TV %03u-%u", dev->inst, dev->input_name_counter[inp->index]); inp->type = V4L2_INPUT_TYPE_TUNER; inp->std = V4L2_STD_ALL; if (dev->has_audio_inputs) inp->audioset = (1 << ARRAY_SIZE(vivid_audio_inputs)) - 1; inp->capabilities = V4L2_IN_CAP_STD; break; case SVID: snprintf(inp->name, sizeof(inp->name), "S-Video %03u-%u", dev->inst, dev->input_name_counter[inp->index]); inp->std = V4L2_STD_ALL; if (dev->has_audio_inputs) inp->audioset = (1 << ARRAY_SIZE(vivid_audio_inputs)) - 1; inp->capabilities = V4L2_IN_CAP_STD; break; case HDMI: snprintf(inp->name, sizeof(inp->name), "HDMI %03u-%u", dev->inst, dev->input_name_counter[inp->index]); inp->capabilities = V4L2_IN_CAP_DV_TIMINGS; if (dev->edid_blocks == 0 || dev->dv_timings_signal_mode[dev->input] == NO_SIGNAL) inp->status |= V4L2_IN_ST_NO_SIGNAL; else if (dev->dv_timings_signal_mode[dev->input] == NO_LOCK || dev->dv_timings_signal_mode[dev->input] == OUT_OF_RANGE) inp->status |= V4L2_IN_ST_NO_H_LOCK; break; } if (dev->sensor_hflip) inp->status |= V4L2_IN_ST_HFLIP; if (dev->sensor_vflip) inp->status |= V4L2_IN_ST_VFLIP; if (dev->input == inp->index && vivid_is_sdtv_cap(dev)) { if (dev->std_signal_mode[dev->input] == NO_SIGNAL) { inp->status |= V4L2_IN_ST_NO_SIGNAL; } else if (dev->std_signal_mode[dev->input] == NO_LOCK) { inp->status |= V4L2_IN_ST_NO_H_LOCK; } else if (vivid_is_tv_cap(dev)) { switch (tpg_g_quality(&dev->tpg)) { case TPG_QUAL_GRAY: inp->status |= V4L2_IN_ST_COLOR_KILL; break; case TPG_QUAL_NOISE: inp->status |= V4L2_IN_ST_NO_H_LOCK; break; default: break; } } } return 0; } int vidioc_g_input(struct file *file, void *priv, unsigned *i) { struct vivid_dev *dev = video_drvdata(file); *i = dev->input; return 0; } int vidioc_s_input(struct file *file, void *priv, unsigned i) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_bt_timings *bt = &dev->dv_timings_cap[dev->input].bt; unsigned brightness; if (i >= dev->num_inputs) return -EINVAL; if (i == dev->input) return 0; if (vb2_is_busy(&dev->vb_vid_cap_q) || vb2_is_busy(&dev->vb_vbi_cap_q) || vb2_is_busy(&dev->vb_meta_cap_q)) return -EBUSY; dev->input = i; dev->vid_cap_dev.tvnorms = 0; if (dev->input_type[i] == TV || dev->input_type[i] == SVID) { dev->tv_audio_input = (dev->input_type[i] == TV) ? 0 : 1; dev->vid_cap_dev.tvnorms = V4L2_STD_ALL; } dev->vbi_cap_dev.tvnorms = dev->vid_cap_dev.tvnorms; dev->meta_cap_dev.tvnorms = dev->vid_cap_dev.tvnorms; vivid_update_format_cap(dev, false); if (dev->colorspace) { switch (dev->input_type[i]) { case WEBCAM: v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_SRGB); break; case TV: case SVID: v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_170M); break; case HDMI: if (bt->flags & V4L2_DV_FL_IS_CE_VIDEO) { if (dev->src_rect.width == 720 && dev->src_rect.height <= 576) v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_170M); else v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_709); } else { v4l2_ctrl_s_ctrl(dev->colorspace, VIVID_CS_SRGB); } break; } } /* * Modify the brightness range depending on the input. * This makes it easy to use vivid to test if applications can * handle control range modifications and is also how this is * typically used in practice as different inputs may be hooked * up to different receivers with different control ranges. */ brightness = 128 * i + dev->input_brightness[i]; v4l2_ctrl_modify_range(dev->brightness, 128 * i, 255 + 128 * i, 1, 128 + 128 * i); v4l2_ctrl_s_ctrl(dev->brightness, brightness); /* Restore per-input states. */ v4l2_ctrl_activate(dev->ctrl_dv_timings_signal_mode, vivid_is_hdmi_cap(dev)); v4l2_ctrl_activate(dev->ctrl_dv_timings, vivid_is_hdmi_cap(dev) && dev->dv_timings_signal_mode[dev->input] == SELECTED_DV_TIMINGS); v4l2_ctrl_activate(dev->ctrl_std_signal_mode, vivid_is_sdtv_cap(dev)); v4l2_ctrl_activate(dev->ctrl_standard, vivid_is_sdtv_cap(dev) && dev->std_signal_mode[dev->input]); if (vivid_is_hdmi_cap(dev)) { v4l2_ctrl_s_ctrl(dev->ctrl_dv_timings_signal_mode, dev->dv_timings_signal_mode[dev->input]); v4l2_ctrl_s_ctrl(dev->ctrl_dv_timings, dev->query_dv_timings[dev->input]); } else if (vivid_is_sdtv_cap(dev)) { v4l2_ctrl_s_ctrl(dev->ctrl_std_signal_mode, dev->std_signal_mode[dev->input]); v4l2_ctrl_s_ctrl(dev->ctrl_standard, dev->std_signal_mode[dev->input]); } return 0; } int vidioc_enumaudio(struct file *file, void *fh, struct v4l2_audio *vin) { if (vin->index >= ARRAY_SIZE(vivid_audio_inputs)) return -EINVAL; *vin = vivid_audio_inputs[vin->index]; return 0; } int vidioc_g_audio(struct file *file, void *fh, struct v4l2_audio *vin) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; *vin = vivid_audio_inputs[dev->tv_audio_input]; return 0; } int vidioc_s_audio(struct file *file, void *fh, const struct v4l2_audio *vin) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; if (vin->index >= ARRAY_SIZE(vivid_audio_inputs)) return -EINVAL; dev->tv_audio_input = vin->index; return 0; } int vivid_video_g_frequency(struct file *file, void *fh, struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); if (vf->tuner != 0) return -EINVAL; vf->frequency = dev->tv_freq; return 0; } int vivid_video_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); if (vf->tuner != 0) return -EINVAL; dev->tv_freq = clamp_t(unsigned, vf->frequency, MIN_TV_FREQ, MAX_TV_FREQ); if (vivid_is_tv_cap(dev)) vivid_update_quality(dev); return 0; } int vivid_video_s_tuner(struct file *file, void *fh, const struct v4l2_tuner *vt) { struct vivid_dev *dev = video_drvdata(file); if (vt->index != 0) return -EINVAL; if (vt->audmode > V4L2_TUNER_MODE_LANG1_LANG2) return -EINVAL; dev->tv_audmode = vt->audmode; return 0; } int vivid_video_g_tuner(struct file *file, void *fh, struct v4l2_tuner *vt) { struct vivid_dev *dev = video_drvdata(file); enum tpg_quality qual; if (vt->index != 0) return -EINVAL; vt->capability = V4L2_TUNER_CAP_NORM | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_LANG1 | V4L2_TUNER_CAP_LANG2; vt->audmode = dev->tv_audmode; vt->rangelow = MIN_TV_FREQ; vt->rangehigh = MAX_TV_FREQ; qual = vivid_get_quality(dev, &vt->afc); if (qual == TPG_QUAL_COLOR) vt->signal = 0xffff; else if (qual == TPG_QUAL_GRAY) vt->signal = 0x8000; else vt->signal = 0; if (qual == TPG_QUAL_NOISE) { vt->rxsubchans = 0; } else if (qual == TPG_QUAL_GRAY) { vt->rxsubchans = V4L2_TUNER_SUB_MONO; } else { unsigned int channel_nr = dev->tv_freq / (6 * 16); unsigned int options = (dev->std_cap[dev->input] & V4L2_STD_NTSC_M) ? 4 : 3; switch (channel_nr % options) { case 0: vt->rxsubchans = V4L2_TUNER_SUB_MONO; break; case 1: vt->rxsubchans = V4L2_TUNER_SUB_STEREO; break; case 2: if (dev->std_cap[dev->input] & V4L2_STD_NTSC_M) vt->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_SAP; else vt->rxsubchans = V4L2_TUNER_SUB_LANG1 | V4L2_TUNER_SUB_LANG2; break; case 3: vt->rxsubchans = V4L2_TUNER_SUB_STEREO | V4L2_TUNER_SUB_SAP; break; } } strscpy(vt->name, "TV Tuner", sizeof(vt->name)); return 0; } /* Must remain in sync with the vivid_ctrl_standard_strings array */ const v4l2_std_id vivid_standard[] = { V4L2_STD_NTSC_M, V4L2_STD_NTSC_M_JP, V4L2_STD_NTSC_M_KR, V4L2_STD_NTSC_443, V4L2_STD_PAL_BG | V4L2_STD_PAL_H, V4L2_STD_PAL_I, V4L2_STD_PAL_DK, V4L2_STD_PAL_M, V4L2_STD_PAL_N, V4L2_STD_PAL_Nc, V4L2_STD_PAL_60, V4L2_STD_SECAM_B | V4L2_STD_SECAM_G | V4L2_STD_SECAM_H, V4L2_STD_SECAM_DK, V4L2_STD_SECAM_L, V4L2_STD_SECAM_LC, V4L2_STD_UNKNOWN }; /* Must remain in sync with the vivid_standard array */ const char * const vivid_ctrl_standard_strings[] = { "NTSC-M", "NTSC-M-JP", "NTSC-M-KR", "NTSC-443", "PAL-BGH", "PAL-I", "PAL-DK", "PAL-M", "PAL-N", "PAL-Nc", "PAL-60", "SECAM-BGH", "SECAM-DK", "SECAM-L", "SECAM-Lc", NULL, }; int vidioc_querystd(struct file *file, void *priv, v4l2_std_id *id) { struct vivid_dev *dev = video_drvdata(file); unsigned int last = dev->query_std_last[dev->input]; if (!vivid_is_sdtv_cap(dev)) return -ENODATA; if (dev->std_signal_mode[dev->input] == NO_SIGNAL || dev->std_signal_mode[dev->input] == NO_LOCK) { *id = V4L2_STD_UNKNOWN; return 0; } if (vivid_is_tv_cap(dev) && tpg_g_quality(&dev->tpg) == TPG_QUAL_NOISE) { *id = V4L2_STD_UNKNOWN; } else if (dev->std_signal_mode[dev->input] == CURRENT_STD) { *id = dev->std_cap[dev->input]; } else if (dev->std_signal_mode[dev->input] == SELECTED_STD) { *id = dev->query_std[dev->input]; } else { *id = vivid_standard[last]; dev->query_std_last[dev->input] = (last + 1) % ARRAY_SIZE(vivid_standard); } return 0; } int vivid_vid_cap_s_std(struct file *file, void *priv, v4l2_std_id id) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_sdtv_cap(dev)) return -ENODATA; if (dev->std_cap[dev->input] == id) return 0; if (vb2_is_busy(&dev->vb_vid_cap_q) || vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; dev->std_cap[dev->input] = id; vivid_update_format_cap(dev, false); return 0; } static void find_aspect_ratio(u32 width, u32 height, u32 *num, u32 *denom) { if (!(height % 3) && ((height * 4 / 3) == width)) { *num = 4; *denom = 3; } else if (!(height % 9) && ((height * 16 / 9) == width)) { *num = 16; *denom = 9; } else if (!(height % 10) && ((height * 16 / 10) == width)) { *num = 16; *denom = 10; } else if (!(height % 4) && ((height * 5 / 4) == width)) { *num = 5; *denom = 4; } else if (!(height % 9) && ((height * 15 / 9) == width)) { *num = 15; *denom = 9; } else { /* default to 16:9 */ *num = 16; *denom = 9; } } static bool valid_cvt_gtf_timings(struct v4l2_dv_timings *timings) { struct v4l2_bt_timings *bt = &timings->bt; u32 total_h_pixel; u32 total_v_lines; u32 h_freq; if (!v4l2_valid_dv_timings(timings, &vivid_dv_timings_cap, NULL, NULL)) return false; total_h_pixel = V4L2_DV_BT_FRAME_WIDTH(bt); total_v_lines = V4L2_DV_BT_FRAME_HEIGHT(bt); h_freq = (u32)bt->pixelclock / total_h_pixel; if (bt->standards == 0 || (bt->standards & V4L2_DV_BT_STD_CVT)) { struct v4l2_dv_timings cvt = {}; if (v4l2_detect_cvt(total_v_lines, h_freq, bt->vsync, bt->width, bt->polarities, bt->interlaced, &vivid_dv_timings_cap, &cvt) && cvt.bt.width == bt->width && cvt.bt.height == bt->height) { *timings = cvt; return true; } } if (bt->standards == 0 || (bt->standards & V4L2_DV_BT_STD_GTF)) { struct v4l2_dv_timings gtf = {}; struct v4l2_fract aspect_ratio; find_aspect_ratio(bt->width, bt->height, &aspect_ratio.numerator, &aspect_ratio.denominator); if (v4l2_detect_gtf(total_v_lines, h_freq, bt->vsync, bt->polarities, bt->interlaced, aspect_ratio, &vivid_dv_timings_cap, &gtf) && gtf.bt.width == bt->width && gtf.bt.height == bt->height) { *timings = gtf; return true; } } return false; } int vivid_vid_cap_s_dv_timings(struct file *file, void *_fh, struct v4l2_dv_timings *timings) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_hdmi_cap(dev)) return -ENODATA; if (!v4l2_find_dv_timings_cap(timings, &vivid_dv_timings_cap, 0, NULL, NULL) && !valid_cvt_gtf_timings(timings)) return -EINVAL; if (v4l2_match_dv_timings(timings, &dev->dv_timings_cap[dev->input], 0, false)) return 0; if (vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->dv_timings_cap[dev->input] = *timings; vivid_update_format_cap(dev, false); return 0; } int vidioc_query_dv_timings(struct file *file, void *_fh, struct v4l2_dv_timings *timings) { struct vivid_dev *dev = video_drvdata(file); unsigned int input = dev->input; unsigned int last = dev->query_dv_timings_last[input]; if (!vivid_is_hdmi_cap(dev)) return -ENODATA; if (dev->dv_timings_signal_mode[input] == NO_SIGNAL || dev->edid_blocks == 0) return -ENOLINK; if (dev->dv_timings_signal_mode[input] == NO_LOCK) return -ENOLCK; if (dev->dv_timings_signal_mode[input] == OUT_OF_RANGE) { timings->bt.pixelclock = vivid_dv_timings_cap.bt.max_pixelclock * 2; return -ERANGE; } if (dev->dv_timings_signal_mode[input] == CURRENT_DV_TIMINGS) { *timings = dev->dv_timings_cap[input]; } else if (dev->dv_timings_signal_mode[input] == SELECTED_DV_TIMINGS) { *timings = v4l2_dv_timings_presets[dev->query_dv_timings[input]]; } else { *timings = v4l2_dv_timings_presets[last]; dev->query_dv_timings_last[input] = (last + 1) % dev->query_dv_timings_size; } return 0; } void vivid_update_outputs(struct vivid_dev *dev) { u32 edid_present = 0; if (!dev || !dev->num_outputs) return; for (unsigned int i = 0, j = 0; i < dev->num_outputs; i++) { if (dev->output_type[i] != HDMI) continue; struct vivid_dev *dev_rx = dev->output_to_input_instance[i]; if (dev_rx && dev_rx->edid_blocks) edid_present |= 1 << j; j++; } v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, edid_present); v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, edid_present); v4l2_ctrl_s_ctrl(dev->ctrl_tx_rxsense, edid_present); } void vivid_update_connected_outputs(struct vivid_dev *dev) { u16 phys_addr = cec_get_edid_phys_addr(dev->edid, dev->edid_blocks * 128, NULL); for (unsigned int i = 0, j = 0; i < dev->num_inputs; i++) { unsigned int menu_idx = dev->input_is_connected_to_output[i]; if (dev->input_type[i] != HDMI) continue; j++; if (menu_idx < FIXED_MENU_ITEMS) continue; struct vivid_dev *dev_tx = vivid_ctrl_hdmi_to_output_instance[menu_idx]; unsigned int output = vivid_ctrl_hdmi_to_output_index[menu_idx]; if (!dev_tx) continue; unsigned int hdmi_output = dev_tx->output_to_iface_index[output]; vivid_update_outputs(dev_tx); if (dev->edid_blocks) { cec_s_phys_addr(dev_tx->cec_tx_adap[hdmi_output], v4l2_phys_addr_for_input(phys_addr, j), false); } else { cec_phys_addr_invalidate(dev_tx->cec_tx_adap[hdmi_output]); } } } int vidioc_s_edid(struct file *file, void *_fh, struct v4l2_edid *edid) { struct vivid_dev *dev = video_drvdata(file); u16 phys_addr; int ret; memset(edid->reserved, 0, sizeof(edid->reserved)); if (edid->pad >= dev->num_inputs) return -EINVAL; if (dev->input_type[edid->pad] != HDMI || edid->start_block) return -EINVAL; if (edid->blocks == 0) { if (vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->edid_blocks = 0; vivid_update_connected_outputs(dev); return 0; } if (edid->blocks > dev->edid_max_blocks) { edid->blocks = dev->edid_max_blocks; return -E2BIG; } phys_addr = cec_get_edid_phys_addr(edid->edid, edid->blocks * 128, NULL); ret = v4l2_phys_addr_validate(phys_addr, &phys_addr, NULL); if (ret) return ret; if (vb2_is_busy(&dev->vb_vid_cap_q)) return -EBUSY; dev->edid_blocks = edid->blocks; memcpy(dev->edid, edid->edid, edid->blocks * 128); vivid_update_connected_outputs(dev); return 0; } int vidioc_enum_framesizes(struct file *file, void *fh, struct v4l2_frmsizeenum *fsize) { struct vivid_dev *dev = video_drvdata(file); if (!vivid_is_webcam(dev) && !dev->has_scaler_cap) return -EINVAL; if (vivid_get_format(dev, fsize->pixel_format) == NULL) return -EINVAL; if (vivid_is_webcam(dev)) { if (fsize->index >= ARRAY_SIZE(webcam_sizes)) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; fsize->discrete = webcam_sizes[fsize->index]; return 0; } if (fsize->index) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; fsize->stepwise.min_width = MIN_WIDTH; fsize->stepwise.max_width = MAX_WIDTH * MAX_ZOOM; fsize->stepwise.step_width = 2; fsize->stepwise.min_height = MIN_HEIGHT; fsize->stepwise.max_height = MAX_HEIGHT * MAX_ZOOM; fsize->stepwise.step_height = 2; return 0; } /* timeperframe is arbitrary and continuous */ int vidioc_enum_frameintervals(struct file *file, void *priv, struct v4l2_frmivalenum *fival) { struct vivid_dev *dev = video_drvdata(file); const struct vivid_fmt *fmt; int i; fmt = vivid_get_format(dev, fival->pixel_format); if (!fmt) return -EINVAL; if (!vivid_is_webcam(dev)) { if (fival->index) return -EINVAL; if (fival->width < MIN_WIDTH || fival->width > MAX_WIDTH * MAX_ZOOM) return -EINVAL; if (fival->height < MIN_HEIGHT || fival->height > MAX_HEIGHT * MAX_ZOOM) return -EINVAL; fival->type = V4L2_FRMIVAL_TYPE_DISCRETE; fival->discrete = dev->timeperframe_vid_cap; return 0; } for (i = 0; i < ARRAY_SIZE(webcam_sizes); i++) if (fival->width == webcam_sizes[i].width && fival->height == webcam_sizes[i].height) break; if (i == ARRAY_SIZE(webcam_sizes)) return -EINVAL; if (fival->index >= webcam_ival_count(dev, i)) return -EINVAL; fival->type = V4L2_FRMIVAL_TYPE_DISCRETE; fival->discrete = webcam_intervals[fival->index]; return 0; } int vivid_vid_cap_g_parm(struct file *file, void *priv, struct v4l2_streamparm *parm) { struct vivid_dev *dev = video_drvdata(file); if (parm->type != (dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : V4L2_BUF_TYPE_VIDEO_CAPTURE)) return -EINVAL; parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; parm->parm.capture.timeperframe = dev->timeperframe_vid_cap; parm->parm.capture.readbuffers = 1; return 0; } int vivid_vid_cap_s_parm(struct file *file, void *priv, struct v4l2_streamparm *parm) { struct vivid_dev *dev = video_drvdata(file); unsigned int ival_sz = webcam_ival_count(dev, dev->webcam_size_idx); struct v4l2_fract tpf; unsigned i; if (parm->type != (dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : V4L2_BUF_TYPE_VIDEO_CAPTURE)) return -EINVAL; if (!vivid_is_webcam(dev)) return vivid_vid_cap_g_parm(file, priv, parm); tpf = parm->parm.capture.timeperframe; if (tpf.denominator == 0) tpf = webcam_intervals[ival_sz - 1]; for (i = 0; i < ival_sz; i++) if (V4L2_FRACT_COMPARE(tpf, >=, webcam_intervals[i])) break; if (i == ival_sz) i = ival_sz - 1; dev->webcam_ival_idx = i; tpf = webcam_intervals[dev->webcam_ival_idx]; /* resync the thread's timings */ dev->cap_seq_resync = true; dev->timeperframe_vid_cap = tpf; parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; parm->parm.capture.timeperframe = tpf; parm->parm.capture.readbuffers = 1; return 0; }
171 171 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 // SPDX-License-Identifier: GPL-2.0+ /* * IPv6 IOAM implementation * * Author: * Justin Iurman <justin.iurman@uliege.be> */ #include <linux/errno.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/net.h> #include <linux/ioam6.h> #include <linux/ioam6_genl.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/genetlink.h> #include <net/ioam6.h> #include <net/sch_generic.h> static void ioam6_ns_release(struct ioam6_namespace *ns) { kfree_rcu(ns, rcu); } static void ioam6_sc_release(struct ioam6_schema *sc) { kfree_rcu(sc, rcu); } static void ioam6_free_ns(void *ptr, void *arg) { struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; if (ns) ioam6_ns_release(ns); } static void ioam6_free_sc(void *ptr, void *arg) { struct ioam6_schema *sc = (struct ioam6_schema *)ptr; if (sc) ioam6_sc_release(sc); } static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct ioam6_namespace *ns = obj; return (ns->id != *(__be16 *)arg->key); } static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct ioam6_schema *sc = obj; return (sc->id != *(u32 *)arg->key); } static const struct rhashtable_params rht_ns_params = { .key_len = sizeof(__be16), .key_offset = offsetof(struct ioam6_namespace, id), .head_offset = offsetof(struct ioam6_namespace, head), .automatic_shrinking = true, .obj_cmpfn = ioam6_ns_cmpfn, }; static const struct rhashtable_params rht_sc_params = { .key_len = sizeof(u32), .key_offset = offsetof(struct ioam6_schema, id), .head_offset = offsetof(struct ioam6_schema, head), .automatic_shrinking = true, .obj_cmpfn = ioam6_sc_cmpfn, }; static struct genl_family ioam6_genl_family; static const struct nla_policy ioam6_genl_policy_addns[] = { [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 }, [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 }, }; static const struct nla_policy ioam6_genl_policy_delns[] = { [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, }; static const struct nla_policy ioam6_genl_policy_addsc[] = { [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY, .len = IOAM6_MAX_SCHEMA_DATA_LEN }, }; static const struct nla_policy ioam6_genl_policy_delsc[] = { [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, }; static const struct nla_policy ioam6_genl_policy_ns_sc[] = { [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG }, }; static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; struct ioam6_namespace *ns; u64 data64; u32 data32; __be16 id; int err; if (!info->attrs[IOAM6_ATTR_NS_ID]) return -EINVAL; id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); if (ns) { err = -EEXIST; goto out_unlock; } ns = kzalloc(sizeof(*ns), GFP_KERNEL); if (!ns) { err = -ENOMEM; goto out_unlock; } ns->id = id; data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA], IOAM6_U32_UNAVAILABLE); data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE], IOAM6_U64_UNAVAILABLE); ns->data = cpu_to_be32(data32); ns->data_wide = cpu_to_be64(data64); err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head, rht_ns_params); if (err) kfree(ns); out_unlock: mutex_unlock(&nsdata->lock); return err; } static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; struct ioam6_namespace *ns; struct ioam6_schema *sc; __be16 id; int err; if (!info->attrs[IOAM6_ATTR_NS_ID]) return -EINVAL; id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); if (!ns) { err = -ENOENT; goto out_unlock; } sc = rcu_dereference_protected(ns->schema, lockdep_is_held(&nsdata->lock)); err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head, rht_ns_params); if (err) goto out_unlock; if (sc) rcu_assign_pointer(sc->ns, NULL); ioam6_ns_release(ns); out_unlock: mutex_unlock(&nsdata->lock); return err; } static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { struct ioam6_schema *sc; u64 data64; u32 data32; void *hdr; hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); if (!hdr) return -ENOMEM; data32 = be32_to_cpu(ns->data); data64 = be64_to_cpu(ns->data_wide); if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) || (data32 != IOAM6_U32_UNAVAILABLE && nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) || (data64 != IOAM6_U64_UNAVAILABLE && nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE, data64, IOAM6_ATTR_PAD))) goto nla_put_failure; rcu_read_lock(); sc = rcu_dereference(ns->schema); if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) { rcu_read_unlock(); goto nla_put_failure; } rcu_read_unlock(); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } static int ioam6_genl_dumpns_start(struct netlink_callback *cb) { struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; if (!iter) { iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long)iter; } rhashtable_walk_enter(&nsdata->namespaces, iter); return 0; } static int ioam6_genl_dumpns_done(struct netlink_callback *cb) { struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_exit(iter); kfree(iter); return 0; } static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb) { struct rhashtable_iter *iter; struct ioam6_namespace *ns; int err; iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_start(iter); for (;;) { ns = rhashtable_walk_next(iter); if (IS_ERR(ns)) { if (PTR_ERR(ns) == -EAGAIN) continue; err = PTR_ERR(ns); goto done; } else if (!ns) { break; } err = __ioam6_genl_dumpns_element(ns, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, IOAM6_CMD_DUMP_NAMESPACES); if (err) goto done; } err = skb->len; done: rhashtable_walk_stop(iter); return err; } static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; int len, len_aligned, err; struct ioam6_schema *sc; u32 id; if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA]) return -EINVAL; id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); if (sc) { err = -EEXIST; goto out_unlock; } len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]); len_aligned = ALIGN(len, 4); sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL); if (!sc) { err = -ENOMEM; goto out_unlock; } sc->id = id; sc->len = len_aligned; sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24)); nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len); err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head, rht_sc_params); if (err) goto free_sc; out_unlock: mutex_unlock(&nsdata->lock); return err; free_sc: kfree(sc); goto out_unlock; } static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info) { struct ioam6_pernet_data *nsdata; struct ioam6_namespace *ns; struct ioam6_schema *sc; int err; u32 id; if (!info->attrs[IOAM6_ATTR_SC_ID]) return -EINVAL; id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); if (!sc) { err = -ENOENT; goto out_unlock; } ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); err = rhashtable_remove_fast(&nsdata->schemas, &sc->head, rht_sc_params); if (err) goto out_unlock; if (ns) rcu_assign_pointer(ns->schema, NULL); ioam6_sc_release(sc); out_unlock: mutex_unlock(&nsdata->lock); return err; } static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { struct ioam6_namespace *ns; void *hdr; hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); if (!hdr) return -ENOMEM; if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) || nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data)) goto nla_put_failure; rcu_read_lock(); ns = rcu_dereference(sc->ns); if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) { rcu_read_unlock(); goto nla_put_failure; } rcu_read_unlock(); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } static int ioam6_genl_dumpsc_start(struct netlink_callback *cb) { struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; if (!iter) { iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; cb->args[0] = (long)iter; } rhashtable_walk_enter(&nsdata->schemas, iter); return 0; } static int ioam6_genl_dumpsc_done(struct netlink_callback *cb) { struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_exit(iter); kfree(iter); return 0; } static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb) { struct rhashtable_iter *iter; struct ioam6_schema *sc; int err; iter = (struct rhashtable_iter *)cb->args[0]; rhashtable_walk_start(iter); for (;;) { sc = rhashtable_walk_next(iter); if (IS_ERR(sc)) { if (PTR_ERR(sc) == -EAGAIN) continue; err = PTR_ERR(sc); goto done; } else if (!sc) { break; } err = __ioam6_genl_dumpsc_element(sc, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, IOAM6_CMD_DUMP_SCHEMAS); if (err) goto done; } err = skb->len; done: rhashtable_walk_stop(iter); return err; } static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info) { struct ioam6_namespace *ns, *ns_ref; struct ioam6_schema *sc, *sc_ref; struct ioam6_pernet_data *nsdata; __be16 ns_id; u32 sc_id; int err; if (!info->attrs[IOAM6_ATTR_NS_ID] || (!info->attrs[IOAM6_ATTR_SC_ID] && !info->attrs[IOAM6_ATTR_SC_NONE])) return -EINVAL; ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); nsdata = ioam6_pernet(genl_info_net(info)); mutex_lock(&nsdata->lock); ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params); if (!ns) { err = -ENOENT; goto out_unlock; } if (info->attrs[IOAM6_ATTR_SC_NONE]) { sc = NULL; } else { sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id, rht_sc_params); if (!sc) { err = -ENOENT; goto out_unlock; } } sc_ref = rcu_dereference_protected(ns->schema, lockdep_is_held(&nsdata->lock)); if (sc_ref) rcu_assign_pointer(sc_ref->ns, NULL); rcu_assign_pointer(ns->schema, sc); if (sc) { ns_ref = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); if (ns_ref) rcu_assign_pointer(ns_ref->schema, NULL); rcu_assign_pointer(sc->ns, ns); } err = 0; out_unlock: mutex_unlock(&nsdata->lock); return err; } static const struct genl_ops ioam6_genl_ops[] = { { .cmd = IOAM6_CMD_ADD_NAMESPACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_addns, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_addns, .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1, }, { .cmd = IOAM6_CMD_DEL_NAMESPACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_delns, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_delns, .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1, }, { .cmd = IOAM6_CMD_DUMP_NAMESPACES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = ioam6_genl_dumpns_start, .dumpit = ioam6_genl_dumpns, .done = ioam6_genl_dumpns_done, .flags = GENL_ADMIN_PERM, }, { .cmd = IOAM6_CMD_ADD_SCHEMA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_addsc, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_addsc, .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1, }, { .cmd = IOAM6_CMD_DEL_SCHEMA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_delsc, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_delsc, .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1, }, { .cmd = IOAM6_CMD_DUMP_SCHEMAS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = ioam6_genl_dumpsc_start, .dumpit = ioam6_genl_dumpsc, .done = ioam6_genl_dumpsc_done, .flags = GENL_ADMIN_PERM, }, { .cmd = IOAM6_CMD_NS_SET_SCHEMA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ioam6_genl_ns_set_schema, .flags = GENL_ADMIN_PERM, .policy = ioam6_genl_policy_ns_sc, .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1, }, }; #define IOAM6_GENL_EV_GRP_OFFSET 0 static const struct genl_multicast_group ioam6_mcgrps[] = { [IOAM6_GENL_EV_GRP_OFFSET] = { .name = IOAM6_GENL_EV_GRP_NAME, .flags = GENL_MCAST_CAP_NET_ADMIN }, }; static int ioam6_event_put_trace(struct sk_buff *skb, struct ioam6_trace_hdr *trace, unsigned int len) { if (nla_put_u16(skb, IOAM6_EVENT_ATTR_TRACE_NAMESPACE, be16_to_cpu(trace->namespace_id)) || nla_put_u8(skb, IOAM6_EVENT_ATTR_TRACE_NODELEN, trace->nodelen) || nla_put_u32(skb, IOAM6_EVENT_ATTR_TRACE_TYPE, be32_to_cpu(trace->type_be32)) || nla_put(skb, IOAM6_EVENT_ATTR_TRACE_DATA, len - sizeof(struct ioam6_trace_hdr) - trace->remlen * 4, trace->data + trace->remlen * 4)) return 1; return 0; } void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp, void *opt, unsigned int opt_len) { struct nlmsghdr *nlh; struct sk_buff *skb; if (!genl_has_listeners(&ioam6_genl_family, net, IOAM6_GENL_EV_GRP_OFFSET)) return; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!skb) return; nlh = genlmsg_put(skb, 0, 0, &ioam6_genl_family, 0, type); if (!nlh) goto nla_put_failure; switch (type) { case IOAM6_EVENT_UNSPEC: WARN_ON_ONCE(1); break; case IOAM6_EVENT_TRACE: if (ioam6_event_put_trace(skb, (struct ioam6_trace_hdr *)opt, opt_len)) goto nla_put_failure; break; } genlmsg_end(skb, nlh); genlmsg_multicast_netns(&ioam6_genl_family, net, skb, 0, IOAM6_GENL_EV_GRP_OFFSET, gfp); return; nla_put_failure: nlmsg_free(skb); } static struct genl_family ioam6_genl_family __ro_after_init = { .name = IOAM6_GENL_NAME, .version = IOAM6_GENL_VERSION, .netnsok = true, .parallel_ops = true, .ops = ioam6_genl_ops, .n_ops = ARRAY_SIZE(ioam6_genl_ops), .resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1, .mcgrps = ioam6_mcgrps, .n_mcgrps = ARRAY_SIZE(ioam6_mcgrps), .module = THIS_MODULE, }; struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) { struct ioam6_pernet_data *nsdata = ioam6_pernet(net); return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); } static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, struct ioam6_schema *sc, u8 sclen, bool is_input) { struct timespec64 ts; ktime_t tstamp; u64 raw64; u32 raw32; u16 raw16; u8 *data; u8 byte; data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; /* hop_lim and node_id */ if (trace->type.bit0) { byte = ipv6_hdr(skb)->hop_limit; if (is_input) byte--; raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); data += sizeof(__be32); } /* ingress_if_id and egress_if_id */ if (trace->type.bit1) { if (!skb->dev) raw16 = IOAM6_U16_UNAVAILABLE; else raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); } /* timestamp seconds */ if (trace->type.bit2) { if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { tstamp = skb_tstamp_cond(skb, true); ts = ktime_to_timespec64(tstamp); *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); } data += sizeof(__be32); } /* timestamp subseconds */ if (trace->type.bit3) { if (!skb->dev) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { if (!trace->type.bit2) { tstamp = skb_tstamp_cond(skb, true); ts = ktime_to_timespec64(tstamp); } *(__be32 *)data = cpu_to_be32((u32)(ts.tv_nsec / NSEC_PER_USEC)); } data += sizeof(__be32); } /* transit delay */ if (trace->type.bit4) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* namespace data */ if (trace->type.bit5) { *(__be32 *)data = ns->data; data += sizeof(__be32); } /* queue depth */ if (trace->type.bit6) { struct netdev_queue *queue; struct Qdisc *qdisc; __u32 qlen, backlog; if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { queue = skb_get_tx_queue(skb_dst(skb)->dev, skb); qdisc = rcu_dereference(queue->qdisc); qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog); *(__be32 *)data = cpu_to_be32(backlog); } data += sizeof(__be32); } /* checksum complement */ if (trace->type.bit7) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* hop_lim and node_id (wide) */ if (trace->type.bit8) { byte = ipv6_hdr(skb)->hop_limit; if (is_input) byte--; raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); data += sizeof(__be64); } /* ingress_if_id and egress_if_id (wide) */ if (trace->type.bit9) { if (!skb->dev) raw32 = IOAM6_U32_UNAVAILABLE; else raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); } /* namespace data (wide) */ if (trace->type.bit10) { *(__be64 *)data = ns->data_wide; data += sizeof(__be64); } /* buffer occupancy */ if (trace->type.bit11) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit12 undefined: filled with empty value */ if (trace->type.bit12) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit13 undefined: filled with empty value */ if (trace->type.bit13) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit14 undefined: filled with empty value */ if (trace->type.bit14) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit15 undefined: filled with empty value */ if (trace->type.bit15) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit16 undefined: filled with empty value */ if (trace->type.bit16) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit17 undefined: filled with empty value */ if (trace->type.bit17) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit18 undefined: filled with empty value */ if (trace->type.bit18) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit19 undefined: filled with empty value */ if (trace->type.bit19) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit20 undefined: filled with empty value */ if (trace->type.bit20) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* bit21 undefined: filled with empty value */ if (trace->type.bit21) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); data += sizeof(__be32); } /* opaque state snapshot */ if (trace->type.bit22) { if (!sc) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); } else { *(__be32 *)data = sc->hdr; data += sizeof(__be32); memcpy(data, sc->data, sc->len); } } } /* called with rcu_read_lock() */ void ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_namespace *ns, struct ioam6_trace_hdr *trace, bool is_input) { struct ioam6_schema *sc; u8 sclen = 0; /* Skip if Overflow flag is set */ if (trace->overflow) return; /* NodeLen does not include Opaque State Snapshot length. We need to * take it into account if the corresponding bit is set (bit 22) and * if the current IOAM namespace has an active schema attached to it */ sc = rcu_dereference(ns->schema); if (trace->type.bit22) { sclen = sizeof_field(struct ioam6_schema, hdr) / 4; if (sc) sclen += sc->len / 4; } /* If there is no space remaining, we set the Overflow flag and we * skip without filling the trace */ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { trace->overflow = 1; return; } __ioam6_fill_trace_data(skb, ns, trace, sc, sclen, is_input); trace->remlen -= trace->nodelen + sclen; } static int __net_init ioam6_net_init(struct net *net) { struct ioam6_pernet_data *nsdata; int err = -ENOMEM; nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); if (!nsdata) goto out; mutex_init(&nsdata->lock); net->ipv6.ioam6_data = nsdata; err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); if (err) goto free_nsdata; err = rhashtable_init(&nsdata->schemas, &rht_sc_params); if (err) goto free_rht_ns; out: return err; free_rht_ns: rhashtable_destroy(&nsdata->namespaces); free_nsdata: kfree(nsdata); net->ipv6.ioam6_data = NULL; goto out; } static void __net_exit ioam6_net_exit(struct net *net) { struct ioam6_pernet_data *nsdata = ioam6_pernet(net); rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); kfree(nsdata); } static struct pernet_operations ioam6_net_ops = { .init = ioam6_net_init, .exit = ioam6_net_exit, }; int __init ioam6_init(void) { int err = register_pernet_subsys(&ioam6_net_ops); if (err) goto out; err = genl_register_family(&ioam6_genl_family); if (err) goto out_unregister_pernet_subsys; #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL err = ioam6_iptunnel_init(); if (err) goto out_unregister_genl; #endif pr_info("In-situ OAM (IOAM) with IPv6\n"); out: return err; #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL out_unregister_genl: genl_unregister_family(&ioam6_genl_family); #endif out_unregister_pernet_subsys: unregister_pernet_subsys(&ioam6_net_ops); goto out; } void ioam6_exit(void) { #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL ioam6_iptunnel_exit(); #endif genl_unregister_family(&ioam6_genl_family); unregister_pernet_subsys(&ioam6_net_ops); }
73 91 91 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 #ifndef __DRM_GEM_H__ #define __DRM_GEM_H__ /* * GEM Graphics Execution Manager Driver Interfaces * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * Copyright (c) 2009-2010, Code Aurora Forum. * All rights reserved. * Copyright © 2014 Intel Corporation * Daniel Vetter <daniel.vetter@ffwll.ch> * * Author: Rickard E. (Rik) Faith <faith@valinux.com> * Author: Gareth Hughes <gareth@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/kref.h> #include <linux/dma-buf.h> #include <linux/dma-resv.h> #include <linux/list.h> #include <linux/mutex.h> #include <drm/drm_vma_manager.h> struct iosys_map; struct drm_gem_object; /** * enum drm_gem_object_status - bitmask of object state for fdinfo reporting * @DRM_GEM_OBJECT_RESIDENT: object is resident in memory (ie. not unpinned) * @DRM_GEM_OBJECT_PURGEABLE: object marked as purgeable by userspace * @DRM_GEM_OBJECT_ACTIVE: object is currently used by an active submission * * Bitmask of status used for fdinfo memory stats, see &drm_gem_object_funcs.status * and drm_show_fdinfo(). Note that an object can report DRM_GEM_OBJECT_PURGEABLE * and be active or not resident, in which case drm_show_fdinfo() will not * account for it as purgeable. So drivers do not need to check if the buffer * is idle and resident to return this bit, i.e. userspace can mark a buffer as * purgeable even while it is still busy on the GPU. It will not get reported in * the puregeable stats until it becomes idle. The status gem object func does * not need to consider this. */ enum drm_gem_object_status { DRM_GEM_OBJECT_RESIDENT = BIT(0), DRM_GEM_OBJECT_PURGEABLE = BIT(1), DRM_GEM_OBJECT_ACTIVE = BIT(2), }; /** * struct drm_gem_object_funcs - GEM object functions */ struct drm_gem_object_funcs { /** * @free: * * Deconstructor for drm_gem_objects. * * This callback is mandatory. */ void (*free)(struct drm_gem_object *obj); /** * @open: * * Called upon GEM handle creation. * * This callback is optional. */ int (*open)(struct drm_gem_object *obj, struct drm_file *file); /** * @close: * * Called upon GEM handle release. * * This callback is optional. */ void (*close)(struct drm_gem_object *obj, struct drm_file *file); /** * @print_info: * * If driver subclasses struct &drm_gem_object, it can implement this * optional hook for printing additional driver specific info. * * drm_printf_indent() should be used in the callback passing it the * indent argument. * * This callback is called from drm_gem_print_info(). * * This callback is optional. */ void (*print_info)(struct drm_printer *p, unsigned int indent, const struct drm_gem_object *obj); /** * @export: * * Export backing buffer as a &dma_buf. * If this is not set drm_gem_prime_export() is used. * * This callback is optional. */ struct dma_buf *(*export)(struct drm_gem_object *obj, int flags); /** * @pin: * * Pin backing buffer in memory. Used by the drm_gem_map_attach() helper. * * This callback is optional. */ int (*pin)(struct drm_gem_object *obj); /** * @unpin: * * Unpin backing buffer. Used by the drm_gem_map_detach() helper. * * This callback is optional. */ void (*unpin)(struct drm_gem_object *obj); /** * @get_sg_table: * * Returns a Scatter-Gather table representation of the buffer. * Used when exporting a buffer by the drm_gem_map_dma_buf() helper. * Releasing is done by calling dma_unmap_sg_attrs() and sg_free_table() * in drm_gem_unmap_buf(), therefore these helpers and this callback * here cannot be used for sg tables pointing at driver private memory * ranges. * * See also drm_prime_pages_to_sg(). */ struct sg_table *(*get_sg_table)(struct drm_gem_object *obj); /** * @vmap: * * Returns a virtual address for the buffer. Used by the * drm_gem_dmabuf_vmap() helper. * * This callback is optional. */ int (*vmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @vunmap: * * Releases the address previously returned by @vmap. Used by the * drm_gem_dmabuf_vunmap() helper. * * This callback is optional. */ void (*vunmap)(struct drm_gem_object *obj, struct iosys_map *map); /** * @mmap: * * Handle mmap() of the gem object, setup vma accordingly. * * This callback is optional. * * The callback is used by both drm_gem_mmap_obj() and * drm_gem_prime_mmap(). When @mmap is present @vm_ops is not * used, the @mmap callback must set vma->vm_ops instead. */ int (*mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma); /** * @evict: * * Evicts gem object out from memory. Used by the drm_gem_object_evict() * helper. Returns 0 on success, -errno otherwise. * * This callback is optional. */ int (*evict)(struct drm_gem_object *obj); /** * @status: * * The optional status callback can return additional object state * which determines which stats the object is counted against. The * callback is called under table_lock. Racing against object status * change is "harmless", and the callback can expect to not race * against object destruction. * * Called by drm_show_memory_stats(). */ enum drm_gem_object_status (*status)(struct drm_gem_object *obj); /** * @rss: * * Return resident size of the object in physical memory. * * Called by drm_show_memory_stats(). */ size_t (*rss)(struct drm_gem_object *obj); /** * @vm_ops: * * Virtual memory operations used with mmap. * * This is optional but necessary for mmap support. */ const struct vm_operations_struct *vm_ops; }; /** * struct drm_gem_lru - A simple LRU helper * * A helper for tracking GEM objects in a given state, to aid in * driver's shrinker implementation. Tracks the count of pages * for lockless &shrinker.count_objects, and provides * &drm_gem_lru_scan for driver's &shrinker.scan_objects * implementation. */ struct drm_gem_lru { /** * @lock: * * Lock protecting movement of GEM objects between LRUs. All * LRUs that the object can move between should be protected * by the same lock. */ struct mutex *lock; /** * @count: * * The total number of backing pages of the GEM objects in * this LRU. */ long count; /** * @list: * * The LRU list. */ struct list_head list; }; /** * struct drm_gem_object - GEM buffer object * * This structure defines the generic parts for GEM buffer objects, which are * mostly around handling mmap and userspace handles. * * Buffer objects are often abbreviated to BO. */ struct drm_gem_object { /** * @refcount: * * Reference count of this object * * Please use drm_gem_object_get() to acquire and drm_gem_object_put_locked() * or drm_gem_object_put() to release a reference to a GEM * buffer object. */ struct kref refcount; /** * @handle_count: * * This is the GEM file_priv handle count of this object. * * Each handle also holds a reference. Note that when the handle_count * drops to 0 any global names (e.g. the id in the flink namespace) will * be cleared. * * Protected by &drm_device.object_name_lock. */ unsigned handle_count; /** * @dev: DRM dev this object belongs to. */ struct drm_device *dev; /** * @filp: * * SHMEM file node used as backing storage for swappable buffer objects. * GEM also supports driver private objects with driver-specific backing * storage (contiguous DMA memory, special reserved blocks). In this * case @filp is NULL. */ struct file *filp; /** * @vma_node: * * Mapping info for this object to support mmap. Drivers are supposed to * allocate the mmap offset using drm_gem_create_mmap_offset(). The * offset itself can be retrieved using drm_vma_node_offset_addr(). * * Memory mapping itself is handled by drm_gem_mmap(), which also checks * that userspace is allowed to access the object. */ struct drm_vma_offset_node vma_node; /** * @size: * * Size of the object, in bytes. Immutable over the object's * lifetime. */ size_t size; /** * @name: * * Global name for this object, starts at 1. 0 means unnamed. * Access is covered by &drm_device.object_name_lock. This is used by * the GEM_FLINK and GEM_OPEN ioctls. */ int name; /** * @dma_buf: * * dma-buf associated with this GEM object. * * Pointer to the dma-buf associated with this gem object (either * through importing or exporting). We break the resulting reference * loop when the last gem handle for this object is released. * * Protected by &drm_device.object_name_lock. */ struct dma_buf *dma_buf; /** * @import_attach: * * dma-buf attachment backing this object. * * Any foreign dma_buf imported as a gem object has this set to the * attachment point for the device. This is invariant over the lifetime * of a gem object. * * The &drm_gem_object_funcs.free callback is responsible for * cleaning up the dma_buf attachment and references acquired at import * time. * * Note that the drm gem/prime core does not depend upon drivers setting * this field any more. So for drivers where this doesn't make sense * (e.g. virtual devices or a displaylink behind an usb bus) they can * simply leave it as NULL. */ struct dma_buf_attachment *import_attach; /** * @resv: * * Pointer to reservation object associated with the this GEM object. * * Normally (@resv == &@_resv) except for imported GEM objects. */ struct dma_resv *resv; /** * @_resv: * * A reservation object for this GEM object. * * This is unused for imported GEM objects. */ struct dma_resv _resv; /** * @gpuva: * * Provides the list of GPU VAs attached to this GEM object. * * Drivers should lock list accesses with the GEMs &dma_resv lock * (&drm_gem_object.resv) or a custom lock if one is provided. */ struct { struct list_head list; #ifdef CONFIG_LOCKDEP struct lockdep_map *lock_dep_map; #endif } gpuva; /** * @funcs: * * Optional GEM object functions. If this is set, it will be used instead of the * corresponding &drm_driver GEM callbacks. * * New drivers should use this. * */ const struct drm_gem_object_funcs *funcs; /** * @lru_node: * * List node in a &drm_gem_lru. */ struct list_head lru_node; /** * @lru: * * The current LRU list that the GEM object is on. */ struct drm_gem_lru *lru; }; /** * DRM_GEM_FOPS - Default drm GEM file operations * * This macro provides a shorthand for setting the GEM file ops in the * &file_operations structure. If all you need are the default ops, use * DEFINE_DRM_GEM_FOPS instead. */ #define DRM_GEM_FOPS \ .open = drm_open,\ .release = drm_release,\ .unlocked_ioctl = drm_ioctl,\ .compat_ioctl = drm_compat_ioctl,\ .poll = drm_poll,\ .read = drm_read,\ .llseek = noop_llseek,\ .mmap = drm_gem_mmap, \ .fop_flags = FOP_UNSIGNED_OFFSET /** * DEFINE_DRM_GEM_FOPS() - macro to generate file operations for GEM drivers * @name: name for the generated structure * * This macro autogenerates a suitable &struct file_operations for GEM based * drivers, which can be assigned to &drm_driver.fops. Note that this structure * cannot be shared between drivers, because it contains a reference to the * current module using THIS_MODULE. * * Note that the declaration is already marked as static - if you need a * non-static version of this you're probably doing it wrong and will break the * THIS_MODULE reference by accident. */ #define DEFINE_DRM_GEM_FOPS(name) \ static const struct file_operations name = {\ .owner = THIS_MODULE,\ DRM_GEM_FOPS,\ } void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); int drm_gem_object_init_with_mnt(struct drm_device *dev, struct drm_gem_object *obj, size_t size, struct vfsmount *gemfs); void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); void drm_gem_private_object_fini(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); /** * drm_gem_object_get - acquire a GEM buffer object reference * @obj: GEM buffer object * * This function acquires an additional reference to @obj. It is illegal to * call this without already holding a reference. No locks required. */ static inline void drm_gem_object_get(struct drm_gem_object *obj) { kref_get(&obj->refcount); } __attribute__((nonnull)) static inline void __drm_gem_object_put(struct drm_gem_object *obj) { kref_put(&obj->refcount, drm_gem_object_free); } /** * drm_gem_object_put - drop a GEM buffer object reference * @obj: GEM buffer object * * This releases a reference to @obj. */ static inline void drm_gem_object_put(struct drm_gem_object *obj) { if (obj) __drm_gem_object_put(obj); } int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); int drm_gem_handle_delete(struct drm_file *filp, u32 handle); void drm_gem_free_mmap_offset(struct drm_gem_object *obj); int drm_gem_create_mmap_offset(struct drm_gem_object *obj); int drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size); struct page **drm_gem_get_pages(struct drm_gem_object *obj); void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, bool dirty, bool accessed); void drm_gem_lock(struct drm_gem_object *obj); void drm_gem_unlock(struct drm_gem_object *obj); int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map); void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map); int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles, int count, struct drm_gem_object ***objs_out); struct drm_gem_object *drm_gem_object_lookup(struct drm_file *filp, u32 handle); long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, bool wait_all, unsigned long timeout); int drm_gem_lock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx); void drm_gem_unlock_reservations(struct drm_gem_object **objs, int count, struct ww_acquire_ctx *acquire_ctx); int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, u32 handle, u64 *offset); void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock); void drm_gem_lru_remove(struct drm_gem_object *obj); void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj); unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)); int drm_gem_evict(struct drm_gem_object *obj); /** * drm_gem_object_is_shared_for_memory_stats - helper for shared memory stats * * This helper should only be used for fdinfo shared memory stats to determine * if a GEM object is shared. * * @obj: obj in question */ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_object *obj) { return (obj->handle_count > 1) || obj->dma_buf; } /** * drm_gem_is_imported() - Tests if GEM object's buffer has been imported * @obj: the GEM object * * Returns: * True if the GEM object's buffer has been imported, false otherwise */ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) { return !!obj->import_attach; } #ifdef CONFIG_LOCKDEP /** * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list. * @obj: the &drm_gem_object * @lock: the lock used to protect the gpuva list. The locking primitive * must contain a dep_map field. * * Call this if you're not proctecting access to the gpuva list with the * dma-resv lock, but with a custom lock. */ #define drm_gem_gpuva_set_lock(obj, lock) \ if (!WARN((obj)->gpuva.lock_dep_map, \ "GEM GPUVA lock should be set only once.")) \ (obj)->gpuva.lock_dep_map = &(lock)->dep_map #define drm_gem_gpuva_assert_lock_held(obj) \ lockdep_assert((obj)->gpuva.lock_dep_map ? \ lock_is_held((obj)->gpuva.lock_dep_map) : \ dma_resv_held((obj)->resv)) #else #define drm_gem_gpuva_set_lock(obj, lock) do {} while (0) #define drm_gem_gpuva_assert_lock_held(obj) do {} while (0) #endif /** * drm_gem_gpuva_init() - initialize the gpuva list of a GEM object * @obj: the &drm_gem_object * * This initializes the &drm_gem_object's &drm_gpuvm_bo list. * * Calling this function is only necessary for drivers intending to support the * &drm_driver_feature DRIVER_GEM_GPUVA. * * See also drm_gem_gpuva_set_lock(). */ static inline void drm_gem_gpuva_init(struct drm_gem_object *obj) { INIT_LIST_HEAD(&obj->gpuva.list); } /** * drm_gem_for_each_gpuvm_bo() - iterator to walk over a list of &drm_gpuvm_bo * @entry__: &drm_gpuvm_bo structure to assign to in each iteration step * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with * * This iterator walks over all &drm_gpuvm_bo structures associated with the * &drm_gem_object. */ #define drm_gem_for_each_gpuvm_bo(entry__, obj__) \ list_for_each_entry(entry__, &(obj__)->gpuva.list, list.entry.gem) /** * drm_gem_for_each_gpuvm_bo_safe() - iterator to safely walk over a list of * &drm_gpuvm_bo * @entry__: &drm_gpuvm_bostructure to assign to in each iteration step * @next__: &next &drm_gpuvm_bo to store the next step * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with * * This iterator walks over all &drm_gpuvm_bo structures associated with the * &drm_gem_object. It is implemented with list_for_each_entry_safe(), hence * it is save against removal of elements. */ #define drm_gem_for_each_gpuvm_bo_safe(entry__, next__, obj__) \ list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, list.entry.gem) #endif /* __DRM_GEM_H__ */
2 2 2 2 6 6 5 1 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,port,ip type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ /* 3 Comments support added */ /* 4 Forceadd support added */ /* 5 skbinfo support added */ #define IPSET_TYPE_REV_MAX 6 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip /* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { __be32 ip; __be32 ip2; __be16 port; u8 proto; u8 padding; }; static bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipportip4_data_list(struct sk_buff *skb, const struct hash_ipportip4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportip4_data_next(struct hash_ipportip4_elem *next, const struct hash_ipportip4_elem *d) { next->ip = d->ip; next->port = d->port; } /* Common functions */ #define MTYPE hash_ipportip4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip = ntohl(e.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } port_to = port = ntohs(e.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); if (i > IPSET_MAX_RANGE) { hash_ipportip4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } } return ret; } /* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; union nf_inet_addr ip2; __be16 port; u8 proto; u8 padding; }; /* Common functions */ static bool hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, const struct hash_ipportip6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipportip6_data_list(struct sk_buff *skb, const struct hash_ipportip6_elem *data) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportip6_data_next(struct hash_ipportip6_elem *next, const struct hash_ipportip6_elem *d) { next->port = d->port; } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipportip6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipportip6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(e.port); port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); if (retried) port = ntohs(h->next.port); for (; port <= port_to; port++) { e.port = htons(port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static struct ip_set_type hash_ipportip_type __read_mostly = { .name = "hash:ip,port,ip", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipportip_init(void) { return ip_set_type_register(&hash_ipportip_type); } static void __exit hash_ipportip_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipportip_type); } module_init(hash_ipportip_init); module_exit(hash_ipportip_fini);
1 1 1 1 3 2 1 1 1 1 2 1 1 1 14 1 14 3 2 3 9 1 8 1 7 1 4 3 3 1 3 3 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 // SPDX-License-Identifier: GPL-2.0-or-later #include <net/genetlink.h> #include <uapi/linux/mrp_bridge.h> #include "br_private.h" #include "br_private_mrp.h" static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = { [IFLA_BRIDGE_MRP_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_IN_ROLE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_IN_STATE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_START_IN_TEST] = { .type = NLA_NESTED }, }; static const struct nla_policy br_mrp_instance_policy[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1] = { [IFLA_BRIDGE_MRP_INSTANCE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_INSTANCE_RING_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_INSTANCE_PRIO] = { .type = NLA_U16 }, }; static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1]; struct br_mrp_instance inst; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_INSTANCE_MAX, attr, br_mrp_instance_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID] || !tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] || !tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: RING_ID or P_IFINDEX or S_IFINDEX"); return -EINVAL; } memset(&inst, 0, sizeof(inst)); inst.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]); inst.p_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]); inst.s_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]); inst.prio = MRP_DEFAULT_PRIO; if (tb[IFLA_BRIDGE_MRP_INSTANCE_PRIO]) inst.prio = nla_get_u16(tb[IFLA_BRIDGE_MRP_INSTANCE_PRIO]); if (cmd == RTM_SETLINK) return br_mrp_add(br, &inst); else return br_mrp_del(br, &inst); return 0; } static const struct nla_policy br_mrp_port_state_policy[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1] = { [IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_PORT_STATE_STATE] = { .type = NLA_U32 }, }; static int br_mrp_port_state_parse(struct net_bridge_port *p, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1]; enum br_mrp_port_state_type state; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_STATE_MAX, attr, br_mrp_port_state_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: STATE"); return -EINVAL; } state = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]); return br_mrp_set_port_state(p, state); } static const struct nla_policy br_mrp_port_role_policy[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1] = { [IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_PORT_ROLE_ROLE] = { .type = NLA_U32 }, }; static int br_mrp_port_role_parse(struct net_bridge_port *p, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1]; enum br_mrp_port_role_type role; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_ROLE_MAX, attr, br_mrp_port_role_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: ROLE"); return -EINVAL; } role = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]); return br_mrp_set_port_role(p, role); } static const struct nla_policy br_mrp_ring_state_policy[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1] = { [IFLA_BRIDGE_MRP_RING_STATE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_RING_STATE_RING_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_RING_STATE_STATE] = { .type = NLA_U32 }, }; static int br_mrp_ring_state_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1]; struct br_mrp_ring_state state; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_STATE_MAX, attr, br_mrp_ring_state_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID] || !tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: RING_ID or STATE"); return -EINVAL; } memset(&state, 0x0, sizeof(state)); state.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID]); state.ring_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]); return br_mrp_set_ring_state(br, &state); } static const struct nla_policy br_mrp_ring_role_policy[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1] = { [IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_RING_ROLE_ROLE] = { .type = NLA_U32 }, }; static int br_mrp_ring_role_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1]; struct br_mrp_ring_role role; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_ROLE_MAX, attr, br_mrp_ring_role_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] || !tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: RING_ID or ROLE"); return -EINVAL; } memset(&role, 0x0, sizeof(role)); role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID]); role.ring_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]); return br_mrp_set_ring_role(br, &role); } static const struct nla_policy br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_TEST_MONITOR] = { .type = NLA_U32 }, }; static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_START_TEST_MAX + 1]; struct br_mrp_start_test test; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_TEST_MAX, attr, br_mrp_start_test_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID] || !tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL] || !tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] || !tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD"); return -EINVAL; } memset(&test, 0x0, sizeof(test)); test.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID]); test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]); test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]); test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]); test.monitor = false; if (tb[IFLA_BRIDGE_MRP_START_TEST_MONITOR]) test.monitor = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MONITOR]); return br_mrp_start_test(br, &test); } static const struct nla_policy br_mrp_in_state_policy[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1] = { [IFLA_BRIDGE_MRP_IN_STATE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_IN_STATE_IN_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_IN_STATE_STATE] = { .type = NLA_U32 }, }; static int br_mrp_in_state_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1]; struct br_mrp_in_state state; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_STATE_MAX, attr, br_mrp_in_state_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID] || !tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: IN_ID or STATE"); return -EINVAL; } memset(&state, 0x0, sizeof(state)); state.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID]); state.in_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]); return br_mrp_set_in_state(br, &state); } static const struct nla_policy br_mrp_in_role_policy[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1] = { [IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = { .type = NLA_U16 }, [IFLA_BRIDGE_MRP_IN_ROLE_ROLE] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = { .type = NLA_U32 }, }; static int br_mrp_in_role_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1]; struct br_mrp_in_role role; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_ROLE_MAX, attr, br_mrp_in_role_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] || !tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] || !tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] || !tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: RING_ID or ROLE or IN_ID or I_IFINDEX"); return -EINVAL; } memset(&role, 0x0, sizeof(role)); role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID]); role.in_id = nla_get_u16(tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID]); role.i_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX]); role.in_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]); return br_mrp_set_in_role(br, &role); } static const struct nla_policy br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, }; static int br_mrp_start_in_test_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1]; struct br_mrp_start_in_test test; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_IN_TEST_MAX, attr, br_mrp_start_in_test_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] || !tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] || !tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] || !tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]) { NL_SET_ERR_MSG_MOD(extack, "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD"); return -EINVAL; } memset(&test, 0x0, sizeof(test)); test.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID]); test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL]); test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS]); test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]); return br_mrp_start_in_test(br, &test); } int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1]; int err; /* When this function is called for a port then the br pointer is * invalid, therefor set the br to point correctly */ if (p) br = p->br; if (br->stp_enabled != BR_NO_STP) { NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled"); return -EINVAL; } err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_MAX, attr, br_mrp_policy, extack); if (err) return err; if (tb[IFLA_BRIDGE_MRP_INSTANCE]) { err = br_mrp_instance_parse(br, tb[IFLA_BRIDGE_MRP_INSTANCE], cmd, extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) { err = br_mrp_port_state_parse(p, tb[IFLA_BRIDGE_MRP_PORT_STATE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) { err = br_mrp_port_role_parse(p, tb[IFLA_BRIDGE_MRP_PORT_ROLE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_RING_STATE]) { err = br_mrp_ring_state_parse(br, tb[IFLA_BRIDGE_MRP_RING_STATE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) { err = br_mrp_ring_role_parse(br, tb[IFLA_BRIDGE_MRP_RING_ROLE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_START_TEST]) { err = br_mrp_start_test_parse(br, tb[IFLA_BRIDGE_MRP_START_TEST], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_IN_STATE]) { err = br_mrp_in_state_parse(br, tb[IFLA_BRIDGE_MRP_IN_STATE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_IN_ROLE]) { err = br_mrp_in_role_parse(br, tb[IFLA_BRIDGE_MRP_IN_ROLE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_START_IN_TEST]) { err = br_mrp_start_in_test_parse(br, tb[IFLA_BRIDGE_MRP_START_IN_TEST], extack); if (err) return err; } return 0; } int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) { struct nlattr *tb, *mrp_tb; struct br_mrp *mrp; mrp_tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP); if (!mrp_tb) return -EMSGSIZE; hlist_for_each_entry_rcu(mrp, &br->mrp_list, list) { struct net_bridge_port *p; tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ID, mrp->ring_id)) goto nla_put_failure; p = rcu_dereference(mrp->p_port); if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_P_IFINDEX, p->dev->ifindex)) goto nla_put_failure; p = rcu_dereference(mrp->s_port); if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_S_IFINDEX, p->dev->ifindex)) goto nla_put_failure; p = rcu_dereference(mrp->i_port); if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_I_IFINDEX, p->dev->ifindex)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO, mrp->prio)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_STATE, mrp->ring_state)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ROLE, mrp->ring_role)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, mrp->test_interval)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, mrp->test_max_miss)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, mrp->test_monitor)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_STATE, mrp->in_state)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_ROLE, mrp->in_role)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, mrp->in_test_interval)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, mrp->in_test_max_miss)) goto nla_put_failure; nla_nest_end(skb, tb); } nla_nest_end(skb, mrp_tb); return 0; nla_put_failure: nla_nest_cancel(skb, tb); nla_info_failure: nla_nest_cancel(skb, mrp_tb); return -EMSGSIZE; } int br_mrp_ring_port_open(struct net_device *dev, u8 loc) { struct net_bridge_port *p; int err = 0; p = br_port_get_rcu(dev); if (!p) { err = -EINVAL; goto out; } if (loc) p->flags |= BR_MRP_LOST_CONT; else p->flags &= ~BR_MRP_LOST_CONT; br_ifinfo_notify(RTM_NEWLINK, NULL, p); out: return err; } int br_mrp_in_port_open(struct net_device *dev, u8 loc) { struct net_bridge_port *p; int err = 0; p = br_port_get_rcu(dev); if (!p) { err = -EINVAL; goto out; } if (loc) p->flags |= BR_MRP_LOST_IN_CONT; else p->flags &= ~BR_MRP_LOST_IN_CONT; br_ifinfo_notify(RTM_NEWLINK, NULL, p); out: return err; }
5 5 5 5 6 4 6 5 5 5 4 1 3 6 1 2 1 1 1 8 8 8 2 6 6 6 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 // SPDX-License-Identifier: GPL-2.0-or-later /* * Native support for the Aiptek HyperPen USB Tablets * (4000U/5000U/6000U/8000U/12000U) * * Copyright (c) 2001 Chris Atenasio <chris@crud.net> * Copyright (c) 2002-2004 Bryan W. Headley <bwheadley@earthlink.net> * * based on wacom.c by * Vojtech Pavlik <vojtech@suse.cz> * Andreas Bach Aaen <abach@stofanet.dk> * Clifford Wolf <clifford@clifford.at> * Sam Mosel <sam.mosel@computer.org> * James E. Blair <corvus@gnu.org> * Daniel Egger <egger@suse.de> * * Many thanks to Oliver Kuechemann for his support. * * ChangeLog: * v0.1 - Initial release * v0.2 - Hack to get around fake event 28's. (Bryan W. Headley) * v0.3 - Make URB dynamic (Bryan W. Headley, Jun-8-2002) * Released to Linux 2.4.19 and 2.5.x * v0.4 - Rewrote substantial portions of the code to deal with * corrected control sequences, timing, dynamic configuration, * support of 6000U - 12000U, procfs, and macro key support * (Jan-1-2003 - Feb-5-2003, Bryan W. Headley) * v1.0 - Added support for diagnostic messages, count of messages * received from URB - Mar-8-2003, Bryan W. Headley * v1.1 - added support for tablet resolution, changed DV and proximity * some corrections - Jun-22-2003, martin schneebacher * - Added support for the sysfs interface, deprecating the * procfs interface for 2.5.x kernel. Also added support for * Wheel command. Bryan W. Headley July-15-2003. * v1.2 - Reworked jitter timer as a kernel thread. * Bryan W. Headley November-28-2003/Jan-10-2004. * v1.3 - Repaired issue of kernel thread going nuts on single-processor * machines, introduced programmableDelay as a command line * parameter. Feb 7 2004, Bryan W. Headley. * v1.4 - Re-wire jitter so it does not require a thread. Courtesy of * Rene van Paassen. Added reporting of physical pointer device * (e.g., stylus, mouse in reports 2, 3, 4, 5. We don't know * for reports 1, 6.) * what physical device reports for reports 1, 6.) Also enabled * MOUSE and LENS tool button modes. Renamed "rubber" to "eraser". * Feb 20, 2004, Bryan W. Headley. * v1.5 - Added previousJitterable, so we don't do jitter delay when the * user is holding a button down for periods of time. * * NOTE: * This kernel driver is augmented by the "Aiptek" XFree86 input * driver for your X server, as well as the Gaiptek GUI Front-end * "Tablet Manager". * These three products are highly interactive with one another, * so therefore it's easier to document them all as one subsystem. * Please visit the project's "home page", located at, * http://aiptektablet.sourceforge.net. */ #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb/input.h> #include <linux/uaccess.h> #include <linux/unaligned.h> /* * Aiptek status packet: * * (returned as Report 1 - relative coordinates from mouse and stylus) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 0 0 1 * byte1 0 0 0 0 0 BS2 BS Tip * byte2 X7 X6 X5 X4 X3 X2 X1 X0 * byte3 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * * (returned as Report 2 - absolute coordinates from the stylus) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 0 1 0 * byte1 X7 X6 X5 X4 X3 X2 X1 X0 * byte2 X15 X14 X13 X12 X11 X10 X9 X8 * byte3 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * byte4 Y15 Y14 Y13 Y12 Y11 Y10 Y9 Y8 * byte5 * * * BS2 BS1 Tip IR DV * byte6 P7 P6 P5 P4 P3 P2 P1 P0 * byte7 P15 P14 P13 P12 P11 P10 P9 P8 * * (returned as Report 3 - absolute coordinates from the mouse) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 0 1 1 * byte1 X7 X6 X5 X4 X3 X2 X1 X0 * byte2 X15 X14 X13 X12 X11 X10 X9 X8 * byte3 Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 * byte4 Y15 Y14 Y13 Y12 Y11 Y10 Y9 Y8 * byte5 * * * BS2 BS1 Tip IR DV * byte6 P7 P6 P5 P4 P3 P2 P1 P0 * byte7 P15 P14 P13 P12 P11 P10 P9 P8 * * (returned as Report 4 - macrokeys from the stylus) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 1 0 0 * byte1 0 0 0 BS2 BS Tip IR DV * byte2 0 0 0 0 0 0 1 0 * byte3 0 0 0 K4 K3 K2 K1 K0 * byte4 P7 P6 P5 P4 P3 P2 P1 P0 * byte5 P15 P14 P13 P12 P11 P10 P9 P8 * * (returned as Report 5 - macrokeys from the mouse) * * bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 * byte0 0 0 0 0 0 1 0 1 * byte1 0 0 0 BS2 BS Tip IR DV * byte2 0 0 0 0 0 0 1 0 * byte3 0 0 0 K4 K3 K2 K1 K0 * byte4 P7 P6 P5 P4 P3 P2 P1 P0 * byte5 P15 P14 P13 P12 P11 P10 P9 P8 * * IR: In Range = Proximity on * DV = Data Valid * BS = Barrel Switch (as in, macro keys) * BS2 also referred to as Tablet Pick * * Command Summary: * * Use report_type CONTROL (3) * Use report_id 2 * * Command/Data Description Return Bytes Return Value * 0x10/0x00 SwitchToMouse 0 * 0x10/0x01 SwitchToTablet 0 * 0x18/0x04 SetResolution 0 * 0x12/0xFF AutoGainOn 0 * 0x17/0x00 FilterOn 0 * 0x01/0x00 GetXExtension 2 MaxX * 0x01/0x01 GetYExtension 2 MaxY * 0x02/0x00 GetModelCode 2 ModelCode = LOBYTE * 0x03/0x00 GetODMCode 2 ODMCode * 0x08/0x00 GetPressureLevels 2 =512 * 0x04/0x00 GetFirmwareVersion 2 Firmware Version * 0x11/0x02 EnableMacroKeys 0 * * To initialize the tablet: * * (1) Send Resolution500LPI (Command) * (2) Query for Model code (Option Report) * (3) Query for ODM code (Option Report) * (4) Query for firmware (Option Report) * (5) Query for GetXExtension (Option Report) * (6) Query for GetYExtension (Option Report) * (7) Query for GetPressureLevels (Option Report) * (8) SwitchToTablet for Absolute coordinates, or * SwitchToMouse for Relative coordinates (Command) * (9) EnableMacroKeys (Command) * (10) FilterOn (Command) * (11) AutoGainOn (Command) * * (Step 9 can be omitted, but you'll then have no function keys.) */ #define USB_VENDOR_ID_AIPTEK 0x08ca #define USB_VENDOR_ID_KYE 0x0458 #define USB_REQ_GET_REPORT 0x01 #define USB_REQ_SET_REPORT 0x09 /* PointerMode codes */ #define AIPTEK_POINTER_ONLY_MOUSE_MODE 0 #define AIPTEK_POINTER_ONLY_STYLUS_MODE 1 #define AIPTEK_POINTER_EITHER_MODE 2 #define AIPTEK_POINTER_ALLOW_MOUSE_MODE(a) \ (a == AIPTEK_POINTER_ONLY_MOUSE_MODE || \ a == AIPTEK_POINTER_EITHER_MODE) #define AIPTEK_POINTER_ALLOW_STYLUS_MODE(a) \ (a == AIPTEK_POINTER_ONLY_STYLUS_MODE || \ a == AIPTEK_POINTER_EITHER_MODE) /* CoordinateMode code */ #define AIPTEK_COORDINATE_RELATIVE_MODE 0 #define AIPTEK_COORDINATE_ABSOLUTE_MODE 1 /* XTilt and YTilt values */ #define AIPTEK_TILT_MIN (-128) #define AIPTEK_TILT_MAX 127 #define AIPTEK_TILT_DISABLE (-10101) /* Wheel values */ #define AIPTEK_WHEEL_MIN 0 #define AIPTEK_WHEEL_MAX 1024 #define AIPTEK_WHEEL_DISABLE (-10101) /* ToolCode values, which BTW are 0x140 .. 0x14f * We have things set up such that if the tool button has changed, * the tools get reset. */ /* toolMode codes */ #define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN #define AIPTEK_TOOL_BUTTON_PENCIL_MODE BTN_TOOL_PENCIL #define AIPTEK_TOOL_BUTTON_BRUSH_MODE BTN_TOOL_BRUSH #define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE BTN_TOOL_AIRBRUSH #define AIPTEK_TOOL_BUTTON_ERASER_MODE BTN_TOOL_RUBBER #define AIPTEK_TOOL_BUTTON_MOUSE_MODE BTN_TOOL_MOUSE #define AIPTEK_TOOL_BUTTON_LENS_MODE BTN_TOOL_LENS /* Diagnostic message codes */ #define AIPTEK_DIAGNOSTIC_NA 0 #define AIPTEK_DIAGNOSTIC_SENDING_RELATIVE_IN_ABSOLUTE 1 #define AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE 2 #define AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED 3 /* Time to wait (in ms) to help mask hand jittering * when pressing the stylus buttons. */ #define AIPTEK_JITTER_DELAY_DEFAULT 50 /* Time to wait (in ms) in-between sending the tablet * a command and beginning the process of reading the return * sequence from the tablet. */ #define AIPTEK_PROGRAMMABLE_DELAY_25 25 #define AIPTEK_PROGRAMMABLE_DELAY_50 50 #define AIPTEK_PROGRAMMABLE_DELAY_100 100 #define AIPTEK_PROGRAMMABLE_DELAY_200 200 #define AIPTEK_PROGRAMMABLE_DELAY_300 300 #define AIPTEK_PROGRAMMABLE_DELAY_400 400 #define AIPTEK_PROGRAMMABLE_DELAY_DEFAULT AIPTEK_PROGRAMMABLE_DELAY_400 /* Mouse button programming */ #define AIPTEK_MOUSE_LEFT_BUTTON 0x04 #define AIPTEK_MOUSE_RIGHT_BUTTON 0x08 #define AIPTEK_MOUSE_MIDDLE_BUTTON 0x10 /* Stylus button programming */ #define AIPTEK_STYLUS_LOWER_BUTTON 0x08 #define AIPTEK_STYLUS_UPPER_BUTTON 0x10 /* Length of incoming packet from the tablet */ #define AIPTEK_PACKET_LENGTH 8 /* We report in EV_MISC both the proximity and * whether the report came from the stylus, tablet mouse * or "unknown" -- Unknown when the tablet is in relative * mode, because we only get report 1's. */ #define AIPTEK_REPORT_TOOL_UNKNOWN 0x10 #define AIPTEK_REPORT_TOOL_STYLUS 0x20 #define AIPTEK_REPORT_TOOL_MOUSE 0x40 static int programmableDelay = AIPTEK_PROGRAMMABLE_DELAY_DEFAULT; static int jitterDelay = AIPTEK_JITTER_DELAY_DEFAULT; struct aiptek_features { int odmCode; /* Tablet manufacturer code */ int modelCode; /* Tablet model code (not unique) */ int firmwareCode; /* prom/eeprom version */ char usbPath[64 + 1]; /* device's physical usb path */ }; struct aiptek_settings { int pointerMode; /* stylus-, mouse-only or either */ int coordinateMode; /* absolute/relative coords */ int toolMode; /* pen, pencil, brush, etc. tool */ int xTilt; /* synthetic xTilt amount */ int yTilt; /* synthetic yTilt amount */ int wheel; /* synthetic wheel amount */ int stylusButtonUpper; /* stylus upper btn delivers... */ int stylusButtonLower; /* stylus lower btn delivers... */ int mouseButtonLeft; /* mouse left btn delivers... */ int mouseButtonMiddle; /* mouse middle btn delivers... */ int mouseButtonRight; /* mouse right btn delivers... */ int programmableDelay; /* delay for tablet programming */ int jitterDelay; /* delay for hand jittering */ }; struct aiptek { struct input_dev *inputdev; /* input device struct */ struct usb_interface *intf; /* usb interface struct */ struct urb *urb; /* urb for incoming reports */ dma_addr_t data_dma; /* our dma stuffage */ struct aiptek_features features; /* tablet's array of features */ struct aiptek_settings curSetting; /* tablet's current programmable */ struct aiptek_settings newSetting; /* ... and new param settings */ unsigned int ifnum; /* interface number for IO */ int diagnostic; /* tablet diagnostic codes */ unsigned long eventCount; /* event count */ int inDelay; /* jitter: in jitter delay? */ unsigned long endDelay; /* jitter: time when delay ends */ int previousJitterable; /* jitterable prev value */ int lastMacro; /* macro key to reset */ int previousToolMode; /* pen, pencil, brush, etc. tool */ unsigned char *data; /* incoming packet data */ }; static const int eventTypes[] = { EV_KEY, EV_ABS, EV_REL, EV_MSC, }; static const int absEvents[] = { ABS_X, ABS_Y, ABS_PRESSURE, ABS_TILT_X, ABS_TILT_Y, ABS_WHEEL, ABS_MISC, }; static const int relEvents[] = { REL_X, REL_Y, REL_WHEEL, }; static const int buttonEvents[] = { BTN_LEFT, BTN_RIGHT, BTN_MIDDLE, BTN_TOOL_PEN, BTN_TOOL_RUBBER, BTN_TOOL_PENCIL, BTN_TOOL_AIRBRUSH, BTN_TOOL_BRUSH, BTN_TOOL_MOUSE, BTN_TOOL_LENS, BTN_TOUCH, BTN_STYLUS, BTN_STYLUS2, }; /* * Permit easy lookup of keyboard events to send, versus * the bitmap which comes from the tablet. This hides the * issue that the F_keys are not sequentially numbered. */ static const int macroKeyEvents[] = { KEY_ESC, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_F12, KEY_F13, KEY_F14, KEY_F15, KEY_F16, KEY_F17, KEY_F18, KEY_F19, KEY_F20, KEY_F21, KEY_F22, KEY_F23, KEY_F24, KEY_STOP, KEY_AGAIN, KEY_PROPS, KEY_UNDO, KEY_FRONT, KEY_COPY, KEY_OPEN, KEY_PASTE, 0 }; /*********************************************************************** * Map values to strings and back. Every map should have the following * as its last element: { NULL, AIPTEK_INVALID_VALUE }. */ #define AIPTEK_INVALID_VALUE -1 struct aiptek_map { const char *string; int value; }; static int map_str_to_val(const struct aiptek_map *map, const char *str, size_t count) { const struct aiptek_map *p; if (str[count - 1] == '\n') count--; for (p = map; p->string; p++) if (!strncmp(str, p->string, count)) return p->value; return AIPTEK_INVALID_VALUE; } static const char *map_val_to_str(const struct aiptek_map *map, int val) { const struct aiptek_map *p; for (p = map; p->value != AIPTEK_INVALID_VALUE; p++) if (val == p->value) return p->string; return "unknown"; } /*********************************************************************** * aiptek_irq can receive one of six potential reports. * The documentation for each is in the body of the function. * * The tablet reports on several attributes per invocation of * aiptek_irq. Because the Linux Input Event system allows the * transmission of ONE attribute per input_report_xxx() call, * collation has to be done on the other end to reconstitute * a complete tablet report. Further, the number of Input Event reports * submitted varies, depending on what USB report type, and circumstance. * To deal with this, EV_MSC is used to indicate an 'end-of-report' * message. This has been an undocumented convention understood by the kernel * tablet driver and clients such as gpm and XFree86's tablet drivers. * * Of the information received from the tablet, the one piece I * cannot transmit is the proximity bit (without resorting to an EV_MSC * convention above.) I therefore have taken over REL_MISC and ABS_MISC * (for relative and absolute reports, respectively) for communicating * Proximity. Why two events? I thought it interesting to know if the * Proximity event occurred while the tablet was in absolute or relative * mode. * Update: REL_MISC proved not to be such a good idea. With REL_MISC you * get an event transmitted each time. ABS_MISC works better, since it * can be set and re-set. Thus, only using ABS_MISC from now on. * * Other tablets use the notion of a certain minimum stylus pressure * to infer proximity. While that could have been done, that is yet * another 'by convention' behavior, the documentation for which * would be spread between two (or more) pieces of software. * * EV_MSC usage was terminated for this purpose in Linux 2.5.x, and * replaced with the input_sync() method (which emits EV_SYN.) */ static void aiptek_irq(struct urb *urb) { struct aiptek *aiptek = urb->context; unsigned char *data = aiptek->data; struct input_dev *inputdev = aiptek->inputdev; struct usb_interface *intf = aiptek->intf; int jitterable = 0; int retval, macro, x, y, z, left, right, middle, p, dv, tip, bs, pck; switch (urb->status) { case 0: /* Success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* This urb is terminated, clean up */ dev_dbg(&intf->dev, "%s - urb shutting down with status: %d\n", __func__, urb->status); return; default: dev_dbg(&intf->dev, "%s - nonzero urb status received: %d\n", __func__, urb->status); goto exit; } /* See if we are in a delay loop -- throw out report if true. */ if (aiptek->inDelay == 1 && time_after(aiptek->endDelay, jiffies)) { goto exit; } aiptek->inDelay = 0; aiptek->eventCount++; /* Report 1 delivers relative coordinates with either a stylus * or the mouse. You do not know, however, which input * tool generated the event. */ if (data[0] == 1) { if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_ABSOLUTE_MODE) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_SENDING_RELATIVE_IN_ABSOLUTE; } else { x = (signed char) data[2]; y = (signed char) data[3]; /* jitterable keeps track of whether any button has been pressed. * We're also using it to remap the physical mouse button mask * to pseudo-settings. (We don't specifically care about it's * value after moving/transposing mouse button bitmasks, except * that a non-zero value indicates that one or more * mouse button was pressed.) */ jitterable = data[1] & 0x07; left = (data[1] & aiptek->curSetting.mouseButtonLeft >> 2) != 0 ? 1 : 0; right = (data[1] & aiptek->curSetting.mouseButtonRight >> 2) != 0 ? 1 : 0; middle = (data[1] & aiptek->curSetting.mouseButtonMiddle >> 2) != 0 ? 1 : 0; input_report_key(inputdev, BTN_LEFT, left); input_report_key(inputdev, BTN_MIDDLE, middle); input_report_key(inputdev, BTN_RIGHT, right); input_report_abs(inputdev, ABS_MISC, 1 | AIPTEK_REPORT_TOOL_UNKNOWN); input_report_rel(inputdev, REL_X, x); input_report_rel(inputdev, REL_Y, y); /* Wheel support is in the form of a single-event * firing. */ if (aiptek->curSetting.wheel != AIPTEK_WHEEL_DISABLE) { input_report_rel(inputdev, REL_WHEEL, aiptek->curSetting.wheel); aiptek->curSetting.wheel = AIPTEK_WHEEL_DISABLE; } if (aiptek->lastMacro != -1) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } input_sync(inputdev); } } /* Report 2 is delivered only by the stylus, and delivers * absolute coordinates. */ else if (data[0] == 2) { if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_RELATIVE_MODE) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE; } else if (!AIPTEK_POINTER_ALLOW_STYLUS_MODE (aiptek->curSetting.pointerMode)) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED; } else { x = get_unaligned_le16(data + 1); y = get_unaligned_le16(data + 3); z = get_unaligned_le16(data + 6); dv = (data[5] & 0x01) != 0 ? 1 : 0; p = (data[5] & 0x02) != 0 ? 1 : 0; tip = (data[5] & 0x04) != 0 ? 1 : 0; /* Use jitterable to re-arrange button masks */ jitterable = data[5] & 0x18; bs = (data[5] & aiptek->curSetting.stylusButtonLower) != 0 ? 1 : 0; pck = (data[5] & aiptek->curSetting.stylusButtonUpper) != 0 ? 1 : 0; /* dv indicates 'data valid' (e.g., the tablet is in sync * and has delivered a "correct" report) We will ignore * all 'bad' reports... */ if (dv != 0) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } if (p != 0) { input_report_abs(inputdev, ABS_X, x); input_report_abs(inputdev, ABS_Y, y); input_report_abs(inputdev, ABS_PRESSURE, z); input_report_key(inputdev, BTN_TOUCH, tip); input_report_key(inputdev, BTN_STYLUS, bs); input_report_key(inputdev, BTN_STYLUS2, pck); if (aiptek->curSetting.xTilt != AIPTEK_TILT_DISABLE) { input_report_abs(inputdev, ABS_TILT_X, aiptek->curSetting.xTilt); } if (aiptek->curSetting.yTilt != AIPTEK_TILT_DISABLE) { input_report_abs(inputdev, ABS_TILT_Y, aiptek->curSetting.yTilt); } /* Wheel support is in the form of a single-event * firing. */ if (aiptek->curSetting.wheel != AIPTEK_WHEEL_DISABLE) { input_report_abs(inputdev, ABS_WHEEL, aiptek->curSetting.wheel); aiptek->curSetting.wheel = AIPTEK_WHEEL_DISABLE; } } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_STYLUS); if (aiptek->lastMacro != -1) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } input_sync(inputdev); } } } /* Report 3's come from the mouse in absolute mode. */ else if (data[0] == 3) { if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_RELATIVE_MODE) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE; } else if (!AIPTEK_POINTER_ALLOW_MOUSE_MODE (aiptek->curSetting.pointerMode)) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED; } else { x = get_unaligned_le16(data + 1); y = get_unaligned_le16(data + 3); jitterable = data[5] & 0x1c; dv = (data[5] & 0x01) != 0 ? 1 : 0; p = (data[5] & 0x02) != 0 ? 1 : 0; left = (data[5] & aiptek->curSetting.mouseButtonLeft) != 0 ? 1 : 0; right = (data[5] & aiptek->curSetting.mouseButtonRight) != 0 ? 1 : 0; middle = (data[5] & aiptek->curSetting.mouseButtonMiddle) != 0 ? 1 : 0; if (dv != 0) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } if (p != 0) { input_report_abs(inputdev, ABS_X, x); input_report_abs(inputdev, ABS_Y, y); input_report_key(inputdev, BTN_LEFT, left); input_report_key(inputdev, BTN_MIDDLE, middle); input_report_key(inputdev, BTN_RIGHT, right); /* Wheel support is in the form of a single-event * firing. */ if (aiptek->curSetting.wheel != AIPTEK_WHEEL_DISABLE) { input_report_abs(inputdev, ABS_WHEEL, aiptek->curSetting.wheel); aiptek->curSetting.wheel = AIPTEK_WHEEL_DISABLE; } } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_MOUSE); if (aiptek->lastMacro != -1) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } input_sync(inputdev); } } } /* Report 4s come from the macro keys when pressed by stylus */ else if (data[0] == 4) { jitterable = data[1] & 0x18; dv = (data[1] & 0x01) != 0 ? 1 : 0; p = (data[1] & 0x02) != 0 ? 1 : 0; tip = (data[1] & 0x04) != 0 ? 1 : 0; bs = (data[1] & aiptek->curSetting.stylusButtonLower) != 0 ? 1 : 0; pck = (data[1] & aiptek->curSetting.stylusButtonUpper) != 0 ? 1 : 0; macro = dv && p && tip && !(data[3] & 1) ? (data[3] >> 1) : -1; z = get_unaligned_le16(data + 4); if (dv) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } } if (aiptek->lastMacro != -1 && aiptek->lastMacro != macro) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } if (macro != -1 && macro != aiptek->lastMacro) { input_report_key(inputdev, macroKeyEvents[macro], 1); aiptek->lastMacro = macro; } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_STYLUS); input_sync(inputdev); } /* Report 5s come from the macro keys when pressed by mouse */ else if (data[0] == 5) { jitterable = data[1] & 0x1c; dv = (data[1] & 0x01) != 0 ? 1 : 0; p = (data[1] & 0x02) != 0 ? 1 : 0; left = (data[1]& aiptek->curSetting.mouseButtonLeft) != 0 ? 1 : 0; right = (data[1] & aiptek->curSetting.mouseButtonRight) != 0 ? 1 : 0; middle = (data[1] & aiptek->curSetting.mouseButtonMiddle) != 0 ? 1 : 0; macro = dv && p && left && !(data[3] & 1) ? (data[3] >> 1) : 0; if (dv) { /* If the selected tool changed, reset the old * tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } } if (aiptek->lastMacro != -1 && aiptek->lastMacro != macro) { input_report_key(inputdev, macroKeyEvents[aiptek->lastMacro], 0); aiptek->lastMacro = -1; } if (macro != -1 && macro != aiptek->lastMacro) { input_report_key(inputdev, macroKeyEvents[macro], 1); aiptek->lastMacro = macro; } input_report_abs(inputdev, ABS_MISC, p | AIPTEK_REPORT_TOOL_MOUSE); input_sync(inputdev); } /* We have no idea which tool can generate a report 6. Theoretically, * neither need to, having been given reports 4 & 5 for such use. * However, report 6 is the 'official-looking' report for macroKeys; * reports 4 & 5 supposively are used to support unnamed, unknown * hat switches (which just so happen to be the macroKeys.) */ else if (data[0] == 6) { macro = get_unaligned_le16(data + 1); if (macro > 0) { input_report_key(inputdev, macroKeyEvents[macro - 1], 0); } if (macro < 25) { input_report_key(inputdev, macroKeyEvents[macro + 1], 0); } /* If the selected tool changed, reset the old tool key, and set the new one. */ if (aiptek->previousToolMode != aiptek->curSetting.toolMode) { input_report_key(inputdev, aiptek->previousToolMode, 0); input_report_key(inputdev, aiptek->curSetting.toolMode, 1); aiptek->previousToolMode = aiptek->curSetting.toolMode; } input_report_key(inputdev, macroKeyEvents[macro], 1); input_report_abs(inputdev, ABS_MISC, 1 | AIPTEK_REPORT_TOOL_UNKNOWN); input_sync(inputdev); } else { dev_dbg(&intf->dev, "Unknown report %d\n", data[0]); } /* Jitter may occur when the user presses a button on the stlyus * or the mouse. What we do to prevent that is wait 'x' milliseconds * following a 'jitterable' event, which should give the hand some time * stabilize itself. * * We just introduced aiptek->previousJitterable to carry forth the * notion that jitter occurs when the button state changes from on to off: * a person drawing, holding a button down is not subject to jittering. * With that in mind, changing from upper button depressed to lower button * WILL transition through a jitter delay. */ if (aiptek->previousJitterable != jitterable && aiptek->curSetting.jitterDelay != 0 && aiptek->inDelay != 1) { aiptek->endDelay = jiffies + ((aiptek->curSetting.jitterDelay * HZ) / 1000); aiptek->inDelay = 1; } aiptek->previousJitterable = jitterable; exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval != 0) { dev_err(&intf->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } } /*********************************************************************** * These are the USB id's known so far. We do not identify them to * specific Aiptek model numbers, because there has been overlaps, * use, and reuse of id's in existing models. Certain models have * been known to use more than one ID, indicative perhaps of * manufacturing revisions. In any event, we consider these * IDs to not be model-specific nor unique. */ static const struct usb_device_id aiptek_ids[] = { {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x01)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x10)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x20)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x21)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x22)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x23)}, {USB_DEVICE(USB_VENDOR_ID_AIPTEK, 0x24)}, {USB_DEVICE(USB_VENDOR_ID_KYE, 0x5003)}, {} }; MODULE_DEVICE_TABLE(usb, aiptek_ids); /*********************************************************************** * Open an instance of the tablet driver. */ static int aiptek_open(struct input_dev *inputdev) { struct aiptek *aiptek = input_get_drvdata(inputdev); aiptek->urb->dev = interface_to_usbdev(aiptek->intf); if (usb_submit_urb(aiptek->urb, GFP_KERNEL) != 0) return -EIO; return 0; } /*********************************************************************** * Close an instance of the tablet driver. */ static void aiptek_close(struct input_dev *inputdev) { struct aiptek *aiptek = input_get_drvdata(inputdev); usb_kill_urb(aiptek->urb); } /*********************************************************************** * aiptek_set_report and aiptek_get_report() are borrowed from Linux 2.4.x, * where they were known as usb_set_report and usb_get_report. */ static int aiptek_set_report(struct aiptek *aiptek, unsigned char report_type, unsigned char report_id, void *buffer, int size) { struct usb_device *udev = interface_to_usbdev(aiptek->intf); return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, (report_type << 8) + report_id, aiptek->ifnum, buffer, size, 5000); } static int aiptek_get_report(struct aiptek *aiptek, unsigned char report_type, unsigned char report_id, void *buffer, int size) { struct usb_device *udev = interface_to_usbdev(aiptek->intf); return usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, (report_type << 8) + report_id, aiptek->ifnum, buffer, size, 5000); } /*********************************************************************** * Send a command to the tablet. */ static int aiptek_command(struct aiptek *aiptek, unsigned char command, unsigned char data) { const int sizeof_buf = 3 * sizeof(u8); int ret; u8 *buf; buf = kmalloc(sizeof_buf, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = 2; buf[1] = command; buf[2] = data; if ((ret = aiptek_set_report(aiptek, 3, 2, buf, sizeof_buf)) != sizeof_buf) { dev_dbg(&aiptek->intf->dev, "aiptek_program: failed, tried to send: 0x%02x 0x%02x\n", command, data); } kfree(buf); return ret < 0 ? ret : 0; } /*********************************************************************** * Retrieve information from the tablet. Querying info is defined as first * sending the {command,data} sequence as a command, followed by a wait * (aka, "programmaticDelay") and then a "read" request. */ static int aiptek_query(struct aiptek *aiptek, unsigned char command, unsigned char data) { const int sizeof_buf = 3 * sizeof(u8); int ret; u8 *buf; buf = kmalloc(sizeof_buf, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = 2; buf[1] = command; buf[2] = data; if (aiptek_command(aiptek, command, data) != 0) { kfree(buf); return -EIO; } msleep(aiptek->curSetting.programmableDelay); if (aiptek_get_report(aiptek, 3, 2, buf, sizeof_buf) != sizeof_buf) { dev_dbg(&aiptek->intf->dev, "aiptek_query failed: returned 0x%02x 0x%02x 0x%02x\n", buf[0], buf[1], buf[2]); ret = -EIO; } else { ret = get_unaligned_le16(buf + 1); } kfree(buf); return ret; } /*********************************************************************** * Program the tablet into either absolute or relative mode. * We also get information about the tablet's size. */ static int aiptek_program_tablet(struct aiptek *aiptek) { int ret; /* Execute Resolution500LPI */ if ((ret = aiptek_command(aiptek, 0x18, 0x04)) < 0) return ret; /* Query getModelCode */ if ((ret = aiptek_query(aiptek, 0x02, 0x00)) < 0) return ret; aiptek->features.modelCode = ret & 0xff; /* Query getODMCode */ if ((ret = aiptek_query(aiptek, 0x03, 0x00)) < 0) return ret; aiptek->features.odmCode = ret; /* Query getFirmwareCode */ if ((ret = aiptek_query(aiptek, 0x04, 0x00)) < 0) return ret; aiptek->features.firmwareCode = ret; /* Query getXextension */ if ((ret = aiptek_query(aiptek, 0x01, 0x00)) < 0) return ret; input_set_abs_params(aiptek->inputdev, ABS_X, 0, ret - 1, 0, 0); /* Query getYextension */ if ((ret = aiptek_query(aiptek, 0x01, 0x01)) < 0) return ret; input_set_abs_params(aiptek->inputdev, ABS_Y, 0, ret - 1, 0, 0); /* Query getPressureLevels */ if ((ret = aiptek_query(aiptek, 0x08, 0x00)) < 0) return ret; input_set_abs_params(aiptek->inputdev, ABS_PRESSURE, 0, ret - 1, 0, 0); /* Depending on whether we are in absolute or relative mode, we will * do a switchToTablet(absolute) or switchToMouse(relative) command. */ if (aiptek->curSetting.coordinateMode == AIPTEK_COORDINATE_ABSOLUTE_MODE) { /* Execute switchToTablet */ if ((ret = aiptek_command(aiptek, 0x10, 0x01)) < 0) { return ret; } } else { /* Execute switchToMouse */ if ((ret = aiptek_command(aiptek, 0x10, 0x00)) < 0) { return ret; } } /* Enable the macro keys */ if ((ret = aiptek_command(aiptek, 0x11, 0x02)) < 0) return ret; #if 0 /* Execute FilterOn */ if ((ret = aiptek_command(aiptek, 0x17, 0x00)) < 0) return ret; #endif /* Execute AutoGainOn */ if ((ret = aiptek_command(aiptek, 0x12, 0xff)) < 0) return ret; /* Reset the eventCount, so we track events from last (re)programming */ aiptek->diagnostic = AIPTEK_DIAGNOSTIC_NA; aiptek->eventCount = 0; return 0; } /*********************************************************************** * Sysfs functions. Sysfs prefers that individually-tunable parameters * exist in their separate pseudo-files. Summary data that is immutable * may exist in a singular file so long as you don't define a writeable * interface. */ /*********************************************************************** * support the 'size' file -- display support */ static ssize_t show_tabletSize(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%dx%d\n", input_abs_get_max(aiptek->inputdev, ABS_X) + 1, input_abs_get_max(aiptek->inputdev, ABS_Y) + 1); } /* These structs define the sysfs files, param #1 is the name of the * file, param 2 is the file permissions, param 3 & 4 are to the * output generator and input parser routines. Absence of a routine is * permitted -- it only means can't either 'cat' the file, or send data * to it. */ static DEVICE_ATTR(size, S_IRUGO, show_tabletSize, NULL); /*********************************************************************** * support routines for the 'pointer_mode' file. Note that this file * both displays current setting and allows reprogramming. */ static struct aiptek_map pointer_mode_map[] = { { "stylus", AIPTEK_POINTER_ONLY_STYLUS_MODE }, { "mouse", AIPTEK_POINTER_ONLY_MOUSE_MODE }, { "either", AIPTEK_POINTER_EITHER_MODE }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletPointerMode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(pointer_mode_map, aiptek->curSetting.pointerMode)); } static ssize_t store_tabletPointerMode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_mode = map_str_to_val(pointer_mode_map, buf, count); if (new_mode == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.pointerMode = new_mode; return count; } static DEVICE_ATTR(pointer_mode, S_IRUGO | S_IWUSR, show_tabletPointerMode, store_tabletPointerMode); /*********************************************************************** * support routines for the 'coordinate_mode' file. Note that this file * both displays current setting and allows reprogramming. */ static struct aiptek_map coordinate_mode_map[] = { { "absolute", AIPTEK_COORDINATE_ABSOLUTE_MODE }, { "relative", AIPTEK_COORDINATE_RELATIVE_MODE }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletCoordinateMode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(coordinate_mode_map, aiptek->curSetting.coordinateMode)); } static ssize_t store_tabletCoordinateMode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_mode = map_str_to_val(coordinate_mode_map, buf, count); if (new_mode == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.coordinateMode = new_mode; return count; } static DEVICE_ATTR(coordinate_mode, S_IRUGO | S_IWUSR, show_tabletCoordinateMode, store_tabletCoordinateMode); /*********************************************************************** * support routines for the 'tool_mode' file. Note that this file * both displays current setting and allows reprogramming. */ static struct aiptek_map tool_mode_map[] = { { "mouse", AIPTEK_TOOL_BUTTON_MOUSE_MODE }, { "eraser", AIPTEK_TOOL_BUTTON_ERASER_MODE }, { "pencil", AIPTEK_TOOL_BUTTON_PENCIL_MODE }, { "pen", AIPTEK_TOOL_BUTTON_PEN_MODE }, { "brush", AIPTEK_TOOL_BUTTON_BRUSH_MODE }, { "airbrush", AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE }, { "lens", AIPTEK_TOOL_BUTTON_LENS_MODE }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletToolMode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(tool_mode_map, aiptek->curSetting.toolMode)); } static ssize_t store_tabletToolMode(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_mode = map_str_to_val(tool_mode_map, buf, count); if (new_mode == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.toolMode = new_mode; return count; } static DEVICE_ATTR(tool_mode, S_IRUGO | S_IWUSR, show_tabletToolMode, store_tabletToolMode); /*********************************************************************** * support routines for the 'xtilt' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletXtilt(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); if (aiptek->curSetting.xTilt == AIPTEK_TILT_DISABLE) { return sysfs_emit(buf, "disable\n"); } else { return sysfs_emit(buf, "%d\n", aiptek->curSetting.xTilt); } } static ssize_t store_tabletXtilt(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int x; if (kstrtoint(buf, 10, &x)) { size_t len = buf[count - 1] == '\n' ? count - 1 : count; if (strncmp(buf, "disable", len)) return -EINVAL; aiptek->newSetting.xTilt = AIPTEK_TILT_DISABLE; } else { if (x < AIPTEK_TILT_MIN || x > AIPTEK_TILT_MAX) return -EINVAL; aiptek->newSetting.xTilt = x; } return count; } static DEVICE_ATTR(xtilt, S_IRUGO | S_IWUSR, show_tabletXtilt, store_tabletXtilt); /*********************************************************************** * support routines for the 'ytilt' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletYtilt(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); if (aiptek->curSetting.yTilt == AIPTEK_TILT_DISABLE) { return sysfs_emit(buf, "disable\n"); } else { return sysfs_emit(buf, "%d\n", aiptek->curSetting.yTilt); } } static ssize_t store_tabletYtilt(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int y; if (kstrtoint(buf, 10, &y)) { size_t len = buf[count - 1] == '\n' ? count - 1 : count; if (strncmp(buf, "disable", len)) return -EINVAL; aiptek->newSetting.yTilt = AIPTEK_TILT_DISABLE; } else { if (y < AIPTEK_TILT_MIN || y > AIPTEK_TILT_MAX) return -EINVAL; aiptek->newSetting.yTilt = y; } return count; } static DEVICE_ATTR(ytilt, S_IRUGO | S_IWUSR, show_tabletYtilt, store_tabletYtilt); /*********************************************************************** * support routines for the 'jitter' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletJitterDelay(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%d\n", aiptek->curSetting.jitterDelay); } static ssize_t store_tabletJitterDelay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int err, j; err = kstrtoint(buf, 10, &j); if (err) return err; aiptek->newSetting.jitterDelay = j; return count; } static DEVICE_ATTR(jitter, S_IRUGO | S_IWUSR, show_tabletJitterDelay, store_tabletJitterDelay); /*********************************************************************** * support routines for the 'delay' file. Note that this file * both displays current setting and allows reprogramming. */ static ssize_t show_tabletProgrammableDelay(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%d\n", aiptek->curSetting.programmableDelay); } static ssize_t store_tabletProgrammableDelay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int err, d; err = kstrtoint(buf, 10, &d); if (err) return err; aiptek->newSetting.programmableDelay = d; return count; } static DEVICE_ATTR(delay, S_IRUGO | S_IWUSR, show_tabletProgrammableDelay, store_tabletProgrammableDelay); /*********************************************************************** * support routines for the 'event_count' file. Note that this file * only displays current setting. */ static ssize_t show_tabletEventsReceived(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%ld\n", aiptek->eventCount); } static DEVICE_ATTR(event_count, S_IRUGO, show_tabletEventsReceived, NULL); /*********************************************************************** * support routines for the 'diagnostic' file. Note that this file * only displays current setting. */ static ssize_t show_tabletDiagnosticMessage(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); char *retMsg; switch (aiptek->diagnostic) { case AIPTEK_DIAGNOSTIC_NA: retMsg = "no errors\n"; break; case AIPTEK_DIAGNOSTIC_SENDING_RELATIVE_IN_ABSOLUTE: retMsg = "Error: receiving relative reports\n"; break; case AIPTEK_DIAGNOSTIC_SENDING_ABSOLUTE_IN_RELATIVE: retMsg = "Error: receiving absolute reports\n"; break; case AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED: if (aiptek->curSetting.pointerMode == AIPTEK_POINTER_ONLY_MOUSE_MODE) { retMsg = "Error: receiving stylus reports\n"; } else { retMsg = "Error: receiving mouse reports\n"; } break; default: return 0; } return sysfs_emit(buf, retMsg); } static DEVICE_ATTR(diagnostic, S_IRUGO, show_tabletDiagnosticMessage, NULL); /*********************************************************************** * support routines for the 'stylus_upper' file. Note that this file * both displays current setting and allows for setting changing. */ static struct aiptek_map stylus_button_map[] = { { "upper", AIPTEK_STYLUS_UPPER_BUTTON }, { "lower", AIPTEK_STYLUS_LOWER_BUTTON }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletStylusUpper(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(stylus_button_map, aiptek->curSetting.stylusButtonUpper)); } static ssize_t store_tabletStylusUpper(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(stylus_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.stylusButtonUpper = new_button; return count; } static DEVICE_ATTR(stylus_upper, S_IRUGO | S_IWUSR, show_tabletStylusUpper, store_tabletStylusUpper); /*********************************************************************** * support routines for the 'stylus_lower' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletStylusLower(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(stylus_button_map, aiptek->curSetting.stylusButtonLower)); } static ssize_t store_tabletStylusLower(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(stylus_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.stylusButtonLower = new_button; return count; } static DEVICE_ATTR(stylus_lower, S_IRUGO | S_IWUSR, show_tabletStylusLower, store_tabletStylusLower); /*********************************************************************** * support routines for the 'mouse_left' file. Note that this file * both displays current setting and allows for setting changing. */ static struct aiptek_map mouse_button_map[] = { { "left", AIPTEK_MOUSE_LEFT_BUTTON }, { "middle", AIPTEK_MOUSE_MIDDLE_BUTTON }, { "right", AIPTEK_MOUSE_RIGHT_BUTTON }, { NULL, AIPTEK_INVALID_VALUE } }; static ssize_t show_tabletMouseLeft(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map, aiptek->curSetting.mouseButtonLeft)); } static ssize_t store_tabletMouseLeft(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(mouse_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.mouseButtonLeft = new_button; return count; } static DEVICE_ATTR(mouse_left, S_IRUGO | S_IWUSR, show_tabletMouseLeft, store_tabletMouseLeft); /*********************************************************************** * support routines for the 'mouse_middle' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletMouseMiddle(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map, aiptek->curSetting.mouseButtonMiddle)); } static ssize_t store_tabletMouseMiddle(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(mouse_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.mouseButtonMiddle = new_button; return count; } static DEVICE_ATTR(mouse_middle, S_IRUGO | S_IWUSR, show_tabletMouseMiddle, store_tabletMouseMiddle); /*********************************************************************** * support routines for the 'mouse_right' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletMouseRight(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map, aiptek->curSetting.mouseButtonRight)); } static ssize_t store_tabletMouseRight(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int new_button = map_str_to_val(mouse_button_map, buf, count); if (new_button == AIPTEK_INVALID_VALUE) return -EINVAL; aiptek->newSetting.mouseButtonRight = new_button; return count; } static DEVICE_ATTR(mouse_right, S_IRUGO | S_IWUSR, show_tabletMouseRight, store_tabletMouseRight); /*********************************************************************** * support routines for the 'wheel' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletWheel(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); if (aiptek->curSetting.wheel == AIPTEK_WHEEL_DISABLE) { return sysfs_emit(buf, "disable\n"); } else { return sysfs_emit(buf, "%d\n", aiptek->curSetting.wheel); } } static ssize_t store_tabletWheel(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); int err, w; err = kstrtoint(buf, 10, &w); if (err) return err; aiptek->newSetting.wheel = w; return count; } static DEVICE_ATTR(wheel, S_IRUGO | S_IWUSR, show_tabletWheel, store_tabletWheel); /*********************************************************************** * support routines for the 'execute' file. Note that this file * both displays current setting and allows for setting changing. */ static ssize_t show_tabletExecute(struct device *dev, struct device_attribute *attr, char *buf) { /* There is nothing useful to display, so a one-line manual * is in order... */ return sysfs_emit(buf, "Write anything to this file to program your tablet.\n"); } static ssize_t store_tabletExecute(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct aiptek *aiptek = dev_get_drvdata(dev); /* We do not care what you write to this file. Merely the action * of writing to this file triggers a tablet reprogramming. */ memcpy(&aiptek->curSetting, &aiptek->newSetting, sizeof(struct aiptek_settings)); if (aiptek_program_tablet(aiptek) < 0) return -EIO; return count; } static DEVICE_ATTR(execute, S_IRUGO | S_IWUSR, show_tabletExecute, store_tabletExecute); /*********************************************************************** * support routines for the 'odm_code' file. Note that this file * only displays current setting. */ static ssize_t show_tabletODMCode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "0x%04x\n", aiptek->features.odmCode); } static DEVICE_ATTR(odm_code, S_IRUGO, show_tabletODMCode, NULL); /*********************************************************************** * support routines for the 'model_code' file. Note that this file * only displays current setting. */ static ssize_t show_tabletModelCode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "0x%04x\n", aiptek->features.modelCode); } static DEVICE_ATTR(model_code, S_IRUGO, show_tabletModelCode, NULL); /*********************************************************************** * support routines for the 'firmware_code' file. Note that this file * only displays current setting. */ static ssize_t show_firmwareCode(struct device *dev, struct device_attribute *attr, char *buf) { struct aiptek *aiptek = dev_get_drvdata(dev); return sysfs_emit(buf, "%04x\n", aiptek->features.firmwareCode); } static DEVICE_ATTR(firmware_code, S_IRUGO, show_firmwareCode, NULL); static struct attribute *aiptek_dev_attrs[] = { &dev_attr_size.attr, &dev_attr_pointer_mode.attr, &dev_attr_coordinate_mode.attr, &dev_attr_tool_mode.attr, &dev_attr_xtilt.attr, &dev_attr_ytilt.attr, &dev_attr_jitter.attr, &dev_attr_delay.attr, &dev_attr_event_count.attr, &dev_attr_diagnostic.attr, &dev_attr_odm_code.attr, &dev_attr_model_code.attr, &dev_attr_firmware_code.attr, &dev_attr_stylus_lower.attr, &dev_attr_stylus_upper.attr, &dev_attr_mouse_left.attr, &dev_attr_mouse_middle.attr, &dev_attr_mouse_right.attr, &dev_attr_wheel.attr, &dev_attr_execute.attr, NULL }; ATTRIBUTE_GROUPS(aiptek_dev); /*********************************************************************** * This routine is called when a tablet has been identified. It basically * sets up the tablet and the driver's internal structures. */ static int aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *endpoint; struct aiptek *aiptek; struct input_dev *inputdev; int i; int speeds[] = { 0, AIPTEK_PROGRAMMABLE_DELAY_50, AIPTEK_PROGRAMMABLE_DELAY_400, AIPTEK_PROGRAMMABLE_DELAY_25, AIPTEK_PROGRAMMABLE_DELAY_100, AIPTEK_PROGRAMMABLE_DELAY_200, AIPTEK_PROGRAMMABLE_DELAY_300 }; int err = -ENOMEM; /* programmableDelay is where the command-line specified * delay is kept. We make it the first element of speeds[], * so therefore, your override speed is tried first, then the * remainder. Note that the default value of 400ms will be tried * if you do not specify any command line parameter. */ speeds[0] = programmableDelay; aiptek = kzalloc(sizeof(*aiptek), GFP_KERNEL); inputdev = input_allocate_device(); if (!aiptek || !inputdev) { dev_warn(&intf->dev, "cannot allocate memory or input device\n"); goto fail1; } aiptek->data = usb_alloc_coherent(usbdev, AIPTEK_PACKET_LENGTH, GFP_KERNEL, &aiptek->data_dma); if (!aiptek->data) { dev_warn(&intf->dev, "cannot allocate usb buffer\n"); goto fail1; } aiptek->urb = usb_alloc_urb(0, GFP_KERNEL); if (!aiptek->urb) { dev_warn(&intf->dev, "cannot allocate urb\n"); goto fail2; } aiptek->inputdev = inputdev; aiptek->intf = intf; aiptek->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; aiptek->inDelay = 0; aiptek->endDelay = 0; aiptek->previousJitterable = 0; aiptek->lastMacro = -1; /* Set up the curSettings struct. Said struct contains the current * programmable parameters. The newSetting struct contains changes * the user makes to the settings via the sysfs interface. Those * changes are not "committed" to curSettings until the user * writes to the sysfs/.../execute file. */ aiptek->curSetting.pointerMode = AIPTEK_POINTER_EITHER_MODE; aiptek->curSetting.coordinateMode = AIPTEK_COORDINATE_ABSOLUTE_MODE; aiptek->curSetting.toolMode = AIPTEK_TOOL_BUTTON_PEN_MODE; aiptek->curSetting.xTilt = AIPTEK_TILT_DISABLE; aiptek->curSetting.yTilt = AIPTEK_TILT_DISABLE; aiptek->curSetting.mouseButtonLeft = AIPTEK_MOUSE_LEFT_BUTTON; aiptek->curSetting.mouseButtonMiddle = AIPTEK_MOUSE_MIDDLE_BUTTON; aiptek->curSetting.mouseButtonRight = AIPTEK_MOUSE_RIGHT_BUTTON; aiptek->curSetting.stylusButtonUpper = AIPTEK_STYLUS_UPPER_BUTTON; aiptek->curSetting.stylusButtonLower = AIPTEK_STYLUS_LOWER_BUTTON; aiptek->curSetting.jitterDelay = jitterDelay; aiptek->curSetting.programmableDelay = programmableDelay; /* Both structs should have equivalent settings */ aiptek->newSetting = aiptek->curSetting; /* Determine the usb devices' physical path. * Asketh not why we always pretend we're using "../input0", * but I suspect this will have to be refactored one * day if a single USB device can be a keyboard & a mouse * & a tablet, and the inputX number actually will tell * us something... */ usb_make_path(usbdev, aiptek->features.usbPath, sizeof(aiptek->features.usbPath)); strlcat(aiptek->features.usbPath, "/input0", sizeof(aiptek->features.usbPath)); /* Set up client data, pointers to open and close routines * for the input device. */ inputdev->name = "Aiptek"; inputdev->phys = aiptek->features.usbPath; usb_to_input_id(usbdev, &inputdev->id); inputdev->dev.parent = &intf->dev; input_set_drvdata(inputdev, aiptek); inputdev->open = aiptek_open; inputdev->close = aiptek_close; /* Now program the capacities of the tablet, in terms of being * an input device. */ for (i = 0; i < ARRAY_SIZE(eventTypes); ++i) __set_bit(eventTypes[i], inputdev->evbit); for (i = 0; i < ARRAY_SIZE(absEvents); ++i) __set_bit(absEvents[i], inputdev->absbit); for (i = 0; i < ARRAY_SIZE(relEvents); ++i) __set_bit(relEvents[i], inputdev->relbit); __set_bit(MSC_SERIAL, inputdev->mscbit); /* Set up key and button codes */ for (i = 0; i < ARRAY_SIZE(buttonEvents); ++i) __set_bit(buttonEvents[i], inputdev->keybit); for (i = 0; i < ARRAY_SIZE(macroKeyEvents); ++i) __set_bit(macroKeyEvents[i], inputdev->keybit); /* * Program the input device coordinate capacities. We do not yet * know what maximum X, Y, and Z values are, so we're putting fake * values in. Later, we'll ask the tablet to put in the correct * values. */ input_set_abs_params(inputdev, ABS_X, 0, 2999, 0, 0); input_set_abs_params(inputdev, ABS_Y, 0, 2249, 0, 0); input_set_abs_params(inputdev, ABS_PRESSURE, 0, 511, 0, 0); input_set_abs_params(inputdev, ABS_TILT_X, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0); input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0); input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0); err = usb_find_common_endpoints(intf->cur_altsetting, NULL, NULL, &endpoint, NULL); if (err) { dev_err(&intf->dev, "interface has no int in endpoints, but must have minimum 1\n"); goto fail3; } /* Go set up our URB, which is called when the tablet receives * input. */ usb_fill_int_urb(aiptek->urb, usbdev, usb_rcvintpipe(usbdev, endpoint->bEndpointAddress), aiptek->data, 8, aiptek_irq, aiptek, endpoint->bInterval); aiptek->urb->transfer_dma = aiptek->data_dma; aiptek->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* Program the tablet. This sets the tablet up in the mode * specified in newSetting, and also queries the tablet's * physical capacities. * * Sanity check: if a tablet doesn't like the slow programmatic * delay, we often get sizes of 0x0. Let's use that as an indicator * to try faster delays, up to 25 ms. If that logic fails, well, you'll * have to explain to us how your tablet thinks it's 0x0, and yet that's * not an error :-) */ for (i = 0; i < ARRAY_SIZE(speeds); ++i) { aiptek->curSetting.programmableDelay = speeds[i]; (void)aiptek_program_tablet(aiptek); if (input_abs_get_max(aiptek->inputdev, ABS_X) > 0) { dev_info(&intf->dev, "Aiptek using %d ms programming speed\n", aiptek->curSetting.programmableDelay); break; } } /* Murphy says that some day someone will have a tablet that fails the above test. That's you, Frederic Rodrigo */ if (i == ARRAY_SIZE(speeds)) { dev_info(&intf->dev, "Aiptek tried all speeds, no sane response\n"); err = -EINVAL; goto fail3; } /* Associate this driver's struct with the usb interface. */ usb_set_intfdata(intf, aiptek); /* Register the tablet as an Input Device */ err = input_register_device(aiptek->inputdev); if (err) { dev_warn(&intf->dev, "input_register_device returned err: %d\n", err); goto fail3; } return 0; fail3: usb_free_urb(aiptek->urb); fail2: usb_free_coherent(usbdev, AIPTEK_PACKET_LENGTH, aiptek->data, aiptek->data_dma); fail1: usb_set_intfdata(intf, NULL); input_free_device(inputdev); kfree(aiptek); return err; } /*********************************************************************** * Deal with tablet disconnecting from the system. */ static void aiptek_disconnect(struct usb_interface *intf) { struct aiptek *aiptek = usb_get_intfdata(intf); /* Disassociate driver's struct with usb interface */ usb_set_intfdata(intf, NULL); if (aiptek != NULL) { /* Free & unhook everything from the system. */ usb_kill_urb(aiptek->urb); input_unregister_device(aiptek->inputdev); usb_free_urb(aiptek->urb); usb_free_coherent(interface_to_usbdev(intf), AIPTEK_PACKET_LENGTH, aiptek->data, aiptek->data_dma); kfree(aiptek); } } static struct usb_driver aiptek_driver = { .name = "aiptek", .probe = aiptek_probe, .disconnect = aiptek_disconnect, .id_table = aiptek_ids, .dev_groups = aiptek_dev_groups, }; module_usb_driver(aiptek_driver); MODULE_AUTHOR("Bryan W. Headley/Chris Atenasio/Cedric Brun/Rene van Paassen"); MODULE_DESCRIPTION("Aiptek HyperPen USB Tablet Driver"); MODULE_LICENSE("GPL"); module_param(programmableDelay, int, 0); MODULE_PARM_DESC(programmableDelay, "delay used during tablet programming"); module_param(jitterDelay, int, 0); MODULE_PARM_DESC(jitterDelay, "stylus/mouse settlement delay");
22 172 461 1 461 461 342 218 267 7 8 8 8 453 453 453 428 196 245 332 431 188 2 451 433 189 453 337 117 8 319 320 3 317 36 274 310 2 217 1 35 33 2 61 36 3 22 24 35 24 35 20 248 48 293 33 267 208 64 35 235 310 401 79 4 319 383 266 14 252 174 174 171 3 85 85 5 11 9 1 10 70 106 23 19 20 50 1 1 44 283 235 46 441 440 166 25 142 167 60 132 132 132 129 4 4 4 1 3 68 69 63 1 5 171 171 171 171 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 // SPDX-License-Identifier: GPL-2.0-or-later /* * PF_INET6 socket protocol family * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Adapted from linux/net/ipv4/af_inet.c * * Fixes: * piggy, Karl Knutson : Socket protocol table * Hideaki YOSHIFUJI : sin6_scope_id support * Arnaldo Melo : check proc_net_create return, cleanups */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> #include <net/ping.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/ipv6_stubs.h> #include <net/ndisc.h> #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> #include <net/ioam6.h> #include <net/rawv6.h> #include <net/rps.h> #include <linux/uaccess.h> #include <linux/mroute6.h> #include "ip6_offload.h" MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); /* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); struct ipv6_params ipv6_defaults = { .disable_ipv6 = 0, .autoconf = 1, }; static int disable_ipv6_mod; module_param_named(disable, disable_ipv6_mod, int, 0444); MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444); MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); bool ipv6_mod_enabled(void) { return disable_ipv6_mod == 0; } EXPORT_SYMBOL_GPL(ipv6_mod_enabled); static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->ipv6_pinfo_offset; return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } void inet6_sock_destruct(struct sock *sk) { inet6_cleanup_sock(sk); inet_sock_destruct(sk); } EXPORT_SYMBOL_GPL(inet6_sock_destruct); static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; int try_loading_module = 0; int err; if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; } if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* * Be more specific, e.g. net-pf-10-proto-132-type-1 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM) */ if (++try_loading_module == 1) request_module("net-pf-%d-proto-%d-type-%d", PF_INET6, protocol, sock->type); /* * Fall back to generic, e.g. net-pf-10-proto-132 * (net-pf-PF_INET6-proto-IPPROTO_SCTP) */ else request_module("net-pf-%d-proto-%d", PF_INET6, protocol); goto lookup_protocol; } else goto out_rcu_unlock; } err = -EPERM; if (sock->type == SOCK_RAW && !kern && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; answer_flags = answer->flags; rcu_read_unlock(); WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; sock_init_data(sock, sk); err = 0; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; if (INET_PROTOSW_ICSK & answer_flags) inet_init_csk_locks(sk); inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet_set_bit(HDRINCL, sk); } sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; sk->sk_backlog_rcv = answer->prot->backlog_rcv; inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; inet6_set_bit(MC6_LOOP, sk); inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. */ inet->uc_ttl = -1; inet_set_bit(MC_LOOP, sk); inet->mc_ttl = 1; inet->mc_index = 0; RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); if (err) goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); if (err) goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; out_sk_release: sk_common_release(sk); sock->sk = NULL; goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; bool saved_ipv6only; int addr_type = 0; int err = 0; if (addr->sin6_family != AF_INET6) return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM) return -EINVAL; snum = ntohs(addr->sin6_port); if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) && snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ if (sk->sk_state != TCP_CLOSE || inet->inet_num) { err = -EINVAL; goto out; } /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket * makes no sense */ if (ipv6_only_sock(sk)) { err = -EINVAL; goto out; } rcu_read_lock(); if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out_unlock; } } /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); rcu_read_unlock(); if (!inet_addr_valid_or_nonlocal(net, inet, v4addr, chk_addr_ret)) { err = -EADDRNOTAVAIL; goto out; } } else { if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; rcu_read_lock(); if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another one * is supplied by user. */ sk->sk_bound_dev_if = addr->sin6_scope_id; } /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; goto out_unlock; } } if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out_unlock; } } /* ipv4 addr of the socket is invalid. Only the * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { if (!ipv6_can_nonlocal_bind(net, inet) && !ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { err = -EADDRNOTAVAIL; goto out_unlock; } } rcu_read_unlock(); } } inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; saved_ipv6only = sk->sk_ipv6only; if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) sk->sk_ipv6only = 1; /* Make sure we are allowed to bind here. */ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { err = sk->sk_prot->get_port(sk, snum); if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); goto out; } if (!(flags & BIND_FROM_BPF)) { err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk); if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); if (sk->sk_prot->put_port) sk->sk_prot->put_port(sk); goto out; } } } if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); inet->inet_dport = 0; inet->inet_daddr = 0; out: if (flags & BIND_WITH_LOCK) release_sock(sk); return err; out_unlock: rcu_read_unlock(); goto out; } int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; const struct proto *prot; int err = 0; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); /* If the socket has its own bind function then use it. */ if (prot->bind) return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; return __inet6_bind(sk, uaddr, addr_len, flags); } /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { return inet6_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet6_bind); int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return -EINVAL; /* Free mc lists */ ipv6_sock_mc_close(sk); /* Free ac lists */ ipv6_sock_ac_close(sk); return inet_release(sock); } EXPORT_SYMBOL(inet6_release); void inet6_cleanup_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct ipv6_txoptions *opt; /* Release rx options */ skb = xchg(&np->pktoptions, NULL); kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); kfree_skb(skb); /* Free flowlabels */ fl6_free_socklist(sk); /* Free tx options */ opt = unrcu_pointer(xchg(&np->opt, NULL)); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } } EXPORT_SYMBOL_GPL(inet6_cleanup_sock); /* * This does both peername and sockname. */ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; lock_sock(sk); if (peer) { if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) { release_sock(sk); return -ENOTCONN; } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); const struct proto *prot; switch (cmd) { case SIOCADDRT: case SIOCDELRT: { struct in6_rtmsg rtmsg; if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) return -EFAULT; return ipv6_route_ioctl(net, cmd, &rtmsg); } case SIOCSIFADDR: return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (!prot->ioctl) return -ENOIOCTLCMD; return sk_ioctl(sk, cmd, (void __user *)arg); } /*NOTREACHED*/ return 0; } EXPORT_SYMBOL(inet6_ioctl); #ifdef CONFIG_COMPAT struct compat_in6_rtmsg { struct in6_addr rtmsg_dst; struct in6_addr rtmsg_src; struct in6_addr rtmsg_gateway; u32 rtmsg_type; u16 rtmsg_dst_len; u16 rtmsg_src_len; u32 rtmsg_metric; u32 rtmsg_info; u32 rtmsg_flags; s32 rtmsg_ifindex; }; static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, struct compat_in6_rtmsg __user *ur) { struct in6_rtmsg rt; if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, 3 * sizeof(struct in6_addr)) || get_user(rt.rtmsg_type, &ur->rtmsg_type) || get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || get_user(rt.rtmsg_info, &ur->rtmsg_info) || get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) return -EFAULT; return ipv6_route_ioctl(sock_net(sk), cmd, &rt); } int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; switch (cmd) { case SIOCADDRT: case SIOCDELRT: return inet6_compat_routing_ioctl(sk, cmd, argp); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL_GPL(inet6_compat_ioctl); #endif /* CONFIG_COMPAT */ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } const struct proto_ops inet6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_stream_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = inet_accept, /* ok */ .getname = inet6_getname, .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, .sendmsg_locked = tcp_sendmsg_locked, .splice_read = tcp_splice_read, .set_peek_off = sk_set_peek_off, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, }; const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_dgram_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ .read_skb = udp_read_skb, .mmap = sock_no_mmap, .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static const struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, .owner = THIS_MODULE, }; int inet6_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; struct list_head *last_perm; int protocol = p->protocol; int ret; spin_lock_bh(&inetsw6_lock); ret = -EINVAL; if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ answer = NULL; ret = -EPERM; last_perm = &inetsw6[p->type]; list_for_each(lh, &inetsw6[p->type]) { answer = list_entry(lh, struct inet_protosw, list); /* Check only the non-wild match. */ if (INET_PROTOSW_PERMANENT & answer->flags) { if (protocol == answer->protocol) break; last_perm = lh; } answer = NULL; } if (answer) goto out_permanent; /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); ret = 0; out: spin_unlock_bh(&inetsw6_lock); return ret; out_permanent: pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } EXPORT_SYMBOL(inet6_register_protosw); void inet6_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw6_lock); list_del_rcu(&p->list); spin_unlock_bh(&inetsw6_lock); synchronize_net(); } } EXPORT_SYMBOL(inet6_unregister_protosw); int inet6_sk_rebuild_header(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct dst_entry *dst; dst = __sk_dst_check(sk, np->dst_cookie); if (!dst) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } ip6_dst_store(sk, dst, NULL, NULL); } return 0; } EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt) { const struct ipv6_pinfo *np = inet6_sk(sk); if (np->rxopt.all) { if (((opt->flags & IP6SKB_HOPBYHOP) && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return true; } return false; } EXPORT_SYMBOL_GPL(ipv6_opt_accepted); static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, .list_func = ipv6_list_rcv, }; static int __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); return 0; } static void ipv6_packet_cleanup(void) { dev_remove_pack(&ipv6_packet_type); } static int __net_init ipv6_init_mibs(struct net *net) { int i; net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udp_stats_in6) return -ENOMEM; net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udplite_stats_in6) goto err_udplite_mib; net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); } net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); if (!net->mib.icmpv6msg_statistics) goto err_icmpmsg_mib; return 0; err_icmpmsg_mib: free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: free_percpu(net->mib.ipv6_statistics); err_ip_mib: free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { free_percpu(net->mib.udp_stats_in6); free_percpu(net->mib.udplite_stats_in6); free_percpu(net->mib.ipv6_statistics); free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } static int __net_init inet6_net_init(struct net *net) { int err = 0; net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0; /* By default, rate limit error messages. * Except for pmtu discovery, it would break it. * proc_do_large_bitmap needs pointer to the bitmap. */ bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1); bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1); net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 0; net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT; net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; err = ipv6_init_mibs(net); if (err) return err; #ifdef CONFIG_PROC_FS err = udp6_proc_init(net); if (err) goto out; err = tcp6_proc_init(net); if (err) goto proc_tcp6_fail; err = ac6_proc_init(net); if (err) goto proc_ac6_fail; #endif return err; #ifdef CONFIG_PROC_FS proc_ac6_fail: tcp6_proc_exit(net); proc_tcp6_fail: udp6_proc_exit(net); out: ipv6_cleanup_mibs(net); return err; #endif } static void __net_exit inet6_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS udp6_proc_exit(net); tcp6_proc_exit(net); ac6_proc_exit(net); #endif ipv6_cleanup_mibs(net); } static struct pernet_operations inet6_net_ops = { .init = inet6_net_init, .exit = inet6_net_exit, }; static int ipv6_route_input(struct sk_buff *skb) { ip6_route_input(skb); return skb_dst(skb)->error; } static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup_flow = ip6_dst_lookup_flow, .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, .fib6_select_path = fib6_select_path, .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, .fib6_nh_init = fib6_nh_init, .fib6_nh_release = fib6_nh_release, .fib6_nh_release_dsts = fib6_nh_release_dsts, .fib6_update_sernum = fib6_update_sernum_stub, .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, .ipv6_dev_find = ipv6_dev_find, .ip6_xmit = ip6_xmit, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) { struct list_head *r; int err = 0; sock_skb_cb_check_size(sizeof(struct inet6_skb_parm)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); raw_hashinfo_init(&raw_v6_hashinfo); if (disable_ipv6_mod) { pr_info("Loaded, but administratively disabled, reboot required to enable\n"); goto out; } err = proto_register(&tcpv6_prot, 1); if (err) goto out; err = proto_register(&udpv6_prot, 1); if (err) goto out_unregister_tcp_proto; err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; err = proto_register(&rawv6_prot, 1); if (err) goto out_unregister_udplite_proto; err = proto_register(&pingv6_prot, 1); if (err) goto out_unregister_raw_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ err = rawv6_init(); if (err) goto out_unregister_ping_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) */ err = sock_register(&inet6_family_ops); if (err) goto out_sock_register_fail; /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance * in a host available by both INET and INET6 APIs and * able to communicate via both network protocols. */ err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; err = icmpv6_init(); if (err) goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); if (err) goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; if (udplite6_proc_init()) goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; if (if6_proc_init()) goto proc_if6_fail; #endif err = ip6_route_init(); if (err) goto ip6_route_fail; err = ndisc_late_init(); if (err) goto ndisc_late_fail; err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; err = ipv6_anycast_init(); if (err) goto ipv6_anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; /* Init v6 extension headers. */ err = ipv6_exthdrs_init(); if (err) goto ipv6_exthdrs_fail; err = ipv6_frag_init(); if (err) goto ipv6_frag_fail; /* Init v6 transport protocols. */ err = udpv6_init(); if (err) goto udpv6_fail; err = udplitev6_init(); if (err) goto udplitev6_fail; err = udpv6_offload_init(); if (err) goto udpv6_offload_fail; err = tcpv6_init(); if (err) goto tcpv6_fail; err = ipv6_packet_init(); if (err) goto ipv6_packet_fail; err = pingv6_init(); if (err) goto pingv6_fail; err = calipso_init(); if (err) goto calipso_fail; err = seg6_init(); if (err) goto seg6_fail; err = rpl_init(); if (err) goto rpl_fail; err = ioam6_init(); if (err) goto ioam6_fail; err = igmp6_late_init(); if (err) goto igmp6_late_err; #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) goto sysctl_fail; #endif /* ensure that ipv6 stubs are visible only after ipv6 is ready */ wmb(); ipv6_stub = &ipv6_stub_impl; ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; #ifdef CONFIG_SYSCTL sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: ioam6_exit(); ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); seg6_fail: calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: ipv6_packet_cleanup(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: udpv6_offload_exit(); udpv6_offload_fail: udplitev6_exit(); udplitev6_fail: udpv6_exit(); udpv6_fail: ipv6_frag_exit(); ipv6_frag_fail: ipv6_exthdrs_exit(); ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: ipv6_anycast_cleanup(); ipv6_anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); ndisc_late_fail: ip6_route_cleanup(); ip6_route_fail: #ifdef CONFIG_PROC_FS if6_proc_exit(); proc_if6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: udplite6_proc_exit(); proc_udplite6_fail: raw6_proc_exit(); proc_raw6_fail: #endif ipv6_netfilter_fini(); netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); ndisc_fail: icmpv6_cleanup(); icmp_fail: ip6_mr_cleanup(); ipmr_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); out_unregister_ping_proto: proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); goto out; } module_init(inet6_init); MODULE_ALIAS_NETPROTO(PF_INET6);
15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 /* SPDX-License-Identifier: GPL-2.0 */ /* * fscrypt_private.h * * Copyright (C) 2015, Google, Inc. * * Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar. * Heavily modified since then. */ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H #include <linux/fscrypt.h> #include <linux/siphash.h> #include <crypto/hash.h> #include <linux/blk-crypto.h> #define CONST_STRLEN(str) (sizeof(str) - 1) #define FSCRYPT_FILE_NONCE_SIZE 16 /* * Minimum size of an fscrypt master key. Note: a longer key will be required * if ciphers with a 256-bit security strength are used. This is just the * absolute minimum, which applies when only 128-bit encryption is used. */ #define FSCRYPT_MIN_KEY_SIZE 16 #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 /* Keep this in sync with include/uapi/linux/fscrypt.h */ #define FSCRYPT_MODE_MAX FSCRYPT_MODE_AES_256_HCTR2 struct fscrypt_context_v1 { u8 version; /* FSCRYPT_CONTEXT_V1 */ u8 contents_encryption_mode; u8 filenames_encryption_mode; u8 flags; u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; struct fscrypt_context_v2 { u8 version; /* FSCRYPT_CONTEXT_V2 */ u8 contents_encryption_mode; u8 filenames_encryption_mode; u8 flags; u8 log2_data_unit_size; u8 __reserved[3]; u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; /* * fscrypt_context - the encryption context of an inode * * This is the on-disk equivalent of an fscrypt_policy, stored alongside each * encrypted file usually in a hidden extended attribute. It contains the * fields from the fscrypt_policy, in order to identify the encryption algorithm * and key with which the file is encrypted. It also contains a nonce that was * randomly generated by fscrypt itself; this is used as KDF input or as a tweak * to cause different files to be encrypted differently. */ union fscrypt_context { u8 version; struct fscrypt_context_v1 v1; struct fscrypt_context_v2 v2; }; /* * Return the size expected for the given fscrypt_context based on its version * number, or 0 if the context version is unrecognized. */ static inline int fscrypt_context_size(const union fscrypt_context *ctx) { switch (ctx->version) { case FSCRYPT_CONTEXT_V1: BUILD_BUG_ON(sizeof(ctx->v1) != 28); return sizeof(ctx->v1); case FSCRYPT_CONTEXT_V2: BUILD_BUG_ON(sizeof(ctx->v2) != 40); return sizeof(ctx->v2); } return 0; } /* Check whether an fscrypt_context has a recognized version number and size */ static inline bool fscrypt_context_is_valid(const union fscrypt_context *ctx, int ctx_size) { return ctx_size >= 1 && ctx_size == fscrypt_context_size(ctx); } /* Retrieve the context's nonce, assuming the context was already validated */ static inline const u8 *fscrypt_context_nonce(const union fscrypt_context *ctx) { switch (ctx->version) { case FSCRYPT_CONTEXT_V1: return ctx->v1.nonce; case FSCRYPT_CONTEXT_V2: return ctx->v2.nonce; } WARN_ON_ONCE(1); return NULL; } union fscrypt_policy { u8 version; struct fscrypt_policy_v1 v1; struct fscrypt_policy_v2 v2; }; /* * Return the size expected for the given fscrypt_policy based on its version * number, or 0 if the policy version is unrecognized. */ static inline int fscrypt_policy_size(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return sizeof(policy->v1); case FSCRYPT_POLICY_V2: return sizeof(policy->v2); } return 0; } /* Return the contents encryption mode of a valid encryption policy */ static inline u8 fscrypt_policy_contents_mode(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return policy->v1.contents_encryption_mode; case FSCRYPT_POLICY_V2: return policy->v2.contents_encryption_mode; } BUG(); } /* Return the filenames encryption mode of a valid encryption policy */ static inline u8 fscrypt_policy_fnames_mode(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return policy->v1.filenames_encryption_mode; case FSCRYPT_POLICY_V2: return policy->v2.filenames_encryption_mode; } BUG(); } /* Return the flags (FSCRYPT_POLICY_FLAG*) of a valid encryption policy */ static inline u8 fscrypt_policy_flags(const union fscrypt_policy *policy) { switch (policy->version) { case FSCRYPT_POLICY_V1: return policy->v1.flags; case FSCRYPT_POLICY_V2: return policy->v2.flags; } BUG(); } static inline int fscrypt_policy_v2_du_bits(const struct fscrypt_policy_v2 *policy, const struct inode *inode) { return policy->log2_data_unit_size ?: inode->i_blkbits; } static inline int fscrypt_policy_du_bits(const union fscrypt_policy *policy, const struct inode *inode) { switch (policy->version) { case FSCRYPT_POLICY_V1: return inode->i_blkbits; case FSCRYPT_POLICY_V2: return fscrypt_policy_v2_du_bits(&policy->v2, inode); } BUG(); } /* * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. */ struct fscrypt_symlink_data { __le16 len; char encrypted_path[]; } __packed; /** * struct fscrypt_prepared_key - a key prepared for actual encryption/decryption * @tfm: crypto API transform object * @blk_key: key for blk-crypto * * Normally only one of the fields will be non-NULL. */ struct fscrypt_prepared_key { struct crypto_skcipher *tfm; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT struct blk_crypto_key *blk_key; #endif }; /* * fscrypt_inode_info - the "encryption key" for an inode * * When an encrypted file's key is made available, an instance of this struct is * allocated and stored in ->i_crypt_info. Once created, it remains until the * inode is evicted. */ struct fscrypt_inode_info { /* The key in a form prepared for actual encryption/decryption */ struct fscrypt_prepared_key ci_enc_key; /* True if ci_enc_key should be freed when this struct is freed */ u8 ci_owns_key : 1; #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT /* * True if this inode will use inline encryption (blk-crypto) instead of * the traditional filesystem-layer encryption. */ u8 ci_inlinecrypt : 1; #endif /* True if ci_dirhash_key is initialized */ u8 ci_dirhash_key_initialized : 1; /* * log2 of the data unit size (granularity of contents encryption) of * this file. This is computable from ci_policy and ci_inode but is * cached here for efficiency. Only used for regular files. */ u8 ci_data_unit_bits; /* Cached value: log2 of number of data units per FS block */ u8 ci_data_units_per_block_bits; /* Hashed inode number. Only set for IV_INO_LBLK_32 */ u32 ci_hashed_ino; /* * Encryption mode used for this inode. It corresponds to either the * contents or filenames encryption mode, depending on the inode type. */ struct fscrypt_mode *ci_mode; /* Back-pointer to the inode */ struct inode *ci_inode; /* * The master key with which this inode was unlocked (decrypted). This * will be NULL if the master key was found in a process-subscribed * keyring rather than in the filesystem-level keyring. */ struct fscrypt_master_key *ci_master_key; /* * Link in list of inodes that were unlocked with the master key. * Only used when ->ci_master_key is set. */ struct list_head ci_master_key_link; /* * If non-NULL, then encryption is done using the master key directly * and ci_enc_key will equal ci_direct_key->dk_key. */ struct fscrypt_direct_key *ci_direct_key; /* * This inode's hash key for filenames. This is a 128-bit SipHash-2-4 * key. This is only set for directories that use a keyed dirhash over * the plaintext filenames -- currently just casefolded directories. */ siphash_key_t ci_dirhash_key; /* The encryption policy used by this inode */ union fscrypt_policy ci_policy; /* This inode's nonce, copied from the fscrypt_context */ u8 ci_nonce[FSCRYPT_FILE_NONCE_SIZE]; }; typedef enum { FS_DECRYPT = 0, FS_ENCRYPT, } fscrypt_direction_t; /* crypto.c */ extern struct kmem_cache *fscrypt_inode_info_cachep; int fscrypt_initialize(struct super_block *sb); int fscrypt_crypt_data_unit(const struct fscrypt_inode_info *ci, fscrypt_direction_t rw, u64 index, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags); struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); #define fscrypt_warn(inode, fmt, ...) \ fscrypt_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__) #define fscrypt_err(inode, fmt, ...) \ fscrypt_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__) #define FSCRYPT_MAX_IV_SIZE 32 union fscrypt_iv { struct { /* zero-based index of data unit within the file */ __le64 index; /* per-file nonce; only set in DIRECT_KEY mode */ u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; }; u8 raw[FSCRYPT_MAX_IV_SIZE]; __le64 dun[FSCRYPT_MAX_IV_SIZE / sizeof(__le64)]; }; void fscrypt_generate_iv(union fscrypt_iv *iv, u64 index, const struct fscrypt_inode_info *ci); /* * Return the number of bits used by the maximum file data unit index that is * possible on the given filesystem, using the given log2 data unit size. */ static inline int fscrypt_max_file_dun_bits(const struct super_block *sb, int du_bits) { return fls64(sb->s_maxbytes - 1) - du_bits; } /* fname.c */ bool __fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); /* hkdf.c */ struct fscrypt_hkdf { struct crypto_shash *hmac_tfm; }; int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, unsigned int master_key_size); /* * The list of contexts in which fscrypt uses HKDF. These values are used as * the first byte of the HKDF application-specific info string to guarantee that * info strings are never repeated between contexts. This ensures that all HKDF * outputs are unique and cryptographically isolated, i.e. knowledge of one * output doesn't reveal another. */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 /* info=<empty> */ #define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 /* info=file_nonce */ #define HKDF_CONTEXT_DIRECT_KEY 3 /* info=mode_num */ #define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 /* info=mode_num||fs_uuid */ #define HKDF_CONTEXT_DIRHASH_KEY 5 /* info=file_nonce */ #define HKDF_CONTEXT_IV_INO_LBLK_32_KEY 6 /* info=mode_num||fs_uuid */ #define HKDF_CONTEXT_INODE_HASH_KEY 7 /* info=<empty> */ int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen); void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci); static inline bool fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci) { return ci->ci_inlinecrypt; } int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci); void fscrypt_destroy_inline_crypt_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key); /* * Check whether the crypto transform or blk-crypto key has been allocated in * @prep_key, depending on which encryption implementation the file will use. */ static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_inode_info *ci) { /* * The two smp_load_acquire()'s here pair with the smp_store_release()'s * in fscrypt_prepare_inline_crypt_key() and fscrypt_prepare_key(). * I.e., in some cases (namely, if this prep_key is a per-mode * encryption key) another task can publish blk_key or tfm concurrently, * executing a RELEASE barrier. We need to use smp_load_acquire() here * to safely ACQUIRE the memory the other task published. */ if (fscrypt_using_inline_encryption(ci)) return smp_load_acquire(&prep_key->blk_key) != NULL; return smp_load_acquire(&prep_key->tfm) != NULL; } #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline int fscrypt_select_encryption_impl(struct fscrypt_inode_info *ci) { return 0; } static inline bool fscrypt_using_inline_encryption(const struct fscrypt_inode_info *ci) { return false; } static inline int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci) { WARN_ON_ONCE(1); return -EOPNOTSUPP; } static inline void fscrypt_destroy_inline_crypt_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key) { } static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_inode_info *ci) { return smp_load_acquire(&prep_key->tfm) != NULL; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /* keyring.c */ /* * fscrypt_master_key_secret - secret key material of an in-use master key */ struct fscrypt_master_key_secret { /* * For v2 policy keys: HKDF context keyed by this master key. * For v1 policy keys: not set (hkdf.hmac_tfm == NULL). */ struct fscrypt_hkdf hkdf; /* * Size of the raw key in bytes. This remains set even if ->raw was * zeroized due to no longer being needed. I.e. we still remember the * size of the key even if we don't need to remember the key itself. */ u32 size; /* For v1 policy keys: the raw key. Wiped for v2 policy keys. */ u8 raw[FSCRYPT_MAX_KEY_SIZE]; } __randomize_layout; /* * fscrypt_master_key - an in-use master key * * This represents a master encryption key which has been added to the * filesystem. There are three high-level states that a key can be in: * * FSCRYPT_KEY_STATUS_PRESENT * Key is fully usable; it can be used to unlock inodes that are encrypted * with it (this includes being able to create new inodes). ->mk_present * indicates whether the key is in this state. ->mk_secret exists, the key * is in the keyring, and ->mk_active_refs > 0 due to ->mk_present. * * FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED * Removal of this key has been initiated, but some inodes that were * unlocked with it are still in-use. Like ABSENT, ->mk_secret is wiped, * and the key can no longer be used to unlock inodes. Unlike ABSENT, the * key is still in the keyring; ->mk_decrypted_inodes is nonempty; and * ->mk_active_refs > 0, being equal to the size of ->mk_decrypted_inodes. * * This state transitions to ABSENT if ->mk_decrypted_inodes becomes empty, * or to PRESENT if FS_IOC_ADD_ENCRYPTION_KEY is called again for this key. * * FSCRYPT_KEY_STATUS_ABSENT * Key is fully removed. The key is no longer in the keyring, * ->mk_decrypted_inodes is empty, ->mk_active_refs == 0, ->mk_secret is * wiped, and the key can no longer be used to unlock inodes. */ struct fscrypt_master_key { /* * Link in ->s_master_keys->key_hashtable. * Only valid if ->mk_active_refs > 0. */ struct hlist_node mk_node; /* Semaphore that protects ->mk_secret, ->mk_users, and ->mk_present */ struct rw_semaphore mk_sem; /* * Active and structural reference counts. An active ref guarantees * that the struct continues to exist, continues to be in the keyring * ->s_master_keys, and that any embedded subkeys (e.g. * ->mk_direct_keys) that have been prepared continue to exist. * A structural ref only guarantees that the struct continues to exist. * * There is one active ref associated with ->mk_present being true, and * one active ref for each inode in ->mk_decrypted_inodes. * * There is one structural ref associated with the active refcount being * nonzero. Finding a key in the keyring also takes a structural ref, * which is then held temporarily while the key is operated on. */ refcount_t mk_active_refs; refcount_t mk_struct_refs; struct rcu_head mk_rcu_head; /* * The secret key material. Wiped as soon as it is no longer needed; * for details, see the fscrypt_master_key struct comment. * * Locking: protected by ->mk_sem. */ struct fscrypt_master_key_secret mk_secret; /* * For v1 policy keys: an arbitrary key descriptor which was assigned by * userspace (->descriptor). * * For v2 policy keys: a cryptographic hash of this key (->identifier). */ struct fscrypt_key_specifier mk_spec; /* * Keyring which contains a key of type 'key_type_fscrypt_user' for each * user who has added this key. Normally each key will be added by just * one user, but it's possible that multiple users share a key, and in * that case we need to keep track of those users so that one user can't * remove the key before the others want it removed too. * * This is NULL for v1 policy keys; those can only be added by root. * * Locking: protected by ->mk_sem. (We don't just rely on the keyrings * subsystem semaphore ->mk_users->sem, as we need support for atomic * search+insert along with proper synchronization with other fields.) */ struct key *mk_users; /* * List of inodes that were unlocked using this key. This allows the * inodes to be evicted efficiently if the key is removed. */ struct list_head mk_decrypted_inodes; spinlock_t mk_decrypted_inodes_lock; /* * Per-mode encryption keys for the various types of encryption policies * that use them. Allocated and derived on-demand. */ struct fscrypt_prepared_key mk_direct_keys[FSCRYPT_MODE_MAX + 1]; struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[FSCRYPT_MODE_MAX + 1]; struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[FSCRYPT_MODE_MAX + 1]; /* Hash key for inode numbers. Initialized only when needed. */ siphash_key_t mk_ino_hash_key; bool mk_ino_hash_key_initialized; /* * Whether this key is in the "present" state, i.e. fully usable. For * details, see the fscrypt_master_key struct comment. * * Locking: protected by ->mk_sem, but can be read locklessly using * READ_ONCE(). Writers must use WRITE_ONCE() when concurrent readers * are possible. */ bool mk_present; } __randomize_layout; static inline const char *master_key_spec_type( const struct fscrypt_key_specifier *spec) { switch (spec->type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: return "descriptor"; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: return "identifier"; } return "[unknown]"; } static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec) { switch (spec->type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: return FSCRYPT_KEY_DESCRIPTOR_SIZE; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: return FSCRYPT_KEY_IDENTIFIER_SIZE; } return 0; } void fscrypt_put_master_key(struct fscrypt_master_key *mk); void fscrypt_put_master_key_activeref(struct super_block *sb, struct fscrypt_master_key *mk); struct fscrypt_master_key * fscrypt_find_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec); int fscrypt_get_test_dummy_key_identifier( u8 key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]); int fscrypt_add_test_dummy_key(struct super_block *sb, struct fscrypt_key_specifier *key_spec); int fscrypt_verify_key_added(struct super_block *sb, const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]); int __init fscrypt_init_keyring(void); /* keysetup.c */ struct fscrypt_mode { const char *friendly_name; const char *cipher_str; int keysize; /* key size in bytes */ int security_strength; /* security strength in bytes */ int ivsize; /* IV size in bytes */ int logged_cryptoapi_impl; int logged_blk_crypto_native; int logged_blk_crypto_fallback; enum blk_crypto_mode_num blk_crypto_mode; }; extern struct fscrypt_mode fscrypt_modes[]; int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci); void fscrypt_destroy_prepared_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key); int fscrypt_set_per_file_enc_key(struct fscrypt_inode_info *ci, const u8 *raw_key); int fscrypt_derive_dirhash_key(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk); void fscrypt_hash_inode_number(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk); int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported); /** * fscrypt_require_key() - require an inode's encryption key * @inode: the inode we need the key for * * If the inode is encrypted, set up its encryption key if not already done. * Then require that the key be present and return -ENOKEY otherwise. * * No locks are needed, and the key will live as long as the struct inode --- so * it won't go away from under you. * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code * if a problem occurred while setting up the encryption key. */ static inline int fscrypt_require_key(struct inode *inode) { if (IS_ENCRYPTED(inode)) { int err = fscrypt_get_encryption_info(inode, false); if (err) return err; if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } return 0; } /* keysetup_v1.c */ void fscrypt_put_direct_key(struct fscrypt_direct_key *dk); int fscrypt_setup_v1_file_key(struct fscrypt_inode_info *ci, const u8 *raw_master_key); int fscrypt_setup_v1_file_key_via_subscribed_keyrings( struct fscrypt_inode_info *ci); /* policy.c */ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, const union fscrypt_policy *policy2); int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy, struct fscrypt_key_specifier *key_spec); const union fscrypt_policy *fscrypt_get_dummy_policy(struct super_block *sb); bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode); int fscrypt_policy_from_context(union fscrypt_policy *policy_u, const union fscrypt_context *ctx_u, int ctx_size); const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir); #endif /* _FSCRYPT_PRIVATE_H */
6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 // SPDX-License-Identifier: GPL-2.0+ /* * Support for the Maxtor OneTouch USB hard drive's button * * Current development and maintenance by: * Copyright (c) 2005 Nick Sillik <n.sillik@temple.edu> * * Initial work by: * Copyright (c) 2003 Erik Thyren <erth7411@student.uu.se> * * Based on usbmouse.c (Vojtech Pavlik) and xpad.c (Marko Friedemann) * */ #include <linux/kernel.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb/input.h> #include "usb.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-onetouch" MODULE_DESCRIPTION("Maxtor USB OneTouch hard drive button driver"); MODULE_AUTHOR("Nick Sillik <n.sillik@temple.edu>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); #define ONETOUCH_PKT_LEN 0x02 #define ONETOUCH_BUTTON KEY_PROG1 static int onetouch_connect_input(struct us_data *ss); static void onetouch_release_input(void *onetouch_); struct usb_onetouch { char name[128]; char phys[64]; struct input_dev *dev; /* input device interface */ struct usb_device *udev; /* usb device */ struct urb *irq; /* urb for interrupt in report */ unsigned char *data; /* input data */ dma_addr_t data_dma; unsigned int is_open:1; }; /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id onetouch_usb_ids[] = { # include "unusual_onetouch.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, onetouch_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev onetouch_unusual_dev_list[] = { # include "unusual_onetouch.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV static void usb_onetouch_irq(struct urb *urb) { struct usb_onetouch *onetouch = urb->context; signed char *data = onetouch->data; struct input_dev *dev = onetouch->dev; int status = urb->status; int retval; switch (status) { case 0: /* success */ break; case -ECONNRESET: /* unlink */ case -ENOENT: case -ESHUTDOWN: return; /* -EPIPE: should clear the halt */ default: /* error */ goto resubmit; } input_report_key(dev, ONETOUCH_BUTTON, data[0] & 0x02); input_sync(dev); resubmit: retval = usb_submit_urb (urb, GFP_ATOMIC); if (retval) dev_err(&dev->dev, "can't resubmit intr, %s-%s/input0, " "retval %d\n", onetouch->udev->bus->bus_name, onetouch->udev->devpath, retval); } static int usb_onetouch_open(struct input_dev *dev) { struct usb_onetouch *onetouch = input_get_drvdata(dev); onetouch->is_open = 1; onetouch->irq->dev = onetouch->udev; if (usb_submit_urb(onetouch->irq, GFP_KERNEL)) { dev_err(&dev->dev, "usb_submit_urb failed\n"); return -EIO; } return 0; } static void usb_onetouch_close(struct input_dev *dev) { struct usb_onetouch *onetouch = input_get_drvdata(dev); usb_kill_urb(onetouch->irq); onetouch->is_open = 0; } #ifdef CONFIG_PM static void usb_onetouch_pm_hook(struct us_data *us, int action) { struct usb_onetouch *onetouch = (struct usb_onetouch *) us->extra; if (onetouch->is_open) { switch (action) { case US_SUSPEND: usb_kill_urb(onetouch->irq); break; case US_RESUME: if (usb_submit_urb(onetouch->irq, GFP_NOIO) != 0) dev_err(&onetouch->irq->dev->dev, "usb_submit_urb failed\n"); break; default: break; } } } #endif /* CONFIG_PM */ static int onetouch_connect_input(struct us_data *ss) { struct usb_device *udev = ss->pusb_dev; struct usb_host_interface *interface; struct usb_endpoint_descriptor *endpoint; struct usb_onetouch *onetouch; struct input_dev *input_dev; int pipe, maxp; int error = -ENOMEM; interface = ss->pusb_intf->cur_altsetting; if (interface->desc.bNumEndpoints != 3) return -ENODEV; endpoint = &interface->endpoint[2].desc; if (!usb_endpoint_is_int_in(endpoint)) return -ENODEV; pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress); maxp = usb_maxpacket(udev, pipe); maxp = min(maxp, ONETOUCH_PKT_LEN); onetouch = kzalloc(sizeof(struct usb_onetouch), GFP_KERNEL); input_dev = input_allocate_device(); if (!onetouch || !input_dev) goto fail1; onetouch->data = usb_alloc_coherent(udev, ONETOUCH_PKT_LEN, GFP_KERNEL, &onetouch->data_dma); if (!onetouch->data) goto fail1; onetouch->irq = usb_alloc_urb(0, GFP_KERNEL); if (!onetouch->irq) goto fail2; onetouch->udev = udev; onetouch->dev = input_dev; if (udev->manufacturer) strscpy(onetouch->name, udev->manufacturer, sizeof(onetouch->name)); if (udev->product) { if (udev->manufacturer) strlcat(onetouch->name, " ", sizeof(onetouch->name)); strlcat(onetouch->name, udev->product, sizeof(onetouch->name)); } if (!strlen(onetouch->name)) snprintf(onetouch->name, sizeof(onetouch->name), "Maxtor Onetouch %04x:%04x", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); usb_make_path(udev, onetouch->phys, sizeof(onetouch->phys)); strlcat(onetouch->phys, "/input0", sizeof(onetouch->phys)); input_dev->name = onetouch->name; input_dev->phys = onetouch->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &udev->dev; set_bit(EV_KEY, input_dev->evbit); set_bit(ONETOUCH_BUTTON, input_dev->keybit); clear_bit(0, input_dev->keybit); input_set_drvdata(input_dev, onetouch); input_dev->open = usb_onetouch_open; input_dev->close = usb_onetouch_close; usb_fill_int_urb(onetouch->irq, udev, pipe, onetouch->data, maxp, usb_onetouch_irq, onetouch, endpoint->bInterval); onetouch->irq->transfer_dma = onetouch->data_dma; onetouch->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; ss->extra_destructor = onetouch_release_input; ss->extra = onetouch; #ifdef CONFIG_PM ss->suspend_resume_hook = usb_onetouch_pm_hook; #endif error = input_register_device(onetouch->dev); if (error) goto fail3; return 0; fail3: usb_free_urb(onetouch->irq); fail2: usb_free_coherent(udev, ONETOUCH_PKT_LEN, onetouch->data, onetouch->data_dma); fail1: kfree(onetouch); input_free_device(input_dev); return error; } static void onetouch_release_input(void *onetouch_) { struct usb_onetouch *onetouch = (struct usb_onetouch *) onetouch_; if (onetouch) { usb_kill_urb(onetouch->irq); input_unregister_device(onetouch->dev); usb_free_urb(onetouch->irq); usb_free_coherent(onetouch->udev, ONETOUCH_PKT_LEN, onetouch->data, onetouch->data_dma); } } static struct scsi_host_template onetouch_host_template; static int onetouch_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - onetouch_usb_ids) + onetouch_unusual_dev_list, &onetouch_host_template); if (result) return result; /* Use default transport and protocol */ result = usb_stor_probe2(us); return result; } static struct usb_driver onetouch_driver = { .name = DRV_NAME, .probe = onetouch_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = onetouch_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(onetouch_driver, onetouch_host_template, DRV_NAME);
35 15 19 13 13 21 18 1 8 13 13 13 13 13 13 2 2 2 2 2 25 24 1 25 25 1 8 38 36 2 17 1 12 8 38 13 12 9 7 6 5 4 8 8 6 8 1 7 5 3 2 5 2 4 4 4 4 38 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 // SPDX-License-Identifier: GPL-2.0-or-later /* * Sunplus spca504(abc) spca533 spca536 library * Copyright (C) 2005 Michel Xhaard mxhaard@magic.fr * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sunplus" #include "gspca.h" #include "jpeg.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA5xx USB Camera Driver"); MODULE_LICENSE("GPL"); #define QUALITY 85 /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ bool autogain; u8 bridge; #define BRIDGE_SPCA504 0 #define BRIDGE_SPCA504B 1 #define BRIDGE_SPCA504C 2 #define BRIDGE_SPCA533 3 #define BRIDGE_SPCA536 4 u8 subtype; #define AiptekMiniPenCam13 1 #define LogitechClickSmart420 2 #define LogitechClickSmart820 3 #define MegapixV4 4 #define MegaImageVI 5 u8 jpeg_hdr[JPEG_HDR_SZ]; }; static const struct v4l2_pix_format vga_mode[] = { {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, }; static const struct v4l2_pix_format custom_mode[] = { {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {464, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 464, .sizeimage = 464 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, }; static const struct v4l2_pix_format vga_mode2[] = { {176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 4}, {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, {352, 288, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, }; #define SPCA50X_OFFSET_DATA 10 #define SPCA504_PCCAM600_OFFSET_SNAPSHOT 3 #define SPCA504_PCCAM600_OFFSET_COMPRESS 4 #define SPCA504_PCCAM600_OFFSET_MODE 5 #define SPCA504_PCCAM600_OFFSET_DATA 14 /* Frame packet header offsets for the spca533 */ #define SPCA533_OFFSET_DATA 16 #define SPCA533_OFFSET_FRAMSEQ 15 /* Frame packet header offsets for the spca536 */ #define SPCA536_OFFSET_DATA 4 #define SPCA536_OFFSET_FRAMSEQ 1 struct cmd { u8 req; u16 val; u16 idx; }; /* Initialisation data for the Creative PC-CAM 600 */ static const struct cmd spca504_pccam600_init_data[] = { /* {0xa0, 0x0000, 0x0503}, * capture mode */ {0x00, 0x0000, 0x2000}, {0x00, 0x0013, 0x2301}, {0x00, 0x0003, 0x2000}, {0x00, 0x0001, 0x21ac}, {0x00, 0x0001, 0x21a6}, {0x00, 0x0000, 0x21a7}, /* brightness */ {0x00, 0x0020, 0x21a8}, /* contrast */ {0x00, 0x0001, 0x21ac}, /* sat/hue */ {0x00, 0x0000, 0x21ad}, /* hue */ {0x00, 0x001a, 0x21ae}, /* saturation */ {0x00, 0x0002, 0x21a3}, /* gamma */ {0x30, 0x0154, 0x0008}, {0x30, 0x0004, 0x0006}, {0x30, 0x0258, 0x0009}, {0x30, 0x0004, 0x0000}, {0x30, 0x0093, 0x0004}, {0x30, 0x0066, 0x0005}, {0x00, 0x0000, 0x2000}, {0x00, 0x0013, 0x2301}, {0x00, 0x0003, 0x2000}, {0x00, 0x0013, 0x2301}, {0x00, 0x0003, 0x2000}, }; /* Creative PC-CAM 600 specific open data, sent before using the * generic initialisation data from spca504_open_data. */ static const struct cmd spca504_pccam600_open_data[] = { {0x00, 0x0001, 0x2501}, {0x20, 0x0500, 0x0001}, /* snapshot mode */ {0x00, 0x0003, 0x2880}, {0x00, 0x0001, 0x2881}, }; /* Initialisation data for the logitech clicksmart 420 */ static const struct cmd spca504A_clicksmart420_init_data[] = { /* {0xa0, 0x0000, 0x0503}, * capture mode */ {0x00, 0x0000, 0x2000}, {0x00, 0x0013, 0x2301}, {0x00, 0x0003, 0x2000}, {0x00, 0x0001, 0x21ac}, {0x00, 0x0001, 0x21a6}, {0x00, 0x0000, 0x21a7}, /* brightness */ {0x00, 0x0020, 0x21a8}, /* contrast */ {0x00, 0x0001, 0x21ac}, /* sat/hue */ {0x00, 0x0000, 0x21ad}, /* hue */ {0x00, 0x001a, 0x21ae}, /* saturation */ {0x00, 0x0002, 0x21a3}, /* gamma */ {0x30, 0x0004, 0x000a}, {0xb0, 0x0001, 0x0000}, {0xa1, 0x0080, 0x0001}, {0x30, 0x0049, 0x0000}, {0x30, 0x0060, 0x0005}, {0x0c, 0x0004, 0x0000}, {0x00, 0x0000, 0x0000}, {0x00, 0x0000, 0x2000}, {0x00, 0x0013, 0x2301}, {0x00, 0x0003, 0x2000}, }; /* clicksmart 420 open data ? */ static const struct cmd spca504A_clicksmart420_open_data[] = { {0x00, 0x0001, 0x2501}, {0x20, 0x0502, 0x0000}, {0x06, 0x0000, 0x0000}, {0x00, 0x0004, 0x2880}, {0x00, 0x0001, 0x2881}, {0xa0, 0x0000, 0x0503}, }; static const u8 qtable_creative_pccam[2][64] = { { /* Q-table Y-components */ 0x05, 0x03, 0x03, 0x05, 0x07, 0x0c, 0x0f, 0x12, 0x04, 0x04, 0x04, 0x06, 0x08, 0x11, 0x12, 0x11, 0x04, 0x04, 0x05, 0x07, 0x0c, 0x11, 0x15, 0x11, 0x04, 0x05, 0x07, 0x09, 0x0f, 0x1a, 0x18, 0x13, 0x05, 0x07, 0x0b, 0x11, 0x14, 0x21, 0x1f, 0x17, 0x07, 0x0b, 0x11, 0x13, 0x18, 0x1f, 0x22, 0x1c, 0x0f, 0x13, 0x17, 0x1a, 0x1f, 0x24, 0x24, 0x1e, 0x16, 0x1c, 0x1d, 0x1d, 0x22, 0x1e, 0x1f, 0x1e}, { /* Q-table C-components */ 0x05, 0x05, 0x07, 0x0e, 0x1e, 0x1e, 0x1e, 0x1e, 0x05, 0x06, 0x08, 0x14, 0x1e, 0x1e, 0x1e, 0x1e, 0x07, 0x08, 0x11, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x14, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e} }; /* FIXME: This Q-table is identical to the Creative PC-CAM one, * except for one byte. Possibly a typo? * NWG: 18/05/2003. */ static const u8 qtable_spca504_default[2][64] = { { /* Q-table Y-components */ 0x05, 0x03, 0x03, 0x05, 0x07, 0x0c, 0x0f, 0x12, 0x04, 0x04, 0x04, 0x06, 0x08, 0x11, 0x12, 0x11, 0x04, 0x04, 0x05, 0x07, 0x0c, 0x11, 0x15, 0x11, 0x04, 0x05, 0x07, 0x09, 0x0f, 0x1a, 0x18, 0x13, 0x05, 0x07, 0x0b, 0x11, 0x14, 0x21, 0x1f, 0x17, 0x07, 0x0b, 0x11, 0x13, 0x18, 0x1f, 0x22, 0x1c, 0x0f, 0x13, 0x17, 0x1a, 0x1f, 0x24, 0x24, 0x1e, 0x16, 0x1c, 0x1d, 0x1d, 0x1d /* 0x22 */ , 0x1e, 0x1f, 0x1e, }, { /* Q-table C-components */ 0x05, 0x05, 0x07, 0x0e, 0x1e, 0x1e, 0x1e, 0x1e, 0x05, 0x06, 0x08, 0x14, 0x1e, 0x1e, 0x1e, 0x1e, 0x07, 0x08, 0x11, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x14, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e} }; /* read <len> bytes to gspca_dev->usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, u8 req, u16 index, u16 len) { int ret; if (len > USB_BUF_SZ) { gspca_err(gspca_dev, "reg_r: buffer overflow\n"); return; } if (len == 0) { gspca_err(gspca_dev, "reg_r: zero-length read\n"); return; } if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, len, 500); if (ret < 0) { pr_err("reg_r err %d\n", ret); gspca_dev->usb_err = ret; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, USB_BUF_SZ); } } /* write one byte */ static void reg_w_1(struct gspca_dev *gspca_dev, u8 req, u16 value, u16 index, u16 byte) { int ret; if (gspca_dev->usb_err < 0) return; gspca_dev->usb_buf[0] = byte; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, gspca_dev->usb_buf, 1, 500); if (ret < 0) { pr_err("reg_w_1 err %d\n", ret); gspca_dev->usb_err = ret; } } /* write req / index / value */ static void reg_w_riv(struct gspca_dev *gspca_dev, u8 req, u16 index, u16 value) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); if (ret < 0) { pr_err("reg_w_riv err %d\n", ret); gspca_dev->usb_err = ret; return; } gspca_dbg(gspca_dev, D_USBO, "reg_w_riv: 0x%02x,0x%04x:0x%04x\n", req, index, value); } static void write_vector(struct gspca_dev *gspca_dev, const struct cmd *data, int ncmds) { while (--ncmds >= 0) { reg_w_riv(gspca_dev, data->req, data->idx, data->val); data++; } } static void setup_qtable(struct gspca_dev *gspca_dev, const u8 qtable[2][64]) { int i; /* loop over y components */ for (i = 0; i < 64; i++) reg_w_riv(gspca_dev, 0x00, 0x2800 + i, qtable[0][i]); /* loop over c components */ for (i = 0; i < 64; i++) reg_w_riv(gspca_dev, 0x00, 0x2840 + i, qtable[1][i]); } static void spca504_acknowledged_command(struct gspca_dev *gspca_dev, u8 req, u16 idx, u16 val) { reg_w_riv(gspca_dev, req, idx, val); reg_r(gspca_dev, 0x01, 0x0001, 1); gspca_dbg(gspca_dev, D_FRAM, "before wait 0x%04x\n", gspca_dev->usb_buf[0]); reg_w_riv(gspca_dev, req, idx, val); msleep(200); reg_r(gspca_dev, 0x01, 0x0001, 1); gspca_dbg(gspca_dev, D_FRAM, "after wait 0x%04x\n", gspca_dev->usb_buf[0]); } static void spca504_read_info(struct gspca_dev *gspca_dev) { int i; u8 info[6]; if (gspca_debug < D_STREAM) return; for (i = 0; i < 6; i++) { reg_r(gspca_dev, 0, i, 1); info[i] = gspca_dev->usb_buf[0]; } gspca_dbg(gspca_dev, D_STREAM, "Read info: %d %d %d %d %d %d. Should be 1,0,2,2,0,0\n", info[0], info[1], info[2], info[3], info[4], info[5]); } static void spca504A_acknowledged_command(struct gspca_dev *gspca_dev, u8 req, u16 idx, u16 val, u8 endcode, u8 count) { u16 status; reg_w_riv(gspca_dev, req, idx, val); reg_r(gspca_dev, 0x01, 0x0001, 1); if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_FRAM, "Status 0x%02x Need 0x%02x\n", gspca_dev->usb_buf[0], endcode); if (!count) return; count = 200; while (--count > 0) { msleep(10); /* gsmart mini2 write a each wait setting 1 ms is enough */ /* reg_w_riv(gspca_dev, req, idx, val); */ reg_r(gspca_dev, 0x01, 0x0001, 1); status = gspca_dev->usb_buf[0]; if (status == endcode) { gspca_dbg(gspca_dev, D_FRAM, "status 0x%04x after wait %d\n", status, 200 - count); break; } } } static void spca504B_PollingDataReady(struct gspca_dev *gspca_dev) { int count = 10; while (--count > 0) { reg_r(gspca_dev, 0x21, 0, 1); if ((gspca_dev->usb_buf[0] & 0x01) == 0) break; msleep(10); } } static void spca504B_WaitCmdStatus(struct gspca_dev *gspca_dev) { int count = 50; while (--count > 0) { reg_r(gspca_dev, 0x21, 1, 1); if (gspca_dev->usb_buf[0] != 0) { reg_w_1(gspca_dev, 0x21, 0, 1, 0); reg_r(gspca_dev, 0x21, 1, 1); spca504B_PollingDataReady(gspca_dev); break; } msleep(10); } } static void spca50x_GetFirmware(struct gspca_dev *gspca_dev) { u8 *data; if (gspca_debug < D_STREAM) return; data = gspca_dev->usb_buf; reg_r(gspca_dev, 0x20, 0, 5); gspca_dbg(gspca_dev, D_STREAM, "FirmWare: %d %d %d %d %d\n", data[0], data[1], data[2], data[3], data[4]); reg_r(gspca_dev, 0x23, 0, 64); reg_r(gspca_dev, 0x23, 1, 64); } static void spca504B_SetSizeType(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; u8 Size; Size = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; switch (sd->bridge) { case BRIDGE_SPCA533: reg_w_riv(gspca_dev, 0x31, 0, 0); spca504B_WaitCmdStatus(gspca_dev); spca504B_PollingDataReady(gspca_dev); spca50x_GetFirmware(gspca_dev); reg_w_1(gspca_dev, 0x24, 0, 8, 2); /* type */ reg_r(gspca_dev, 0x24, 8, 1); reg_w_1(gspca_dev, 0x25, 0, 4, Size); reg_r(gspca_dev, 0x25, 4, 1); /* size */ spca504B_PollingDataReady(gspca_dev); /* Init the cam width height with some values get on init ? */ reg_w_riv(gspca_dev, 0x31, 0x0004, 0x00); spca504B_WaitCmdStatus(gspca_dev); spca504B_PollingDataReady(gspca_dev); break; default: /* case BRIDGE_SPCA504B: */ /* case BRIDGE_SPCA536: */ reg_w_1(gspca_dev, 0x25, 0, 4, Size); reg_r(gspca_dev, 0x25, 4, 1); /* size */ reg_w_1(gspca_dev, 0x27, 0, 0, 6); reg_r(gspca_dev, 0x27, 0, 1); /* type */ spca504B_PollingDataReady(gspca_dev); break; case BRIDGE_SPCA504: Size += 3; if (sd->subtype == AiptekMiniPenCam13) { /* spca504a aiptek */ spca504A_acknowledged_command(gspca_dev, 0x08, Size, 0, 0x80 | (Size & 0x0f), 1); spca504A_acknowledged_command(gspca_dev, 1, 3, 0, 0x9f, 0); } else { spca504_acknowledged_command(gspca_dev, 0x08, Size, 0); } break; case BRIDGE_SPCA504C: /* capture mode */ reg_w_riv(gspca_dev, 0xa0, (0x0500 | (Size & 0x0f)), 0x00); reg_w_riv(gspca_dev, 0x20, 0x01, 0x0500 | (Size & 0x0f)); break; } } static void spca504_wait_status(struct gspca_dev *gspca_dev) { int cnt; cnt = 256; while (--cnt > 0) { /* With this we get the status, when return 0 it's all ok */ reg_r(gspca_dev, 0x06, 0x00, 1); if (gspca_dev->usb_buf[0] == 0) return; msleep(10); } } static void spca504B_setQtable(struct gspca_dev *gspca_dev) { reg_w_1(gspca_dev, 0x26, 0, 0, 3); reg_r(gspca_dev, 0x26, 0, 1); spca504B_PollingDataReady(gspca_dev); } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; u16 reg; reg = sd->bridge == BRIDGE_SPCA536 ? 0x20f0 : 0x21a7; reg_w_riv(gspca_dev, 0x00, reg, val); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; u16 reg; reg = sd->bridge == BRIDGE_SPCA536 ? 0x20f1 : 0x21a8; reg_w_riv(gspca_dev, 0x00, reg, val); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; u16 reg; reg = sd->bridge == BRIDGE_SPCA536 ? 0x20f6 : 0x21ae; reg_w_riv(gspca_dev, 0x00, reg, val); } static void init_ctl_reg(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int pollreg = 1; switch (sd->bridge) { case BRIDGE_SPCA504: case BRIDGE_SPCA504C: pollreg = 0; fallthrough; default: /* case BRIDGE_SPCA533: */ /* case BRIDGE_SPCA504B: */ reg_w_riv(gspca_dev, 0, 0x21ad, 0x00); /* hue */ reg_w_riv(gspca_dev, 0, 0x21ac, 0x01); /* sat/hue */ reg_w_riv(gspca_dev, 0, 0x21a3, 0x00); /* gamma */ break; case BRIDGE_SPCA536: reg_w_riv(gspca_dev, 0, 0x20f5, 0x40); reg_w_riv(gspca_dev, 0, 0x20f4, 0x01); reg_w_riv(gspca_dev, 0, 0x2089, 0x00); break; } if (pollreg) spca504B_PollingDataReady(gspca_dev); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; cam = &gspca_dev->cam; sd->bridge = id->driver_info >> 8; sd->subtype = id->driver_info; if (sd->subtype == AiptekMiniPenCam13) { /* try to get the firmware as some cam answer 2.0.1.2.2 * and should be a spca504b then overwrite that setting */ reg_r(gspca_dev, 0x20, 0, 1); switch (gspca_dev->usb_buf[0]) { case 1: break; /* (right bridge/subtype) */ case 2: sd->bridge = BRIDGE_SPCA504B; sd->subtype = 0; break; default: return -ENODEV; } } switch (sd->bridge) { default: /* case BRIDGE_SPCA504B: */ /* case BRIDGE_SPCA504: */ /* case BRIDGE_SPCA536: */ cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); break; case BRIDGE_SPCA533: cam->cam_mode = custom_mode; if (sd->subtype == MegaImageVI) /* 320x240 only */ cam->nmodes = ARRAY_SIZE(custom_mode) - 1; else cam->nmodes = ARRAY_SIZE(custom_mode); break; case BRIDGE_SPCA504C: cam->cam_mode = vga_mode2; cam->nmodes = ARRAY_SIZE(vga_mode2); break; } return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->bridge) { case BRIDGE_SPCA504B: reg_w_riv(gspca_dev, 0x1d, 0x00, 0); reg_w_riv(gspca_dev, 0x00, 0x2306, 0x01); reg_w_riv(gspca_dev, 0x00, 0x0d04, 0x00); reg_w_riv(gspca_dev, 0x00, 0x2000, 0x00); reg_w_riv(gspca_dev, 0x00, 0x2301, 0x13); reg_w_riv(gspca_dev, 0x00, 0x2306, 0x00); fallthrough; case BRIDGE_SPCA533: spca504B_PollingDataReady(gspca_dev); spca50x_GetFirmware(gspca_dev); break; case BRIDGE_SPCA536: spca50x_GetFirmware(gspca_dev); reg_r(gspca_dev, 0x00, 0x5002, 1); reg_w_1(gspca_dev, 0x24, 0, 0, 0); reg_r(gspca_dev, 0x24, 0, 1); spca504B_PollingDataReady(gspca_dev); reg_w_riv(gspca_dev, 0x34, 0, 0); spca504B_WaitCmdStatus(gspca_dev); break; case BRIDGE_SPCA504C: /* pccam600 */ gspca_dbg(gspca_dev, D_STREAM, "Opening SPCA504 (PC-CAM 600)\n"); reg_w_riv(gspca_dev, 0xe0, 0x0000, 0x0000); reg_w_riv(gspca_dev, 0xe0, 0x0000, 0x0001); /* reset */ spca504_wait_status(gspca_dev); if (sd->subtype == LogitechClickSmart420) write_vector(gspca_dev, spca504A_clicksmart420_open_data, ARRAY_SIZE(spca504A_clicksmart420_open_data)); else write_vector(gspca_dev, spca504_pccam600_open_data, ARRAY_SIZE(spca504_pccam600_open_data)); setup_qtable(gspca_dev, qtable_creative_pccam); break; default: /* case BRIDGE_SPCA504: */ gspca_dbg(gspca_dev, D_STREAM, "Opening SPCA504\n"); if (sd->subtype == AiptekMiniPenCam13) { spca504_read_info(gspca_dev); /* Set AE AWB Banding Type 3-> 50Hz 2-> 60Hz */ spca504A_acknowledged_command(gspca_dev, 0x24, 8, 3, 0x9e, 1); /* Twice sequential need status 0xff->0x9e->0x9d */ spca504A_acknowledged_command(gspca_dev, 0x24, 8, 3, 0x9e, 0); spca504A_acknowledged_command(gspca_dev, 0x24, 0, 0, 0x9d, 1); /******************************/ /* spca504a aiptek */ spca504A_acknowledged_command(gspca_dev, 0x08, 6, 0, 0x86, 1); /* reg_write (dev, 0, 0x2000, 0); */ /* reg_write (dev, 0, 0x2883, 1); */ /* spca504A_acknowledged_command (gspca_dev, 0x08, 6, 0, 0x86, 1); */ /* spca504A_acknowledged_command (gspca_dev, 0x24, 0, 0, 0x9D, 1); */ reg_w_riv(gspca_dev, 0x00, 0x270c, 0x05); /* L92 sno1t.txt */ reg_w_riv(gspca_dev, 0x00, 0x2310, 0x05); spca504A_acknowledged_command(gspca_dev, 0x01, 0x0f, 0, 0xff, 0); } /* setup qtable */ reg_w_riv(gspca_dev, 0, 0x2000, 0); reg_w_riv(gspca_dev, 0, 0x2883, 1); setup_qtable(gspca_dev, qtable_spca504_default); break; } return gspca_dev->usb_err; } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int enable; /* create the JPEG header */ jpeg_define(sd->jpeg_hdr, gspca_dev->pixfmt.height, gspca_dev->pixfmt.width, 0x22); /* JPEG 411 */ jpeg_set_qual(sd->jpeg_hdr, QUALITY); if (sd->bridge == BRIDGE_SPCA504B) spca504B_setQtable(gspca_dev); spca504B_SetSizeType(gspca_dev); switch (sd->bridge) { default: /* case BRIDGE_SPCA504B: */ /* case BRIDGE_SPCA533: */ /* case BRIDGE_SPCA536: */ switch (sd->subtype) { case MegapixV4: case LogitechClickSmart820: case MegaImageVI: reg_w_riv(gspca_dev, 0xf0, 0, 0); spca504B_WaitCmdStatus(gspca_dev); reg_w_riv(gspca_dev, 0xf0, 4, 0); spca504B_WaitCmdStatus(gspca_dev); break; default: reg_w_riv(gspca_dev, 0x31, 0x0004, 0x00); spca504B_WaitCmdStatus(gspca_dev); spca504B_PollingDataReady(gspca_dev); break; } break; case BRIDGE_SPCA504: if (sd->subtype == AiptekMiniPenCam13) { spca504_read_info(gspca_dev); /* Set AE AWB Banding Type 3-> 50Hz 2-> 60Hz */ spca504A_acknowledged_command(gspca_dev, 0x24, 8, 3, 0x9e, 1); /* Twice sequential need status 0xff->0x9e->0x9d */ spca504A_acknowledged_command(gspca_dev, 0x24, 8, 3, 0x9e, 0); spca504A_acknowledged_command(gspca_dev, 0x24, 0, 0, 0x9d, 1); } else { spca504_acknowledged_command(gspca_dev, 0x24, 8, 3); spca504_read_info(gspca_dev); spca504_acknowledged_command(gspca_dev, 0x24, 8, 3); spca504_acknowledged_command(gspca_dev, 0x24, 0, 0); } spca504B_SetSizeType(gspca_dev); reg_w_riv(gspca_dev, 0x00, 0x270c, 0x05); /* L92 sno1t.txt */ reg_w_riv(gspca_dev, 0x00, 0x2310, 0x05); break; case BRIDGE_SPCA504C: if (sd->subtype == LogitechClickSmart420) { write_vector(gspca_dev, spca504A_clicksmart420_init_data, ARRAY_SIZE(spca504A_clicksmart420_init_data)); } else { write_vector(gspca_dev, spca504_pccam600_init_data, ARRAY_SIZE(spca504_pccam600_init_data)); } enable = (sd->autogain ? 0x04 : 0x01); reg_w_riv(gspca_dev, 0x0c, 0x0000, enable); /* auto exposure */ reg_w_riv(gspca_dev, 0xb0, 0x0000, enable); /* auto whiteness */ /* set default exposure compensation and whiteness balance */ reg_w_riv(gspca_dev, 0x30, 0x0001, 800); /* ~ 20 fps */ reg_w_riv(gspca_dev, 0x30, 0x0002, 1600); spca504B_SetSizeType(gspca_dev); break; } init_ctl_reg(gspca_dev); return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->bridge) { default: /* case BRIDGE_SPCA533: */ /* case BRIDGE_SPCA536: */ /* case BRIDGE_SPCA504B: */ reg_w_riv(gspca_dev, 0x31, 0, 0); spca504B_WaitCmdStatus(gspca_dev); spca504B_PollingDataReady(gspca_dev); break; case BRIDGE_SPCA504: case BRIDGE_SPCA504C: reg_w_riv(gspca_dev, 0x00, 0x2000, 0x0000); if (sd->subtype == AiptekMiniPenCam13) { /* spca504a aiptek */ /* spca504A_acknowledged_command(gspca_dev, 0x08, 6, 0, 0x86, 1); */ spca504A_acknowledged_command(gspca_dev, 0x24, 0x00, 0x00, 0x9d, 1); spca504A_acknowledged_command(gspca_dev, 0x01, 0x0f, 0x00, 0xff, 1); } else { spca504_acknowledged_command(gspca_dev, 0x24, 0, 0); reg_w_riv(gspca_dev, 0x01, 0x000f, 0x0000); } break; } } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; int i, sof = 0; static u8 ffd9[] = {0xff, 0xd9}; /* frames are jpeg 4.1.1 without 0xff escape */ switch (sd->bridge) { case BRIDGE_SPCA533: if (data[0] == 0xff) { if (data[1] != 0x01) { /* drop packet */ /* gspca_dev->last_packet_type = DISCARD_PACKET; */ return; } sof = 1; data += SPCA533_OFFSET_DATA; len -= SPCA533_OFFSET_DATA; } else { data += 1; len -= 1; } break; case BRIDGE_SPCA536: if (data[0] == 0xff) { sof = 1; data += SPCA536_OFFSET_DATA; len -= SPCA536_OFFSET_DATA; } else { data += 2; len -= 2; } break; default: /* case BRIDGE_SPCA504: */ /* case BRIDGE_SPCA504B: */ switch (data[0]) { case 0xfe: /* start of frame */ sof = 1; data += SPCA50X_OFFSET_DATA; len -= SPCA50X_OFFSET_DATA; break; case 0xff: /* drop packet */ /* gspca_dev->last_packet_type = DISCARD_PACKET; */ return; default: data += 1; len -= 1; break; } break; case BRIDGE_SPCA504C: switch (data[0]) { case 0xfe: /* start of frame */ sof = 1; data += SPCA504_PCCAM600_OFFSET_DATA; len -= SPCA504_PCCAM600_OFFSET_DATA; break; case 0xff: /* drop packet */ /* gspca_dev->last_packet_type = DISCARD_PACKET; */ return; default: data += 1; len -= 1; break; } break; } if (sof) { /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, ffd9, 2); /* put the JPEG header in the new frame */ gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); } /* add 0x00 after 0xff */ i = 0; do { if (data[i] == 0xff) { gspca_frame_add(gspca_dev, INTER_PACKET, data, i + 1); len -= i; data += i; *data = 0x00; i = 0; } i++; } while (i < len); gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_AUTOGAIN: sd->autogain = ctrl->val; break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 4); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, -128, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 0x20); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 255, 1, 0x1a); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ #define BS(bridge, subtype) \ .driver_info = (BRIDGE_ ## bridge << 8) \ | (subtype) static const struct usb_device_id device_table[] = { {USB_DEVICE(0x041e, 0x400b), BS(SPCA504C, 0)}, {USB_DEVICE(0x041e, 0x4012), BS(SPCA504C, 0)}, {USB_DEVICE(0x041e, 0x4013), BS(SPCA504C, 0)}, {USB_DEVICE(0x0458, 0x7006), BS(SPCA504B, 0)}, {USB_DEVICE(0x0461, 0x0821), BS(SPCA533, 0)}, {USB_DEVICE(0x046d, 0x0905), BS(SPCA533, LogitechClickSmart820)}, {USB_DEVICE(0x046d, 0x0960), BS(SPCA504C, LogitechClickSmart420)}, {USB_DEVICE(0x0471, 0x0322), BS(SPCA504B, 0)}, {USB_DEVICE(0x04a5, 0x3003), BS(SPCA504B, 0)}, {USB_DEVICE(0x04a5, 0x3008), BS(SPCA533, 0)}, {USB_DEVICE(0x04a5, 0x300a), BS(SPCA533, 0)}, {USB_DEVICE(0x04f1, 0x1001), BS(SPCA504B, 0)}, {USB_DEVICE(0x04fc, 0x500c), BS(SPCA504B, 0)}, {USB_DEVICE(0x04fc, 0x504a), BS(SPCA504, AiptekMiniPenCam13)}, {USB_DEVICE(0x04fc, 0x504b), BS(SPCA504B, 0)}, {USB_DEVICE(0x04fc, 0x5330), BS(SPCA533, 0)}, {USB_DEVICE(0x04fc, 0x5360), BS(SPCA536, 0)}, {USB_DEVICE(0x04fc, 0xffff), BS(SPCA504B, 0)}, {USB_DEVICE(0x052b, 0x1507), BS(SPCA533, MegapixV4)}, {USB_DEVICE(0x052b, 0x1513), BS(SPCA533, MegapixV4)}, {USB_DEVICE(0x052b, 0x1803), BS(SPCA533, MegaImageVI)}, {USB_DEVICE(0x0546, 0x3155), BS(SPCA533, 0)}, {USB_DEVICE(0x0546, 0x3191), BS(SPCA504B, 0)}, {USB_DEVICE(0x0546, 0x3273), BS(SPCA504B, 0)}, {USB_DEVICE(0x055f, 0xc211), BS(SPCA536, 0)}, {USB_DEVICE(0x055f, 0xc230), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc232), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc360), BS(SPCA536, 0)}, {USB_DEVICE(0x055f, 0xc420), BS(SPCA504, 0)}, {USB_DEVICE(0x055f, 0xc430), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc440), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc520), BS(SPCA504, 0)}, {USB_DEVICE(0x055f, 0xc530), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc540), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc630), BS(SPCA533, 0)}, {USB_DEVICE(0x055f, 0xc650), BS(SPCA533, 0)}, {USB_DEVICE(0x05da, 0x1018), BS(SPCA504B, 0)}, {USB_DEVICE(0x06d6, 0x0031), BS(SPCA533, 0)}, {USB_DEVICE(0x06d6, 0x0041), BS(SPCA504B, 0)}, {USB_DEVICE(0x0733, 0x1311), BS(SPCA533, 0)}, {USB_DEVICE(0x0733, 0x1314), BS(SPCA533, 0)}, {USB_DEVICE(0x0733, 0x2211), BS(SPCA533, 0)}, {USB_DEVICE(0x0733, 0x2221), BS(SPCA533, 0)}, {USB_DEVICE(0x0733, 0x3261), BS(SPCA536, 0)}, {USB_DEVICE(0x0733, 0x3281), BS(SPCA536, 0)}, {USB_DEVICE(0x08ca, 0x0104), BS(SPCA533, 0)}, {USB_DEVICE(0x08ca, 0x0106), BS(SPCA533, 0)}, {USB_DEVICE(0x08ca, 0x2008), BS(SPCA504B, 0)}, {USB_DEVICE(0x08ca, 0x2010), BS(SPCA533, 0)}, {USB_DEVICE(0x08ca, 0x2016), BS(SPCA504B, 0)}, {USB_DEVICE(0x08ca, 0x2018), BS(SPCA504B, 0)}, {USB_DEVICE(0x08ca, 0x2020), BS(SPCA533, 0)}, {USB_DEVICE(0x08ca, 0x2022), BS(SPCA533, 0)}, {USB_DEVICE(0x08ca, 0x2024), BS(SPCA536, 0)}, {USB_DEVICE(0x08ca, 0x2028), BS(SPCA533, 0)}, {USB_DEVICE(0x08ca, 0x2040), BS(SPCA536, 0)}, {USB_DEVICE(0x08ca, 0x2042), BS(SPCA536, 0)}, {USB_DEVICE(0x08ca, 0x2050), BS(SPCA536, 0)}, {USB_DEVICE(0x08ca, 0x2060), BS(SPCA536, 0)}, {USB_DEVICE(0x0d64, 0x0303), BS(SPCA536, 0)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
139 12 691 4 693 695 836 229 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ #ifndef _LINUX_RSEQ_H #define _LINUX_RSEQ_H #ifdef CONFIG_RSEQ #include <linux/preempt.h> #include <linux/sched.h> /* * Map the event mask on the user-space ABI enum rseq_cs_flags * for direct mask checks. */ enum rseq_event_mask_bits { RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, }; enum rseq_event_mask { RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), }; static inline void rseq_set_notify_resume(struct task_struct *t) { if (t->rseq) set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { if (current->rseq) __rseq_handle_notify_resume(ksig, regs); } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { preempt_disable(); __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask); preempt_enable(); rseq_handle_notify_resume(ksig, regs); } /* rseq_preempt() requires preemption to be disabled. */ static inline void rseq_preempt(struct task_struct *t) { __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); rseq_set_notify_resume(t); } /* rseq_migrate() requires preemption to be disabled. */ static inline void rseq_migrate(struct task_struct *t) { __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); rseq_set_notify_resume(t); } /* * If parent process has a registered restartable sequences area, the * child inherits. Unregister rseq for a clone with CLONE_VM set. */ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { if (clone_flags & CLONE_VM) { t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } else { t->rseq = current->rseq; t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; } } static inline void rseq_execve(struct task_struct *t) { t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } #else static inline void rseq_set_notify_resume(struct task_struct *t) { } static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_preempt(struct task_struct *t) { } static inline void rseq_migrate(struct task_struct *t) { } static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { } static inline void rseq_execve(struct task_struct *t) { } #endif #ifdef CONFIG_DEBUG_RSEQ void rseq_syscall(struct pt_regs *regs); #else static inline void rseq_syscall(struct pt_regs *regs) { } #endif #endif /* _LINUX_RSEQ_H */
58 25 3 31 36 28 6 2 16 13 612 606 25 51 4 16 9 9 16 20 609 608 613 23 24 3 2 1 3 16 1 7 4 5 8 6 6 2 8 3 16 6 6 1 3 1 2 2 85 86 86 8 6 86 19 19 16 5 14 14 20 20 5 19 16 3 19 19 14 4 29 30 1 12 17 6 6 7 12 1 4 8 10 6 5 9 3 131 93 5 32 32 2 25 5 5 5 29 8 22 30 30 21 1 4 1 1 14 14 14 12 2 14 12 12 12 12 1 12 3 3 1 101 7 37 4 7 57 28 34 201 202 1 201 2 78 66 21 2 62 3 9 70 2 1 58 21 3 65 127 3 195 172 24 8 2 175 6 182 26 156 162 10 49 122 170 143 26 39 1 163 139 21 6 8 130 181 182 14 11 26 2 2 1 1 1 1 2 7 1 3 1 1 1 1 260 2 1 4 254 11 1 1 2 2 1 2 3 34 6 1 4 24 7 10 2 4 1 4 16 10 6 16 190 189 1 10 10 7 171 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 // SPDX-License-Identifier: GPL-2.0-or-later /* * RAW sockets for IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Adapted from linux/net/ipv4/raw.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/slab.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> #include <linux/compat.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/ip6_route.h> #include <net/ip6_checksum.h> #include <net/addrconf.h> #include <net/transp_v6.h> #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif #include <linux/mroute6.h> #include <net/raw.h> #include <net/rawv6.h> #include <net/xfrm.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif, int sdif) { if (inet_sk(sk)->inet_num != num || !net_eq(sock_net(sk), net) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return false; if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) || (ipv6_addr_is_multicast(loc_addr) && inet6_mc_check(sk, loc_addr, rmt_addr))) return true; return false; } EXPORT_SYMBOL_GPL(raw_v6_match); /* * 0 - deliver * 1 - block */ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { struct icmp6hdr _hdr; const struct icmp6hdr *hdr; /* We require only the four bytes of the ICMPv6 header, not any * additional bytes of message body in "struct icmp6hdr". */ hdr = skb_header_pointer(skb, skb_transport_offset(skb), ICMPV6_HDRLEN, &_hdr); if (hdr) { const __u32 *data = &raw6_sk(sk)->filter.data[0]; unsigned int type = hdr->icmp6_type; return (data[type >> 5] & (1U << (type & 31))) != 0; } return 1; } #if IS_ENABLED(CONFIG_IPV6_MIP6) typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); static mh_filter_t __rcu *mh_filter __read_mostly; int rawv6_mh_filter_register(mh_filter_t filter) { rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); int rawv6_mh_filter_unregister(mh_filter_t filter) { RCU_INIT_POINTER(mh_filter, NULL); synchronize_rcu(); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_unregister); #endif /* * demultiplex raw sockets. * (should consider queueing the skb in the sock receive_queue * without calling rawv6.c) * * Caller owns SKB so we must make clones. */ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct net *net = dev_net(skb->dev); const struct in6_addr *saddr; const struct in6_addr *daddr; struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); sk_for_each_rcu(sk, hlist) { int filtered; if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, inet6_iif(skb), inet6_sdif(skb))) continue; if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); continue; } delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: filtered = icmpv6_filter(sk, skb); break; #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: { /* XXX: To validate MH only once for each packet, * this is placed here. It should be after checking * xfrm policy, however it doesn't. The checking xfrm * policy is placed in rawv6_rcv() because it is * required for each socket. */ mh_filter_t *filter; filter = rcu_dereference(mh_filter); filtered = filter ? (*filter)(sk, skb) : 0; break; } #endif default: filtered = 0; break; } if (filtered < 0) break; if (filtered == 0) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) rawv6_rcv(sk, clone); } } rcu_read_unlock(); return delivered; } bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { return ipv6_raw_deliver(skb, nexthdr); } /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; __be32 v4addr = 0; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (addr->sin6_family != AF_INET6) return -EINVAL; addr_type = ipv6_addr_type(&addr->sin6_addr); /* Raw sockets are IPv6 only */ if (addr_type == IPV6_ADDR_MAPPED) return -EADDRNOTAVAIL; lock_sock(sk); err = -EINVAL; if (sk->sk_state != TCP_CLOSE) goto out; rcu_read_lock(); /* Check if the address belongs to the host. */ if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another * one is supplied by user. */ sk->sk_bound_dev_if = addr->sin6_scope_id; } /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) goto out_unlock; } if (sk->sk_bound_dev_if) { err = -ENODEV; dev = dev_get_by_index_rcu(sock_net(sk), sk->sk_bound_dev_if); if (!dev) goto out_unlock; } /* ipv4 addr of the socket is invalid. Only the * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST) && !ipv6_can_nonlocal_bind(sock_net(sk), inet)) { err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { goto out_unlock; } } } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; out_unlock: rcu_read_unlock(); out: release_sock(sk); return err; } static void rawv6_err(struct sock *sk, struct sk_buff *skb, u8 type, u8 code, int offset, __be32 info) { bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; /* Report error on raw socket, if: 1. User requested recverr. 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); return; } if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } } void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { struct net *net = dev_net(skb->dev); struct hlist_head *hlist; struct sock *sk; int hash; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, inet6_iif(skb), inet6_iif(skb))) continue; rawv6_err(sk, skb, type, code, inner_offset, info); } rcu_read_unlock(); } static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } /* Charge it to the socket. */ skb_dst_drop(skb); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } return 0; } /* * This is next to useless... * if we demultiplex in network layer we don't need the extra call * just to queue the skb... * maybe we could have the network decide upon a hint if it * should call raw_rcv for demultiplexing */ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { atomic_inc(&sk->sk_drops); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; if (skb->ip_summed == CHECKSUM_COMPLETE) { skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, inet->inet_num, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (!skb_csum_unnecessary(skb)) skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, inet->inet_num, 0)); if (inet_test_bit(HDRINCL, sk)) { if (skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } } rawv6_rcv_skb(sk, skb); return 0; } /* * This should be easy, if there is something there * we return it, otherwise we block. */ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct sk_buff *skb; size_t copied; int err; if (flags & MSG_OOB) return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (copied > len) { copied = len; msg->msg_flags |= MSG_TRUNC; } if (skb_csum_unnecessary(skb)) { err = skb_copy_datagram_msg(skb, 0, msg, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) goto csum_copy_err; err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { err = skb_copy_and_csum_datagram_msg(skb, 0, msg); if (err == -EINVAL) goto csum_copy_err; } if (err) goto out_free; /* Copy the address. */ if (sin6) { sin6->sin6_family = AF_INET6; sin6->sin6_port = 0; sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); *addr_len = sizeof(*sin6); } sock_recv_cmsgs(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); err = copied; if (flags & MSG_TRUNC) err = skb->len; out_free: skb_free_datagram(sk, skb); out: return err; csum_copy_err: skb_kill_datagram(sk, skb, flags); /* Error for blocking case is chosen to masquerade as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; goto out; } static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; int len; int total_len; __wsum tmp_csum; __sum16 csum; if (!rp->checksum) goto send; skb = skb_peek(&sk->sk_write_queue); if (!skb) goto out; offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; opt = inet6_sk(sk)->cork.opt; total_len -= opt ? opt->opt_flen : 0; if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); goto out; } /* should be check HW csum miyazawa */ if (skb_queue_len(&sk->sk_write_queue) == 1) { /* * Only one fragment on the socket. */ tmp_csum = skb->csum; } else { struct sk_buff *csum_skb = NULL; tmp_csum = 0; skb_queue_walk(&sk->sk_write_queue, skb) { tmp_csum = csum_add(tmp_csum, skb->csum); if (csum_skb) continue; len = skb->len - skb_transport_offset(skb); if (offset >= len) { offset -= len; continue; } csum_skb = skb; } skb = csum_skb; } offset += skb_transport_offset(skb); err = skb_copy_bits(skb, offset, &csum, 2); if (err < 0) { ip6_flush_pending_frames(sk); goto out; } /* in case cksum was not initialized */ if (unlikely(csum)) tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, total_len, fl6->flowi6_proto, tmp_csum); if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; BUG_ON(skb_store_bits(skb, offset, &csum, 2)); send: err = ip6_push_pending_frames(sk); out: return err; } static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; struct rt6_info *rt = dst_rt6_info(*dstp); int hlen = LL_RESERVED_SPACE(rt->dst.dev); int tlen = rt->dst.dev->needed_tailroom; if (length > rt->dst.dev->mtu) { ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } if (length < sizeof(struct ipv6hdr)) return -EINVAL; if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (!skb) goto error; skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IPV6); skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); skb->ip_summed = CHECKSUM_NONE; skb_setup_tx_timestamp(skb, sockc); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); if (err) { err = -EFAULT; kfree_skb(skb); goto error; } skb_dst_set(skb, &rt->dst); *dstp = NULL; /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out(sk, skb); if (unlikely(!skb)) return 0; /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev * in the error path. Since skb has been freed, the dst could * have been queued for deletion. */ rcu_read_lock(); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) { IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); goto error_check; } rcu_read_unlock(); out: return 0; error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } struct raw6_frag_vec { struct msghdr *msg; int hlen; char c[4]; }; static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) { int err = 0; switch (fl6->flowi6_proto) { case IPPROTO_ICMPV6: rfv->hlen = 2; err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); if (!err) { fl6->fl6_icmp_type = rfv->c[0]; fl6->fl6_icmp_code = rfv->c[1]; } break; case IPPROTO_MH: rfv->hlen = 4; err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); if (!err) fl6->fl6_mh_type = rfv->c[2]; } return err; } static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct raw6_frag_vec *rfv = from; if (offset < rfv->hlen) { int copy = min(rfv->hlen - offset, len); if (skb->ip_summed == CHECKSUM_PARTIAL) memcpy(to, rfv->c + offset, copy); else skb->csum = csum_block_add( skb->csum, csum_partial_copy_nocheck(rfv->c + offset, to, copy), odd); odd = 0; offset += copy; to += copy; len -= copy; if (!len) return 0; } offset -= rfv->hlen; return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_sock *rp = raw6_sk(sk); struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; struct raw6_frag_vec rfv; struct flowi6 fl6; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; int hdrincl; u16 proto; int err; /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ if (len > INT_MAX) return -EMSGSIZE; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; hdrincl = inet_test_bit(HDRINCL, sk); ipcm6_init_sk(&ipc6, sk); /* * Get and verify the address. */ memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (sin6->sin6_family && sin6->sin6_family != AF_INET6) return -EAFNOSUPPORT; /* port is the proto value [0..255] carried in nexthdr */ proto = ntohs(sin6->sin6_port); if (!proto) proto = inet->inet_num; else if (proto != inet->inet_num && inet->inet_num != IPPROTO_RAW) return -EINVAL; if (proto > 255) return -EINVAL; daddr = &sin6->sin6_addr; if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } } /* * Otherwise it will be difficult to maintain * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; proto = inet->inet_num; daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } if (fl6.flowi6_oif == 0) fl6.flowi6_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); ipc6.opt = opt; err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } if (!opt) { opt = txopt_get(np); opt_to_free = opt; } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; fl6.flowi6_mark = ipc6.sockc.mark; if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; err = rawv6_probe_proto_opt(&rfv, &fl6); if (err) goto out; } if (!ipv6_addr_any(daddr)) fl6.daddr = *daddr; else fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; } if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: if (hdrincl) err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags, &ipc6.sockc); else { ipc6.opt = opt; lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, &ipc6, &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) err = rawv6_push_pending_frames(sk, &fl6, rp); release_sock(sk); } done: dst_release(dst); out: fl6_sock_release(flowlabel); txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; } static int rawv6_seticmpfilter(struct sock *sk, int optname, sockptr_t optval, int optlen) { switch (optname) { case ICMPV6_FILTER: if (optlen > sizeof(struct icmp6_filter)) optlen = sizeof(struct icmp6_filter); if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; default: return -ENOPROTOOPT; } return 0; } static int rawv6_geticmpfilter(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int len; switch (optname) { case ICMPV6_FILTER: if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; if (len > sizeof(struct icmp6_filter)) len = sizeof(struct icmp6_filter); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &raw6_sk(sk)->filter, len)) return -EFAULT; return 0; default: return -ENOPROTOOPT; } return 0; } static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct raw6_sock *rp = raw6_sk(sk); int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (optname) { case IPV6_HDRINCL: if (sk->sk_type != SOCK_RAW) return -EINVAL; inet_assign_bit(HDRINCL, sk, val); return 0; case IPV6_CHECKSUM: if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { /* * RFC3542 tells that IPV6_CHECKSUM socket * option in the IPPROTO_IPV6 level is not * allowed on ICMPv6 sockets. * If you want to set it, use IPPROTO_RAW * level IPV6_CHECKSUM socket option * (Linux extension). */ return -EINVAL; } /* You may get strange result with a positive odd offset; RFC2292bis agrees with me. */ if (val > 0 && (val&1)) return -EINVAL; if (val < 0) { rp->checksum = 0; } else { rp->checksum = 1; rp->offset = val; } return 0; default: return -ENOPROTOOPT; } } static int rawv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { switch (level) { case SOL_RAW: break; case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; fallthrough; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } return do_rawv6_setsockopt(sk, level, optname, optval, optlen); } static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct raw6_sock *rp = raw6_sk(sk); int val, len; if (get_user(len, optlen)) return -EFAULT; switch (optname) { case IPV6_HDRINCL: val = inet_test_bit(HDRINCL, sk); break; case IPV6_CHECKSUM: /* * We allow getsockopt() for IPPROTO_IPV6-level * IPV6_CHECKSUM socket option on ICMPv6 sockets * since RFC3542 is silent about it. */ if (rp->checksum == 0) val = -1; else val = rp->offset; break; default: return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { switch (level) { case SOL_RAW: break; case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; fallthrough; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } return do_rawv6_getsockopt(sk, level, optname, optval, optlen); } static int rawv6_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { *karg = sk_wmem_alloc_get(sk); return 0; } case SIOCINQ: { struct sk_buff *skb; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) *karg = skb->len; else *karg = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; } default: #ifdef CONFIG_IPV6_MROUTE return ip6mr_ioctl(sk, cmd, karg); #else return -ENOIOCTLCMD; #endif } } #ifdef CONFIG_COMPAT static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: case SIOCINQ: return -ENOIOCTLCMD; default: #ifdef CONFIG_IPV6_MROUTE return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg)); #else return -ENOIOCTLCMD; #endif } } #endif static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->inet_num == IPPROTO_RAW) ip6_ra_control(sk, -1); ip6mr_sk_done(sk); sk_common_release(sk); } static void raw6_destroy(struct sock *sk) { lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); } static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); switch (inet_sk(sk)->inet_num) { case IPPROTO_ICMPV6: rp->checksum = 1; rp->offset = 2; break; case IPPROTO_MH: rp->checksum = 1; rp->offset = 4; break; default: break; } return 0; } struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, .destroy = raw6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, .getsockopt = rawv6_getsockopt, .sendmsg = rawv6_sendmsg, .recvmsg = rawv6_recvmsg, .bind = rawv6_bind, .backlog_rcv = rawv6_rcv_skb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6), .useroffset = offsetof(struct raw6_sock, filter), .usersize = sizeof_field(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT .compat_ioctl = compat_rawv6_ioctl, #endif .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS static int raw6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { struct sock *sp = v; __u16 srcp = inet_sk(sp)->inet_num; ip6_dgram_sock_seq_show(seq, v, srcp, 0, raw_seq_private(seq)->bucket); } return 0; } static const struct seq_operations raw6_seq_ops = { .start = raw_seq_start, .next = raw_seq_next, .stop = raw_seq_stop, .show = raw6_seq_show, }; static int __net_init raw6_init_net(struct net *net) { if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops, sizeof(struct raw_iter_state), &raw_v6_hashinfo)) return -ENOMEM; return 0; } static void __net_exit raw6_exit_net(struct net *net) { remove_proc_entry("raw6", net->proc_net); } static struct pernet_operations raw6_net_ops = { .init = raw6_init_net, .exit = raw6_exit_net, }; int __init raw6_proc_init(void) { return register_pernet_subsys(&raw6_net_ops); } void raw6_proc_exit(void) { unregister_pernet_subsys(&raw6_net_ops); } #endif /* CONFIG_PROC_FS */ /* Same as inet6_dgram_ops, sans udp_poll. */ const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_dgram_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static struct inet_protosw rawv6_protosw = { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, .flags = INET_PROTOSW_REUSE, }; int __init rawv6_init(void) { return inet6_register_protosw(&rawv6_protosw); } void rawv6_exit(void) { inet6_unregister_protosw(&rawv6_protosw); }
519 29 528 527 20 530 531 588 28 534 3 82 125 1952 1953 1746 760 25 358 6 7 7 21 22 7 13 1 13 13 13 55 19 19 19 19 18 4 835 835 362 364 365 363 244 129 7 7 373 34 34 1 1 16 16 16 16 577 3 577 579 578 576 386 189 3 608 339 581 609 609 309 3 339 562 21 193 388 562 9 8 188 1 187 188 186 316 428 202 311 13 6 73 73 73 19 14 14 14 24 24 90 2 88 56 24 1 1 1 1 1 588 18 46 48 21 21 20 1 21 8 588 323 5 7 231 322 323 40 18 5 5 306 16 16 5 13 8 8 8 296 269 557 557 276 304 269 44 264 7 291 5 5 25 266 4 3 300 301 299 5 1 1 219 555 91 182 289 87 87 85 2 86 86 1 86 305 181 181 180 181 181 628 627 624 625 625 31 30 30 32 30 43 43 43 8 35 9 37 36 3 35 36 35 30 30 29 1 184 181 179 7 5 5 5 169 4 4 4 9 1 68 1 68 22 22 22 22 22 14 38 37 14 24 1 6 6 7 1 2 21 1 37 6 3 9 3 3 3 16 16 16 16 15 7 13 16 19 8 13 13 13 13 19 18 4 4 4 3 1 1576 1031 1019 801 745 16 14 744 4 21 742 188 572 719 718 13 691 8 1 13 11 2 197 506 686 7 696 695 5 696 191 144 121 41 93 114 121 31 3 562 499 755 210 551 2 2 2 6 7 3 2 1 45 37 1 1 6 571 524 4 3 35 21 2 1 17 1 17 6 1 5 22 48 8 5 4 9 44 285 1 58 503 333 169 499 3 76 70 4 4 1 7 6 3 21 17 12 2 3 11 12 3 22 3 1 12 6 14 3 51 30 18 19 14 31 18 2 11 18 2 9 8 12 6 9 39 1 1 6 33 2 30 31 32 50 49 49 3 45 50 8 4 40 8 33 35 2 33 13 10 29 5 22 23 21 15 5 5 16 16 4 5 16 6 4 6 318 319 10 308 3 8 298 3 9 25 10 307 2 317 4 25 298 298 26 2 6 18 5 7 3 6 10 9 8 16 5 1 10 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/signal.c * * Copyright (C) 1991, 1992 Linus Torvalds * * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson * * 2003-06-02 Jim Houston - Concurrent Computer Corp. * Changes to use preallocated sigqueue structures * to allow signals to be sent reliably. */ #include <linux/slab.h> #include <linux/export.h> #include <linux/init.h> #include <linux/sched/mm.h> #include <linux/sched/user.h> #include <linux/sched/debug.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> #include <linux/sched/cputime.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/tty.h> #include <linux/binfmts.h> #include <linux/coredump.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/signalfd.h> #include <linux/ratelimit.h> #include <linux/task_work.h> #include <linux/capability.h> #include <linux/freezer.h> #include <linux/pid_namespace.h> #include <linux/nsproxy.h> #include <linux/user_namespace.h> #include <linux/uprobes.h> #include <linux/compat.h> #include <linux/cn_proc.h> #include <linux/compiler.h> #include <linux/posix-timers.h> #include <linux/cgroup.h> #include <linux/audit.h> #include <linux/sysctl.h> #include <uapi/linux/pidfd.h> #define CREATE_TRACE_POINTS #include <trace/events/signal.h> #include <asm/param.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/siginfo.h> #include <asm/cacheflush.h> #include <asm/syscall.h> /* for syscall_get_* */ #include "time/posix-timers.h" /* * SLAB caches for signal bits. */ static struct kmem_cache *sigqueue_cachep; int print_fatal_signals __read_mostly; static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; } static inline bool sig_handler_ignored(void __user *handler, int sig) { /* Is it explicitly or implicitly ignored? */ return handler == SIG_IGN || (handler == SIG_DFL && sig_kernel_ignore(sig)); } static bool sig_task_ignored(struct task_struct *t, int sig, bool force) { void __user *handler; handler = sig_handler(t, sig); /* SIGKILL and SIGSTOP may not be sent to the global init */ if (unlikely(is_global_init(t) && sig_kernel_only(sig))) return true; if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && handler == SIG_DFL && !(force && sig_kernel_only(sig))) return true; /* Only allow kernel generated signals to this kthread */ if (unlikely((t->flags & PF_KTHREAD) && (handler == SIG_KTHREAD_KERNEL) && !force)) return true; return sig_handler_ignored(handler, sig); } static bool sig_ignored(struct task_struct *t, int sig, bool force) { /* * Blocked signals are never ignored, since the * signal handler may change by the time it is * unblocked. */ if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return false; /* * Tracers may want to know about even ignored signal unless it * is SIGKILL which can't be reported anyway but can be ignored * by SIGNAL_UNKILLABLE task. */ if (t->ptrace && sig != SIGKILL) return false; return sig_task_ignored(t, sig, force); } /* * Re-calculate pending state from the set of locally pending * signals, globally pending signals, and blocked signals. */ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) { unsigned long ready; long i; switch (_NSIG_WORDS) { default: for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;) ready |= signal->sig[i] &~ blocked->sig[i]; break; case 4: ready = signal->sig[3] &~ blocked->sig[3]; ready |= signal->sig[2] &~ blocked->sig[2]; ready |= signal->sig[1] &~ blocked->sig[1]; ready |= signal->sig[0] &~ blocked->sig[0]; break; case 2: ready = signal->sig[1] &~ blocked->sig[1]; ready |= signal->sig[0] &~ blocked->sig[0]; break; case 1: ready = signal->sig[0] &~ blocked->sig[0]; } return ready != 0; } #define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) static bool recalc_sigpending_tsk(struct task_struct *t) { if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked) || cgroup_task_frozen(t)) { set_tsk_thread_flag(t, TIF_SIGPENDING); return true; } /* * We must never clear the flag in another thread, or in current * when it's possible the current syscall is returning -ERESTART*. * So we don't clear it here, and only callers who know they should do. */ return false; } void recalc_sigpending(void) { if (!recalc_sigpending_tsk(current) && !freezing(current)) { if (unlikely(test_thread_flag(TIF_SIGPENDING))) clear_thread_flag(TIF_SIGPENDING); } } EXPORT_SYMBOL(recalc_sigpending); void calculate_sigpending(void) { /* Have any signals or users of TIF_SIGPENDING been delayed * until after fork? */ spin_lock_irq(&current->sighand->siglock); set_tsk_thread_flag(current, TIF_SIGPENDING); recalc_sigpending(); spin_unlock_irq(&current->sighand->siglock); } /* Given the mask, find the first available signal that should be serviced. */ #define SYNCHRONOUS_MASK \ (sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \ sigmask(SIGTRAP) | sigmask(SIGFPE) | sigmask(SIGSYS)) int next_signal(struct sigpending *pending, sigset_t *mask) { unsigned long i, *s, *m, x; int sig = 0; s = pending->signal.sig; m = mask->sig; /* * Handle the first word specially: it contains the * synchronous signals that need to be dequeued first. */ x = *s &~ *m; if (x) { if (x & SYNCHRONOUS_MASK) x &= SYNCHRONOUS_MASK; sig = ffz(~x) + 1; return sig; } switch (_NSIG_WORDS) { default: for (i = 1; i < _NSIG_WORDS; ++i) { x = *++s &~ *++m; if (!x) continue; sig = ffz(~x) + i*_NSIG_BPW + 1; break; } break; case 2: x = s[1] &~ m[1]; if (!x) break; sig = ffz(~x) + _NSIG_BPW + 1; break; case 1: /* Nothing to do */ break; } return sig; } static inline void print_dropped_signal(int sig) { static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); if (!print_fatal_signals) return; if (!__ratelimit(&ratelimit_state)) return; pr_info("%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n", current->comm, current->pid, sig); } /** * task_set_jobctl_pending - set jobctl pending bits * @task: target task * @mask: pending bits to set * * Clear @mask from @task->jobctl. @mask must be subset of * %JOBCTL_PENDING_MASK | %JOBCTL_STOP_CONSUME | %JOBCTL_STOP_SIGMASK | * %JOBCTL_TRAPPING. If stop signo is being set, the existing signo is * cleared. If @task is already being killed or exiting, this function * becomes noop. * * CONTEXT: * Must be called with @task->sighand->siglock held. * * RETURNS: * %true if @mask is set, %false if made noop because @task was dying. */ bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask) { BUG_ON(mask & ~(JOBCTL_PENDING_MASK | JOBCTL_STOP_CONSUME | JOBCTL_STOP_SIGMASK | JOBCTL_TRAPPING)); BUG_ON((mask & JOBCTL_TRAPPING) && !(mask & JOBCTL_PENDING_MASK)); if (unlikely(fatal_signal_pending(task) || (task->flags & PF_EXITING))) return false; if (mask & JOBCTL_STOP_SIGMASK) task->jobctl &= ~JOBCTL_STOP_SIGMASK; task->jobctl |= mask; return true; } /** * task_clear_jobctl_trapping - clear jobctl trapping bit * @task: target task * * If JOBCTL_TRAPPING is set, a ptracer is waiting for us to enter TRACED. * Clear it and wake up the ptracer. Note that we don't need any further * locking. @task->siglock guarantees that @task->parent points to the * ptracer. * * CONTEXT: * Must be called with @task->sighand->siglock held. */ void task_clear_jobctl_trapping(struct task_struct *task) { if (unlikely(task->jobctl & JOBCTL_TRAPPING)) { task->jobctl &= ~JOBCTL_TRAPPING; smp_mb(); /* advised by wake_up_bit() */ wake_up_bit(&task->jobctl, JOBCTL_TRAPPING_BIT); } } /** * task_clear_jobctl_pending - clear jobctl pending bits * @task: target task * @mask: pending bits to clear * * Clear @mask from @task->jobctl. @mask must be subset of * %JOBCTL_PENDING_MASK. If %JOBCTL_STOP_PENDING is being cleared, other * STOP bits are cleared together. * * If clearing of @mask leaves no stop or trap pending, this function calls * task_clear_jobctl_trapping(). * * CONTEXT: * Must be called with @task->sighand->siglock held. */ void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask) { BUG_ON(mask & ~JOBCTL_PENDING_MASK); if (mask & JOBCTL_STOP_PENDING) mask |= JOBCTL_STOP_CONSUME | JOBCTL_STOP_DEQUEUED; task->jobctl &= ~mask; if (!(task->jobctl & JOBCTL_PENDING_MASK)) task_clear_jobctl_trapping(task); } /** * task_participate_group_stop - participate in a group stop * @task: task participating in a group stop * * @task has %JOBCTL_STOP_PENDING set and is participating in a group stop. * Group stop states are cleared and the group stop count is consumed if * %JOBCTL_STOP_CONSUME was set. If the consumption completes the group * stop, the appropriate `SIGNAL_*` flags are set. * * CONTEXT: * Must be called with @task->sighand->siglock held. * * RETURNS: * %true if group stop completion should be notified to the parent, %false * otherwise. */ static bool task_participate_group_stop(struct task_struct *task) { struct signal_struct *sig = task->signal; bool consume = task->jobctl & JOBCTL_STOP_CONSUME; WARN_ON_ONCE(!(task->jobctl & JOBCTL_STOP_PENDING)); task_clear_jobctl_pending(task, JOBCTL_STOP_PENDING); if (!consume) return false; if (!WARN_ON_ONCE(sig->group_stop_count == 0)) sig->group_stop_count--; /* * Tell the caller to notify completion iff we are entering into a * fresh group stop. Read comment in do_signal_stop() for details. */ if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED); return true; } return false; } void task_join_group_stop(struct task_struct *task) { unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK; struct signal_struct *sig = current->signal; if (sig->group_stop_count) { sig->group_stop_count++; mask |= JOBCTL_STOP_CONSUME; } else if (!(sig->flags & SIGNAL_STOP_STOPPED)) return; /* Have the new thread join an on-going signal group stop */ task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } static struct ucounts *sig_get_ucounts(struct task_struct *t, int sig, int override_rlimit) { struct ucounts *ucounts; long sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. * * NOTE! A pending signal will hold on to the user refcount, * and we get/put the refcount only when the sigpending count * changes from/to zero. */ rcu_read_lock(); ucounts = task_ucounts(t); sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, override_rlimit); rcu_read_unlock(); if (!sigpending) return NULL; if (unlikely(!override_rlimit && sigpending > task_rlimit(t, RLIMIT_SIGPENDING))) { dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); print_dropped_signal(sig); return NULL; } return ucounts; } static void __sigqueue_init(struct sigqueue *q, struct ucounts *ucounts, const unsigned int sigqueue_flags) { INIT_LIST_HEAD(&q->list); q->flags = sigqueue_flags; q->ucounts = ucounts; } /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an * appropriate lock must be held to stop the target task from exiting */ static struct sigqueue *sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags, int override_rlimit) { struct ucounts *ucounts = sig_get_ucounts(t, sig, override_rlimit); struct sigqueue *q; if (!ucounts) return NULL; q = kmem_cache_alloc(sigqueue_cachep, gfp_flags); if (!q) { dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING); return NULL; } __sigqueue_init(q, ucounts, 0); return q; } static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) { posixtimer_sigqueue_putref(q); return; } if (q->ucounts) { dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); q->ucounts = NULL; } kmem_cache_free(sigqueue_cachep, q); } void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; sigemptyset(&queue->signal); while (!list_empty(&queue->list)) { q = list_entry(queue->list.next, struct sigqueue , list); list_del_init(&q->list); __sigqueue_free(q); } } /* * Flush all pending signals for this kthread. */ void flush_signals(struct task_struct *t) { unsigned long flags; spin_lock_irqsave(&t->sighand->siglock, flags); clear_tsk_thread_flag(t, TIF_SIGPENDING); flush_sigqueue(&t->pending); flush_sigqueue(&t->signal->shared_pending); spin_unlock_irqrestore(&t->sighand->siglock, flags); } EXPORT_SYMBOL(flush_signals); void ignore_signals(struct task_struct *t) { int i; for (i = 0; i < _NSIG; ++i) t->sighand->action[i].sa.sa_handler = SIG_IGN; flush_signals(t); } /* * Flush all handlers for a task. */ void flush_signal_handlers(struct task_struct *t, int force_default) { int i; struct k_sigaction *ka = &t->sighand->action[0]; for (i = _NSIG ; i != 0 ; i--) { if (force_default || ka->sa.sa_handler != SIG_IGN) ka->sa.sa_handler = SIG_DFL; ka->sa.sa_flags = 0; #ifdef __ARCH_HAS_SA_RESTORER ka->sa.sa_restorer = NULL; #endif sigemptyset(&ka->sa.sa_mask); ka++; } } bool unhandled_signal(struct task_struct *tsk, int sig) { void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler; if (is_global_init(tsk)) return true; if (handler != SIG_IGN && handler != SIG_DFL) return false; /* If dying, we handle all new signals by ignoring them */ if (fatal_signal_pending(tsk)) return false; /* if ptraced, let the tracer determine */ return !tsk->ptrace; } static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *info, struct sigqueue **timer_sigq) { struct sigqueue *q, *first = NULL; /* * Collect the siginfo appropriate to this signal. Check if * there is another siginfo for the same signal. */ list_for_each_entry(q, &list->list, list) { if (q->info.si_signo == sig) { if (first) goto still_pending; first = q; } } sigdelset(&list->signal, sig); if (first) { still_pending: list_del_init(&first->list); copy_siginfo(info, &first->info); /* * posix-timer signals are preallocated and freed when the last * reference count is dropped in posixtimer_deliver_signal() or * immediately on timer deletion when the signal is not pending. * Spare the extra round through __sigqueue_free() which is * ignoring preallocated signals. */ if (unlikely((first->flags & SIGQUEUE_PREALLOC) && (info->si_code == SI_TIMER))) *timer_sigq = first; else __sigqueue_free(first); } else { /* * Ok, it wasn't in the queue. This must be * a fast-pathed signal or we must have been * out of queue space. So zero out the info. */ clear_siginfo(info); info->si_signo = sig; info->si_errno = 0; info->si_code = SI_USER; info->si_pid = 0; info->si_uid = 0; } } static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, kernel_siginfo_t *info, struct sigqueue **timer_sigq) { int sig = next_signal(pending, mask); if (sig) collect_signal(sig, pending, info, timer_sigq); return sig; } /* * Try to dequeue a signal. If a deliverable signal is found fill in the * caller provided siginfo and return the signal number. Otherwise return * 0. */ int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type) { struct task_struct *tsk = current; struct sigqueue *timer_sigq; int signr; lockdep_assert_held(&tsk->sighand->siglock); again: *type = PIDTYPE_PID; timer_sigq = NULL; signr = __dequeue_signal(&tsk->pending, mask, info, &timer_sigq); if (!signr) { *type = PIDTYPE_TGID; signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info, &timer_sigq); if (unlikely(signr == SIGALRM)) posixtimer_rearm_itimer(tsk); } recalc_sigpending(); if (!signr) return 0; if (unlikely(sig_kernel_stop(signr))) { /* * Set a marker that we have dequeued a stop signal. Our * caller might release the siglock and then the pending * stop signal it is about to process is no longer in the * pending bitmasks, but must still be cleared by a SIGCONT * (and overruled by a SIGKILL). So those cases clear this * shared flag after we've set it. Note that this flag may * remain set after the signal we return is ignored or * handled. That doesn't matter because its only purpose * is to alert stop-signal processing code when another * processor has come along and cleared the flag. */ current->jobctl |= JOBCTL_STOP_DEQUEUED; } if (IS_ENABLED(CONFIG_POSIX_TIMERS) && unlikely(timer_sigq)) { if (!posixtimer_deliver_signal(info, timer_sigq)) goto again; } return signr; } EXPORT_SYMBOL_GPL(dequeue_signal); static int dequeue_synchronous_signal(kernel_siginfo_t *info) { struct task_struct *tsk = current; struct sigpending *pending = &tsk->pending; struct sigqueue *q, *sync = NULL; /* * Might a synchronous signal be in the queue? */ if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) return 0; /* * Return the first synchronous signal in the queue. */ list_for_each_entry(q, &pending->list, list) { /* Synchronous signals have a positive si_code */ if ((q->info.si_code > SI_USER) && (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { sync = q; goto next; } } return 0; next: /* * Check if there is another siginfo for the same signal. */ list_for_each_entry_continue(q, &pending->list, list) { if (q->info.si_signo == sync->info.si_signo) goto still_pending; } sigdelset(&pending->signal, sync->info.si_signo); recalc_sigpending(); still_pending: list_del_init(&sync->list); copy_siginfo(info, &sync->info); __sigqueue_free(sync); return info->si_signo; } /* * Tell a process that it has a new active signal.. * * NOTE! we rely on the previous spin_lock to * lock interrupts for us! We can only be called with * "siglock" held, and the local interrupt must * have been disabled when that got acquired! * * No need to set need_resched since signal event passing * goes through ->blocked */ void signal_wake_up_state(struct task_struct *t, unsigned int state) { lockdep_assert_held(&t->sighand->siglock); set_tsk_thread_flag(t, TIF_SIGPENDING); /* * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it * executing another processor and just now entering stopped state. * By using wake_up_state, we ensure the process will wake up and * handle its death signal. */ if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) kick_process(t); } static inline void posixtimer_sig_ignore(struct task_struct *tsk, struct sigqueue *q); static void sigqueue_free_ignored(struct task_struct *tsk, struct sigqueue *q) { if (likely(!(q->flags & SIGQUEUE_PREALLOC) || q->info.si_code != SI_TIMER)) __sigqueue_free(q); else posixtimer_sig_ignore(tsk, q); } /* Remove signals in mask from the pending set and queue. */ static void flush_sigqueue_mask(struct task_struct *p, sigset_t *mask, struct sigpending *s) { struct sigqueue *q, *n; sigset_t m; lockdep_assert_held(&p->sighand->siglock); sigandsets(&m, mask, &s->signal); if (sigisemptyset(&m)) return; sigandnsets(&s->signal, &s->signal, mask); list_for_each_entry_safe(q, n, &s->list, list) { if (sigismember(mask, q->info.si_signo)) { list_del_init(&q->list); sigqueue_free_ignored(p, q); } } } static inline int is_si_special(const struct kernel_siginfo *info) { return info <= SEND_SIG_PRIV; } static inline bool si_fromuser(const struct kernel_siginfo *info) { return info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)); } /* * called with RCU read lock from check_kill_permission() */ static bool kill_ok_by_cred(struct task_struct *t) { const struct cred *cred = current_cred(); const struct cred *tcred = __task_cred(t); return uid_eq(cred->euid, tcred->suid) || uid_eq(cred->euid, tcred->uid) || uid_eq(cred->uid, tcred->suid) || uid_eq(cred->uid, tcred->uid) || ns_capable(tcred->user_ns, CAP_KILL); } /* * Bad permissions for sending the signal * - the caller must hold the RCU read lock */ static int check_kill_permission(int sig, struct kernel_siginfo *info, struct task_struct *t) { struct pid *sid; int error; if (!valid_signal(sig)) return -EINVAL; if (!si_fromuser(info)) return 0; error = audit_signal_info(sig, t); /* Let audit system see the signal */ if (error) return error; if (!same_thread_group(current, t) && !kill_ok_by_cred(t)) { switch (sig) { case SIGCONT: sid = task_session(t); /* * We don't return the error if sid == NULL. The * task was unhashed, the caller must notice this. */ if (!sid || sid == task_session(current)) break; fallthrough; default: return -EPERM; } } return security_task_kill(t, info, sig, NULL); } /** * ptrace_trap_notify - schedule trap to notify ptracer * @t: tracee wanting to notify tracer * * This function schedules sticky ptrace trap which is cleared on the next * TRAP_STOP to notify ptracer of an event. @t must have been seized by * ptracer. * * If @t is running, STOP trap will be taken. If trapped for STOP and * ptracer is listening for events, tracee is woken up so that it can * re-trap for the new event. If trapped otherwise, STOP trap will be * eventually taken without returning to userland after the existing traps * are finished by PTRACE_CONT. * * CONTEXT: * Must be called with @task->sighand->siglock held. */ static void ptrace_trap_notify(struct task_struct *t) { WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); lockdep_assert_held(&t->sighand->siglock); task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); } /* * Handle magic process-wide effects of stop/continue signals. Unlike * the signal actions, these happen immediately at signal-generation * time regardless of blocking, ignoring, or handling. This does the * actual continuing for SIGCONT, but not the actual stopping for stop * signals. The process stop is done as a signal action for SIG_DFL. * * Returns true if the signal should be actually delivered, otherwise * it should be dropped. */ static bool prepare_signal(int sig, struct task_struct *p, bool force) { struct signal_struct *signal = p->signal; struct task_struct *t; sigset_t flush; if (signal->flags & SIGNAL_GROUP_EXIT) { if (signal->core_state) return sig == SIGKILL; /* * The process is in the middle of dying, drop the signal. */ return false; } else if (sig_kernel_stop(sig)) { /* * This is a stop signal. Remove SIGCONT from all queues. */ siginitset(&flush, sigmask(SIGCONT)); flush_sigqueue_mask(p, &flush, &signal->shared_pending); for_each_thread(p, t) flush_sigqueue_mask(p, &flush, &t->pending); } else if (sig == SIGCONT) { unsigned int why; /* * Remove all stop signals from all queues, wake all threads. */ siginitset(&flush, SIG_KERNEL_STOP_MASK); flush_sigqueue_mask(p, &flush, &signal->shared_pending); for_each_thread(p, t) { flush_sigqueue_mask(p, &flush, &t->pending); task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); if (likely(!(t->ptrace & PT_SEIZED))) { t->jobctl &= ~JOBCTL_STOPPED; wake_up_state(t, __TASK_STOPPED); } else ptrace_trap_notify(t); } /* * Notify the parent with CLD_CONTINUED if we were stopped. * * If we were in the middle of a group stop, we pretend it * was already finished, and then continued. Since SIGCHLD * doesn't queue we report only CLD_STOPPED, as if the next * CLD_CONTINUED was dropped. */ why = 0; if (signal->flags & SIGNAL_STOP_STOPPED) why |= SIGNAL_CLD_CONTINUED; else if (signal->group_stop_count) why |= SIGNAL_CLD_STOPPED; if (why) { /* * The first thread which returns from do_signal_stop() * will take ->siglock, notice SIGNAL_CLD_MASK, and * notify its parent. See get_signal(). */ signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED); signal->group_stop_count = 0; signal->group_exit_code = 0; } } return !sig_ignored(p, sig, force); } /* * Test if P wants to take SIG. After we've checked all threads with this, * it's equivalent to finding no threads not blocking SIG. Any threads not * blocking SIG were ruled out because they are not running and already * have pending signals. Such threads will dequeue from the shared queue * as soon as they're available, so putting the signal on the shared queue * will be equivalent to sending it to one such thread. */ static inline bool wants_signal(int sig, struct task_struct *p) { if (sigismember(&p->blocked, sig)) return false; if (p->flags & PF_EXITING) return false; if (sig == SIGKILL) return true; if (task_is_stopped_or_traced(p)) return false; return task_curr(p) || !task_sigpending(p); } static void complete_signal(int sig, struct task_struct *p, enum pid_type type) { struct signal_struct *signal = p->signal; struct task_struct *t; /* * Now find a thread we can wake up to take the signal off the queue. * * Try the suggested task first (may or may not be the main thread). */ if (wants_signal(sig, p)) t = p; else if ((type == PIDTYPE_PID) || thread_group_empty(p)) /* * There is just one thread and it does not need to be woken. * It will dequeue unblocked signals before it runs again. */ return; else { /* * Otherwise try to find a suitable thread. */ t = signal->curr_target; while (!wants_signal(sig, t)) { t = next_thread(t); if (t == signal->curr_target) /* * No thread needs to be woken. * Any eligible threads will see * the signal in the queue soon. */ return; } signal->curr_target = t; } /* * Found a killable thread. If the signal will be fatal, * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) && (signal->core_state || !(signal->flags & SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && (sig == SIGKILL || !p->ptrace)) { /* * This signal will be fatal to the whole group. */ if (!sig_kernel_coredump(sig)) { /* * Start a group exit and wake everybody up. * This way we don't have other threads * running and doing things after a slower * thread has the fatal signal pending. */ signal->flags = SIGNAL_GROUP_EXIT; signal->group_exit_code = sig; signal->group_stop_count = 0; __for_each_thread(signal, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } return; } } /* * The signal is already in the shared-pending queue. * Tell the chosen thread to wake up and dequeue it. */ signal_wake_up(t, sig == SIGKILL); return; } static inline bool legacy_queue(struct sigpending *signals, int sig) { return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } static int __send_signal_locked(int sig, struct kernel_siginfo *info, struct task_struct *t, enum pid_type type, bool force) { struct sigpending *pending; struct sigqueue *q; int override_rlimit; int ret = 0, result; lockdep_assert_held(&t->sighand->siglock); result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, force)) goto ret; pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; /* * Short-circuit ignored signals and support queuing * exactly one non-rt signal, so that we can get more * detailed information about the cause of the signal. */ result = TRACE_SIGNAL_ALREADY_PENDING; if (legacy_queue(pending, sig)) goto ret; result = TRACE_SIGNAL_DELIVERED; /* * Skip useless siginfo allocation for SIGKILL and kernel threads. */ if ((sig == SIGKILL) || (t->flags & PF_KTHREAD)) goto out_set; /* * Real-time signals must be queued if sent by sigqueue, or * some other real-time mechanism. It is implementation * defined whether kill() does so. We attempt to do so, on * the principle of least surprise, but since kill is not * allowed to fail with EAGAIN when low on memory we just * make sure at least one signal gets delivered and don't * pass on the info struct. */ if (sig < SIGRTMIN) override_rlimit = (is_si_special(info) || info->si_code >= 0); else override_rlimit = 0; q = sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit); if (q) { list_add_tail(&q->list, &pending->list); switch ((unsigned long) info) { case (unsigned long) SEND_SIG_NOINFO: clear_siginfo(&q->info); q->info.si_signo = sig; q->info.si_errno = 0; q->info.si_code = SI_USER; q->info.si_pid = task_tgid_nr_ns(current, task_active_pid_ns(t)); rcu_read_lock(); q->info.si_uid = from_kuid_munged(task_cred_xxx(t, user_ns), current_uid()); rcu_read_unlock(); break; case (unsigned long) SEND_SIG_PRIV: clear_siginfo(&q->info); q->info.si_signo = sig; q->info.si_errno = 0; q->info.si_code = SI_KERNEL; q->info.si_pid = 0; q->info.si_uid = 0; break; default: copy_siginfo(&q->info, info); break; } } else if (!is_si_special(info) && sig >= SIGRTMIN && info->si_code != SI_USER) { /* * Queue overflow, abort. We may abort if the * signal was rt and sent by user using something * other than kill(). */ result = TRACE_SIGNAL_OVERFLOW_FAIL; ret = -EAGAIN; goto ret; } else { /* * This is a silent loss of information. We still * send the signal, but the *info bits are lost. */ result = TRACE_SIGNAL_LOSE_INFO; } out_set: signalfd_notify(t, sig); sigaddset(&pending->signal, sig); /* Let multiprocess signals appear after on-going forks */ if (type > PIDTYPE_TGID) { struct multiprocess_signals *delayed; hlist_for_each_entry(delayed, &t->signal->multiprocess, node) { sigset_t *signal = &delayed->signal; /* Can't queue both a stop and a continue signal */ if (sig == SIGCONT) sigdelsetmask(signal, SIG_KERNEL_STOP_MASK); else if (sig_kernel_stop(sig)) sigdelset(signal, SIGCONT); sigaddset(signal, sig); } } complete_signal(sig, t, type); ret: trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result); return ret; } static inline bool has_si_pid_and_uid(struct kernel_siginfo *info) { bool ret = false; switch (siginfo_layout(info->si_signo, info->si_code)) { case SIL_KILL: case SIL_CHLD: case SIL_RT: ret = true; break; case SIL_TIMER: case SIL_POLL: case SIL_FAULT: case SIL_FAULT_TRAPNO: case SIL_FAULT_MCEERR: case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: case SIL_FAULT_PERF_EVENT: case SIL_SYS: ret = false; break; } return ret; } int send_signal_locked(int sig, struct kernel_siginfo *info, struct task_struct *t, enum pid_type type) { /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */ bool force = false; if (info == SEND_SIG_NOINFO) { /* Force if sent from an ancestor pid namespace */ force = !task_pid_nr_ns(current, task_active_pid_ns(t)); } else if (info == SEND_SIG_PRIV) { /* Don't ignore kernel generated signals */ force = true; } else if (has_si_pid_and_uid(info)) { /* SIGKILL and SIGSTOP is special or has ids */ struct user_namespace *t_user_ns; rcu_read_lock(); t_user_ns = task_cred_xxx(t, user_ns); if (current_user_ns() != t_user_ns) { kuid_t uid = make_kuid(current_user_ns(), info->si_uid); info->si_uid = from_kuid_munged(t_user_ns, uid); } rcu_read_unlock(); /* A kernel generated signal? */ force = (info->si_code == SI_KERNEL); /* From an ancestor pid namespace? */ if (!task_pid_nr_ns(current, task_active_pid_ns(t))) { info->si_pid = 0; force = true; } } return __send_signal_locked(sig, info, t, type, force); } static void print_fatal_signal(int signr) { struct pt_regs *regs = task_pt_regs(current); struct file *exe_file; exe_file = get_task_exe_file(current); if (exe_file) { pr_info("%pD: %s: potentially unexpected fatal signal %d.\n", exe_file, current->comm, signr); fput(exe_file); } else { pr_info("%s: potentially unexpected fatal signal %d.\n", current->comm, signr); } #if defined(__i386__) && !defined(__arch_um__) pr_info("code at %08lx: ", regs->ip); { int i; for (i = 0; i < 16; i++) { unsigned char insn; if (get_user(insn, (unsigned char *)(regs->ip + i))) break; pr_cont("%02x ", insn); } } pr_cont("\n"); #endif preempt_disable(); show_regs(regs); preempt_enable(); } static int __init setup_print_fatal_signals(char *str) { get_option (&str, &print_fatal_signals); return 1; } __setup("print-fatal-signals=", setup_print_fatal_signals); int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type) { unsigned long flags; int ret = -ESRCH; if (lock_task_sighand(p, &flags)) { ret = send_signal_locked(sig, info, p, type); unlock_task_sighand(p, &flags); } return ret; } enum sig_handler { HANDLER_CURRENT, /* If reachable use the current handler */ HANDLER_SIG_DFL, /* Always use SIG_DFL handler semantics */ HANDLER_EXIT, /* Only visible as the process exit code */ }; /* * Force a signal that the process can't ignore: if necessary * we unblock the signal and change any SIG_IGN to SIG_DFL. * * Note: If we unblock the signal, we always reset it to SIG_DFL, * since we do not want to have a signal handler that was blocked * be invoked when user space had explicitly blocked it. * * We don't want to have recursive SIGSEGV's etc, for example, * that is why we also clear SIGNAL_UNKILLABLE. */ static int force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, enum sig_handler handler) { unsigned long int flags; int ret, blocked, ignored; struct k_sigaction *action; int sig = info->si_signo; spin_lock_irqsave(&t->sighand->siglock, flags); action = &t->sighand->action[sig-1]; ignored = action->sa.sa_handler == SIG_IGN; blocked = sigismember(&t->blocked, sig); if (blocked || ignored || (handler != HANDLER_CURRENT)) { action->sa.sa_handler = SIG_DFL; if (handler == HANDLER_EXIT) action->sa.sa_flags |= SA_IMMUTABLE; if (blocked) sigdelset(&t->blocked, sig); } /* * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect * debugging to leave init killable. But HANDLER_EXIT is always fatal. */ if (action->sa.sa_handler == SIG_DFL && (!t->ptrace || (handler == HANDLER_EXIT))) t->signal->flags &= ~SIGNAL_UNKILLABLE; ret = send_signal_locked(sig, info, t, PIDTYPE_PID); /* This can happen if the signal was already pending and blocked */ if (!task_sigpending(t)) signal_wake_up(t, 0); spin_unlock_irqrestore(&t->sighand->siglock, flags); return ret; } int force_sig_info(struct kernel_siginfo *info) { return force_sig_info_to_task(info, current, HANDLER_CURRENT); } /* * Nuke all other threads in the group. */ int zap_other_threads(struct task_struct *p) { struct task_struct *t; int count = 0; p->signal->group_stop_count = 0; for_other_threads(p, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); count++; /* Don't bother with already dead threads */ if (t->exit_state) continue; sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } return count; } struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags) { struct sighand_struct *sighand; rcu_read_lock(); for (;;) { sighand = rcu_dereference(tsk->sighand); if (unlikely(sighand == NULL)) break; /* * This sighand can be already freed and even reused, but * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which * initializes ->siglock: this slab can't go away, it has * the same object type, ->siglock can't be reinitialized. * * We need to ensure that tsk->sighand is still the same * after we take the lock, we can race with de_thread() or * __exit_signal(). In the latter case the next iteration * must see ->sighand == NULL. */ spin_lock_irqsave(&sighand->siglock, *flags); if (likely(sighand == rcu_access_pointer(tsk->sighand))) break; spin_unlock_irqrestore(&sighand->siglock, *flags); } rcu_read_unlock(); return sighand; } #ifdef CONFIG_LOCKDEP void lockdep_assert_task_sighand_held(struct task_struct *task) { struct sighand_struct *sighand; rcu_read_lock(); sighand = rcu_dereference(task->sighand); if (sighand) lockdep_assert_held(&sighand->siglock); else WARN_ON_ONCE(1); rcu_read_unlock(); } #endif /* * send signal info to all the members of a thread group or to the * individual thread if type == PIDTYPE_PID. */ int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type) { int ret; rcu_read_lock(); ret = check_kill_permission(sig, info, p); rcu_read_unlock(); if (!ret && sig) ret = do_send_sig_info(sig, info, p, type); return ret; } /* * __kill_pgrp_info() sends a signal to a process group: this is what the tty * control characters do (^C, ^Z etc) * - the caller must hold at least a readlock on tasklist_lock */ int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp) { struct task_struct *p = NULL; int ret = -ESRCH; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID); /* * If group_send_sig_info() succeeds at least once ret * becomes 0 and after that the code below has no effect. * Otherwise we return the last err or -ESRCH if this * process group is empty. */ if (ret) ret = err; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); return ret; } static int kill_pid_info_type(int sig, struct kernel_siginfo *info, struct pid *pid, enum pid_type type) { int error = -ESRCH; struct task_struct *p; for (;;) { rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (p) error = group_send_sig_info(sig, info, p, type); rcu_read_unlock(); if (likely(!p || error != -ESRCH)) return error; /* * The task was unhashed in between, try again. If it * is dead, pid_task() will return NULL, if we race with * de_thread() it will find the new leader. */ } } int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid) { return kill_pid_info_type(sig, info, pid, PIDTYPE_TGID); } static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid) { int error; rcu_read_lock(); error = kill_pid_info(sig, info, find_vpid(pid)); rcu_read_unlock(); return error; } static inline bool kill_as_cred_perm(const struct cred *cred, struct task_struct *target) { const struct cred *pcred = __task_cred(target); return uid_eq(cred->euid, pcred->suid) || uid_eq(cred->euid, pcred->uid) || uid_eq(cred->uid, pcred->suid) || uid_eq(cred->uid, pcred->uid); } /* * The usb asyncio usage of siginfo is wrong. The glibc support * for asyncio which uses SI_ASYNCIO assumes the layout is SIL_RT. * AKA after the generic fields: * kernel_pid_t si_pid; * kernel_uid32_t si_uid; * sigval_t si_value; * * Unfortunately when usb generates SI_ASYNCIO it assumes the layout * after the generic fields is: * void __user *si_addr; * * This is a practical problem when there is a 64bit big endian kernel * and a 32bit userspace. As the 32bit address will encoded in the low * 32bits of the pointer. Those low 32bits will be stored at higher * address than appear in a 32 bit pointer. So userspace will not * see the address it was expecting for it's completions. * * There is nothing in the encoding that can allow * copy_siginfo_to_user32 to detect this confusion of formats, so * handle this by requiring the caller of kill_pid_usb_asyncio to * notice when this situration takes place and to store the 32bit * pointer in sival_int, instead of sival_addr of the sigval_t addr * parameter. */ int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *pid, const struct cred *cred) { struct kernel_siginfo info; struct task_struct *p; unsigned long flags; int ret = -EINVAL; if (!valid_signal(sig)) return ret; clear_siginfo(&info); info.si_signo = sig; info.si_errno = errno; info.si_code = SI_ASYNCIO; *((sigval_t *)&info.si_pid) = addr; rcu_read_lock(); p = pid_task(pid, PIDTYPE_PID); if (!p) { ret = -ESRCH; goto out_unlock; } if (!kill_as_cred_perm(cred, p)) { ret = -EPERM; goto out_unlock; } ret = security_task_kill(p, &info, sig, cred); if (ret) goto out_unlock; if (sig) { if (lock_task_sighand(p, &flags)) { ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false); unlock_task_sighand(p, &flags); } else ret = -ESRCH; } out_unlock: rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(kill_pid_usb_asyncio); /* * kill_something_info() interprets pid in interesting ways just like kill(2). * * POSIX specifies that kill(-1,sig) is unspecified, but what we have * is probably wrong. Should make it like BSD or SYSV. */ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid) { int ret; if (pid > 0) return kill_proc_info(sig, info, pid); /* -INT_MIN is undefined. Exclude this case to avoid a UBSAN warning */ if (pid == INT_MIN) return -ESRCH; read_lock(&tasklist_lock); if (pid != -1) { ret = __kill_pgrp_info(sig, info, pid ? find_vpid(-pid) : task_pgrp(current)); } else { int retval = 0, count = 0; struct task_struct * p; for_each_process(p) { if (task_pid_vnr(p) > 1 && !same_thread_group(p, current)) { int err = group_send_sig_info(sig, info, p, PIDTYPE_MAX); ++count; if (err != -EPERM) retval = err; } } ret = count ? retval : -ESRCH; } read_unlock(&tasklist_lock); return ret; } /* * These are for backward compatibility with the rest of the kernel source. */ int send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) { /* * Make sure legacy kernel users don't send in bad values * (normal paths check this in check_kill_permission). */ if (!valid_signal(sig)) return -EINVAL; return do_send_sig_info(sig, info, p, PIDTYPE_PID); } EXPORT_SYMBOL(send_sig_info); #define __si_special(priv) \ ((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO) int send_sig(int sig, struct task_struct *p, int priv) { return send_sig_info(sig, __si_special(priv), p); } EXPORT_SYMBOL(send_sig); void force_sig(int sig) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = SI_KERNEL; info.si_pid = 0; info.si_uid = 0; force_sig_info(&info); } EXPORT_SYMBOL(force_sig); void force_fatal_sig(int sig) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = SI_KERNEL; info.si_pid = 0; info.si_uid = 0; force_sig_info_to_task(&info, current, HANDLER_SIG_DFL); } void force_exit_sig(int sig) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = SI_KERNEL; info.si_pid = 0; info.si_uid = 0; force_sig_info_to_task(&info, current, HANDLER_EXIT); } /* * When things go south during signal handling, we * will force a SIGSEGV. And if the signal that caused * the problem was already a SIGSEGV, we'll want to * make sure we don't even try to deliver the signal.. */ void force_sigsegv(int sig) { if (sig == SIGSEGV) force_fatal_sig(SIGSEGV); else force_sig(SIGSEGV); } int force_sig_fault_to_task(int sig, int code, void __user *addr, struct task_struct *t) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = code; info.si_addr = addr; return force_sig_info_to_task(&info, t, HANDLER_CURRENT); } int force_sig_fault(int sig, int code, void __user *addr) { return force_sig_fault_to_task(sig, code, addr, current); } int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = code; info.si_addr = addr; return send_sig_info(info.si_signo, &info, t); } int force_sig_mceerr(int code, void __user *addr, short lsb) { struct kernel_siginfo info; WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR)); clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = code; info.si_addr = addr; info.si_addr_lsb = lsb; return force_sig_info(&info); } int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t) { struct kernel_siginfo info; WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR)); clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = code; info.si_addr = addr; info.si_addr_lsb = lsb; return send_sig_info(info.si_signo, &info, t); } EXPORT_SYMBOL(send_sig_mceerr); int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = SEGV_BNDERR; info.si_addr = addr; info.si_lower = lower; info.si_upper = upper; return force_sig_info(&info); } #ifdef SEGV_PKUERR int force_sig_pkuerr(void __user *addr, u32 pkey) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = SEGV_PKUERR; info.si_addr = addr; info.si_pkey = pkey; return force_sig_info(&info); } #endif int send_sig_perf(void __user *addr, u32 type, u64 sig_data) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_PERF; info.si_addr = addr; info.si_perf_data = sig_data; info.si_perf_type = type; /* * Signals generated by perf events should not terminate the whole * process if SIGTRAP is blocked, however, delivering the signal * asynchronously is better than not delivering at all. But tell user * space if the signal was asynchronous, so it can clearly be * distinguished from normal synchronous ones. */ info.si_perf_flags = sigismember(&current->blocked, info.si_signo) ? TRAP_PERF_FLAG_ASYNC : 0; return send_sig_info(info.si_signo, &info, current); } /** * force_sig_seccomp - signals the task to allow in-process syscall emulation * @syscall: syscall number to send to userland * @reason: filter-supplied reason code to send to userland (via si_errno) * @force_coredump: true to trigger a coredump * * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info. */ int force_sig_seccomp(int syscall, int reason, bool force_coredump) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = SIGSYS; info.si_code = SYS_SECCOMP; info.si_call_addr = (void __user *)KSTK_EIP(current); info.si_errno = reason; info.si_arch = syscall_get_arch(current); info.si_syscall = syscall; return force_sig_info_to_task(&info, current, force_coredump ? HANDLER_EXIT : HANDLER_CURRENT); } /* For the crazy architectures that include trap information in * the errno field, instead of an actual errno value. */ int force_sig_ptrace_errno_trap(int errno, void __user *addr) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_errno = errno; info.si_code = TRAP_HWBKPT; info.si_addr = addr; return force_sig_info(&info); } /* For the rare architectures that include trap information using * si_trapno. */ int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = code; info.si_addr = addr; info.si_trapno = trapno; return force_sig_info(&info); } /* For the rare architectures that include trap information using * si_trapno. */ int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno, struct task_struct *t) { struct kernel_siginfo info; clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; info.si_code = code; info.si_addr = addr; info.si_trapno = trapno; return send_sig_info(info.si_signo, &info, t); } static int kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp) { int ret; read_lock(&tasklist_lock); ret = __kill_pgrp_info(sig, info, pgrp); read_unlock(&tasklist_lock); return ret; } int kill_pgrp(struct pid *pid, int sig, int priv) { return kill_pgrp_info(sig, __si_special(priv), pid); } EXPORT_SYMBOL(kill_pgrp); int kill_pid(struct pid *pid, int sig, int priv) { return kill_pid_info(sig, __si_special(priv), pid); } EXPORT_SYMBOL(kill_pid); #ifdef CONFIG_POSIX_TIMERS /* * These functions handle POSIX timer signals. POSIX timers use * preallocated sigqueue structs for sending signals. */ static void __flush_itimer_signals(struct sigpending *pending) { sigset_t signal, retain; struct sigqueue *q, *n; signal = pending->signal; sigemptyset(&retain); list_for_each_entry_safe(q, n, &pending->list, list) { int sig = q->info.si_signo; if (likely(q->info.si_code != SI_TIMER)) { sigaddset(&retain, sig); } else { sigdelset(&signal, sig); list_del_init(&q->list); __sigqueue_free(q); } } sigorsets(&pending->signal, &signal, &retain); } void flush_itimer_signals(void) { struct task_struct *tsk = current; guard(spinlock_irqsave)(&tsk->sighand->siglock); __flush_itimer_signals(&tsk->pending); __flush_itimer_signals(&tsk->signal->shared_pending); } bool posixtimer_init_sigqueue(struct sigqueue *q) { struct ucounts *ucounts = sig_get_ucounts(current, -1, 0); if (!ucounts) return false; clear_siginfo(&q->info); __sigqueue_init(q, ucounts, SIGQUEUE_PREALLOC); return true; } static void posixtimer_queue_sigqueue(struct sigqueue *q, struct task_struct *t, enum pid_type type) { struct sigpending *pending; int sig = q->info.si_signo; signalfd_notify(t, sig); pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending; list_add_tail(&q->list, &pending->list); sigaddset(&pending->signal, sig); complete_signal(sig, t, type); } /* * This function is used by POSIX timers to deliver a timer signal. * Where type is PIDTYPE_PID (such as for timers with SIGEV_THREAD_ID * set), the signal must be delivered to the specific thread (queues * into t->pending). * * Where type is not PIDTYPE_PID, signals must be delivered to the * process. In this case, prefer to deliver to current if it is in * the same thread group as the target process and its sighand is * stable, which avoids unnecessarily waking up a potentially idle task. */ static inline struct task_struct *posixtimer_get_target(struct k_itimer *tmr) { struct task_struct *t = pid_task(tmr->it_pid, tmr->it_pid_type); if (t && tmr->it_pid_type != PIDTYPE_PID && same_thread_group(t, current) && !current->exit_state) t = current; return t; } void posixtimer_send_sigqueue(struct k_itimer *tmr) { struct sigqueue *q = &tmr->sigq; int sig = q->info.si_signo; struct task_struct *t; unsigned long flags; int result; guard(rcu)(); t = posixtimer_get_target(tmr); if (!t) return; if (!likely(lock_task_sighand(t, &flags))) return; /* * Update @tmr::sigqueue_seq for posix timer signals with sighand * locked to prevent a race against dequeue_signal(). */ tmr->it_sigqueue_seq = tmr->it_signal_seq; /* * Set the signal delivery status under sighand lock, so that the * ignored signal handling can distinguish between a periodic and a * non-periodic timer. */ tmr->it_sig_periodic = tmr->it_status == POSIX_TIMER_REQUEUE_PENDING; if (!prepare_signal(sig, t, false)) { result = TRACE_SIGNAL_IGNORED; if (!list_empty(&q->list)) { /* * The signal was ignored and blocked. The timer * expiry queued it because blocked signals are * queued independent of the ignored state. * * The unblocking set SIGPENDING, but the signal * was not yet dequeued from the pending list. * So prepare_signal() sees unblocked and ignored, * which ends up here. Leave it queued like a * regular signal. * * The same happens when the task group is exiting * and the signal is already queued. * prepare_signal() treats SIGNAL_GROUP_EXIT as * ignored independent of its queued state. This * gets cleaned up in __exit_signal(). */ goto out; } /* Periodic timers with SIG_IGN are queued on the ignored list */ if (tmr->it_sig_periodic) { /* * Already queued means the timer was rearmed after * the previous expiry got it on the ignore list. * Nothing to do for that case. */ if (hlist_unhashed(&tmr->ignored_list)) { /* * Take a signal reference and queue it on * the ignored list. */ posixtimer_sigqueue_getref(q); posixtimer_sig_ignore(t, q); } } else if (!hlist_unhashed(&tmr->ignored_list)) { /* * Covers the case where a timer was periodic and * then the signal was ignored. Later it was rearmed * as oneshot timer. The previous signal is invalid * now, and this oneshot signal has to be dropped. * Remove it from the ignored list and drop the * reference count as the signal is not longer * queued. */ hlist_del_init(&tmr->ignored_list); posixtimer_putref(tmr); } goto out; } if (unlikely(!list_empty(&q->list))) { /* This holds a reference count already */ result = TRACE_SIGNAL_ALREADY_PENDING; goto out; } /* * If the signal is on the ignore list, it got blocked after it was * ignored earlier. But nothing lifted the ignore. Move it back to * the pending list to be consistent with the regular signal * handling. This already holds a reference count. * * If it's not on the ignore list acquire a reference count. */ if (likely(hlist_unhashed(&tmr->ignored_list))) posixtimer_sigqueue_getref(q); else hlist_del_init(&tmr->ignored_list); posixtimer_queue_sigqueue(q, t, tmr->it_pid_type); result = TRACE_SIGNAL_DELIVERED; out: trace_signal_generate(sig, &q->info, t, tmr->it_pid_type != PIDTYPE_PID, result); unlock_task_sighand(t, &flags); } static inline void posixtimer_sig_ignore(struct task_struct *tsk, struct sigqueue *q) { struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); /* * If the timer is marked deleted already or the signal originates * from a non-periodic timer, then just drop the reference * count. Otherwise queue it on the ignored list. */ if (posixtimer_valid(tmr) && tmr->it_sig_periodic) hlist_add_head(&tmr->ignored_list, &tsk->signal->ignored_posix_timers); else posixtimer_putref(tmr); } static void posixtimer_sig_unignore(struct task_struct *tsk, int sig) { struct hlist_head *head = &tsk->signal->ignored_posix_timers; struct hlist_node *tmp; struct k_itimer *tmr; if (likely(hlist_empty(head))) return; /* * Rearming a timer with sighand lock held is not possible due to * lock ordering vs. tmr::it_lock. Just stick the sigqueue back and * let the signal delivery path deal with it whether it needs to be * rearmed or not. This cannot be decided here w/o dropping sighand * lock and creating a loop retry horror show. */ hlist_for_each_entry_safe(tmr, tmp , head, ignored_list) { struct task_struct *target; /* * tmr::sigq.info.si_signo is immutable, so accessing it * without holding tmr::it_lock is safe. */ if (tmr->sigq.info.si_signo != sig) continue; hlist_del_init(&tmr->ignored_list); /* This should never happen and leaks a reference count */ if (WARN_ON_ONCE(!list_empty(&tmr->sigq.list))) continue; /* * Get the target for the signal. If target is a thread and * has exited by now, drop the reference count. */ guard(rcu)(); target = posixtimer_get_target(tmr); if (target) posixtimer_queue_sigqueue(&tmr->sigq, target, tmr->it_pid_type); else posixtimer_putref(tmr); } } #else /* CONFIG_POSIX_TIMERS */ static inline void posixtimer_sig_ignore(struct task_struct *tsk, struct sigqueue *q) { } static inline void posixtimer_sig_unignore(struct task_struct *tsk, int sig) { } #endif /* !CONFIG_POSIX_TIMERS */ void do_notify_pidfd(struct task_struct *task) { struct pid *pid = task_pid(task); WARN_ON(task->exit_state == 0); __wake_up(&pid->wait_pidfd, TASK_NORMAL, 0, poll_to_key(EPOLLIN | EPOLLRDNORM)); } /* * Let a parent know about the death of a child. * For a stopped/continued status change, use do_notify_parent_cldstop instead. * * Returns true if our parent ignored us and so we've switched to * self-reaping. */ bool do_notify_parent(struct task_struct *tsk, int sig) { struct kernel_siginfo info; unsigned long flags; struct sighand_struct *psig; bool autoreap = false; u64 utime, stime; WARN_ON_ONCE(sig == -1); /* do_notify_parent_cldstop should have been called instead. */ WARN_ON_ONCE(task_is_stopped_or_traced(tsk)); WARN_ON_ONCE(!tsk->ptrace && (tsk->group_leader != tsk || !thread_group_empty(tsk))); /* ptraced, or group-leader without sub-threads */ do_notify_pidfd(tsk); if (sig != SIGCHLD) { /* * This is only possible if parent == real_parent. * Check if it has changed security domain. */ if (tsk->parent_exec_id != READ_ONCE(tsk->parent->self_exec_id)) sig = SIGCHLD; } clear_siginfo(&info); info.si_signo = sig; info.si_errno = 0; /* * We are under tasklist_lock here so our parent is tied to * us and cannot change. * * task_active_pid_ns will always return the same pid namespace * until a task passes through release_task. * * write_lock() currently calls preempt_disable() which is the * same as rcu_read_lock(), but according to Oleg, this is not * correct to rely on this */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), task_uid(tsk)); rcu_read_unlock(); task_cputime(tsk, &utime, &stime); info.si_utime = nsec_to_clock_t(utime + tsk->signal->utime); info.si_stime = nsec_to_clock_t(stime + tsk->signal->stime); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) info.si_code = CLD_DUMPED; else if (tsk->exit_code & 0x7f) info.si_code = CLD_KILLED; else { info.si_code = CLD_EXITED; info.si_status = tsk->exit_code >> 8; } psig = tsk->parent->sighand; spin_lock_irqsave(&psig->siglock, flags); if (!tsk->ptrace && sig == SIGCHLD && (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { /* * We are exiting and our parent doesn't care. POSIX.1 * defines special semantics for setting SIGCHLD to SIG_IGN * or setting the SA_NOCLDWAIT flag: we should be reaped * automatically and not left for our parent's wait4 call. * Rather than having the parent do it as a magic kind of * signal handler, we just set this to tell do_exit that we * can be cleaned up without becoming a zombie. Note that * we still call __wake_up_parent in this case, because a * blocked sys_wait4 might now return -ECHILD. * * Whether we send SIGCHLD or not for SA_NOCLDWAIT * is implementation-defined: we do (if you don't want * it, just use SIG_IGN instead). */ autoreap = true; if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) sig = 0; } /* * Send with __send_signal as si_pid and si_uid are in the * parent's namespaces. */ if (valid_signal(sig) && sig) __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); return autoreap; } /** * do_notify_parent_cldstop - notify parent of stopped/continued state change * @tsk: task reporting the state change * @for_ptracer: the notification is for ptracer * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report * * Notify @tsk's parent that the stopped/continued state has changed. If * @for_ptracer is %false, @tsk's group leader notifies to its real parent. * If %true, @tsk reports to @tsk->parent which should be the ptracer. * * CONTEXT: * Must be called with tasklist_lock at least read locked. */ static void do_notify_parent_cldstop(struct task_struct *tsk, bool for_ptracer, int why) { struct kernel_siginfo info; unsigned long flags; struct task_struct *parent; struct sighand_struct *sighand; u64 utime, stime; if (for_ptracer) { parent = tsk->parent; } else { tsk = tsk->group_leader; parent = tsk->real_parent; } clear_siginfo(&info); info.si_signo = SIGCHLD; info.si_errno = 0; /* * see comment in do_notify_parent() about the following 4 lines */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent)); info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); rcu_read_unlock(); task_cputime(tsk, &utime, &stime); info.si_utime = nsec_to_clock_t(utime); info.si_stime = nsec_to_clock_t(stime); info.si_code = why; switch (why) { case CLD_CONTINUED: info.si_status = SIGCONT; break; case CLD_STOPPED: info.si_status = tsk->signal->group_exit_code & 0x7f; break; case CLD_TRAPPED: info.si_status = tsk->exit_code & 0x7f; break; default: BUG(); } sighand = parent->sighand; spin_lock_irqsave(&sighand->siglock, flags); if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID); /* * Even if SIGCHLD is not generated, we must wake up wait4 calls. */ __wake_up_parent(tsk, parent); spin_unlock_irqrestore(&sighand->siglock, flags); } /* * This must be called with current->sighand->siglock held. * * This should be the path for all ptrace stops. * We always set current->last_siginfo while stopped here. * That makes it a way to test a stopped process for * being ptrace-stopped vs being job-control-stopped. * * Returns the signal the ptracer requested the code resume * with. If the code did not stop because the tracer is gone, * the stop signal remains unchanged unless clear_code. */ static int ptrace_stop(int exit_code, int why, unsigned long message, kernel_siginfo_t *info) __releases(&current->sighand->siglock) __acquires(&current->sighand->siglock) { bool gstop_done = false; if (arch_ptrace_stop_needed()) { /* * The arch code has something special to do before a * ptrace stop. This is allowed to block, e.g. for faults * on user stack pages. We can't keep the siglock while * calling arch_ptrace_stop, so we must release it now. * To preserve proper semantics, we must do this before * any signal bookkeeping like checking group_stop_count. */ spin_unlock_irq(&current->sighand->siglock); arch_ptrace_stop(); spin_lock_irq(&current->sighand->siglock); } /* * After this point ptrace_signal_wake_up or signal_wake_up * will clear TASK_TRACED if ptrace_unlink happens or a fatal * signal comes in. Handle previous ptrace_unlinks and fatal * signals here to prevent ptrace_stop sleeping in schedule. */ if (!current->ptrace || __fatal_signal_pending(current)) return exit_code; set_special_state(TASK_TRACED); current->jobctl |= JOBCTL_TRACED; /* * We're committing to trapping. TRACED should be visible before * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). * Also, transition to TRACED and updates to ->jobctl should be * atomic with respect to siglock and should be done after the arch * hook as siglock is released and regrabbed across it. * * TRACER TRACEE * * ptrace_attach() * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED) * do_wait() * set_current_state() smp_wmb(); * ptrace_do_wait() * wait_task_stopped() * task_stopped_code() * [L] task_is_traced() [S] task_clear_jobctl_trapping(); */ smp_wmb(); current->ptrace_message = message; current->last_siginfo = info; current->exit_code = exit_code; /* * If @why is CLD_STOPPED, we're trapping to participate in a group * stop. Do the bookkeeping. Note that if SIGCONT was delievered * across siglock relocks since INTERRUPT was scheduled, PENDING * could be clear now. We act as if SIGCONT is received after * TASK_TRACED is entered - ignore it. */ if (why == CLD_STOPPED && (current->jobctl & JOBCTL_STOP_PENDING)) gstop_done = task_participate_group_stop(current); /* any trap clears pending STOP trap, STOP trap clears NOTIFY */ task_clear_jobctl_pending(current, JOBCTL_TRAP_STOP); if (info && info->si_code >> 8 == PTRACE_EVENT_STOP) task_clear_jobctl_pending(current, JOBCTL_TRAP_NOTIFY); /* entering a trap, clear TRAPPING */ task_clear_jobctl_trapping(current); spin_unlock_irq(&current->sighand->siglock); read_lock(&tasklist_lock); /* * Notify parents of the stop. * * While ptraced, there are two parents - the ptracer and * the real_parent of the group_leader. The ptracer should * know about every stop while the real parent is only * interested in the completion of group stop. The states * for the two don't interact with each other. Notify * separately unless they're gonna be duplicates. */ if (current->ptrace) do_notify_parent_cldstop(current, true, why); if (gstop_done && (!current->ptrace || ptrace_reparented(current))) do_notify_parent_cldstop(current, false, why); /* * The previous do_notify_parent_cldstop() invocation woke ptracer. * One a PREEMPTION kernel this can result in preemption requirement * which will be fulfilled after read_unlock() and the ptracer will be * put on the CPU. * The ptracer is in wait_task_inactive(, __TASK_TRACED) waiting for * this task wait in schedule(). If this task gets preempted then it * remains enqueued on the runqueue. The ptracer will observe this and * then sleep for a delay of one HZ tick. In the meantime this task * gets scheduled, enters schedule() and will wait for the ptracer. * * This preemption point is not bad from a correctness point of * view but extends the runtime by one HZ tick time due to the * ptracer's sleep. The preempt-disable section ensures that there * will be no preemption between unlock and schedule() and so * improving the performance since the ptracer will observe that * the tracee is scheduled out once it gets on the CPU. * * On PREEMPT_RT locking tasklist_lock does not disable preemption. * Therefore the task can be preempted after do_notify_parent_cldstop() * before unlocking tasklist_lock so there is no benefit in doing this. * * In fact disabling preemption is harmful on PREEMPT_RT because * the spinlock_t in cgroup_enter_frozen() must not be acquired * with preemption disabled due to the 'sleeping' spinlock * substitution of RT. */ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) preempt_disable(); read_unlock(&tasklist_lock); cgroup_enter_frozen(); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) preempt_enable_no_resched(); schedule(); cgroup_leave_frozen(true); /* * We are back. Now reacquire the siglock before touching * last_siginfo, so that we are sure to have synchronized with * any signal-sending on another CPU that wants to examine it. */ spin_lock_irq(&current->sighand->siglock); exit_code = current->exit_code; current->last_siginfo = NULL; current->ptrace_message = 0; current->exit_code = 0; /* LISTENING can be set only during STOP traps, clear it */ current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN); /* * Queued signals ignored us while we were stopped for tracing. * So check for any that we should take before resuming user mode. * This sets TIF_SIGPENDING, but never clears it. */ recalc_sigpending_tsk(current); return exit_code; } static int ptrace_do_notify(int signr, int exit_code, int why, unsigned long message) { kernel_siginfo_t info; clear_siginfo(&info); info.si_signo = signr; info.si_code = exit_code; info.si_pid = task_pid_vnr(current); info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); /* Let the debugger run. */ return ptrace_stop(exit_code, why, message, &info); } int ptrace_notify(int exit_code, unsigned long message) { int signr; BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); if (unlikely(task_work_pending(current))) task_work_run(); spin_lock_irq(&current->sighand->siglock); signr = ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED, message); spin_unlock_irq(&current->sighand->siglock); return signr; } /** * do_signal_stop - handle group stop for SIGSTOP and other stop signals * @signr: signr causing group stop if initiating * * If %JOBCTL_STOP_PENDING is not set yet, initiate group stop with @signr * and participate in it. If already set, participate in the existing * group stop. If participated in a group stop (and thus slept), %true is * returned with siglock released. * * If ptraced, this function doesn't handle stop itself. Instead, * %JOBCTL_TRAP_STOP is scheduled and %false is returned with siglock * untouched. The caller must ensure that INTERRUPT trap handling takes * places afterwards. * * CONTEXT: * Must be called with @current->sighand->siglock held, which is released * on %true return. * * RETURNS: * %false if group stop is already cancelled or ptrace trap is scheduled. * %true if participated in group stop. */ static bool do_signal_stop(int signr) __releases(&current->sighand->siglock) { struct signal_struct *sig = current->signal; if (!(current->jobctl & JOBCTL_STOP_PENDING)) { unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; struct task_struct *t; /* signr will be recorded in task->jobctl for retries */ WARN_ON_ONCE(signr & ~JOBCTL_STOP_SIGMASK); if (!likely(current->jobctl & JOBCTL_STOP_DEQUEUED) || unlikely(sig->flags & SIGNAL_GROUP_EXIT) || unlikely(sig->group_exec_task)) return false; /* * There is no group stop already in progress. We must * initiate one now. * * While ptraced, a task may be resumed while group stop is * still in effect and then receive a stop signal and * initiate another group stop. This deviates from the * usual behavior as two consecutive stop signals can't * cause two group stops when !ptraced. That is why we * also check !task_is_stopped(t) below. * * The condition can be distinguished by testing whether * SIGNAL_STOP_STOPPED is already set. Don't generate * group_exit_code in such case. * * This is not necessary for SIGNAL_STOP_CONTINUED because * an intervening stop signal is required to cause two * continued events regardless of ptrace. */ if (!(sig->flags & SIGNAL_STOP_STOPPED)) sig->group_exit_code = signr; sig->group_stop_count = 0; if (task_set_jobctl_pending(current, signr | gstop)) sig->group_stop_count++; for_other_threads(current, t) { /* * Setting state to TASK_STOPPED for a group * stop is always done with the siglock held, * so this check has no races. */ if (!task_is_stopped(t) && task_set_jobctl_pending(t, signr | gstop)) { sig->group_stop_count++; if (likely(!(t->ptrace & PT_SEIZED))) signal_wake_up(t, 0); else ptrace_trap_notify(t); } } } if (likely(!current->ptrace)) { int notify = 0; /* * If there are no other threads in the group, or if there * is a group stop in progress and we are the last to stop, * report to the parent. */ if (task_participate_group_stop(current)) notify = CLD_STOPPED; current->jobctl |= JOBCTL_STOPPED; set_special_state(TASK_STOPPED); spin_unlock_irq(&current->sighand->siglock); /* * Notify the parent of the group stop completion. Because * we're not holding either the siglock or tasklist_lock * here, ptracer may attach inbetween; however, this is for * group stop and should always be delivered to the real * parent of the group leader. The new ptracer will get * its notification when this task transitions into * TASK_TRACED. */ if (notify) { read_lock(&tasklist_lock); do_notify_parent_cldstop(current, false, notify); read_unlock(&tasklist_lock); } /* Now we don't run again until woken by SIGCONT or SIGKILL */ cgroup_enter_frozen(); schedule(); return true; } else { /* * While ptraced, group stop is handled by STOP trap. * Schedule it and let the caller deal with it. */ task_set_jobctl_pending(current, JOBCTL_TRAP_STOP); return false; } } /** * do_jobctl_trap - take care of ptrace jobctl traps * * When PT_SEIZED, it's used for both group stop and explicit * SEIZE/INTERRUPT traps. Both generate PTRACE_EVENT_STOP trap with * accompanying siginfo. If stopped, lower eight bits of exit_code contain * the stop signal; otherwise, %SIGTRAP. * * When !PT_SEIZED, it's used only for group stop trap with stop signal * number as exit_code and no siginfo. * * CONTEXT: * Must be called with @current->sighand->siglock held, which may be * released and re-acquired before returning with intervening sleep. */ static void do_jobctl_trap(void) { struct signal_struct *signal = current->signal; int signr = current->jobctl & JOBCTL_STOP_SIGMASK; if (current->ptrace & PT_SEIZED) { if (!signal->group_stop_count && !(signal->flags & SIGNAL_STOP_STOPPED)) signr = SIGTRAP; WARN_ON_ONCE(!signr); ptrace_do_notify(signr, signr | (PTRACE_EVENT_STOP << 8), CLD_STOPPED, 0); } else { WARN_ON_ONCE(!signr); ptrace_stop(signr, CLD_STOPPED, 0, NULL); } } /** * do_freezer_trap - handle the freezer jobctl trap * * Puts the task into frozen state, if only the task is not about to quit. * In this case it drops JOBCTL_TRAP_FREEZE. * * CONTEXT: * Must be called with @current->sighand->siglock held, * which is always released before returning. */ static void do_freezer_trap(void) __releases(&current->sighand->siglock) { /* * If there are other trap bits pending except JOBCTL_TRAP_FREEZE, * let's make another loop to give it a chance to be handled. * In any case, we'll return back. */ if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) != JOBCTL_TRAP_FREEZE) { spin_unlock_irq(&current->sighand->siglock); return; } /* * Now we're sure that there is no pending fatal signal and no * pending traps. Clear TIF_SIGPENDING to not get out of schedule() * immediately (if there is a non-fatal signal pending), and * put the task into sleep. */ __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); clear_thread_flag(TIF_SIGPENDING); spin_unlock_irq(&current->sighand->siglock); cgroup_enter_frozen(); schedule(); /* * We could've been woken by task_work, run it to clear * TIF_NOTIFY_SIGNAL. The caller will retry if necessary. */ clear_notify_signal(); if (unlikely(task_work_pending(current))) task_work_run(); } static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type) { /* * We do not check sig_kernel_stop(signr) but set this marker * unconditionally because we do not know whether debugger will * change signr. This flag has no meaning unless we are going * to stop after return from ptrace_stop(). In this case it will * be checked in do_signal_stop(), we should only stop if it was * not cleared by SIGCONT while we were sleeping. See also the * comment in dequeue_signal(). */ current->jobctl |= JOBCTL_STOP_DEQUEUED; signr = ptrace_stop(signr, CLD_TRAPPED, 0, info); /* We're back. Did the debugger cancel the sig? */ if (signr == 0) return signr; /* * Update the siginfo structure if the signal has * changed. If the debugger wanted something * specific in the siginfo structure then it should * have updated *info via PTRACE_SETSIGINFO. */ if (signr != info->si_signo) { clear_siginfo(info); info->si_signo = signr; info->si_errno = 0; info->si_code = SI_USER; rcu_read_lock(); info->si_pid = task_pid_vnr(current->parent); info->si_uid = from_kuid_munged(current_user_ns(), task_uid(current->parent)); rcu_read_unlock(); } /* If the (new) signal is now blocked, requeue it. */ if (sigismember(&current->blocked, signr) || fatal_signal_pending(current)) { send_signal_locked(signr, info, current, type); signr = 0; } return signr; } static void hide_si_addr_tag_bits(struct ksignal *ksig) { switch (siginfo_layout(ksig->sig, ksig->info.si_code)) { case SIL_FAULT: case SIL_FAULT_TRAPNO: case SIL_FAULT_MCEERR: case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: case SIL_FAULT_PERF_EVENT: ksig->info.si_addr = arch_untagged_si_addr( ksig->info.si_addr, ksig->sig, ksig->info.si_code); break; case SIL_KILL: case SIL_TIMER: case SIL_POLL: case SIL_CHLD: case SIL_RT: case SIL_SYS: break; } } bool get_signal(struct ksignal *ksig) { struct sighand_struct *sighand = current->sighand; struct signal_struct *signal = current->signal; int signr; clear_notify_signal(); if (unlikely(task_work_pending(current))) task_work_run(); if (!task_sigpending(current)) return false; if (unlikely(uprobe_deny_signal())) return false; /* * Do this once, we can't return to user-mode if freezing() == T. * do_signal_stop() and ptrace_stop() do freezable_schedule() and * thus do not need another check after return. */ try_to_freeze(); relock: spin_lock_irq(&sighand->siglock); /* * Every stopped thread goes here after wakeup. Check to see if * we should notify the parent, prepare_signal(SIGCONT) encodes * the CLD_ si_code into SIGNAL_CLD_MASK bits. */ if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { int why; if (signal->flags & SIGNAL_CLD_CONTINUED) why = CLD_CONTINUED; else why = CLD_STOPPED; signal->flags &= ~SIGNAL_CLD_MASK; spin_unlock_irq(&sighand->siglock); /* * Notify the parent that we're continuing. This event is * always per-process and doesn't make whole lot of sense * for ptracers, who shouldn't consume the state via * wait(2) either, but, for backward compatibility, notify * the ptracer of the group leader too unless it's gonna be * a duplicate. */ read_lock(&tasklist_lock); do_notify_parent_cldstop(current, false, why); if (ptrace_reparented(current->group_leader)) do_notify_parent_cldstop(current->group_leader, true, why); read_unlock(&tasklist_lock); goto relock; } for (;;) { struct k_sigaction *ka; enum pid_type type; /* Has this task already been marked for death? */ if ((signal->flags & SIGNAL_GROUP_EXIT) || signal->group_exec_task) { signr = SIGKILL; sigdelset(&current->pending.signal, SIGKILL); trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO, &sighand->action[SIGKILL-1]); recalc_sigpending(); /* * implies do_group_exit() or return to PF_USER_WORKER, * no need to initialize ksig->info/etc. */ goto fatal; } if (unlikely(current->jobctl & JOBCTL_STOP_PENDING) && do_signal_stop(0)) goto relock; if (unlikely(current->jobctl & (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) { if (current->jobctl & JOBCTL_TRAP_MASK) { do_jobctl_trap(); spin_unlock_irq(&sighand->siglock); } else if (current->jobctl & JOBCTL_TRAP_FREEZE) do_freezer_trap(); goto relock; } /* * If the task is leaving the frozen state, let's update * cgroup counters and reset the frozen bit. */ if (unlikely(cgroup_task_frozen(current))) { spin_unlock_irq(&sighand->siglock); cgroup_leave_frozen(false); goto relock; } /* * Signals generated by the execution of an instruction * need to be delivered before any other pending signals * so that the instruction pointer in the signal stack * frame points to the faulting instruction. */ type = PIDTYPE_PID; signr = dequeue_synchronous_signal(&ksig->info); if (!signr) signr = dequeue_signal(&current->blocked, &ksig->info, &type); if (!signr) break; /* will return 0 */ if (unlikely(current->ptrace) && (signr != SIGKILL) && !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) { signr = ptrace_signal(signr, &ksig->info, type); if (!signr) continue; } ka = &sighand->action[signr-1]; /* Trace actually delivered signals. */ trace_signal_deliver(signr, &ksig->info, ka); if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { /* Run the handler. */ ksig->ka = *ka; if (ka->sa.sa_flags & SA_ONESHOT) ka->sa.sa_handler = SIG_DFL; break; /* will return non-zero "signr" value */ } /* * Now we are doing the default action for this signal. */ if (sig_kernel_ignore(signr)) /* Default is nothing. */ continue; /* * Global init gets no signals it doesn't want. * Container-init gets no signals it doesn't want from same * container. * * Note that if global/container-init sees a sig_kernel_only() * signal here, the signal must have been generated internally * or must have come from an ancestor namespace. In either * case, the signal cannot be dropped. */ if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && !sig_kernel_only(signr)) continue; if (sig_kernel_stop(signr)) { /* * The default action is to stop all threads in * the thread group. The job control signals * do nothing in an orphaned pgrp, but SIGSTOP * always works. Note that siglock needs to be * dropped during the call to is_orphaned_pgrp() * because of lock ordering with tasklist_lock. * This allows an intervening SIGCONT to be posted. * We need to check for that and bail out if necessary. */ if (signr != SIGSTOP) { spin_unlock_irq(&sighand->siglock); /* signals can be posted during this window */ if (is_current_pgrp_orphaned()) goto relock; spin_lock_irq(&sighand->siglock); } if (likely(do_signal_stop(signr))) { /* It released the siglock. */ goto relock; } /* * We didn't actually stop, due to a race * with SIGCONT or something like that. */ continue; } fatal: spin_unlock_irq(&sighand->siglock); if (unlikely(cgroup_task_frozen(current))) cgroup_leave_frozen(true); /* * Anything else is fatal, maybe with a core dump. */ current->flags |= PF_SIGNALED; if (sig_kernel_coredump(signr)) { if (print_fatal_signals) print_fatal_signal(signr); proc_coredump_connector(current); /* * If it was able to dump core, this kills all * other threads in the group and synchronizes with * their demise. If we lost the race with another * thread getting here, it set group_exit_code * first and our do_group_exit call below will use * that value and ignore the one we pass it. */ do_coredump(&ksig->info); } /* * PF_USER_WORKER threads will catch and exit on fatal signals * themselves. They have cleanup that must be performed, so we * cannot call do_exit() on their behalf. Note that ksig won't * be properly initialized, PF_USER_WORKER's shouldn't use it. */ if (current->flags & PF_USER_WORKER) goto out; /* * Death signals, no core dump. */ do_group_exit(signr); /* NOTREACHED */ } spin_unlock_irq(&sighand->siglock); ksig->sig = signr; if (signr && !(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS)) hide_si_addr_tag_bits(ksig); out: return signr > 0; } /** * signal_delivered - called after signal delivery to update blocked signals * @ksig: kernel signal struct * @stepping: nonzero if debugger single-step or block-step in use * * This function should be called when a signal has successfully been * delivered. It updates the blocked signals accordingly (@ksig->ka.sa.sa_mask * is always blocked), and the signal itself is blocked unless %SA_NODEFER * is set in @ksig->ka.sa.sa_flags. Tracing is notified. */ static void signal_delivered(struct ksignal *ksig, int stepping) { sigset_t blocked; /* A signal was successfully delivered, and the saved sigmask was stored on the signal frame, and will be restored by sigreturn. So we can simply clear the restore sigmask flag. */ clear_restore_sigmask(); sigorsets(&blocked, &current->blocked, &ksig->ka.sa.sa_mask); if (!(ksig->ka.sa.sa_flags & SA_NODEFER)) sigaddset(&blocked, ksig->sig); set_current_blocked(&blocked); if (current->sas_ss_flags & SS_AUTODISARM) sas_ss_reset(current); if (stepping) ptrace_notify(SIGTRAP, 0); } void signal_setup_done(int failed, struct ksignal *ksig, int stepping) { if (failed) force_sigsegv(ksig->sig); else signal_delivered(ksig, stepping); } /* * It could be that complete_signal() picked us to notify about the * group-wide signal. Other threads should be notified now to take * the shared signals in @which since we will not. */ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) { sigset_t retarget; struct task_struct *t; sigandsets(&retarget, &tsk->signal->shared_pending.signal, which); if (sigisemptyset(&retarget)) return; for_other_threads(tsk, t) { if (t->flags & PF_EXITING) continue; if (!has_pending_signals(&retarget, &t->blocked)) continue; /* Remove the signals this thread can handle. */ sigandsets(&retarget, &retarget, &t->blocked); if (!task_sigpending(t)) signal_wake_up(t, 0); if (sigisemptyset(&retarget)) break; } } void exit_signals(struct task_struct *tsk) { int group_stop = 0; sigset_t unblocked; /* * @tsk is about to have PF_EXITING set - lock out users which * expect stable threadgroup. */ cgroup_threadgroup_change_begin(tsk); if (thread_group_empty(tsk) || (tsk->signal->flags & SIGNAL_GROUP_EXIT)) { sched_mm_cid_exit_signals(tsk); tsk->flags |= PF_EXITING; cgroup_threadgroup_change_end(tsk); return; } spin_lock_irq(&tsk->sighand->siglock); /* * From now this task is not visible for group-wide signals, * see wants_signal(), do_signal_stop(). */ sched_mm_cid_exit_signals(tsk); tsk->flags |= PF_EXITING; cgroup_threadgroup_change_end(tsk); if (!task_sigpending(tsk)) goto out; unblocked = tsk->blocked; signotset(&unblocked); retarget_shared_pending(tsk, &unblocked); if (unlikely(tsk->jobctl & JOBCTL_STOP_PENDING) && task_participate_group_stop(tsk)) group_stop = CLD_STOPPED; out: spin_unlock_irq(&tsk->sighand->siglock); /* * If group stop has completed, deliver the notification. This * should always go to the real parent of the group leader. */ if (unlikely(group_stop)) { read_lock(&tasklist_lock); do_notify_parent_cldstop(tsk, false, group_stop); read_unlock(&tasklist_lock); } } /* * System call entry points. */ /** * sys_restart_syscall - restart a system call */ SYSCALL_DEFINE0(restart_syscall) { struct restart_block *restart = &current->restart_block; return restart->fn(restart); } long do_no_restart_syscall(struct restart_block *param) { return -EINTR; } static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) { if (task_sigpending(tsk) && !thread_group_empty(tsk)) { sigset_t newblocked; /* A set of now blocked but previously unblocked signals. */ sigandnsets(&newblocked, newset, &current->blocked); retarget_shared_pending(tsk, &newblocked); } tsk->blocked = *newset; recalc_sigpending(); } /** * set_current_blocked - change current->blocked mask * @newset: new mask * * It is wrong to change ->blocked directly, this helper should be used * to ensure the process can't miss a shared signal we are going to block. */ void set_current_blocked(sigset_t *newset) { sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP)); __set_current_blocked(newset); } void __set_current_blocked(const sigset_t *newset) { struct task_struct *tsk = current; /* * In case the signal mask hasn't changed, there is nothing we need * to do. The current->blocked shouldn't be modified by other task. */ if (sigequalsets(&tsk->blocked, newset)) return; spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, newset); spin_unlock_irq(&tsk->sighand->siglock); } /* * This is also useful for kernel threads that want to temporarily * (or permanently) block certain signals. * * NOTE! Unlike the user-mode sys_sigprocmask(), the kernel * interface happily blocks "unblockable" signals like SIGKILL * and friends. */ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) { struct task_struct *tsk = current; sigset_t newset; /* Lockless, only current can change ->blocked, never from irq */ if (oldset) *oldset = tsk->blocked; switch (how) { case SIG_BLOCK: sigorsets(&newset, &tsk->blocked, set); break; case SIG_UNBLOCK: sigandnsets(&newset, &tsk->blocked, set); break; case SIG_SETMASK: newset = *set; break; default: return -EINVAL; } __set_current_blocked(&newset); return 0; } EXPORT_SYMBOL(sigprocmask); /* * The api helps set app-provided sigmasks. * * This is useful for syscalls such as ppoll, pselect, io_pgetevents and * epoll_pwait where a new sigmask is passed from userland for the syscalls. * * Note that it does set_restore_sigmask() in advance, so it must be always * paired with restore_saved_sigmask_unless() before return from syscall. */ int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize) { sigset_t kmask; if (!umask) return 0; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&kmask, umask, sizeof(sigset_t))) return -EFAULT; set_restore_sigmask(); current->saved_sigmask = current->blocked; set_current_blocked(&kmask); return 0; } #ifdef CONFIG_COMPAT int set_compat_user_sigmask(const compat_sigset_t __user *umask, size_t sigsetsize) { sigset_t kmask; if (!umask) return 0; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (get_compat_sigset(&kmask, umask)) return -EFAULT; set_restore_sigmask(); current->saved_sigmask = current->blocked; set_current_blocked(&kmask); return 0; } #endif /** * sys_rt_sigprocmask - change the list of currently blocked signals * @how: whether to add, remove, or set signals * @nset: stores pending signals * @oset: previous value of signal mask if non-null * @sigsetsize: size of sigset_t type */ SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, sigset_t __user *, oset, size_t, sigsetsize) { sigset_t old_set, new_set; int error; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) return -EINVAL; old_set = current->blocked; if (nset) { if (copy_from_user(&new_set, nset, sizeof(sigset_t))) return -EFAULT; sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); error = sigprocmask(how, &new_set, NULL); if (error) return error; } if (oset) { if (copy_to_user(oset, &old_set, sizeof(sigset_t))) return -EFAULT; } return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, compat_sigset_t __user *, oset, compat_size_t, sigsetsize) { sigset_t old_set = current->blocked; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (nset) { sigset_t new_set; int error; if (get_compat_sigset(&new_set, nset)) return -EFAULT; sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); error = sigprocmask(how, &new_set, NULL); if (error) return error; } return oset ? put_compat_sigset(oset, &old_set, sizeof(*oset)) : 0; } #endif static void do_sigpending(sigset_t *set) { spin_lock_irq(&current->sighand->siglock); sigorsets(set, &current->pending.signal, &current->signal->shared_pending.signal); spin_unlock_irq(&current->sighand->siglock); /* Outside the lock because only this thread touches it. */ sigandsets(set, &current->blocked, set); } /** * sys_rt_sigpending - examine a pending signal that has been raised * while blocked * @uset: stores pending signals * @sigsetsize: size of sigset_t type or larger */ SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) { sigset_t set; if (sigsetsize > sizeof(*uset)) return -EINVAL; do_sigpending(&set); if (copy_to_user(uset, &set, sigsetsize)) return -EFAULT; return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, compat_size_t, sigsetsize) { sigset_t set; if (sigsetsize > sizeof(*uset)) return -EINVAL; do_sigpending(&set); return put_compat_sigset(uset, &set, sigsetsize); } #endif static const struct { unsigned char limit, layout; } sig_sicodes[] = { [SIGILL] = { NSIGILL, SIL_FAULT }, [SIGFPE] = { NSIGFPE, SIL_FAULT }, [SIGSEGV] = { NSIGSEGV, SIL_FAULT }, [SIGBUS] = { NSIGBUS, SIL_FAULT }, [SIGTRAP] = { NSIGTRAP, SIL_FAULT }, #if defined(SIGEMT) [SIGEMT] = { NSIGEMT, SIL_FAULT }, #endif [SIGCHLD] = { NSIGCHLD, SIL_CHLD }, [SIGPOLL] = { NSIGPOLL, SIL_POLL }, [SIGSYS] = { NSIGSYS, SIL_SYS }, }; static bool known_siginfo_layout(unsigned sig, int si_code) { if (si_code == SI_KERNEL) return true; else if ((si_code > SI_USER)) { if (sig_specific_sicodes(sig)) { if (si_code <= sig_sicodes[sig].limit) return true; } else if (si_code <= NSIGPOLL) return true; } else if (si_code >= SI_DETHREAD) return true; else if (si_code == SI_ASYNCNL) return true; return false; } enum siginfo_layout siginfo_layout(unsigned sig, int si_code) { enum siginfo_layout layout = SIL_KILL; if ((si_code > SI_USER) && (si_code < SI_KERNEL)) { if ((sig < ARRAY_SIZE(sig_sicodes)) && (si_code <= sig_sicodes[sig].limit)) { layout = sig_sicodes[sig].layout; /* Handle the exceptions */ if ((sig == SIGBUS) && (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO)) layout = SIL_FAULT_MCEERR; else if ((sig == SIGSEGV) && (si_code == SEGV_BNDERR)) layout = SIL_FAULT_BNDERR; #ifdef SEGV_PKUERR else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR)) layout = SIL_FAULT_PKUERR; #endif else if ((sig == SIGTRAP) && (si_code == TRAP_PERF)) layout = SIL_FAULT_PERF_EVENT; else if (IS_ENABLED(CONFIG_SPARC) && (sig == SIGILL) && (si_code == ILL_ILLTRP)) layout = SIL_FAULT_TRAPNO; else if (IS_ENABLED(CONFIG_ALPHA) && ((sig == SIGFPE) || ((sig == SIGTRAP) && (si_code == TRAP_UNK)))) layout = SIL_FAULT_TRAPNO; } else if (si_code <= NSIGPOLL) layout = SIL_POLL; } else { if (si_code == SI_TIMER) layout = SIL_TIMER; else if (si_code == SI_SIGIO) layout = SIL_POLL; else if (si_code < 0) layout = SIL_RT; } return layout; } static inline char __user *si_expansion(const siginfo_t __user *info) { return ((char __user *)info) + sizeof(struct kernel_siginfo); } int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from) { char __user *expansion = si_expansion(to); if (copy_to_user(to, from , sizeof(struct kernel_siginfo))) return -EFAULT; if (clear_user(expansion, SI_EXPANSION_SIZE)) return -EFAULT; return 0; } static int post_copy_siginfo_from_user(kernel_siginfo_t *info, const siginfo_t __user *from) { if (unlikely(!known_siginfo_layout(info->si_signo, info->si_code))) { char __user *expansion = si_expansion(from); char buf[SI_EXPANSION_SIZE]; int i; /* * An unknown si_code might need more than * sizeof(struct kernel_siginfo) bytes. Verify all of the * extra bytes are 0. This guarantees copy_siginfo_to_user * will return this data to userspace exactly. */ if (copy_from_user(&buf, expansion, SI_EXPANSION_SIZE)) return -EFAULT; for (i = 0; i < SI_EXPANSION_SIZE; i++) { if (buf[i] != 0) return -E2BIG; } } return 0; } static int __copy_siginfo_from_user(int signo, kernel_siginfo_t *to, const siginfo_t __user *from) { if (copy_from_user(to, from, sizeof(struct kernel_siginfo))) return -EFAULT; to->si_signo = signo; return post_copy_siginfo_from_user(to, from); } int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from) { if (copy_from_user(to, from, sizeof(struct kernel_siginfo))) return -EFAULT; return post_copy_siginfo_from_user(to, from); } #ifdef CONFIG_COMPAT /** * copy_siginfo_to_external32 - copy a kernel siginfo into a compat user siginfo * @to: compat siginfo destination * @from: kernel siginfo source * * Note: This function does not work properly for the SIGCHLD on x32, but * fortunately it doesn't have to. The only valid callers for this function are * copy_siginfo_to_user32, which is overriden for x32 and the coredump code. * The latter does not care because SIGCHLD will never cause a coredump. */ void copy_siginfo_to_external32(struct compat_siginfo *to, const struct kernel_siginfo *from) { memset(to, 0, sizeof(*to)); to->si_signo = from->si_signo; to->si_errno = from->si_errno; to->si_code = from->si_code; switch(siginfo_layout(from->si_signo, from->si_code)) { case SIL_KILL: to->si_pid = from->si_pid; to->si_uid = from->si_uid; break; case SIL_TIMER: to->si_tid = from->si_tid; to->si_overrun = from->si_overrun; to->si_int = from->si_int; break; case SIL_POLL: to->si_band = from->si_band; to->si_fd = from->si_fd; break; case SIL_FAULT: to->si_addr = ptr_to_compat(from->si_addr); break; case SIL_FAULT_TRAPNO: to->si_addr = ptr_to_compat(from->si_addr); to->si_trapno = from->si_trapno; break; case SIL_FAULT_MCEERR: to->si_addr = ptr_to_compat(from->si_addr); to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: to->si_addr = ptr_to_compat(from->si_addr); to->si_lower = ptr_to_compat(from->si_lower); to->si_upper = ptr_to_compat(from->si_upper); break; case SIL_FAULT_PKUERR: to->si_addr = ptr_to_compat(from->si_addr); to->si_pkey = from->si_pkey; break; case SIL_FAULT_PERF_EVENT: to->si_addr = ptr_to_compat(from->si_addr); to->si_perf_data = from->si_perf_data; to->si_perf_type = from->si_perf_type; to->si_perf_flags = from->si_perf_flags; break; case SIL_CHLD: to->si_pid = from->si_pid; to->si_uid = from->si_uid; to->si_status = from->si_status; to->si_utime = from->si_utime; to->si_stime = from->si_stime; break; case SIL_RT: to->si_pid = from->si_pid; to->si_uid = from->si_uid; to->si_int = from->si_int; break; case SIL_SYS: to->si_call_addr = ptr_to_compat(from->si_call_addr); to->si_syscall = from->si_syscall; to->si_arch = from->si_arch; break; } } int __copy_siginfo_to_user32(struct compat_siginfo __user *to, const struct kernel_siginfo *from) { struct compat_siginfo new; copy_siginfo_to_external32(&new, from); if (copy_to_user(to, &new, sizeof(struct compat_siginfo))) return -EFAULT; return 0; } static int post_copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo *from) { clear_siginfo(to); to->si_signo = from->si_signo; to->si_errno = from->si_errno; to->si_code = from->si_code; switch(siginfo_layout(from->si_signo, from->si_code)) { case SIL_KILL: to->si_pid = from->si_pid; to->si_uid = from->si_uid; break; case SIL_TIMER: to->si_tid = from->si_tid; to->si_overrun = from->si_overrun; to->si_int = from->si_int; break; case SIL_POLL: to->si_band = from->si_band; to->si_fd = from->si_fd; break; case SIL_FAULT: to->si_addr = compat_ptr(from->si_addr); break; case SIL_FAULT_TRAPNO: to->si_addr = compat_ptr(from->si_addr); to->si_trapno = from->si_trapno; break; case SIL_FAULT_MCEERR: to->si_addr = compat_ptr(from->si_addr); to->si_addr_lsb = from->si_addr_lsb; break; case SIL_FAULT_BNDERR: to->si_addr = compat_ptr(from->si_addr); to->si_lower = compat_ptr(from->si_lower); to->si_upper = compat_ptr(from->si_upper); break; case SIL_FAULT_PKUERR: to->si_addr = compat_ptr(from->si_addr); to->si_pkey = from->si_pkey; break; case SIL_FAULT_PERF_EVENT: to->si_addr = compat_ptr(from->si_addr); to->si_perf_data = from->si_perf_data; to->si_perf_type = from->si_perf_type; to->si_perf_flags = from->si_perf_flags; break; case SIL_CHLD: to->si_pid = from->si_pid; to->si_uid = from->si_uid; to->si_status = from->si_status; #ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) { to->si_utime = from->_sifields._sigchld_x32._utime; to->si_stime = from->_sifields._sigchld_x32._stime; } else #endif { to->si_utime = from->si_utime; to->si_stime = from->si_stime; } break; case SIL_RT: to->si_pid = from->si_pid; to->si_uid = from->si_uid; to->si_int = from->si_int; break; case SIL_SYS: to->si_call_addr = compat_ptr(from->si_call_addr); to->si_syscall = from->si_syscall; to->si_arch = from->si_arch; break; } return 0; } static int __copy_siginfo_from_user32(int signo, struct kernel_siginfo *to, const struct compat_siginfo __user *ufrom) { struct compat_siginfo from; if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo))) return -EFAULT; from.si_signo = signo; return post_copy_siginfo_from_user32(to, &from); } int copy_siginfo_from_user32(struct kernel_siginfo *to, const struct compat_siginfo __user *ufrom) { struct compat_siginfo from; if (copy_from_user(&from, ufrom, sizeof(struct compat_siginfo))) return -EFAULT; return post_copy_siginfo_from_user32(to, &from); } #endif /* CONFIG_COMPAT */ /** * do_sigtimedwait - wait for queued signals specified in @which * @which: queued signals to wait for * @info: if non-null, the signal's siginfo is returned here * @ts: upper bound on process time suspension */ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, const struct timespec64 *ts) { ktime_t *to = NULL, timeout = KTIME_MAX; struct task_struct *tsk = current; sigset_t mask = *which; enum pid_type type; int sig, ret = 0; if (ts) { if (!timespec64_valid(ts)) return -EINVAL; timeout = timespec64_to_ktime(*ts); to = &timeout; } /* * Invert the set of allowed signals to get those we want to block. */ sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); signotset(&mask); spin_lock_irq(&tsk->sighand->siglock); sig = dequeue_signal(&mask, info, &type); if (!sig && timeout) { /* * None ready, temporarily unblock those we're interested * while we are sleeping in so that we'll be awakened when * they arrive. Unblocking is always fine, we can avoid * set_current_blocked(). */ tsk->real_blocked = tsk->blocked; sigandsets(&tsk->blocked, &tsk->blocked, &mask); recalc_sigpending(); spin_unlock_irq(&tsk->sighand->siglock); __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); ret = schedule_hrtimeout_range(to, tsk->timer_slack_ns, HRTIMER_MODE_REL); spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); sigemptyset(&tsk->real_blocked); sig = dequeue_signal(&mask, info, &type); } spin_unlock_irq(&tsk->sighand->siglock); if (sig) return sig; return ret ? -EINTR : -EAGAIN; } /** * sys_rt_sigtimedwait - synchronously wait for queued signals specified * in @uthese * @uthese: queued signals to wait for * @uinfo: if non-null, the signal's siginfo is returned here * @uts: upper bound on process time suspension * @sigsetsize: size of sigset_t type */ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, siginfo_t __user *, uinfo, const struct __kernel_timespec __user *, uts, size_t, sigsetsize) { sigset_t these; struct timespec64 ts; kernel_siginfo_t info; int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&these, uthese, sizeof(these))) return -EFAULT; if (uts) { if (get_timespec64(&ts, uts)) return -EFAULT; } ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL); if (ret > 0 && uinfo) { if (copy_siginfo_to_user(uinfo, &info)) ret = -EFAULT; } return ret; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(rt_sigtimedwait_time32, const sigset_t __user *, uthese, siginfo_t __user *, uinfo, const struct old_timespec32 __user *, uts, size_t, sigsetsize) { sigset_t these; struct timespec64 ts; kernel_siginfo_t info; int ret; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&these, uthese, sizeof(these))) return -EFAULT; if (uts) { if (get_old_timespec32(&ts, uts)) return -EFAULT; } ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL); if (ret > 0 && uinfo) { if (copy_siginfo_to_user(uinfo, &info)) ret = -EFAULT; } return ret; } #endif #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time64, compat_sigset_t __user *, uthese, struct compat_siginfo __user *, uinfo, struct __kernel_timespec __user *, uts, compat_size_t, sigsetsize) { sigset_t s; struct timespec64 t; kernel_siginfo_t info; long ret; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (get_compat_sigset(&s, uthese)) return -EFAULT; if (uts) { if (get_timespec64(&t, uts)) return -EFAULT; } ret = do_sigtimedwait(&s, &info, uts ? &t : NULL); if (ret > 0 && uinfo) { if (copy_siginfo_to_user32(uinfo, &info)) ret = -EFAULT; } return ret; } #ifdef CONFIG_COMPAT_32BIT_TIME COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese, struct compat_siginfo __user *, uinfo, struct old_timespec32 __user *, uts, compat_size_t, sigsetsize) { sigset_t s; struct timespec64 t; kernel_siginfo_t info; long ret; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (get_compat_sigset(&s, uthese)) return -EFAULT; if (uts) { if (get_old_timespec32(&t, uts)) return -EFAULT; } ret = do_sigtimedwait(&s, &info, uts ? &t : NULL); if (ret > 0 && uinfo) { if (copy_siginfo_to_user32(uinfo, &info)) ret = -EFAULT; } return ret; } #endif #endif static void prepare_kill_siginfo(int sig, struct kernel_siginfo *info, enum pid_type type) { clear_siginfo(info); info->si_signo = sig; info->si_errno = 0; info->si_code = (type == PIDTYPE_PID) ? SI_TKILL : SI_USER; info->si_pid = task_tgid_vnr(current); info->si_uid = from_kuid_munged(current_user_ns(), current_uid()); } /** * sys_kill - send a signal to a process * @pid: the PID of the process * @sig: signal to be sent */ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct kernel_siginfo info; prepare_kill_siginfo(sig, &info, PIDTYPE_TGID); return kill_something_info(sig, &info, pid); } /* * Verify that the signaler and signalee either are in the same pid namespace * or that the signaler's pid namespace is an ancestor of the signalee's pid * namespace. */ static bool access_pidfd_pidns(struct pid *pid) { struct pid_namespace *active = task_active_pid_ns(current); struct pid_namespace *p = ns_of_pid(pid); for (;;) { if (!p) return false; if (p == active) break; p = p->parent; } return true; } static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t __user *info) { #ifdef CONFIG_COMPAT /* * Avoid hooking up compat syscalls and instead handle necessary * conversions here. Note, this is a stop-gap measure and should not be * considered a generic solution. */ if (in_compat_syscall()) return copy_siginfo_from_user32( kinfo, (struct compat_siginfo __user *)info); #endif return copy_siginfo_from_user(kinfo, info); } static struct pid *pidfd_to_pid(const struct file *file) { struct pid *pid; pid = pidfd_pid(file); if (!IS_ERR(pid)) return pid; return tgid_pidfd_to_pid(file); } #define PIDFD_SEND_SIGNAL_FLAGS \ (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \ PIDFD_SIGNAL_PROCESS_GROUP) static int do_pidfd_send_signal(struct pid *pid, int sig, enum pid_type type, siginfo_t __user *info, unsigned int flags) { kernel_siginfo_t kinfo; switch (flags) { case PIDFD_SIGNAL_THREAD: type = PIDTYPE_PID; break; case PIDFD_SIGNAL_THREAD_GROUP: type = PIDTYPE_TGID; break; case PIDFD_SIGNAL_PROCESS_GROUP: type = PIDTYPE_PGID; break; } if (info) { int ret; ret = copy_siginfo_from_user_any(&kinfo, info); if (unlikely(ret)) return ret; if (unlikely(sig != kinfo.si_signo)) return -EINVAL; /* Only allow sending arbitrary signals to yourself. */ if ((task_pid(current) != pid || type > PIDTYPE_TGID) && (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) return -EPERM; } else { prepare_kill_siginfo(sig, &kinfo, type); } if (type == PIDTYPE_PGID) return kill_pgrp_info(sig, &kinfo, pid); return kill_pid_info_type(sig, &kinfo, pid, type); } /** * sys_pidfd_send_signal - Signal a process through a pidfd * @pidfd: file descriptor of the process * @sig: signal to send * @info: signal info * @flags: future flags * * Send the signal to the thread group or to the individual thread depending * on PIDFD_THREAD. * In the future extension to @flags may be used to override the default scope * of @pidfd. * * Return: 0 on success, negative errno on failure */ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, siginfo_t __user *, info, unsigned int, flags) { struct pid *pid; enum pid_type type; /* Enforce flags be set to 0 until we add an extension. */ if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) return -EINVAL; /* Ensure that only a single signal scope determining flag is set. */ if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1) return -EINVAL; switch (pidfd) { case PIDFD_SELF_THREAD: pid = get_task_pid(current, PIDTYPE_PID); type = PIDTYPE_PID; break; case PIDFD_SELF_THREAD_GROUP: pid = get_task_pid(current, PIDTYPE_TGID); type = PIDTYPE_TGID; break; default: { CLASS(fd, f)(pidfd); if (fd_empty(f)) return -EBADF; /* Is this a pidfd? */ pid = pidfd_to_pid(fd_file(f)); if (IS_ERR(pid)) return PTR_ERR(pid); if (!access_pidfd_pidns(pid)) return -EINVAL; /* Infer scope from the type of pidfd. */ if (fd_file(f)->f_flags & PIDFD_THREAD) type = PIDTYPE_PID; else type = PIDTYPE_TGID; return do_pidfd_send_signal(pid, sig, type, info, flags); } } return do_pidfd_send_signal(pid, sig, type, info, flags); } static int do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info) { struct task_struct *p; int error = -ESRCH; rcu_read_lock(); p = find_task_by_vpid(pid); if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { error = check_kill_permission(sig, info, p); /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. */ if (!error && sig) { error = do_send_sig_info(sig, info, p, PIDTYPE_PID); /* * If lock_task_sighand() failed we pretend the task * dies after receiving the signal. The window is tiny, * and the signal is private anyway. */ if (unlikely(error == -ESRCH)) error = 0; } } rcu_read_unlock(); return error; } static int do_tkill(pid_t tgid, pid_t pid, int sig) { struct kernel_siginfo info; prepare_kill_siginfo(sig, &info, PIDTYPE_PID); return do_send_specific(tgid, pid, sig, &info); } /** * sys_tgkill - send signal to one specific thread * @tgid: the thread group ID of the thread * @pid: the PID of the thread * @sig: signal to be sent * * This syscall also checks the @tgid and returns -ESRCH even if the PID * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0 || tgid <= 0) return -EINVAL; return do_tkill(tgid, pid, sig); } /** * sys_tkill - send signal to one specific task * @pid: the PID of the task * @sig: signal to be sent * * Send a signal to only one task, even if it's a CLONE_THREAD task. */ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0) return -EINVAL; return do_tkill(0, pid, sig); } static int do_rt_sigqueueinfo(pid_t pid, int sig, kernel_siginfo_t *info) { /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && (task_pid_vnr(current) != pid)) return -EPERM; /* POSIX.1b doesn't mention process groups. */ return kill_proc_info(sig, info, pid); } /** * sys_rt_sigqueueinfo - send signal information to a signal * @pid: the PID of the thread * @sig: signal to be sent * @uinfo: signal info to be sent */ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, siginfo_t __user *, uinfo) { kernel_siginfo_t info; int ret = __copy_siginfo_from_user(sig, &info, uinfo); if (unlikely(ret)) return ret; return do_rt_sigqueueinfo(pid, sig, &info); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, compat_pid_t, pid, int, sig, struct compat_siginfo __user *, uinfo) { kernel_siginfo_t info; int ret = __copy_siginfo_from_user32(sig, &info, uinfo); if (unlikely(ret)) return ret; return do_rt_sigqueueinfo(pid, sig, &info); } #endif static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, kernel_siginfo_t *info) { /* This is only valid for single tasks */ if (pid <= 0 || tgid <= 0) return -EINVAL; /* Not even root can pretend to send signals from the kernel. * Nor can they impersonate a kill()/tgkill(), which adds source info. */ if ((info->si_code >= 0 || info->si_code == SI_TKILL) && (task_pid_vnr(current) != pid)) return -EPERM; return do_send_specific(tgid, pid, sig, info); } SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, siginfo_t __user *, uinfo) { kernel_siginfo_t info; int ret = __copy_siginfo_from_user(sig, &info, uinfo); if (unlikely(ret)) return ret; return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, compat_pid_t, tgid, compat_pid_t, pid, int, sig, struct compat_siginfo __user *, uinfo) { kernel_siginfo_t info; int ret = __copy_siginfo_from_user32(sig, &info, uinfo); if (unlikely(ret)) return ret; return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); } #endif /* * For kthreads only, must not be used if cloned with CLONE_SIGHAND */ void kernel_sigaction(int sig, __sighandler_t action) { spin_lock_irq(&current->sighand->siglock); current->sighand->action[sig - 1].sa.sa_handler = action; if (action == SIG_IGN) { sigset_t mask; sigemptyset(&mask); sigaddset(&mask, sig); flush_sigqueue_mask(current, &mask, &current->signal->shared_pending); flush_sigqueue_mask(current, &mask, &current->pending); recalc_sigpending(); } spin_unlock_irq(&current->sighand->siglock); } EXPORT_SYMBOL(kernel_sigaction); void __weak sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) { } int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *p = current, *t; struct k_sigaction *k; sigset_t mask; if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) return -EINVAL; k = &p->sighand->action[sig-1]; spin_lock_irq(&p->sighand->siglock); if (k->sa.sa_flags & SA_IMMUTABLE) { spin_unlock_irq(&p->sighand->siglock); return -EINVAL; } if (oact) *oact = *k; /* * Make sure that we never accidentally claim to support SA_UNSUPPORTED, * e.g. by having an architecture use the bit in their uapi. */ BUILD_BUG_ON(UAPI_SA_FLAGS & SA_UNSUPPORTED); /* * Clear unknown flag bits in order to allow userspace to detect missing * support for flag bits and to allow the kernel to use non-uapi bits * internally. */ if (act) act->sa.sa_flags &= UAPI_SA_FLAGS; if (oact) oact->sa.sa_flags &= UAPI_SA_FLAGS; sigaction_compat_abi(act, oact); if (act) { bool was_ignored = k->sa.sa_handler == SIG_IGN; sigdelsetmask(&act->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); *k = *act; /* * POSIX 3.3.1.3: * "Setting a signal action to SIG_IGN for a signal that is * pending shall cause the pending signal to be discarded, * whether or not it is blocked." * * "Setting a signal action to SIG_DFL for a signal that is * pending and whose default action is to ignore the signal * (for example, SIGCHLD), shall cause the pending signal to * be discarded, whether or not it is blocked" */ if (sig_handler_ignored(sig_handler(p, sig), sig)) { sigemptyset(&mask); sigaddset(&mask, sig); flush_sigqueue_mask(p, &mask, &p->signal->shared_pending); for_each_thread(p, t) flush_sigqueue_mask(p, &mask, &t->pending); } else if (was_ignored) { posixtimer_sig_unignore(p, sig); } } spin_unlock_irq(&p->sighand->siglock); return 0; } #ifdef CONFIG_DYNAMIC_SIGFRAME static inline void sigaltstack_lock(void) __acquires(&current->sighand->siglock) { spin_lock_irq(&current->sighand->siglock); } static inline void sigaltstack_unlock(void) __releases(&current->sighand->siglock) { spin_unlock_irq(&current->sighand->siglock); } #else static inline void sigaltstack_lock(void) { } static inline void sigaltstack_unlock(void) { } #endif static int do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, size_t min_ss_size) { struct task_struct *t = current; int ret = 0; if (oss) { memset(oss, 0, sizeof(stack_t)); oss->ss_sp = (void __user *) t->sas_ss_sp; oss->ss_size = t->sas_ss_size; oss->ss_flags = sas_ss_flags(sp) | (current->sas_ss_flags & SS_FLAG_BITS); } if (ss) { void __user *ss_sp = ss->ss_sp; size_t ss_size = ss->ss_size; unsigned ss_flags = ss->ss_flags; int ss_mode; if (unlikely(on_sig_stack(sp))) return -EPERM; ss_mode = ss_flags & ~SS_FLAG_BITS; if (unlikely(ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK && ss_mode != 0)) return -EINVAL; /* * Return before taking any locks if no actual * sigaltstack changes were requested. */ if (t->sas_ss_sp == (unsigned long)ss_sp && t->sas_ss_size == ss_size && t->sas_ss_flags == ss_flags) return 0; sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; ss_sp = NULL; } else { if (unlikely(ss_size < min_ss_size)) ret = -ENOMEM; if (!sigaltstack_size_valid(ss_size)) ret = -ENOMEM; } if (!ret) { t->sas_ss_sp = (unsigned long) ss_sp; t->sas_ss_size = ss_size; t->sas_ss_flags = ss_flags; } sigaltstack_unlock(); } return ret; } SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss) { stack_t new, old; int err; if (uss && copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL, current_user_stack_pointer(), MINSIGSTKSZ); if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t))) err = -EFAULT; return err; } int restore_altstack(const stack_t __user *uss) { stack_t new; if (copy_from_user(&new, uss, sizeof(stack_t))) return -EFAULT; (void)do_sigaltstack(&new, NULL, current_user_stack_pointer(), MINSIGSTKSZ); /* squash all but EFAULT for now */ return 0; } int __save_altstack(stack_t __user *uss, unsigned long sp) { struct task_struct *t = current; int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) | __put_user(t->sas_ss_flags, &uss->ss_flags) | __put_user(t->sas_ss_size, &uss->ss_size); return err; } #ifdef CONFIG_COMPAT static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr) { stack_t uss, uoss; int ret; if (uss_ptr) { compat_stack_t uss32; if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t))) return -EFAULT; uss.ss_sp = compat_ptr(uss32.ss_sp); uss.ss_flags = uss32.ss_flags; uss.ss_size = uss32.ss_size; } ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, compat_user_stack_pointer(), COMPAT_MINSIGSTKSZ); if (ret >= 0 && uoss_ptr) { compat_stack_t old; memset(&old, 0, sizeof(old)); old.ss_sp = ptr_to_compat(uoss.ss_sp); old.ss_flags = uoss.ss_flags; old.ss_size = uoss.ss_size; if (copy_to_user(uoss_ptr, &old, sizeof(compat_stack_t))) ret = -EFAULT; } return ret; } COMPAT_SYSCALL_DEFINE2(sigaltstack, const compat_stack_t __user *, uss_ptr, compat_stack_t __user *, uoss_ptr) { return do_compat_sigaltstack(uss_ptr, uoss_ptr); } int compat_restore_altstack(const compat_stack_t __user *uss) { int err = do_compat_sigaltstack(uss, NULL); /* squash all but -EFAULT for now */ return err == -EFAULT ? err : 0; } int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp) { int err; struct task_struct *t = current; err = __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) | __put_user(t->sas_ss_flags, &uss->ss_flags) | __put_user(t->sas_ss_size, &uss->ss_size); return err; } #endif #ifdef __ARCH_WANT_SYS_SIGPENDING /** * sys_sigpending - examine pending signals * @uset: where mask of pending signal is returned */ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset) { sigset_t set; if (sizeof(old_sigset_t) > sizeof(*uset)) return -EINVAL; do_sigpending(&set); if (copy_to_user(uset, &set, sizeof(old_sigset_t))) return -EFAULT; return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32) { sigset_t set; do_sigpending(&set); return put_user(set.sig[0], set32); } #endif #endif #ifdef __ARCH_WANT_SYS_SIGPROCMASK /** * sys_sigprocmask - examine and change blocked signals * @how: whether to add, remove, or set signals * @nset: signals to add or remove (if non-null) * @oset: previous value of signal mask if non-null * * Some platforms have their own version with special arguments; * others support only sys_rt_sigprocmask. */ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, old_sigset_t __user *, oset) { old_sigset_t old_set, new_set; sigset_t new_blocked; old_set = current->blocked.sig[0]; if (nset) { if (copy_from_user(&new_set, nset, sizeof(*nset))) return -EFAULT; new_blocked = current->blocked; switch (how) { case SIG_BLOCK: sigaddsetmask(&new_blocked, new_set); break; case SIG_UNBLOCK: sigdelsetmask(&new_blocked, new_set); break; case SIG_SETMASK: new_blocked.sig[0] = new_set; break; default: return -EINVAL; } set_current_blocked(&new_blocked); } if (oset) { if (copy_to_user(oset, &old_set, sizeof(*oset))) return -EFAULT; } return 0; } #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ #ifndef CONFIG_ODD_RT_SIGACTION /** * sys_rt_sigaction - alter an action taken by a process * @sig: signal to be sent * @act: new sigaction * @oact: used to save the previous sigaction * @sigsetsize: size of sigset_t type */ SYSCALL_DEFINE4(rt_sigaction, int, sig, const struct sigaction __user *, act, struct sigaction __user *, oact, size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa))) return -EFAULT; ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL); if (ret) return ret; if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa))) return -EFAULT; return 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig, const struct compat_sigaction __user *, act, struct compat_sigaction __user *, oact, compat_size_t, sigsetsize) { struct k_sigaction new_ka, old_ka; #ifdef __ARCH_HAS_SA_RESTORER compat_uptr_t restorer; #endif int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (act) { compat_uptr_t handler; ret = get_user(handler, &act->sa_handler); new_ka.sa.sa_handler = compat_ptr(handler); #ifdef __ARCH_HAS_SA_RESTORER ret |= get_user(restorer, &act->sa_restorer); new_ka.sa.sa_restorer = compat_ptr(restorer); #endif ret |= get_compat_sigset(&new_ka.sa.sa_mask, &act->sa_mask); ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) return -EFAULT; } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); ret |= put_compat_sigset(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(oact->sa_mask)); ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); #ifdef __ARCH_HAS_SA_RESTORER ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); #endif } return ret; } #endif #endif /* !CONFIG_ODD_RT_SIGACTION */ #ifdef CONFIG_OLD_SIGACTION SYSCALL_DEFINE3(sigaction, int, sig, const struct old_sigaction __user *, act, struct old_sigaction __user *, oact) { struct k_sigaction new_ka, old_ka; int ret; if (act) { old_sigset_t mask; if (!access_ok(act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || __get_user(mask, &act->sa_mask)) return -EFAULT; #ifdef __ARCH_HAS_KA_RESTORER new_ka.ka_restorer = NULL; #endif siginitset(&new_ka.sa.sa_mask, mask); } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { if (!access_ok(oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; } return ret; } #endif #ifdef CONFIG_COMPAT_OLD_SIGACTION COMPAT_SYSCALL_DEFINE3(sigaction, int, sig, const struct compat_old_sigaction __user *, act, struct compat_old_sigaction __user *, oact) { struct k_sigaction new_ka, old_ka; int ret; compat_old_sigset_t mask; compat_uptr_t handler, restorer; if (act) { if (!access_ok(act, sizeof(*act)) || __get_user(handler, &act->sa_handler) || __get_user(restorer, &act->sa_restorer) || __get_user(new_ka.sa.sa_flags, &act->sa_flags) || __get_user(mask, &act->sa_mask)) return -EFAULT; #ifdef __ARCH_HAS_KA_RESTORER new_ka.ka_restorer = NULL; #endif new_ka.sa.sa_handler = compat_ptr(handler); new_ka.sa.sa_restorer = compat_ptr(restorer); siginitset(&new_ka.sa.sa_mask, mask); } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { if (!access_ok(oact, sizeof(*oact)) || __put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler) || __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer) || __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; } return ret; } #endif #ifdef CONFIG_SGETMASK_SYSCALL /* * For backwards compatibility. Functionality superseded by sigprocmask. */ SYSCALL_DEFINE0(sgetmask) { /* SMP safe */ return current->blocked.sig[0]; } SYSCALL_DEFINE1(ssetmask, int, newmask) { int old = current->blocked.sig[0]; sigset_t newset; siginitset(&newset, newmask); set_current_blocked(&newset); return old; } #endif /* CONFIG_SGETMASK_SYSCALL */ #ifdef __ARCH_WANT_SYS_SIGNAL /* * For backwards compatibility. Functionality superseded by sigaction. */ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) { struct k_sigaction new_sa, old_sa; int ret; new_sa.sa.sa_handler = handler; new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK; sigemptyset(&new_sa.sa.sa_mask); ret = do_sigaction(sig, &new_sa, &old_sa); return ret ? ret : (unsigned long)old_sa.sa.sa_handler; } #endif /* __ARCH_WANT_SYS_SIGNAL */ #ifdef __ARCH_WANT_SYS_PAUSE SYSCALL_DEFINE0(pause) { while (!signal_pending(current)) { __set_current_state(TASK_INTERRUPTIBLE); schedule(); } return -ERESTARTNOHAND; } #endif static int sigsuspend(sigset_t *set) { current->saved_sigmask = current->blocked; set_current_blocked(set); while (!signal_pending(current)) { __set_current_state(TASK_INTERRUPTIBLE); schedule(); } set_restore_sigmask(); return -ERESTARTNOHAND; } /** * sys_rt_sigsuspend - replace the signal mask for a value with the * @unewset value until a signal is received * @unewset: new signal mask value * @sigsetsize: size of sigset_t type */ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) { sigset_t newset; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&newset, unewset, sizeof(newset))) return -EFAULT; return sigsuspend(&newset); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_size_t, sigsetsize) { sigset_t newset; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (get_compat_sigset(&newset, unewset)) return -EFAULT; return sigsuspend(&newset); } #endif #ifdef CONFIG_OLD_SIGSUSPEND SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) { sigset_t blocked; siginitset(&blocked, mask); return sigsuspend(&blocked); } #endif #ifdef CONFIG_OLD_SIGSUSPEND3 SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask) { sigset_t blocked; siginitset(&blocked, mask); return sigsuspend(&blocked); } #endif __weak const char *arch_vma_name(struct vm_area_struct *vma) { return NULL; } static inline void siginfo_buildtime_checks(void) { BUILD_BUG_ON(sizeof(struct siginfo) != SI_MAX_SIZE); /* Verify the offsets in the two siginfos match */ #define CHECK_OFFSET(field) \ BUILD_BUG_ON(offsetof(siginfo_t, field) != offsetof(kernel_siginfo_t, field)) /* kill */ CHECK_OFFSET(si_pid); CHECK_OFFSET(si_uid); /* timer */ CHECK_OFFSET(si_tid); CHECK_OFFSET(si_overrun); CHECK_OFFSET(si_value); /* rt */ CHECK_OFFSET(si_pid); CHECK_OFFSET(si_uid); CHECK_OFFSET(si_value); /* sigchld */ CHECK_OFFSET(si_pid); CHECK_OFFSET(si_uid); CHECK_OFFSET(si_status); CHECK_OFFSET(si_utime); CHECK_OFFSET(si_stime); /* sigfault */ CHECK_OFFSET(si_addr); CHECK_OFFSET(si_trapno); CHECK_OFFSET(si_addr_lsb); CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); CHECK_OFFSET(si_pkey); CHECK_OFFSET(si_perf_data); CHECK_OFFSET(si_perf_type); CHECK_OFFSET(si_perf_flags); /* sigpoll */ CHECK_OFFSET(si_band); CHECK_OFFSET(si_fd); /* sigsys */ CHECK_OFFSET(si_call_addr); CHECK_OFFSET(si_syscall); CHECK_OFFSET(si_arch); #undef CHECK_OFFSET /* usb asyncio */ BUILD_BUG_ON(offsetof(struct siginfo, si_pid) != offsetof(struct siginfo, si_addr)); if (sizeof(int) == sizeof(void __user *)) { BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) != sizeof(void __user *)); } else { BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) + sizeof_field(struct siginfo, si_uid)) != sizeof(void __user *)); BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) != offsetof(struct siginfo, si_uid)); } #ifdef CONFIG_COMPAT BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) != offsetof(struct compat_siginfo, si_addr)); BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) != sizeof(compat_uptr_t)); BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) != sizeof_field(struct siginfo, si_pid)); #endif } #if defined(CONFIG_SYSCTL) static const struct ctl_table signal_debug_table[] = { #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE { .procname = "exception-trace", .data = &show_unhandled_signals, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, #endif }; static int __init init_signal_sysctls(void) { register_sysctl_init("debug", signal_debug_table); return 0; } early_initcall(init_signal_sysctls); #endif /* CONFIG_SYSCTL */ void __init signals_init(void) { siginfo_buildtime_checks(); sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC | SLAB_ACCOUNT); } #ifdef CONFIG_KGDB_KDB #include <linux/kdb.h> /* * kdb_send_sig - Allows kdb to send signals without exposing * signal internals. This function checks if the required locks are * available before calling the main signal code, to avoid kdb * deadlocks. */ void kdb_send_sig(struct task_struct *t, int sig) { static struct task_struct *kdb_prev_t; int new_t, ret; if (!spin_trylock(&t->sighand->siglock)) { kdb_printf("Can't do kill command now.\n" "The sigmask lock is held somewhere else in " "kernel, try again later\n"); return; } new_t = kdb_prev_t != t; kdb_prev_t = t; if (!task_is_running(t) && new_t) { spin_unlock(&t->sighand->siglock); kdb_printf("Process is not RUNNING, sending a signal from " "kdb risks deadlock\n" "on the run queue locks. " "The signal has _not_ been sent.\n" "Reissue the kill command if you want to risk " "the deadlock.\n"); return; } ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); spin_unlock(&t->sighand->siglock); if (ret) kdb_printf("Fail to deliver Signal %d to process %d.\n", sig, t->pid); else kdb_printf("Signal %d is sent to process %d.\n", sig, t->pid); } #endif /* CONFIG_KGDB_KDB */
171 171 171 171 171 171 170 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip_vs_app.c: Application module support for IPVS * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference * is that ip_vs_app module handles the reverse direction (incoming requests * and outgoing responses). * * IP_MASQ_APP application masquerading module * * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> #include <linux/stat.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> #include <net/ip_vs.h> EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); static DEFINE_MUTEX(__ip_vs_app_mutex); /* * Get an ip_vs_app object */ static inline int ip_vs_app_get(struct ip_vs_app *app) { return try_module_get(app->module); } static inline void ip_vs_app_put(struct ip_vs_app *app) { module_put(app->module); } static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) { kfree(inc->timeout_table); kfree(inc); } static void ip_vs_app_inc_rcu_free(struct rcu_head *head) { struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); ip_vs_app_inc_destroy(inc); } /* * Allocate/initialize app incarnation and register it in proto apps. */ static int ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { struct ip_vs_protocol *pp; struct ip_vs_app *inc; int ret; if (!(pp = ip_vs_proto_get(proto))) return -EPROTONOSUPPORT; if (!pp->unregister_app) return -EOPNOTSUPP; inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); if (!inc) return -ENOMEM; INIT_LIST_HEAD(&inc->p_list); INIT_LIST_HEAD(&inc->incs_list); inc->app = app; inc->port = htons(port); atomic_set(&inc->usecnt, 0); if (app->timeouts) { inc->timeout_table = ip_vs_create_timeout_table(app->timeouts, app->timeouts_size); if (!inc->timeout_table) { ret = -ENOMEM; goto out; } } ret = pp->register_app(ipvs, inc); if (ret) goto out; list_add(&inc->a_list, &app->incs_list); IP_VS_DBG(9, "%s App %s:%u registered\n", pp->name, inc->name, ntohs(inc->port)); return 0; out: ip_vs_app_inc_destroy(inc); return ret; } /* * Release app incarnation */ static void ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; if (!(pp = ip_vs_proto_get(inc->protocol))) return; if (pp->unregister_app) pp->unregister_app(ipvs, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); list_del(&inc->a_list); call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); } /* * Get reference to app inc (only called from softirq) * */ int ip_vs_app_inc_get(struct ip_vs_app *inc) { int result; result = ip_vs_app_get(inc->app); if (result) atomic_inc(&inc->usecnt); return result; } /* * Put the app inc (only called from timer or net softirq) */ void ip_vs_app_inc_put(struct ip_vs_app *inc) { atomic_dec(&inc->usecnt); ip_vs_app_put(inc->app); } /* * Register an application incarnation in protocol applications */ int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { int result; mutex_lock(&__ip_vs_app_mutex); result = ip_vs_app_inc_new(ipvs, app, proto, port); mutex_unlock(&__ip_vs_app_mutex); return result; } /* Register application for netns */ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { struct ip_vs_app *a; int err = 0; mutex_lock(&__ip_vs_app_mutex); /* increase the module use count */ if (!ip_vs_use_count_inc()) { err = -ENOENT; goto out_unlock; } list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; /* decrease the module use count */ ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; /* decrease the module use count */ ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); out_unlock: mutex_unlock(&__ip_vs_app_mutex); return err ? ERR_PTR(err) : a; } /* * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services * Caller should use synchronize_rcu() or rcu_barrier() */ void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { struct ip_vs_app *a, *anxt, *inc, *nxt; mutex_lock(&__ip_vs_app_mutex); list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { if (app && strcmp(app->name, a->name)) continue; list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { ip_vs_app_inc_release(ipvs, inc); } list_del(&a->a_list); kfree(a); /* decrease the module use count */ ip_vs_use_count_dec(); } mutex_unlock(&__ip_vs_app_mutex); } /* * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) */ int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { return pp->app_conn_bind(cp); } /* * Unbind cp from application incarnation (called by cp destructor) */ void ip_vs_unbind_app(struct ip_vs_conn *cp) { struct ip_vs_app *inc = cp->app; if (!inc) return; if (inc->unbind_conn) inc->unbind_conn(inc, cp); if (inc->done_conn) inc->done_conn(inc, cp); ip_vs_app_inc_put(inc); cp->app = NULL; } /* * Fixes th->seq based on ip_vs_seq info. */ static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) { __u32 seq = ntohl(th->seq); /* * Adjust seq with delta-offset for all packets after * the most recent resized pkt seq and with previous_delta offset * for all packets before most recent resized pkt seq. */ if (vseq->delta || vseq->previous_delta) { if(after(seq, vseq->init_seq)) { th->seq = htonl(seq + vseq->delta); IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", __func__, vseq->delta); } else { th->seq = htonl(seq + vseq->previous_delta); IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", __func__, vseq->previous_delta); } } } /* * Fixes th->ack_seq based on ip_vs_seq info. */ static inline void vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) { __u32 ack_seq = ntohl(th->ack_seq); /* * Adjust ack_seq with delta-offset for * the packets AFTER most recent resized pkt has caused a shift * for packets before most recent resized pkt, use previous_delta */ if (vseq->delta || vseq->previous_delta) { /* since ack_seq is the number of octet that is expected to receive next, so compare it with init_seq+delta */ if(after(ack_seq, vseq->init_seq+vseq->delta)) { th->ack_seq = htonl(ack_seq - vseq->delta); IP_VS_DBG(9, "%s(): subtracted delta " "(%d) from ack_seq\n", __func__, vseq->delta); } else { th->ack_seq = htonl(ack_seq - vseq->previous_delta); IP_VS_DBG(9, "%s(): subtracted " "previous_delta (%d) from ack_seq\n", __func__, vseq->previous_delta); } } } /* * Updates ip_vs_seq if pkt has been resized * Assumes already checked proto==IPPROTO_TCP and diff!=0. */ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, unsigned int flag, __u32 seq, int diff) { /* spinlock is to keep updating cp->flags atomic */ spin_lock_bh(&cp->lock); if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { vseq->previous_delta = vseq->delta; vseq->delta += diff; vseq->init_seq = seq; cp->flags |= flag; } spin_unlock_bh(&cp->lock); } static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app, struct ip_vs_iphdr *ipvsh) { int diff; const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) return 0; th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized */ seq = ntohl(th->seq); /* * Fix seq stuff if flagged as so. */ if (cp->flags & IP_VS_CONN_F_OUT_SEQ) vs_fix_seq(&cp->out_seq, th); if (cp->flags & IP_VS_CONN_F_IN_SEQ) vs_fix_ack_seq(&cp->in_seq, th); /* * Call private output hook function */ if (app->pkt_out == NULL) return 1; if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) return 0; /* * Update ip_vs seq stuff if len has changed. */ if (diff != 0) vs_seq_update(cp, &cp->out_seq, IP_VS_CONN_F_OUT_SEQ, seq, diff); return 1; } /* * Output pkt hook. Will call bound ip_vs_app specific function * called by ipvs packet handler, assumes previously checked cp!=NULL * returns false if it can't handle packet (oom) */ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_iphdr *ipvsh) { struct ip_vs_app *app; /* * check if application module is bound to * this ip_vs_conn. */ if ((app = cp->app) == NULL) return 1; /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) return app_tcp_pkt_out(cp, skb, app, ipvsh); /* * Call private output hook function */ if (app->pkt_out == NULL) return 1; return app->pkt_out(app, cp, skb, NULL, ipvsh); } static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_app *app, struct ip_vs_iphdr *ipvsh) { int diff; const unsigned int tcp_offset = ip_hdrlen(skb); struct tcphdr *th; __u32 seq; if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) return 0; th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); /* * Remember seq number in case this pkt gets resized */ seq = ntohl(th->seq); /* * Fix seq stuff if flagged as so. */ if (cp->flags & IP_VS_CONN_F_IN_SEQ) vs_fix_seq(&cp->in_seq, th); if (cp->flags & IP_VS_CONN_F_OUT_SEQ) vs_fix_ack_seq(&cp->out_seq, th); /* * Call private input hook function */ if (app->pkt_in == NULL) return 1; if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) return 0; /* * Update ip_vs seq stuff if len has changed. */ if (diff != 0) vs_seq_update(cp, &cp->in_seq, IP_VS_CONN_F_IN_SEQ, seq, diff); return 1; } /* * Input pkt hook. Will call bound ip_vs_app specific function * called by ipvs packet handler, assumes previously checked cp!=NULL. * returns false if can't handle packet (oom). */ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, struct ip_vs_iphdr *ipvsh) { struct ip_vs_app *app; /* * check if application module is bound to * this ip_vs_conn. */ if ((app = cp->app) == NULL) return 1; /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) return app_tcp_pkt_in(cp, skb, app, ipvsh); /* * Call private input hook function */ if (app->pkt_in == NULL) return 1; return app->pkt_in(app, cp, skb, NULL, ipvsh); } #ifdef CONFIG_PROC_FS /* * /proc/net/ip_vs_app entry function */ static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) { struct ip_vs_app *app, *inc; list_for_each_entry(app, &ipvs->app_list, a_list) { list_for_each_entry(inc, &app->incs_list, a_list) { if (pos-- == 0) return inc; } } return NULL; } static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_app_mutex); return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_app *inc, *app; struct list_head *e; struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) return ip_vs_app_idx(ipvs, 0); inc = v; app = inc->app; if ((e = inc->a_list.next) != &app->incs_list) return list_entry(e, struct ip_vs_app, a_list); /* go on to next application */ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { app = list_entry(e, struct ip_vs_app, a_list); list_for_each_entry(inc, &app->incs_list, a_list) { return inc; } } return NULL; } static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { mutex_unlock(&__ip_vs_app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "prot port usecnt name\n"); else { const struct ip_vs_app *inc = v; seq_printf(seq, "%-3s %-7u %-6d %-17s\n", ip_vs_proto_name(inc->protocol), ntohs(inc->port), atomic_read(&inc->usecnt), inc->name); } return 0; } static const struct seq_operations ip_vs_app_seq_ops = { .start = ip_vs_app_seq_start, .next = ip_vs_app_seq_next, .stop = ip_vs_app_seq_stop, .show = ip_vs_app_seq_show, }; #endif int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->app_list); #ifdef CONFIG_PROC_FS if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; #endif return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { unregister_ip_vs_app(ipvs, NULL /* all */); #ifdef CONFIG_PROC_FS remove_proc_entry("ip_vs_app", ipvs->net->proc_net); #endif }
9 1 1 4 4 9 9 94 1 94 11 3 8 7 5 4 93 93 92 11 1 2 8 7 6 5 4 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 /* * linux/drivers/video/fbcmap.c -- Colormap handling for frame buffer devices * * Created 15 Jun 1997 by Geert Uytterhoeven * * 2001 - Documented with DocBook * - Brad Douglas <brad@neruo.com> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/string.h> #include <linux/module.h> #include <linux/fb.h> #include <linux/slab.h> #include <linux/uaccess.h> static u16 red2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 green2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 blue2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 red4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 green4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 blue4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 red8[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; static u16 green8[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa }; static u16 blue8[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa }; static u16 red16[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0x5555, 0x5555, 0x5555, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff }; static u16 green16[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa, 0x5555, 0x5555, 0xffff, 0xffff, 0x5555, 0x5555, 0xffff, 0xffff }; static u16 blue16[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff }; static const struct fb_cmap default_2_colors = { .len=2, .red=red2, .green=green2, .blue=blue2 }; static const struct fb_cmap default_8_colors = { .len=8, .red=red8, .green=green8, .blue=blue8 }; static const struct fb_cmap default_4_colors = { .len=4, .red=red4, .green=green4, .blue=blue4 }; static const struct fb_cmap default_16_colors = { .len=16, .red=red16, .green=green16, .blue=blue16 }; /** * fb_alloc_cmap_gfp - allocate a colormap * @cmap: frame buffer colormap structure * @len: length of @cmap * @transp: boolean, 1 if there is transparency, 0 otherwise * @flags: flags for kmalloc memory allocation * * Allocates memory for a colormap @cmap. @len is the * number of entries in the palette. * * Returns negative errno on error, or zero on success. * */ int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags) { int size = len * sizeof(u16); int ret = -ENOMEM; flags |= __GFP_NOWARN; if (cmap->len != len) { fb_dealloc_cmap(cmap); if (!len) return 0; cmap->red = kzalloc(size, flags); if (!cmap->red) goto fail; cmap->green = kzalloc(size, flags); if (!cmap->green) goto fail; cmap->blue = kzalloc(size, flags); if (!cmap->blue) goto fail; if (transp) { cmap->transp = kzalloc(size, flags); if (!cmap->transp) goto fail; } else { cmap->transp = NULL; } } cmap->start = 0; cmap->len = len; ret = fb_copy_cmap(fb_default_cmap(len), cmap); if (ret) goto fail; return 0; fail: fb_dealloc_cmap(cmap); return ret; } int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp) { return fb_alloc_cmap_gfp(cmap, len, transp, GFP_ATOMIC); } /** * fb_dealloc_cmap - deallocate a colormap * @cmap: frame buffer colormap structure * * Deallocates a colormap that was previously allocated with * fb_alloc_cmap(). * */ void fb_dealloc_cmap(struct fb_cmap *cmap) { kfree(cmap->red); kfree(cmap->green); kfree(cmap->blue); kfree(cmap->transp); cmap->red = cmap->green = cmap->blue = cmap->transp = NULL; cmap->len = 0; } /** * fb_copy_cmap - copy a colormap * @from: frame buffer colormap structure * @to: frame buffer colormap structure * * Copy contents of colormap from @from to @to. */ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); memcpy(to->red+tooff, from->red+fromoff, size); memcpy(to->green+tooff, from->green+fromoff, size); memcpy(to->blue+tooff, from->blue+fromoff, size); if (from->transp && to->transp) memcpy(to->transp+tooff, from->transp+fromoff, size); return 0; } int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); if (copy_to_user(to->red+tooff, from->red+fromoff, size)) return -EFAULT; if (copy_to_user(to->green+tooff, from->green+fromoff, size)) return -EFAULT; if (copy_to_user(to->blue+tooff, from->blue+fromoff, size)) return -EFAULT; if (from->transp && to->transp) if (copy_to_user(to->transp+tooff, from->transp+fromoff, size)) return -EFAULT; return 0; } /** * fb_set_cmap - set the colormap * @cmap: frame buffer colormap structure * @info: frame buffer info structure * * Sets the colormap @cmap for a screen of device @info. * * Returns negative errno on error, or zero on success. * */ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info) { int i, start, rc = 0; u16 *red, *green, *blue, *transp; u_int hred, hgreen, hblue, htransp = 0xffff; red = cmap->red; green = cmap->green; blue = cmap->blue; transp = cmap->transp; start = cmap->start; if (start < 0 || (!info->fbops->fb_setcolreg && !info->fbops->fb_setcmap)) return -EINVAL; if (info->fbops->fb_setcmap) { rc = info->fbops->fb_setcmap(cmap, info); } else { for (i = 0; i < cmap->len; i++) { hred = *red++; hgreen = *green++; hblue = *blue++; if (transp) htransp = *transp++; if (info->fbops->fb_setcolreg(start++, hred, hgreen, hblue, htransp, info)) break; } } if (rc == 0) fb_copy_cmap(cmap, &info->cmap); return rc; } int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) { int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; if (size < 0 || size < cmap->len) return -E2BIG; memset(&umap, 0, sizeof(struct fb_cmap)); rc = fb_alloc_cmap_gfp(&umap, cmap->len, cmap->transp != NULL, GFP_KERNEL); if (rc) return rc; if (copy_from_user(umap.red, cmap->red, size) || copy_from_user(umap.green, cmap->green, size) || copy_from_user(umap.blue, cmap->blue, size) || (cmap->transp && copy_from_user(umap.transp, cmap->transp, size))) { rc = -EFAULT; goto out; } umap.start = cmap->start; lock_fb_info(info); rc = fb_set_cmap(&umap, info); unlock_fb_info(info); out: fb_dealloc_cmap(&umap); return rc; } /** * fb_default_cmap - get default colormap * @len: size of palette for a depth * * Gets the default colormap for a specific screen depth. @len * is the size of the palette for a particular screen depth. * * Returns pointer to a frame buffer colormap structure. * */ const struct fb_cmap *fb_default_cmap(int len) { if (len <= 2) return &default_2_colors; if (len <= 4) return &default_4_colors; if (len <= 8) return &default_8_colors; return &default_16_colors; } /** * fb_invert_cmaps - invert all defaults colormaps * * Invert all default colormaps. * */ void fb_invert_cmaps(void) { u_int i; for (i = 0; i < ARRAY_SIZE(red2); i++) { red2[i] = ~red2[i]; green2[i] = ~green2[i]; blue2[i] = ~blue2[i]; } for (i = 0; i < ARRAY_SIZE(red4); i++) { red4[i] = ~red4[i]; green4[i] = ~green4[i]; blue4[i] = ~blue4[i]; } for (i = 0; i < ARRAY_SIZE(red8); i++) { red8[i] = ~red8[i]; green8[i] = ~green8[i]; blue8[i] = ~blue8[i]; } for (i = 0; i < ARRAY_SIZE(red16); i++) { red16[i] = ~red16[i]; green16[i] = ~green16[i]; blue16[i] = ~blue16[i]; } } /* * Visible symbols for modules */ EXPORT_SYMBOL(fb_alloc_cmap); EXPORT_SYMBOL(fb_dealloc_cmap); EXPORT_SYMBOL(fb_copy_cmap); EXPORT_SYMBOL(fb_set_cmap); EXPORT_SYMBOL(fb_default_cmap); EXPORT_SYMBOL(fb_invert_cmaps);
6 1 6 6 6 1 3 2 3 3 5 10 10 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 // SPDX-License-Identifier: GPL-2.0-only /* * PS/2 driver library * * Copyright (c) 1999-2002 Vojtech Pavlik * Copyright (c) 2004 Dmitry Torokhov */ #include <linux/delay.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/input.h> #include <linux/kmsan-checks.h> #include <linux/serio.h> #include <linux/i8042.h> #include <linux/libps2.h> #define DRIVER_DESC "PS/2 driver library" #define PS2_CMD_SETSCALE11 0x00e6 #define PS2_CMD_SETRES 0x10e8 #define PS2_CMD_EX_SETLEDS 0x20eb #define PS2_CMD_SETLEDS 0x10ed #define PS2_CMD_GETID 0x02f2 #define PS2_CMD_SETREP 0x10f3 /* Set repeat rate/set report rate */ #define PS2_CMD_RESET_BAT 0x02ff #define PS2_RET_BAT 0xaa #define PS2_RET_ID 0x00 #define PS2_RET_ACK 0xfa #define PS2_RET_NAK 0xfe #define PS2_RET_ERR 0xfc #define PS2_FLAG_ACK BIT(0) /* Waiting for ACK/NAK */ #define PS2_FLAG_CMD BIT(1) /* Waiting for a command to finish */ #define PS2_FLAG_CMD1 BIT(2) /* Waiting for the first byte of command response */ #define PS2_FLAG_WAITID BIT(3) /* Command executing is GET ID */ #define PS2_FLAG_NAK BIT(4) /* Last transmission was NAKed */ #define PS2_FLAG_PASS_NOACK BIT(5) /* Pass non-ACK byte to receive handler */ static int ps2_do_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout, unsigned int max_attempts) __releases(&ps2dev->serio->lock) __acquires(&ps2dev->serio->lock) { int attempt = 0; int error; lockdep_assert_held(&ps2dev->serio->lock); do { ps2dev->nak = 1; ps2dev->flags |= PS2_FLAG_ACK; serio_continue_rx(ps2dev->serio); error = serio_write(ps2dev->serio, byte); if (error) dev_dbg(&ps2dev->serio->dev, "failed to write %#02x: %d\n", byte, error); else wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_ACK), msecs_to_jiffies(timeout)); serio_pause_rx(ps2dev->serio); } while (ps2dev->nak == PS2_RET_NAK && ++attempt < max_attempts); ps2dev->flags &= ~PS2_FLAG_ACK; if (!error) { switch (ps2dev->nak) { case 0: break; case PS2_RET_NAK: error = -EAGAIN; break; case PS2_RET_ERR: error = -EPROTO; break; default: error = -EIO; break; } } if (error || attempt > 1) dev_dbg(&ps2dev->serio->dev, "%02x - %d (%x), attempt %d\n", byte, error, ps2dev->nak, attempt); return error; } /** * ps2_sendbyte - sends a byte to the device and wait for acknowledgement * @ps2dev: a PS/2 device to send the data to * @byte: data to be sent to the device * @timeout: timeout for sending the data and receiving an acknowledge * * The function doesn't handle retransmission, the caller is expected to handle * it when needed. * * ps2_sendbyte() can only be called from a process context. */ int ps2_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout) { int retval; guard(serio_pause_rx)(ps2dev->serio); retval = ps2_do_sendbyte(ps2dev, byte, timeout, 1); dev_dbg(&ps2dev->serio->dev, "%02x - %x\n", byte, ps2dev->nak); return retval; } EXPORT_SYMBOL(ps2_sendbyte); /** * ps2_begin_command - mark beginning of execution of a complex command * @ps2dev: a PS/2 device executing the command * * Serializes a complex/compound command. Once command is finished * ps2_end_command() should be called. */ void ps2_begin_command(struct ps2dev *ps2dev) { struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; mutex_lock(m); } EXPORT_SYMBOL(ps2_begin_command); /** * ps2_end_command - mark end of execution of a complex command * @ps2dev: a PS/2 device executing the command */ void ps2_end_command(struct ps2dev *ps2dev) { struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex; mutex_unlock(m); } EXPORT_SYMBOL(ps2_end_command); /** * ps2_drain - waits for device to transmit requested number of bytes * and discards them * @ps2dev: the PS/2 device that should be drained * @maxbytes: maximum number of bytes to be drained * @timeout: time to drain the device */ void ps2_drain(struct ps2dev *ps2dev, size_t maxbytes, unsigned int timeout) { if (maxbytes > sizeof(ps2dev->cmdbuf)) { WARN_ON(1); maxbytes = sizeof(ps2dev->cmdbuf); } ps2_begin_command(ps2dev); scoped_guard(serio_pause_rx, ps2dev->serio) { ps2dev->flags = PS2_FLAG_CMD; ps2dev->cmdcnt = maxbytes; } wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD), msecs_to_jiffies(timeout)); ps2_end_command(ps2dev); } EXPORT_SYMBOL(ps2_drain); /** * ps2_is_keyboard_id - checks received ID byte against the list of * known keyboard IDs * @id_byte: data byte that should be checked */ bool ps2_is_keyboard_id(u8 id_byte) { static const u8 keyboard_ids[] = { 0xab, /* Regular keyboards */ 0xac, /* NCD Sun keyboard */ 0x2b, /* Trust keyboard, translated */ 0x5d, /* Trust keyboard */ 0x60, /* NMB SGI keyboard, translated */ 0x47, /* NMB SGI keyboard */ }; return memchr(keyboard_ids, id_byte, sizeof(keyboard_ids)) != NULL; } EXPORT_SYMBOL(ps2_is_keyboard_id); /* * ps2_adjust_timeout() is called after receiving 1st byte of command * response and tries to reduce remaining timeout to speed up command * completion. */ static int ps2_adjust_timeout(struct ps2dev *ps2dev, unsigned int command, unsigned int timeout) { switch (command) { case PS2_CMD_RESET_BAT: /* * Device has sent the first response byte after * reset command, reset is thus done, so we can * shorten the timeout. * The next byte will come soon (keyboard) or not * at all (mouse). */ if (timeout > msecs_to_jiffies(100)) timeout = msecs_to_jiffies(100); break; case PS2_CMD_GETID: /* * Microsoft Natural Elite keyboard responds to * the GET ID command as it were a mouse, with * a single byte. Fail the command so atkbd will * use alternative probe to detect it. */ if (ps2dev->cmdbuf[1] == 0xaa) { scoped_guard(serio_pause_rx, ps2dev->serio) ps2dev->flags = 0; timeout = 0; } /* * If device behind the port is not a keyboard there * won't be 2nd byte of ID response. */ if (!ps2_is_keyboard_id(ps2dev->cmdbuf[1])) { scoped_guard(serio_pause_rx, ps2dev->serio) ps2dev->flags = ps2dev->cmdcnt = 0; timeout = 0; } break; default: break; } return timeout; } /** * __ps2_command - send a command to PS/2 device * @ps2dev: the PS/2 device that should execute the command * @param: a buffer containing parameters to be sent along with the command, * or place where the results of the command execution will be deposited, * or both * @command: command word that encodes the command itself, as well as number of * additional parameter bytes that should be sent to the device and expected * length of the command response * * Not serialized. Callers should use ps2_begin_command() and ps2_end_command() * to ensure proper serialization for complex commands. */ int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command) { unsigned int timeout; unsigned int send = (command >> 12) & 0xf; unsigned int receive = (command >> 8) & 0xf; int rc; int i; u8 send_param[16]; if (receive > sizeof(ps2dev->cmdbuf)) { WARN_ON(1); return -EINVAL; } if (send && !param) { WARN_ON(1); return -EINVAL; } memcpy(send_param, param, send); /* * Not using guard notation because we need to break critical * section below while waiting for the response. */ serio_pause_rx(ps2dev->serio); ps2dev->cmdcnt = receive; switch (command) { case PS2_CMD_GETID: /* * Some mice do not ACK the "get ID" command, prepare to * handle this. */ ps2dev->flags = PS2_FLAG_WAITID; break; case PS2_CMD_SETLEDS: case PS2_CMD_EX_SETLEDS: case PS2_CMD_SETREP: ps2dev->flags = PS2_FLAG_PASS_NOACK; break; default: ps2dev->flags = 0; break; } if (receive) { /* Indicate that we expect response to the command. */ ps2dev->flags |= PS2_FLAG_CMD | PS2_FLAG_CMD1; if (param) for (i = 0; i < receive; i++) ps2dev->cmdbuf[(receive - 1) - i] = param[i]; } /* * Some devices (Synaptics) perform the reset before * ACKing the reset command, and so it can take a long * time before the ACK arrives. */ timeout = command == PS2_CMD_RESET_BAT ? 1000 : 200; rc = ps2_do_sendbyte(ps2dev, command & 0xff, timeout, 2); if (rc) goto out_reset_flags; /* Send command parameters, if any. */ for (i = 0; i < send; i++) { rc = ps2_do_sendbyte(ps2dev, param[i], 200, 2); if (rc) goto out_reset_flags; } serio_continue_rx(ps2dev->serio); /* * The reset command takes a long time to execute. */ timeout = msecs_to_jiffies(command == PS2_CMD_RESET_BAT ? 4000 : 500); timeout = wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD1), timeout); if (ps2dev->cmdcnt && !(ps2dev->flags & PS2_FLAG_CMD1)) { timeout = ps2_adjust_timeout(ps2dev, command, timeout); wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD), timeout); } serio_pause_rx(ps2dev->serio); if (param) { for (i = 0; i < receive; i++) param[i] = ps2dev->cmdbuf[(receive - 1) - i]; kmsan_unpoison_memory(param, receive); } if (ps2dev->cmdcnt && (command != PS2_CMD_RESET_BAT || ps2dev->cmdcnt != 1)) { rc = -EPROTO; goto out_reset_flags; } rc = 0; out_reset_flags: ps2dev->flags = 0; serio_continue_rx(ps2dev->serio); dev_dbg(&ps2dev->serio->dev, "%02x [%*ph] - %x/%08lx [%*ph]\n", command & 0xff, send, send_param, ps2dev->nak, ps2dev->flags, receive, param ?: send_param); /* * ps_command() handles resends itself, so do not leak -EAGAIN * to the callers. */ return rc != -EAGAIN ? rc : -EPROTO; } EXPORT_SYMBOL(__ps2_command); /** * ps2_command - send a command to PS/2 device * @ps2dev: the PS/2 device that should execute the command * @param: a buffer containing parameters to be sent along with the command, * or place where the results of the command execution will be deposited, * or both * @command: command word that encodes the command itself, as well as number of * additional parameter bytes that should be sent to the device and expected * length of the command response * * Note: ps2_command() serializes the command execution so that only one * command can be executed at a time for either individual port or the entire * 8042 controller. */ int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command) { int rc; ps2_begin_command(ps2dev); rc = __ps2_command(ps2dev, param, command); ps2_end_command(ps2dev); return rc; } EXPORT_SYMBOL(ps2_command); /** * ps2_sliced_command - sends an extended PS/2 command to a mouse * @ps2dev: the PS/2 device that should execute the command * @command: command byte * * The command is sent using "sliced" syntax understood by advanced devices, * such as Logitech or Synaptics touchpads. The command is encoded as: * 0xE6 0xE8 rr 0xE8 ss 0xE8 tt 0xE8 uu where (rr*64)+(ss*16)+(tt*4)+uu * is the command. */ int ps2_sliced_command(struct ps2dev *ps2dev, u8 command) { int i; int retval; ps2_begin_command(ps2dev); retval = __ps2_command(ps2dev, NULL, PS2_CMD_SETSCALE11); if (retval) goto out; for (i = 6; i >= 0; i -= 2) { u8 d = (command >> i) & 3; retval = __ps2_command(ps2dev, &d, PS2_CMD_SETRES); if (retval) break; } out: dev_dbg(&ps2dev->serio->dev, "%02x - %d\n", command, retval); ps2_end_command(ps2dev); return retval; } EXPORT_SYMBOL(ps2_sliced_command); /** * ps2_init - initializes ps2dev structure * @ps2dev: structure to be initialized * @serio: serio port associated with the PS/2 device * @pre_receive_handler: validation handler to check basic communication state * @receive_handler: main protocol handler * * Prepares ps2dev structure for use in drivers for PS/2 devices. */ void ps2_init(struct ps2dev *ps2dev, struct serio *serio, ps2_pre_receive_handler_t pre_receive_handler, ps2_receive_handler_t receive_handler) { ps2dev->pre_receive_handler = pre_receive_handler; ps2dev->receive_handler = receive_handler; mutex_init(&ps2dev->cmd_mutex); lockdep_set_subclass(&ps2dev->cmd_mutex, serio->depth); init_waitqueue_head(&ps2dev->wait); ps2dev->serio = serio; serio_set_drvdata(serio, ps2dev); } EXPORT_SYMBOL(ps2_init); /* * ps2_handle_response() stores device's response to a command and notifies * the process waiting for completion of the command. Note that there is a * distinction between waiting for the first byte of the response, and * waiting for subsequent bytes. It is done so that callers could shorten * timeouts once first byte of response is received. */ static void ps2_handle_response(struct ps2dev *ps2dev, u8 data) { if (ps2dev->cmdcnt) ps2dev->cmdbuf[--ps2dev->cmdcnt] = data; if (ps2dev->flags & PS2_FLAG_CMD1) { ps2dev->flags &= ~PS2_FLAG_CMD1; if (ps2dev->cmdcnt) wake_up(&ps2dev->wait); } if (!ps2dev->cmdcnt) { ps2dev->flags &= ~PS2_FLAG_CMD; wake_up(&ps2dev->wait); } } /* * ps2_handle_ack() processes ACK/NAK of a command from a PS/2 device, * possibly applying workarounds for mice not acknowledging the "get ID" * command. */ static void ps2_handle_ack(struct ps2dev *ps2dev, u8 data) { switch (data) { case PS2_RET_ACK: ps2dev->nak = 0; break; case PS2_RET_NAK: ps2dev->flags |= PS2_FLAG_NAK; ps2dev->nak = PS2_RET_NAK; break; case PS2_RET_ERR: if (ps2dev->flags & PS2_FLAG_NAK) { ps2dev->flags &= ~PS2_FLAG_NAK; ps2dev->nak = PS2_RET_ERR; break; } fallthrough; /* * Workaround for mice which don't ACK the Get ID command. * These are valid mouse IDs that we recognize. */ case 0x00: case 0x03: case 0x04: if (ps2dev->flags & PS2_FLAG_WAITID) { ps2dev->nak = 0; break; } fallthrough; default: /* * Do not signal errors if we get unexpected reply while * waiting for an ACK to the initial (first) command byte: * the device might not be quiesced yet and continue * delivering data. For certain commands (such as set leds and * set repeat rate) that can be used during normal device * operation, we even pass this data byte to the normal receive * handler. * Note that we reset PS2_FLAG_WAITID flag, so the workaround * for mice not acknowledging the Get ID command only triggers * on the 1st byte; if device spews data we really want to see * a real ACK from it. */ dev_dbg(&ps2dev->serio->dev, "unexpected %#02x\n", data); if (ps2dev->flags & PS2_FLAG_PASS_NOACK) ps2dev->receive_handler(ps2dev, data); ps2dev->flags &= ~(PS2_FLAG_WAITID | PS2_FLAG_PASS_NOACK); return; } if (!ps2dev->nak) ps2dev->flags &= ~PS2_FLAG_NAK; ps2dev->flags &= ~PS2_FLAG_ACK; if (!ps2dev->nak && data != PS2_RET_ACK) ps2_handle_response(ps2dev, data); else wake_up(&ps2dev->wait); } /* * Clears state of PS/2 device after communication error by resetting majority * of flags and waking up waiters, if any. */ static void ps2_cleanup(struct ps2dev *ps2dev) { unsigned long old_flags = ps2dev->flags; /* reset all flags except last nak */ ps2dev->flags &= PS2_FLAG_NAK; if (old_flags & PS2_FLAG_ACK) ps2dev->nak = 1; if (old_flags & (PS2_FLAG_ACK | PS2_FLAG_CMD)) wake_up(&ps2dev->wait); } /** * ps2_interrupt - common interrupt handler for PS/2 devices * @serio: serio port for the device * @data: a data byte received from the device * @flags: flags such as %SERIO_PARITY or %SERIO_TIMEOUT indicating state of * the data transfer * * ps2_interrupt() invokes pre-receive handler, optionally handles command * acknowledgement and response from the device, and finally passes the data * to the main protocol handler for future processing. */ irqreturn_t ps2_interrupt(struct serio *serio, u8 data, unsigned int flags) { struct ps2dev *ps2dev = serio_get_drvdata(serio); enum ps2_disposition rc; rc = ps2dev->pre_receive_handler(ps2dev, data, flags); switch (rc) { case PS2_ERROR: ps2_cleanup(ps2dev); break; case PS2_IGNORE: break; case PS2_PROCESS: if (ps2dev->flags & PS2_FLAG_ACK) ps2_handle_ack(ps2dev, data); else if (ps2dev->flags & PS2_FLAG_CMD) ps2_handle_response(ps2dev, data); else ps2dev->receive_handler(ps2dev, data); break; } return IRQ_HANDLED; } EXPORT_SYMBOL(ps2_interrupt); MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>"); MODULE_DESCRIPTION("PS/2 driver library"); MODULE_LICENSE("GPL");
1 9 9 3 9 5 5 5 1 5 5 5 5 5 5 5 5 2 2 2 2 1 1 1 11 5 6 4 3 1 1 5 5 5 4 4 1 4 4 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-vbi-cap.c - vbi capture support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/videodev2.h> #include <media/v4l2-common.h> #include "vivid-core.h" #include "vivid-kthread-cap.h" #include "vivid-vbi-cap.h" #include "vivid-vbi-gen.h" #include "vivid-vid-common.h" static void vivid_sliced_vbi_cap_fill(struct vivid_dev *dev, unsigned seqnr) { struct vivid_vbi_gen_data *vbi_gen = &dev->vbi_gen; bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; vivid_vbi_gen_sliced(vbi_gen, is_60hz, seqnr); if (!is_60hz) { if (vivid_vid_can_loop(dev)) { if (dev->vbi_out_have_wss) { vbi_gen->data[12].data[0] = dev->vbi_out_wss[0]; vbi_gen->data[12].data[1] = dev->vbi_out_wss[1]; } else { vbi_gen->data[12].id = 0; } } else { switch (tpg_g_video_aspect(&dev->tpg)) { case TPG_VIDEO_ASPECT_14X9_CENTRE: vbi_gen->data[12].data[0] = 0x01; break; case TPG_VIDEO_ASPECT_16X9_CENTRE: vbi_gen->data[12].data[0] = 0x0b; break; case TPG_VIDEO_ASPECT_16X9_ANAMORPHIC: vbi_gen->data[12].data[0] = 0x07; break; case TPG_VIDEO_ASPECT_4X3: default: vbi_gen->data[12].data[0] = 0x08; break; } } } else if (vivid_vid_can_loop(dev) && is_60hz) { if (dev->vbi_out_have_cc[0]) { vbi_gen->data[0].data[0] = dev->vbi_out_cc[0][0]; vbi_gen->data[0].data[1] = dev->vbi_out_cc[0][1]; } else { vbi_gen->data[0].id = 0; } if (dev->vbi_out_have_cc[1]) { vbi_gen->data[1].data[0] = dev->vbi_out_cc[1][0]; vbi_gen->data[1].data[1] = dev->vbi_out_cc[1][1]; } else { vbi_gen->data[1].id = 0; } } } static void vivid_g_fmt_vbi_cap(struct vivid_dev *dev, struct v4l2_vbi_format *vbi) { bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; vbi->sampling_rate = 27000000; vbi->offset = 24; vbi->samples_per_line = 1440; vbi->sample_format = V4L2_PIX_FMT_GREY; vbi->start[0] = is_60hz ? V4L2_VBI_ITU_525_F1_START + 9 : V4L2_VBI_ITU_625_F1_START + 5; vbi->start[1] = is_60hz ? V4L2_VBI_ITU_525_F2_START + 9 : V4L2_VBI_ITU_625_F2_START + 5; vbi->count[0] = vbi->count[1] = is_60hz ? 12 : 18; vbi->flags = dev->vbi_cap_interlaced ? V4L2_VBI_INTERLACED : 0; vbi->reserved[0] = 0; vbi->reserved[1] = 0; } void vivid_raw_vbi_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { struct v4l2_vbi_format vbi; u8 *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); vivid_g_fmt_vbi_cap(dev, &vbi); buf->vb.sequence = dev->vbi_cap_seq_count; if (dev->field_cap == V4L2_FIELD_ALTERNATE) buf->vb.sequence /= 2; vivid_sliced_vbi_cap_fill(dev, buf->vb.sequence); memset(vbuf, 0x10, vb2_plane_size(&buf->vb.vb2_buf, 0)); if (!VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) vivid_vbi_gen_raw(&dev->vbi_gen, &vbi, vbuf); } void vivid_sliced_vbi_cap_process(struct vivid_dev *dev, struct vivid_buffer *buf) { struct v4l2_sliced_vbi_data *vbuf = vb2_plane_vaddr(&buf->vb.vb2_buf, 0); buf->vb.sequence = dev->vbi_cap_seq_count; if (dev->field_cap == V4L2_FIELD_ALTERNATE) buf->vb.sequence /= 2; vivid_sliced_vbi_cap_fill(dev, buf->vb.sequence); memset(vbuf, 0, vb2_plane_size(&buf->vb.vb2_buf, 0)); if (!VIVID_INVALID_SIGNAL(dev->std_signal_mode[dev->input])) { unsigned i; for (i = 0; i < 25; i++) vbuf[i] = dev->vbi_gen.data[i]; } } static int vbi_cap_queue_setup(struct vb2_queue *vq, unsigned *nbuffers, unsigned *nplanes, unsigned sizes[], struct device *alloc_devs[]) { struct vivid_dev *dev = vb2_get_drv_priv(vq); bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; unsigned size = vq->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ? 36 * sizeof(struct v4l2_sliced_vbi_data) : 1440 * 2 * (is_60hz ? 12 : 18); if (!vivid_is_sdtv_cap(dev)) return -EINVAL; if (*nplanes) return sizes[0] < size ? -EINVAL : 0; sizes[0] = size; *nplanes = 1; return 0; } static int vbi_cap_buf_prepare(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; unsigned size = vb->vb2_queue->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ? 36 * sizeof(struct v4l2_sliced_vbi_data) : 1440 * 2 * (is_60hz ? 12 : 18); dprintk(dev, 1, "%s\n", __func__); if (dev->buf_prepare_error) { /* * Error injection: test what happens if buf_prepare() returns * an error. */ dev->buf_prepare_error = false; return -EINVAL; } if (vb2_plane_size(vb, 0) < size) { dprintk(dev, 1, "%s data will not fit into plane (%lu < %u)\n", __func__, vb2_plane_size(vb, 0), size); return -EINVAL; } vb2_set_plane_payload(vb, 0, size); return 0; } static void vbi_cap_buf_queue(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); struct vivid_buffer *buf = container_of(vbuf, struct vivid_buffer, vb); dprintk(dev, 1, "%s\n", __func__); spin_lock(&dev->slock); list_add_tail(&buf->list, &dev->vbi_cap_active); spin_unlock(&dev->slock); } static int vbi_cap_start_streaming(struct vb2_queue *vq, unsigned count) { struct vivid_dev *dev = vb2_get_drv_priv(vq); int err; dprintk(dev, 1, "%s\n", __func__); dev->vbi_cap_seq_count = 0; if (dev->start_streaming_error) { dev->start_streaming_error = false; err = -EINVAL; } else { err = vivid_start_generating_vid_cap(dev, &dev->vbi_cap_streaming); } if (err) { struct vivid_buffer *buf, *tmp; list_for_each_entry_safe(buf, tmp, &dev->vbi_cap_active, list) { list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, VB2_BUF_STATE_QUEUED); } } return err; } /* abort streaming and wait for last buffer */ static void vbi_cap_stop_streaming(struct vb2_queue *vq) { struct vivid_dev *dev = vb2_get_drv_priv(vq); dprintk(dev, 1, "%s\n", __func__); vivid_stop_generating_vid_cap(dev, &dev->vbi_cap_streaming); } static void vbi_cap_buf_request_complete(struct vb2_buffer *vb) { struct vivid_dev *dev = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &dev->ctrl_hdl_vbi_cap); } const struct vb2_ops vivid_vbi_cap_qops = { .queue_setup = vbi_cap_queue_setup, .buf_prepare = vbi_cap_buf_prepare, .buf_queue = vbi_cap_buf_queue, .start_streaming = vbi_cap_start_streaming, .stop_streaming = vbi_cap_stop_streaming, .buf_request_complete = vbi_cap_buf_request_complete, }; int vidioc_g_fmt_vbi_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_vbi_format *vbi = &f->fmt.vbi; if (!vivid_is_sdtv_cap(dev) || !dev->has_raw_vbi_cap) return -EINVAL; vivid_g_fmt_vbi_cap(dev, vbi); return 0; } int vidioc_s_fmt_vbi_cap(struct file *file, void *priv, struct v4l2_format *f) { struct vivid_dev *dev = video_drvdata(file); int ret = vidioc_g_fmt_vbi_cap(file, priv, f); if (ret) return ret; if (f->type != V4L2_BUF_TYPE_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; return 0; } void vivid_fill_service_lines(struct v4l2_sliced_vbi_format *vbi, u32 service_set) { vbi->io_size = sizeof(struct v4l2_sliced_vbi_data) * 36; vbi->service_set = service_set; memset(vbi->service_lines, 0, sizeof(vbi->service_lines)); memset(vbi->reserved, 0, sizeof(vbi->reserved)); if (vbi->service_set == 0) return; if (vbi->service_set & V4L2_SLICED_CAPTION_525) { vbi->service_lines[0][21] = V4L2_SLICED_CAPTION_525; vbi->service_lines[1][21] = V4L2_SLICED_CAPTION_525; } if (vbi->service_set & V4L2_SLICED_WSS_625) { unsigned i; for (i = 7; i <= 18; i++) vbi->service_lines[0][i] = vbi->service_lines[1][i] = V4L2_SLICED_TELETEXT_B; vbi->service_lines[0][23] = V4L2_SLICED_WSS_625; } } int vidioc_g_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap) return -EINVAL; vivid_fill_service_lines(vbi, dev->service_set_cap); return 0; } int vidioc_try_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; bool is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; u32 service_set = vbi->service_set; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap) return -EINVAL; service_set &= is_60hz ? V4L2_SLICED_CAPTION_525 : V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; vivid_fill_service_lines(vbi, service_set); return 0; } int vidioc_s_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format *fmt) { struct vivid_dev *dev = video_drvdata(file); struct v4l2_sliced_vbi_format *vbi = &fmt->fmt.sliced; int ret = vidioc_try_fmt_sliced_vbi_cap(file, fh, fmt); if (ret) return ret; if (fmt->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q)) return -EBUSY; dev->service_set_cap = vbi->service_set; return 0; } int vidioc_g_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_sliced_vbi_cap *cap) { struct vivid_dev *dev = video_drvdata(file); struct video_device *vdev = video_devdata(file); bool is_60hz; if (vdev->vfl_dir == VFL_DIR_RX) { is_60hz = dev->std_cap[dev->input] & V4L2_STD_525_60; if (!vivid_is_sdtv_cap(dev) || !dev->has_sliced_vbi_cap || cap->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE) return -EINVAL; } else { is_60hz = dev->std_out & V4L2_STD_525_60; if (!vivid_is_svid_out(dev) || !dev->has_sliced_vbi_out || cap->type != V4L2_BUF_TYPE_SLICED_VBI_OUTPUT) return -EINVAL; } cap->service_set = is_60hz ? V4L2_SLICED_CAPTION_525 : V4L2_SLICED_WSS_625 | V4L2_SLICED_TELETEXT_B; if (is_60hz) { cap->service_lines[0][21] = V4L2_SLICED_CAPTION_525; cap->service_lines[1][21] = V4L2_SLICED_CAPTION_525; } else { unsigned i; for (i = 7; i <= 18; i++) cap->service_lines[0][i] = cap->service_lines[1][i] = V4L2_SLICED_TELETEXT_B; cap->service_lines[0][23] = V4L2_SLICED_WSS_625; } return 0; }
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved * Copyright 2005-2006 Ian Kent <raven@themaw.net> */ /* Internal header file for autofs */ #include <linux/auto_fs.h> #include <linux/auto_dev-ioctl.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/string.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/list.h> #include <linux/completion.h> #include <linux/file.h> #include <linux/magic.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> /* This is the range of ioctl() numbers we claim as ours */ #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY #define AUTOFS_IOC_COUNT 32 #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) #define AUTOFS_DEV_IOCTL_IOC_COUNT \ (AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD) #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__ extern struct file_system_type autofs_fs_type; /* * Unified info structure. This is pointed to by both the dentry and * inode structures. Each file in the filesystem has an instance of this * structure. It holds a reference to the dentry, so dentries are never * flushed while the file exists. All name lookups are dealt with at the * dentry level, although the filesystem can interfere in the validation * process. Readdir is implemented by traversing the dentry lists. */ struct autofs_info { struct dentry *dentry; int flags; struct completion expire_complete; struct list_head active; struct list_head expiring; struct autofs_sb_info *sbi; unsigned long exp_timeout; unsigned long last_used; int count; kuid_t uid; kgid_t gid; struct rcu_head rcu; }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */ #define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered * for expiry, so RCU_walk is * not permitted. If it progresses to * actual expiry attempt, the flag is * not cleared when EXPIRING is set - * in that case it gets cleared only * when it comes to clearing EXPIRING. */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ #define AUTOFS_INF_EXPIRE_SET (1<<3) /* per-dentry expire timeout set for this mount point. */ struct autofs_wait_queue { wait_queue_head_t queue; struct autofs_wait_queue *next; autofs_wqt_t wait_queue_token; /* We use the following to see what we are waiting for */ struct qstr name; u32 offset; u32 dev; u64 ino; kuid_t uid; kgid_t gid; pid_t pid; pid_t tgid; /* This is for status reporting upon return */ int status; unsigned int wait_ctr; }; #define AUTOFS_SBI_MAGIC 0x6d4a556d #define AUTOFS_SBI_CATATONIC 0x0001 #define AUTOFS_SBI_STRICTEXPIRE 0x0002 #define AUTOFS_SBI_IGNORE 0x0004 struct autofs_sb_info { u32 magic; int pipefd; struct file *pipe; struct pid *oz_pgrp; int version; int sub_version; int min_proto; int max_proto; unsigned int flags; unsigned long exp_timeout; unsigned int type; struct super_block *sb; struct mutex wq_mutex; struct mutex pipe_mutex; spinlock_t fs_lock; struct autofs_wait_queue *queues; /* Wait queue pointer */ spinlock_t lookup_lock; struct list_head active_list; struct list_head expiring_list; struct rcu_head rcu; }; static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) { return (struct autofs_sb_info *)(sb->s_fs_info); } static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) { return (struct autofs_info *)(dentry->d_fsdata); } /* autofs_oz_mode(): do we see the man behind the curtain? (The * processes which do manipulations for us in user space sees the raw * filesystem without "magic".) */ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { return ((sbi->flags & AUTOFS_SBI_CATATONIC) || task_pgrp(current) == sbi->oz_pgrp); } static inline bool autofs_empty(struct autofs_info *ino) { return ino->count < 2; } struct inode *autofs_get_inode(struct super_block *, umode_t); void autofs_free_ino(struct autofs_info *); /* Expiration */ int is_autofs_dentry(struct dentry *); int autofs_expire_wait(const struct path *path, int rcu_walk); int autofs_expire_run(struct super_block *, struct vfsmount *, struct autofs_sb_info *, struct autofs_packet_expire __user *); int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, struct autofs_sb_info *sbi, unsigned int how); int autofs_expire_multi(struct super_block *, struct vfsmount *, struct autofs_sb_info *, int __user *); /* Device node initialization */ int autofs_dev_ioctl_init(void); void autofs_dev_ioctl_exit(void); /* Operations structures */ extern const struct inode_operations autofs_symlink_inode_operations; extern const struct inode_operations autofs_dir_inode_operations; extern const struct file_operations autofs_dir_operations; extern const struct file_operations autofs_root_operations; extern const struct dentry_operations autofs_dentry_operations; /* VFS automount flags management functions */ static inline void __managed_dentry_set_managed(struct dentry *dentry) { dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); } static inline void managed_dentry_set_managed(struct dentry *dentry) { spin_lock(&dentry->d_lock); __managed_dentry_set_managed(dentry); spin_unlock(&dentry->d_lock); } static inline void __managed_dentry_clear_managed(struct dentry *dentry) { dentry->d_flags &= ~(DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); } static inline void managed_dentry_clear_managed(struct dentry *dentry) { spin_lock(&dentry->d_lock); __managed_dentry_clear_managed(dentry); spin_unlock(&dentry->d_lock); } /* Initializing function */ extern const struct fs_parameter_spec autofs_param_specs[]; int autofs_init_fs_context(struct fs_context *fc); struct autofs_info *autofs_new_ino(struct autofs_sb_info *); void autofs_clean_ino(struct autofs_info *); static inline int autofs_check_pipe(struct file *pipe) { if (pipe->f_mode & FMODE_PATH) return -EINVAL; if (!(pipe->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; return 0; } static inline void autofs_set_packet_pipe_flags(struct file *pipe) { /* We want a packet pipe */ pipe->f_flags |= O_DIRECT; /* We don't expect -EAGAIN */ pipe->f_flags &= ~O_NONBLOCK; } static inline int autofs_prepare_pipe(struct file *pipe) { int ret = autofs_check_pipe(pipe); if (ret < 0) return ret; autofs_set_packet_pipe_flags(pipe); return 0; } /* Queue management functions */ int autofs_wait(struct autofs_sb_info *, const struct path *, enum autofs_notify); int autofs_wait_release(struct autofs_sb_info *, autofs_wqt_t, int); void autofs_catatonic_mode(struct autofs_sb_info *); static inline u32 autofs_get_dev(struct autofs_sb_info *sbi) { return new_encode_dev(sbi->sb->s_dev); } static inline u64 autofs_get_ino(struct autofs_sb_info *sbi) { return d_inode(sbi->sb->s_root)->i_ino; } static inline void __autofs_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); if (ino) { if (list_empty(&ino->expiring)) list_add(&ino->expiring, &sbi->expiring_list); } } static inline void autofs_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); if (ino) { spin_lock(&sbi->lookup_lock); if (list_empty(&ino->expiring)) list_add(&ino->expiring, &sbi->expiring_list); spin_unlock(&sbi->lookup_lock); } } static inline void autofs_del_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb); struct autofs_info *ino = autofs_dentry_ino(dentry); if (ino) { spin_lock(&sbi->lookup_lock); if (!list_empty(&ino->expiring)) list_del_init(&ino->expiring); spin_unlock(&sbi->lookup_lock); } } void autofs_kill_sb(struct super_block *);
194 193 194 194 194 194 46 46 46 46 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> This program can be distributed under the terms of the GNU GPL. See the file COPYING. */ #include "fuse_i.h" #include <linux/init.h> #include <linux/module.h> #include <linux/fs_context.h> #define FUSE_CTL_SUPER_MAGIC 0x65735543 /* * This is non-NULL when the single instance of the control filesystem * exists. Protected by fuse_mutex */ static struct super_block *fuse_control_sb; static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file) { struct fuse_conn *fc; mutex_lock(&fuse_mutex); fc = file_inode(file)->i_private; if (fc) fc = fuse_conn_get(fc); mutex_unlock(&fuse_mutex); return fc; } static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { if (fc->abort_err) fc->aborted = true; fuse_abort_conn(fc); fuse_conn_put(fc); } return count; } static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { char tmp[32]; size_t size; if (!*ppos) { long value; struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (!fc) return 0; value = atomic_read(&fc->num_waiting); file->private_data = (void *)value; fuse_conn_put(fc); } size = sprintf(tmp, "%ld\n", (long)file->private_data); return simple_read_from_buffer(buf, len, ppos, tmp, size); } static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf, size_t len, loff_t *ppos, unsigned val) { char tmp[32]; size_t size = sprintf(tmp, "%u\n", val); return simple_read_from_buffer(buf, len, ppos, tmp, size); } static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, unsigned *val, unsigned global_limit) { unsigned long t; unsigned limit = (1 << 16) - 1; int err; if (*ppos) return -EINVAL; err = kstrtoul_from_user(buf, count, 0, &t); if (err) return err; if (!capable(CAP_SYS_ADMIN)) limit = min(limit, global_limit); if (t > limit) return -EINVAL; *val = t; return count; } static ssize_t fuse_conn_max_background_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct fuse_conn *fc; unsigned val; fc = fuse_ctl_file_conn_get(file); if (!fc) return 0; val = READ_ONCE(fc->max_background); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); } static ssize_t fuse_conn_max_background_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned val; ssize_t ret; ret = fuse_conn_limit_write(file, buf, count, ppos, &val, max_user_bgreq); if (ret > 0) { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { spin_lock(&fc->bg_lock); fc->max_background = val; fc->blocked = fc->num_background >= fc->max_background; if (!fc->blocked) wake_up(&fc->blocked_waitq); spin_unlock(&fc->bg_lock); fuse_conn_put(fc); } } return ret; } static ssize_t fuse_conn_congestion_threshold_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct fuse_conn *fc; unsigned val; fc = fuse_ctl_file_conn_get(file); if (!fc) return 0; val = READ_ONCE(fc->congestion_threshold); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); } static ssize_t fuse_conn_congestion_threshold_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned val; struct fuse_conn *fc; ssize_t ret; ret = fuse_conn_limit_write(file, buf, count, ppos, &val, max_user_congthresh); if (ret <= 0) goto out; fc = fuse_ctl_file_conn_get(file); if (!fc) goto out; WRITE_ONCE(fc->congestion_threshold, val); fuse_conn_put(fc); out: return ret; } static const struct file_operations fuse_ctl_abort_ops = { .open = nonseekable_open, .write = fuse_conn_abort_write, }; static const struct file_operations fuse_ctl_waiting_ops = { .open = nonseekable_open, .read = fuse_conn_waiting_read, }; static const struct file_operations fuse_conn_max_background_ops = { .open = nonseekable_open, .read = fuse_conn_max_background_read, .write = fuse_conn_max_background_write, }; static const struct file_operations fuse_conn_congestion_threshold_ops = { .open = nonseekable_open, .read = fuse_conn_congestion_threshold_read, .write = fuse_conn_congestion_threshold_write, }; static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct fuse_conn *fc, const char *name, int mode, int nlink, const struct inode_operations *iop, const struct file_operations *fop) { struct dentry *dentry; struct inode *inode; BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES); dentry = d_alloc_name(parent, name); if (!dentry) return NULL; inode = new_inode(fuse_control_sb); if (!inode) { dput(dentry); return NULL; } inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = fc->user_id; inode->i_gid = fc->group_id; simple_inode_init_ts(inode); /* setting ->i_op to NULL is not allowed */ if (iop) inode->i_op = iop; inode->i_fop = fop; set_nlink(inode, nlink); inode->i_private = fc; d_add(dentry, inode); fc->ctl_dentry[fc->ctl_ndents++] = dentry; return dentry; } /* * Add a connection to the control filesystem (if it exists). Caller * must hold fuse_mutex */ int fuse_ctl_add_conn(struct fuse_conn *fc) { struct dentry *parent; char name[32]; if (!fuse_control_sb || fc->no_control) return 0; parent = fuse_control_sb->s_root; inc_nlink(d_inode(parent)); sprintf(name, "%u", fc->dev); parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, &simple_dir_inode_operations, &simple_dir_operations); if (!parent) goto err; if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, NULL, &fuse_ctl_waiting_ops) || !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, NULL, &fuse_ctl_abort_ops) || !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, 1, NULL, &fuse_conn_max_background_ops) || !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", S_IFREG | 0600, 1, NULL, &fuse_conn_congestion_threshold_ops)) goto err; return 0; err: fuse_ctl_remove_conn(fc); return -ENOMEM; } /* * Remove a connection from the control filesystem (if it exists). * Caller must hold fuse_mutex */ void fuse_ctl_remove_conn(struct fuse_conn *fc) { int i; if (!fuse_control_sb || fc->no_control) return; for (i = fc->ctl_ndents - 1; i >= 0; i--) { struct dentry *dentry = fc->ctl_dentry[i]; d_inode(dentry)->i_private = NULL; if (!i) { /* Get rid of submounts: */ d_invalidate(dentry); } dput(dentry); } drop_nlink(d_inode(fuse_control_sb->s_root)); } static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc) { static const struct tree_descr empty_descr = {""}; struct fuse_conn *fc; int err; err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr); if (err) return err; mutex_lock(&fuse_mutex); BUG_ON(fuse_control_sb); fuse_control_sb = sb; list_for_each_entry(fc, &fuse_conn_list, entry) { err = fuse_ctl_add_conn(fc); if (err) { fuse_control_sb = NULL; mutex_unlock(&fuse_mutex); return err; } } mutex_unlock(&fuse_mutex); return 0; } static int fuse_ctl_get_tree(struct fs_context *fsc) { return get_tree_single(fsc, fuse_ctl_fill_super); } static const struct fs_context_operations fuse_ctl_context_ops = { .get_tree = fuse_ctl_get_tree, }; static int fuse_ctl_init_fs_context(struct fs_context *fsc) { fsc->ops = &fuse_ctl_context_ops; return 0; } static void fuse_ctl_kill_sb(struct super_block *sb) { struct fuse_conn *fc; mutex_lock(&fuse_mutex); fuse_control_sb = NULL; list_for_each_entry(fc, &fuse_conn_list, entry) fc->ctl_ndents = 0; mutex_unlock(&fuse_mutex); kill_litter_super(sb); } static struct file_system_type fuse_ctl_fs_type = { .owner = THIS_MODULE, .name = "fusectl", .init_fs_context = fuse_ctl_init_fs_context, .kill_sb = fuse_ctl_kill_sb, }; MODULE_ALIAS_FS("fusectl"); int __init fuse_ctl_init(void) { return register_filesystem(&fuse_ctl_fs_type); } void __exit fuse_ctl_cleanup(void) { unregister_filesystem(&fuse_ctl_fs_type); }
37 37 1 1 1 1 15 44 44 44 44 40 40 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 // SPDX-License-Identifier: GPL-2.0 /* dvb-usb-urb.c is part of the DVB USB library. * * Copyright (C) 2004-6 Patrick Boettcher (patrick.boettcher@posteo.de) * see dvb-usb-init.c for copyright information. * * This file keeps functions for initializing and handling the * USB and URB stuff. */ #include "dvb-usb-common.h" int dvb_usb_generic_rw(struct dvb_usb_device *d, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen, int delay_ms) { int actlen = 0, ret = -ENOMEM; if (!d || wbuf == NULL || wlen == 0) return -EINVAL; if (d->props.generic_bulk_ctrl_endpoint == 0) { err("endpoint for generic control not specified."); return -EINVAL; } if ((ret = mutex_lock_interruptible(&d->usb_mutex))) return ret; deb_xfer(">>> "); debug_dump(wbuf,wlen,deb_xfer); ret = usb_bulk_msg(d->udev,usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint), wbuf,wlen,&actlen, 2000); if (ret) err("bulk message failed: %d (%d/%d)",ret,wlen,actlen); else ret = actlen != wlen ? -1 : 0; /* an answer is expected, and no error before */ if (!ret && rbuf && rlen) { if (delay_ms) msleep(delay_ms); ret = usb_bulk_msg(d->udev,usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint_response ? d->props.generic_bulk_ctrl_endpoint_response : d->props.generic_bulk_ctrl_endpoint),rbuf,rlen,&actlen, 2000); if (ret) err("recv bulk message failed: %d",ret); else { deb_xfer("<<< "); debug_dump(rbuf,actlen,deb_xfer); } } mutex_unlock(&d->usb_mutex); return ret; } EXPORT_SYMBOL(dvb_usb_generic_rw); int dvb_usb_generic_write(struct dvb_usb_device *d, u8 *buf, u16 len) { return dvb_usb_generic_rw(d,buf,len,NULL,0,0); } EXPORT_SYMBOL(dvb_usb_generic_write); static void dvb_usb_data_complete(struct usb_data_stream *stream, u8 *buffer, size_t length) { struct dvb_usb_adapter *adap = stream->user_priv; if (adap->feedcount > 0 && adap->state & DVB_USB_ADAP_STATE_DVB) dvb_dmx_swfilter(&adap->demux, buffer, length); } static void dvb_usb_data_complete_204(struct usb_data_stream *stream, u8 *buffer, size_t length) { struct dvb_usb_adapter *adap = stream->user_priv; if (adap->feedcount > 0 && adap->state & DVB_USB_ADAP_STATE_DVB) dvb_dmx_swfilter_204(&adap->demux, buffer, length); } static void dvb_usb_data_complete_raw(struct usb_data_stream *stream, u8 *buffer, size_t length) { struct dvb_usb_adapter *adap = stream->user_priv; if (adap->feedcount > 0 && adap->state & DVB_USB_ADAP_STATE_DVB) dvb_dmx_swfilter_raw(&adap->demux, buffer, length); } int dvb_usb_adapter_stream_init(struct dvb_usb_adapter *adap) { int i, ret = 0; for (i = 0; i < adap->props.num_frontends; i++) { adap->fe_adap[i].stream.udev = adap->dev->udev; if (adap->props.fe[i].caps & DVB_USB_ADAP_RECEIVES_204_BYTE_TS) adap->fe_adap[i].stream.complete = dvb_usb_data_complete_204; else if (adap->props.fe[i].caps & DVB_USB_ADAP_RECEIVES_RAW_PAYLOAD) adap->fe_adap[i].stream.complete = dvb_usb_data_complete_raw; else adap->fe_adap[i].stream.complete = dvb_usb_data_complete; adap->fe_adap[i].stream.user_priv = adap; ret = usb_urb_init(&adap->fe_adap[i].stream, &adap->props.fe[i].stream); if (ret < 0) break; } return ret; } int dvb_usb_adapter_stream_exit(struct dvb_usb_adapter *adap) { int i; for (i = 0; i < adap->props.num_frontends; i++) usb_urb_exit(&adap->fe_adap[i].stream); return 0; }
96 96 45 46 2 2 31 31 102 102 103 103 31 31 33 33 5 5 19 19 19 19 247 244 5 45 290 290 112 16 112 16 16 16 9 1 1 3 4 2 2 2 2 2 2 2 2 2 2 3 3 3 3 18 1 1 1 14 1 6 1 1 2 1 1 8 1 1 2 2 1 1 1 1 11 5 7 1 3 8 8 8 6 1 1 2 1 1 1 2 2 9 1 1 2 3 14 3 1 1 4 3 14 2 6 6 20 20 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 /* * net/tipc/node.c: TIPC node management routines * * Copyright (c) 2000-2006, 2012-2016, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "link.h" #include "node.h" #include "name_distr.h" #include "socket.h" #include "bcast.h" #include "monitor.h" #include "discover.h" #include "netlink.h" #include "trace.h" #include "crypto.h" #define INVALID_NODE_SIG 0x10000 #define NODE_CLEANUP_AFTER 300000 /* Flags used to take different actions according to flag type * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7) }; struct tipc_link_entry { struct tipc_link *link; spinlock_t lock; /* per link */ u32 mtu; struct sk_buff_head inputq; struct tipc_media_addr maddr; }; struct tipc_bclink_entry { struct tipc_link *link; struct sk_buff_head inputq1; struct sk_buff_head arrvq; struct sk_buff_head inputq2; struct sk_buff_head namedq; u16 named_rcv_nxt; bool named_open; }; /** * struct tipc_node - TIPC node structure * @addr: network address of node * @kref: reference counter to node object * @lock: rwlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain * @active_links: bearer ids of active links, used as index into links[] array * @links: array containing references to all links to node * @bc_entry: broadcast link entry * @action_flags: bit mask of different types of node actions * @state: connectivity state vs peer node * @preliminary: a preliminary node or not * @failover_sent: failover sent or not * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @capabilities: bitmap, indicating peer node's functional capabilities * @signature: node instance identifier * @link_id: local and remote bearer ids of changing link, if any * @peer_id: 128-bit ID of peer * @peer_id_string: ID string of peer * @publ_list: list of publications * @conn_sks: list of connections (FIXME) * @timer: node's keepalive timer * @keepalive_intv: keepalive interval in milliseconds * @rcu: rcu struct for tipc_node * @delete_at: indicates the time for deleting a down node * @peer_net: peer's net namespace * @peer_hash_mix: hash for this peer (FIXME) * @crypto_rx: RX crypto handler */ struct tipc_node { u32 addr; struct kref kref; rwlock_t lock; struct net *net; struct hlist_node hash; int active_links[2]; struct tipc_link_entry links[MAX_BEARERS]; struct tipc_bclink_entry bc_entry; int action_flags; struct list_head list; int state; bool preliminary; bool failover_sent; u16 sync_point; int link_cnt; u16 working_links; u16 capabilities; u32 signature; u32 link_id; u8 peer_id[16]; char peer_id_string[NODE_ID_STR_LEN]; struct list_head publ_list; struct list_head conn_sks; unsigned long keepalive_intv; struct timer_list timer; struct rcu_head rcu; unsigned long delete_at; struct net *peer_net; u32 peer_hash_mix; #ifdef CONFIG_TIPC_CRYPTO struct tipc_crypto *crypto_rx; #endif }; /* Node FSM states and events: */ enum { SELF_DOWN_PEER_DOWN = 0xdd, SELF_UP_PEER_UP = 0xaa, SELF_DOWN_PEER_LEAVING = 0xd1, SELF_UP_PEER_COMING = 0xac, SELF_COMING_PEER_UP = 0xca, SELF_LEAVING_PEER_DOWN = 0x1d, NODE_FAILINGOVER = 0xf0, NODE_SYNCHING = 0xcc }; enum { SELF_ESTABL_CONTACT_EVT = 0xece, SELF_LOST_CONTACT_EVT = 0x1ce, PEER_ESTABL_CONTACT_EVT = 0x9ece, PEER_LOST_CONTACT_EVT = 0x91ce, NODE_FAILOVER_BEGIN_EVT = 0xfbe, NODE_FAILOVER_END_EVT = 0xfee, NODE_SYNCH_BEGIN_EVT = 0xcbe, NODE_SYNCH_END_EVT = 0xcee }; static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr **maddr); static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); static bool node_is_up(struct tipc_node *n); static void tipc_node_delete_from_list(struct tipc_node *node); struct tipc_sock_conn { u32 port; u32 peer_port; u32 peer_node; struct list_head list; }; static struct tipc_link *node_active_link(struct tipc_node *n, int sel) { int bearer_id = n->active_links[sel & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) return NULL; return n->links[bearer_id].link; } int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected) { struct tipc_node *n; int bearer_id; unsigned int mtu = MAX_MSG_SIZE; n = tipc_node_find(net, addr); if (unlikely(!n)) return mtu; /* Allow MAX_MSG_SIZE when building connection oriented message * if they are in the same core network */ if (n->peer_net && connected) { tipc_node_put(n); return mtu; } bearer_id = n->active_links[sel & 1]; if (likely(bearer_id != INVALID_BEARER_ID)) mtu = n->links[bearer_id].mtu; tipc_node_put(n); return mtu; } bool tipc_node_get_id(struct net *net, u32 addr, u8 *id) { u8 *own_id = tipc_own_id(net); struct tipc_node *n; if (!own_id) return true; if (addr == tipc_own_addr(net)) { memcpy(id, own_id, TIPC_NODEID_LEN); return true; } n = tipc_node_find(net, addr); if (!n) return false; memcpy(id, &n->peer_id, TIPC_NODEID_LEN); tipc_node_put(n); return true; } u16 tipc_node_get_capabilities(struct net *net, u32 addr) { struct tipc_node *n; u16 caps; n = tipc_node_find(net, addr); if (unlikely(!n)) return TIPC_NODE_CAPABILITIES; caps = n->capabilities; tipc_node_put(n); return caps; } u32 tipc_node_get_addr(struct tipc_node *node) { return (node) ? node->addr : 0; } char *tipc_node_get_id_str(struct tipc_node *node) { return node->peer_id_string; } #ifdef CONFIG_TIPC_CRYPTO /** * tipc_node_crypto_rx - Retrieve crypto RX handle from node * @__n: target tipc_node * Note: node ref counter must be held first! */ struct tipc_crypto *tipc_node_crypto_rx(struct tipc_node *__n) { return (__n) ? __n->crypto_rx : NULL; } struct tipc_crypto *tipc_node_crypto_rx_by_list(struct list_head *pos) { return container_of(pos, struct tipc_node, list)->crypto_rx; } struct tipc_crypto *tipc_node_crypto_rx_by_addr(struct net *net, u32 addr) { struct tipc_node *n; n = tipc_node_find(net, addr); return (n) ? n->crypto_rx : NULL; } #endif static void tipc_node_free(struct rcu_head *rp) { struct tipc_node *n = container_of(rp, struct tipc_node, rcu); #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_stop(&n->crypto_rx); #endif kfree(n); } static void tipc_node_kref_release(struct kref *kref) { struct tipc_node *n = container_of(kref, struct tipc_node, kref); kfree(n->bc_entry.link); call_rcu(&n->rcu, tipc_node_free); } void tipc_node_put(struct tipc_node *node) { kref_put(&node->kref, tipc_node_kref_release); } void tipc_node_get(struct tipc_node *node) { kref_get(&node->kref); } /* * tipc_node_find - locate specified node object, if it exists */ static struct tipc_node *tipc_node_find(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); struct tipc_node *node; unsigned int thash = tipc_hashfn(addr); rcu_read_lock(); hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { if (node->addr != addr || node->preliminary) continue; if (!kref_get_unless_zero(&node->kref)) node = NULL; break; } rcu_read_unlock(); return node; } /* tipc_node_find_by_id - locate specified node object by its 128-bit id * Note: this function is called only when a discovery request failed * to find the node by its 32-bit id, and is not time critical */ static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id) { struct tipc_net *tn = tipc_net(net); struct tipc_node *n; bool found = false; rcu_read_lock(); list_for_each_entry_rcu(n, &tn->node_list, list) { read_lock_bh(&n->lock); if (!memcmp(id, n->peer_id, 16) && kref_get_unless_zero(&n->kref)) found = true; read_unlock_bh(&n->lock); if (found) break; } rcu_read_unlock(); return found ? n : NULL; } static void tipc_node_read_lock(struct tipc_node *n) __acquires(n->lock) { read_lock_bh(&n->lock); } static void tipc_node_read_unlock(struct tipc_node *n) __releases(n->lock) { read_unlock_bh(&n->lock); } static void tipc_node_write_lock(struct tipc_node *n) __acquires(n->lock) { write_lock_bh(&n->lock); } static void tipc_node_write_unlock_fast(struct tipc_node *n) __releases(n->lock) { write_unlock_bh(&n->lock); } static void tipc_node_write_unlock(struct tipc_node *n) __releases(n->lock) { struct tipc_socket_addr sk; struct net *net = n->net; u32 flags = n->action_flags; struct list_head *publ_list; struct tipc_uaddr ua; u32 bearer_id, node; if (likely(!flags)) { write_unlock_bh(&n->lock); return; } tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, TIPC_LINK_STATE, n->addr, n->addr); sk.ref = n->link_id; sk.node = tipc_own_addr(net); node = n->addr; bearer_id = n->link_id & 0xffff; publ_list = &n->publ_list; n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) tipc_publ_notify(net, publ_list, node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(net, node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { tipc_mon_peer_up(net, node, bearer_id); tipc_nametbl_publish(net, &ua, &sk, sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { tipc_mon_peer_down(net, node, bearer_id); tipc_nametbl_withdraw(net, &ua, &sk, sk.ref); } } static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) { int net_id = tipc_netid(n->net); struct tipc_net *tn_peer; struct net *tmp; u32 hash_chk; if (n->peer_net) return; for_each_net_rcu(tmp) { tn_peer = tipc_net(tmp); if (!tn_peer) continue; /* Integrity checking whether node exists in namespace or not */ if (tn_peer->net_id != net_id) continue; if (memcmp(n->peer_id, tn_peer->node_id, NODE_ID_LEN)) continue; hash_chk = tipc_net_hash_mixes(tmp, tn_peer->random); if (hash_mixes ^ hash_chk) continue; n->peer_net = tmp; n->peer_hash_mix = hash_mixes; break; } } struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, u16 capabilities, u32 hash_mixes, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; unsigned long intv; int bearer_id; int i; spin_lock_bh(&tn->node_list_lock); n = tipc_node_find(net, addr) ?: tipc_node_find_by_id(net, peer_id); if (n) { if (!n->preliminary) goto update; if (preliminary) goto exit; /* A preliminary node becomes "real" now, refresh its data */ tipc_node_write_lock(n); if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), n->capabilities, &n->bc_entry.inputq1, &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { pr_warn("Broadcast rcv link refresh failed, no memory\n"); tipc_node_write_unlock_fast(n); tipc_node_put(n); n = NULL; goto exit; } n->preliminary = false; n->addr = addr; hlist_del_rcu(&n->hash); hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_del_rcu(&n->list); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n->addr < temp_node->addr) break; } list_add_tail_rcu(&n->list, &temp_node->list); tipc_node_write_unlock_fast(n); update: if (n->peer_hash_mix ^ hash_mixes) tipc_node_assign_peer_net(n, hash_mixes); if (n->capabilities == capabilities) goto exit; /* Same node may come back with new capabilities */ tipc_node_write_lock(n); n->capabilities = capabilities; for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { l = n->links[bearer_id].link; if (l) tipc_link_update_caps(l, capabilities); } tipc_node_write_unlock_fast(n); /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { pr_warn("Node creation failed, no memory\n"); goto exit; } tipc_nodeid2string(n->peer_id_string, peer_id); #ifdef CONFIG_TIPC_CRYPTO if (unlikely(tipc_crypto_start(&n->crypto_rx, net, n))) { pr_warn("Failed to start crypto RX(%s)!\n", n->peer_id_string); kfree(n); n = NULL; goto exit; } #endif n->addr = addr; n->preliminary = preliminary; memcpy(&n->peer_id, peer_id, 16); n->net = net; n->peer_net = NULL; n->peer_hash_mix = 0; /* Assign kernel local namespace if exists */ tipc_node_assign_peer_net(n, hash_mixes); n->capabilities = capabilities; kref_init(&n->kref); rwlock_init(&n->lock); INIT_HLIST_NODE(&n->hash); INIT_LIST_HEAD(&n->list); INIT_LIST_HEAD(&n->publ_list); INIT_LIST_HEAD(&n->conn_sks); skb_queue_head_init(&n->bc_entry.namedq); skb_queue_head_init(&n->bc_entry.inputq1); __skb_queue_head_init(&n->bc_entry.arrvq); skb_queue_head_init(&n->bc_entry.inputq2); for (i = 0; i < MAX_BEARERS; i++) spin_lock_init(&n->links[i].lock); n->state = SELF_DOWN_PEER_LEAVING; n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; if (!preliminary && !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), n->capabilities, &n->bc_entry.inputq1, &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); tipc_node_put(n); n = NULL; goto exit; } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ n->keepalive_intv = 10000; intv = jiffies + msecs_to_jiffies(n->keepalive_intv); if (!mod_timer(&n->timer, intv)) tipc_node_get(n); hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n->addr < temp_node->addr) break; } list_add_tail_rcu(&n->list, &temp_node->list); /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(&tn->node_list_lock); return n; } static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; /* Link with lowest tolerance determines timer interval */ if (intv < n->keepalive_intv) n->keepalive_intv = intv; /* Ensure link's abort limit corresponds to current tolerance */ tipc_link_set_abort_limit(l, tol / n->keepalive_intv); } static void tipc_node_delete_from_list(struct tipc_node *node) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_key_flush(node->crypto_rx); #endif list_del_rcu(&node->list); hlist_del_rcu(&node->hash); tipc_node_put(node); } static void tipc_node_delete(struct tipc_node *node) { trace_tipc_node_delete(node, true, " "); tipc_node_delete_from_list(node); timer_delete_sync(&node->timer); tipc_node_put(node); } void tipc_node_stop(struct net *net) { struct tipc_net *tn = tipc_net(net); struct tipc_node *node, *t_node; spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) tipc_node_delete(node); spin_unlock_bh(&tn->node_list_lock); } void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) { struct tipc_node *n; if (in_own_node(net, addr)) return; n = tipc_node_find(net, addr); if (!n) { pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); return; } tipc_node_write_lock(n); list_add_tail(subscr, &n->publ_list); tipc_node_write_unlock_fast(n); tipc_node_put(n); } void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) { struct tipc_node *n; if (in_own_node(net, addr)) return; n = tipc_node_find(net, addr); if (!n) { pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); return; } tipc_node_write_lock(n); list_del_init(subscr); tipc_node_write_unlock_fast(n); tipc_node_put(n); } int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; int err = 0; if (in_own_node(net, dnode)) return 0; node = tipc_node_find(net, dnode); if (!node) { pr_warn("Connecting sock to node 0x%x failed\n", dnode); return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); if (!conn) { err = -EHOSTUNREACH; goto exit; } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; tipc_node_write_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_write_unlock(node); exit: tipc_node_put(node); return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) { struct tipc_node *node; struct tipc_sock_conn *conn, *safe; if (in_own_node(net, dnode)) return; node = tipc_node_find(net, dnode); if (!node) return; tipc_node_write_lock(node); list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { if (port != conn->port) continue; list_del(&conn->list); kfree(conn); } tipc_node_write_unlock(node); tipc_node_put(node); } static void tipc_node_clear_links(struct tipc_node *node) { int i; for (i = 0; i < MAX_BEARERS; i++) { struct tipc_link_entry *le = &node->links[i]; if (le->link) { kfree(le->link); le->link = NULL; node->link_cnt--; } } } /* tipc_node_cleanup - delete nodes that does not * have active links for NODE_CLEANUP_AFTER time */ static bool tipc_node_cleanup(struct tipc_node *peer) { struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; /* If lock held by tipc_node_stop() the node will be deleted anyway */ if (!spin_trylock_bh(&tn->node_list_lock)) return false; tipc_node_write_lock(peer); if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) { tipc_node_clear_links(peer); tipc_node_delete_from_list(peer); deleted = true; } tipc_node_write_unlock(peer); if (!deleted) { spin_unlock_bh(&tn->node_list_lock); return deleted; } /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } tipc_bcast_toggle_rcast(peer->net, (tn->capabilities & TIPC_BCAST_RCAST)); spin_unlock_bh(&tn->node_list_lock); return deleted; } /* tipc_node_timeout - handle expiration of node timer */ static void tipc_node_timeout(struct timer_list *t) { struct tipc_node *n = from_timer(n, t, timer); struct tipc_link_entry *le; struct sk_buff_head xmitq; int remains = n->link_cnt; int bearer_id; int rc = 0; trace_tipc_node_timeout(n, false, " "); if (!node_is_up(n) && tipc_node_cleanup(n)) { /*Removing the reference of Timer*/ tipc_node_put(n); return; } #ifdef CONFIG_TIPC_CRYPTO /* Take any crypto key related actions first */ tipc_crypto_timeout(n->crypto_rx); #endif __skb_queue_head_init(&xmitq); /* Initial node interval to value larger (10 seconds), then it will be * recalculated with link lowest tolerance */ tipc_node_read_lock(n); n->keepalive_intv = 10000; tipc_node_read_unlock(n); for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) { tipc_node_read_lock(n); le = &n->links[bearer_id]; if (le->link) { spin_lock_bh(&le->lock); /* Link tolerance may change asynchronously: */ tipc_node_calculate_timer(n, le->link); rc = tipc_link_timeout(le->link, &xmitq); spin_unlock_bh(&le->lock); remains--; } tipc_node_read_unlock(n); tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr, n); if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); } mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv)); } /** * __tipc_node_link_up - handle addition of link * @n: target tipc_node * @bearer_id: id of the bearer * @xmitq: queue for messages to be xmited on * Node lock must be held by caller * Link becomes active (alone or shared) or standby, depending on its priority. */ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; struct tipc_link *ol = node_active_link(n, 0); struct tipc_link *nl = n->links[bearer_id].link; if (!nl || tipc_link_is_up(nl)) return; tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); if (!tipc_link_is_up(nl)) return; n->working_links++; n->action_flags |= TIPC_NOTIFY_LINK_UP; n->link_id = tipc_link_id(nl); /* Leave room for tunnel header when returning 'mtu' to users: */ n->links[bearer_id].mtu = tipc_link_mss(nl); tipc_bearer_add_dest(n->net, bearer_id, n->addr); tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); pr_debug("Established link <%s> on network plane %c\n", tipc_link_name(nl), tipc_link_plane(nl)); trace_tipc_node_link_up(n, true, " "); /* Ensure that a STATE message goes first */ tipc_link_build_state_msg(nl, xmitq); /* First link? => give it both slots */ if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); return; } /* Second link => redistribute slots */ if (tipc_link_prio(nl) > tipc_link_prio(ol)) { pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); *slot0 = bearer_id; *slot1 = bearer_id; tipc_link_set_active(nl, true); tipc_link_set_active(ol, false); } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { tipc_link_set_active(nl, true); *slot1 = bearer_id; } else { pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); } /* Prepare synchronization with first link */ tipc_link_tnl_prepare(ol, nl, SYNCH_MSG, xmitq); } /** * tipc_node_link_up - handle addition of link * @n: target tipc_node * @bearer_id: id of the bearer * @xmitq: queue for messages to be xmited on * * Link becomes active (alone or shared) or standby, depending on its priority. */ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { struct tipc_media_addr *maddr; tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); maddr = &n->links[bearer_id].maddr; tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr, n); tipc_node_write_unlock(n); } /** * tipc_node_link_failover() - start failover in case "half-failover" * * This function is only called in a very special situation where link * failover can be already started on peer node but not on this node. * This can happen when e.g.:: * * 1. Both links <1A-2A>, <1B-2B> down * 2. Link endpoint 2A up, but 1A still down (e.g. due to network * disturbance, wrong session, etc.) * 3. Link <1B-2B> up * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) * 5. Node 2 starts failover onto link <1B-2B> * * ==> Node 1 does never start link/node failover! * * @n: tipc node structure * @l: link peer endpoint failingover (- can be NULL) * @tnl: tunnel link * @xmitq: queue for messages to be xmited on tnl link later */ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, struct tipc_link *tnl, struct sk_buff_head *xmitq) { /* Avoid to be "self-failover" that can never end */ if (!tipc_link_is_up(tnl)) return; /* Don't rush, failure link may be in the process of resetting */ if (l && !tipc_link_is_reset(l)) return; tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); tipc_link_failover_prepare(l, tnl, xmitq); if (l) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); } /** * __tipc_node_link_down - handle loss of link * @n: target tipc_node * @bearer_id: id of the bearer * @xmitq: queue for messages to be xmited on * @maddr: output media address of the bearer */ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr **maddr) { struct tipc_link_entry *le = &n->links[*bearer_id]; int *slot0 = &n->active_links[0]; int *slot1 = &n->active_links[1]; int i, highest = 0, prio; struct tipc_link *l, *_l, *tnl; l = n->links[*bearer_id].link; if (!l || tipc_link_is_reset(l)) return; n->working_links--; n->action_flags |= TIPC_NOTIFY_LINK_DOWN; n->link_id = tipc_link_id(l); tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); pr_debug("Lost link <%s> on network plane %c\n", tipc_link_name(l), tipc_link_plane(l)); /* Select new active link if any available */ *slot0 = INVALID_BEARER_ID; *slot1 = INVALID_BEARER_ID; for (i = 0; i < MAX_BEARERS; i++) { _l = n->links[i].link; if (!_l || !tipc_link_is_up(_l)) continue; if (_l == l) continue; prio = tipc_link_prio(_l); if (prio < highest) continue; if (prio > highest) { highest = prio; *slot0 = i; *slot1 = i; continue; } *slot1 = i; } if (!node_is_up(n)) { if (tipc_link_peer_is_down(l)) tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down!"); tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_reset(l); tipc_link_build_reset_msg(l, xmitq); *maddr = &n->links[*bearer_id].maddr; node_lost_contact(n, &le->inputq); tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); return; } tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); /* There is still a working link => initiate failover */ *bearer_id = n->active_links[0]; tnl = n->links[*bearer_id].link; tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down -> failover!"); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); *maddr = &n->links[*bearer_id].maddr; } static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; int old_bearer_id = bearer_id; struct sk_buff_head xmitq; if (!l) return; __skb_queue_head_init(&xmitq); tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); } else { /* Defuse pending tipc_node_link_up() */ tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); } if (delete) { kfree(l); le->link = NULL; n->link_cnt--; } trace_tipc_node_link_down(n, true, "node link down or deleted!"); tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); if (!skb_queue_empty(&xmitq)) tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr, n); tipc_sk_rcv(n->net, &le->inputq); } static bool node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } bool tipc_node_is_up(struct net *net, u32 addr) { struct tipc_node *n; bool retval = false; if (in_own_node(net, addr)) return true; n = tipc_node_find(net, addr); if (!n) return false; retval = node_is_up(n); tipc_node_put(n); return retval; } static u32 tipc_node_suggest_addr(struct net *net, u32 addr) { struct tipc_node *n; addr ^= tipc_net(net)->random; while ((n = tipc_node_find(net, addr))) { tipc_node_put(n); addr++; } return addr; } /* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not * Returns suggested address if any, otherwise 0 */ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) { struct tipc_net *tn = tipc_net(net); struct tipc_node *n; bool preliminary; u32 sugg_addr; /* Suggest new address if some other peer is using this one */ n = tipc_node_find(net, addr); if (n) { if (!memcmp(n->peer_id, id, NODE_ID_LEN)) addr = 0; tipc_node_put(n); if (!addr) return 0; return tipc_node_suggest_addr(net, addr); } /* Suggest previously used address if peer is known */ n = tipc_node_find_by_id(net, id); if (n) { sugg_addr = n->addr; preliminary = n->preliminary; tipc_node_put(n); if (!preliminary) return sugg_addr; } /* Even this node may be in conflict */ if (tn->trial_addr == addr) return tipc_node_suggest_addr(net, addr); return 0; } void tipc_node_check_dest(struct net *net, u32 addr, u8 *peer_id, struct tipc_bearer *b, u16 capabilities, u32 signature, u32 hash_mixes, struct tipc_media_addr *maddr, bool *respond, bool *dupl_addr) { struct tipc_node *n; struct tipc_link *l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; bool link_up = false; bool link_is_reset = false; bool accept_addr = false; bool reset = false; char *if_name; unsigned long intv; u16 session; *dupl_addr = false; *respond = false; n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes, false); if (!n) return; tipc_node_write_lock(n); le = &n->links[b->identity]; /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { /* All is fine. Ignore requests. */ /* Peer node is not a container/local namespace */ if (!n->peer_hash_mix) n->peer_hash_mix = hash_mixes; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; } else if (sign_match && !addr_match && link_up) { /* Peer has changed i/f address without rebooting. * If so, the link will reset soon, and the next * discovery will be accepted. So we can ignore it. * It may also be a cloned or malicious peer having * chosen the same node address and signature as an * existing one. * Ignore requests until the link goes down, if ever. */ *dupl_addr = true; } else if (sign_match && !addr_match && !link_up) { /* Peer link has changed i/f address without rebooting. * It may also be a cloned or malicious peer; we can't * distinguish between the two. * The signature is correct, so we must accept. */ accept_addr = true; *respond = true; reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already * reset and re-established contact with the peer, before * receiving a discovery message from that node. * (The peer happened to receive one from this node first). * - The peer came back so fast that our side has not * discovered it yet. Probing from this side will soon * reset the link, since there can be no working link * endpoint at the peer end, and the link will re-establish. * Accept the signature, since it comes from a known peer. */ n->signature = signature; } else if (!sign_match && addr_match && !link_up) { /* The peer node has rebooted. * Accept signature, since it is a known peer. */ n->signature = signature; *respond = true; } else if (!sign_match && !addr_match && link_up) { /* Peer rebooted with new address, or a new/duplicate peer. * Ignore until the link goes down, if ever. */ *dupl_addr = true; } else if (!sign_match && !addr_match && !link_up) { /* Peer rebooted with new address, or it is a new peer. * Accept signature and address. */ n->signature = signature; accept_addr = true; *respond = true; reset = true; } if (!accept_addr) goto exit; /* Now create new link if not already existing */ if (!l) { if (n->link_cnt == 2) goto exit; if_name = strchr(b->name, ':') + 1; get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, b->min_win, b->max_win, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, &le->inputq, &n->bc_entry.namedq, &l)) { *respond = false; goto exit; } trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link created!"); tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); if (n->link_cnt == 1) { intv = jiffies + msecs_to_jiffies(n->keepalive_intv); if (!mod_timer(&n->timer, intv)) tipc_node_get(n); } } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } void tipc_node_delete_links(struct net *net, int bearer_id) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n; rcu_read_lock(); list_for_each_entry_rcu(n, &tn->node_list, list) { tipc_node_link_down(n, bearer_id, true); } rcu_read_unlock(); } static void tipc_node_reset_links(struct tipc_node *n) { int i; pr_warn("Resetting all links to %x\n", n->addr); trace_tipc_node_reset_links(n, true, " "); for (i = 0; i < MAX_BEARERS; i++) { tipc_node_link_down(n, i, false); } } /* tipc_node_fsm_evt - node finite state machine * Determines when contact is allowed with peer node */ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) { int state = n->state; switch (state) { case SELF_DOWN_PEER_DOWN: switch (evt) { case SELF_ESTABL_CONTACT_EVT: state = SELF_UP_PEER_COMING; break; case PEER_ESTABL_CONTACT_EVT: state = SELF_COMING_PEER_UP; break; case SELF_LOST_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: break; case NODE_SYNCH_END_EVT: case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; } break; case SELF_UP_PEER_UP: switch (evt) { case SELF_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_LEAVING; break; case PEER_LOST_CONTACT_EVT: state = SELF_LEAVING_PEER_DOWN; break; case NODE_SYNCH_BEGIN_EVT: state = NODE_SYNCHING; break; case NODE_FAILOVER_BEGIN_EVT: state = NODE_FAILINGOVER; break; case SELF_ESTABL_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: case NODE_SYNCH_END_EVT: case NODE_FAILOVER_END_EVT: break; default: goto illegal_evt; } break; case SELF_DOWN_PEER_LEAVING: switch (evt) { case PEER_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_DOWN; break; case SELF_ESTABL_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: case SELF_LOST_CONTACT_EVT: break; case NODE_SYNCH_END_EVT: case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; } break; case SELF_UP_PEER_COMING: switch (evt) { case PEER_ESTABL_CONTACT_EVT: state = SELF_UP_PEER_UP; break; case SELF_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_DOWN; break; case SELF_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: case NODE_SYNCH_END_EVT: case NODE_FAILOVER_BEGIN_EVT: break; case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; } break; case SELF_COMING_PEER_UP: switch (evt) { case SELF_ESTABL_CONTACT_EVT: state = SELF_UP_PEER_UP; break; case PEER_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_DOWN; break; case SELF_LOST_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: break; case NODE_SYNCH_END_EVT: case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; } break; case SELF_LEAVING_PEER_DOWN: switch (evt) { case SELF_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_DOWN; break; case SELF_ESTABL_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: break; case NODE_SYNCH_END_EVT: case NODE_SYNCH_BEGIN_EVT: case NODE_FAILOVER_BEGIN_EVT: case NODE_FAILOVER_END_EVT: default: goto illegal_evt; } break; case NODE_FAILINGOVER: switch (evt) { case SELF_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_LEAVING; break; case PEER_LOST_CONTACT_EVT: state = SELF_LEAVING_PEER_DOWN; break; case NODE_FAILOVER_END_EVT: state = SELF_UP_PEER_UP; break; case NODE_FAILOVER_BEGIN_EVT: case SELF_ESTABL_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: break; case NODE_SYNCH_BEGIN_EVT: case NODE_SYNCH_END_EVT: default: goto illegal_evt; } break; case NODE_SYNCHING: switch (evt) { case SELF_LOST_CONTACT_EVT: state = SELF_DOWN_PEER_LEAVING; break; case PEER_LOST_CONTACT_EVT: state = SELF_LEAVING_PEER_DOWN; break; case NODE_SYNCH_END_EVT: state = SELF_UP_PEER_UP; break; case NODE_FAILOVER_BEGIN_EVT: state = NODE_FAILINGOVER; break; case NODE_SYNCH_BEGIN_EVT: case SELF_ESTABL_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: break; case NODE_FAILOVER_END_EVT: default: goto illegal_evt; } break; default: pr_err("Unknown node fsm state %x\n", state); break; } trace_tipc_node_fsm(n->peer_id, n->state, state, evt); n->state = state; return; illegal_evt: pr_err("Illegal node fsm evt %x in state %x\n", evt, state); trace_tipc_node_fsm(n->peer_id, n->state, state, evt); } static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq) { struct tipc_sock_conn *conn, *safe; struct tipc_link *l; struct list_head *conns = &n->conn_sks; struct sk_buff *skb; uint i; pr_debug("Lost contact with %x\n", n->addr); n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); trace_tipc_node_lost_contact(n, true, " "); /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); skb_queue_purge(&n->bc_entry.namedq); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { l = n->links[i].link; if (l) tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); } /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; n->peer_net = NULL; n->peer_hash_mix = 0; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, tipc_own_addr(n->net), conn->peer_node, conn->port, conn->peer_port, TIPC_ERR_NO_NODE); if (likely(skb)) skb_queue_tail(inputq, skb); list_del(&conn->list); kfree(conn); } } /** * tipc_node_get_linkname - get the name of a link * * @net: the applicable net namespace * @bearer_id: id of the bearer * @addr: peer node address * @linkname: link name output buffer * @len: size of @linkname output buffer * * Return: 0 on success */ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); if (!node) return err; if (bearer_id >= MAX_BEARERS) goto exit; tipc_node_read_lock(node); link = node->links[bearer_id].link; if (link) { strncpy(linkname, tipc_link_name(link), len); err = 0; } tipc_node_read_unlock(node); exit: tipc_node_put(node); return err; } /* Caller should hold node lock for the passed node */ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) { void *hdr; struct nlattr *attrs; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_NODE_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE); if (!attrs) goto msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) goto attr_msg_full; if (node_is_up(node)) if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) goto attr_msg_full; nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) { struct tipc_msg *hdr = buf_msg(skb_peek(list)); struct sk_buff_head inputq; switch (msg_user(hdr)) { case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: if (msg_connected(hdr) || msg_named(hdr) || msg_direct(hdr)) { tipc_loopback_trace(peer_net, list); spin_lock_init(&list->lock); tipc_sk_rcv(peer_net, list); return; } if (msg_mcast(hdr)) { tipc_loopback_trace(peer_net, list); skb_queue_head_init(&inputq); tipc_sk_mcast_rcv(peer_net, list, &inputq); __skb_queue_purge(list); skb_queue_purge(&inputq); return; } return; case MSG_FRAGMENTER: if (tipc_msg_assemble(list)) { tipc_loopback_trace(peer_net, list); skb_queue_head_init(&inputq); tipc_sk_mcast_rcv(peer_net, list, &inputq); __skb_queue_purge(list); skb_queue_purge(&inputq); } return; case GROUP_PROTOCOL: case CONN_MANAGER: tipc_loopback_trace(peer_net, list); spin_lock_init(&list->lock); tipc_sk_rcv(peer_net, list); return; case LINK_PROTOCOL: case NAME_DISTRIBUTOR: case TUNNEL_PROTOCOL: case BCAST_PROTOCOL: return; default: return; } } /** * tipc_node_xmit() - general link level function for message sending * @net: the applicable net namespace * @list: chain of buffers containing message * @dnode: address of destination node * @selector: a number used for deterministic link selection * Consumes the buffer chain. * Return: 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF */ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector) { struct tipc_link_entry *le = NULL; struct tipc_node *n; struct sk_buff_head xmitq; bool node_up = false; struct net *peer_net; int bearer_id; int rc; if (in_own_node(net, dnode)) { tipc_loopback_trace(net, list); spin_lock_init(&list->lock); tipc_sk_rcv(net, list); return 0; } n = tipc_node_find(net, dnode); if (unlikely(!n)) { __skb_queue_purge(list); return -EHOSTUNREACH; } rcu_read_lock(); tipc_node_read_lock(n); node_up = node_is_up(n); peer_net = n->peer_net; tipc_node_read_unlock(n); if (node_up && peer_net && check_net(peer_net)) { /* xmit inner linux container */ tipc_lxc_xmit(peer_net, list); if (likely(skb_queue_empty(list))) { rcu_read_unlock(); tipc_node_put(n); return 0; } } rcu_read_unlock(); tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); tipc_node_put(n); __skb_queue_purge(list); return -EHOSTUNREACH; } __skb_queue_head_init(&xmitq); le = &n->links[bearer_id]; spin_lock_bh(&le->lock); rc = tipc_link_xmit(le->link, list, &xmitq); spin_unlock_bh(&le->lock); tipc_node_read_unlock(n); if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); else tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); tipc_node_put(n); return rc; } /* tipc_node_xmit_skb(): send single buffer to destination * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected * The only exception is datagram messages rerouted after secondary * lookup, which are rare and safe to dispose of anyway. */ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; __skb_queue_head_init(&head); __skb_queue_tail(&head, skb); tipc_node_xmit(net, &head, dnode, selector); return 0; } /* tipc_node_distr_xmit(): send single buffer msgs to individual destinations * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected */ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) { struct sk_buff *skb; u32 selector, dnode; while ((skb = __skb_dequeue(xmitq))) { selector = msg_origport(buf_msg(skb)); dnode = msg_destnode(buf_msg(skb)); tipc_node_xmit_skb(net, skb, dnode, selector); } return 0; } void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests) { struct sk_buff_head xmitq; struct sk_buff *txskb; struct tipc_node *n; u16 dummy; u32 dst; /* Use broadcast if all nodes support it */ if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) { __skb_queue_head_init(&xmitq); __skb_queue_tail(&xmitq, skb); tipc_bcast_xmit(net, &xmitq, &dummy); return; } /* Otherwise use legacy replicast method */ rcu_read_lock(); list_for_each_entry_rcu(n, tipc_nodes(net), list) { dst = n->addr; if (in_own_node(net, dst)) continue; if (!node_is_up(n)) continue; txskb = pskb_copy(skb, GFP_ATOMIC); if (!txskb) break; msg_set_destnode(buf_msg(txskb), dst); tipc_node_xmit_skb(net, txskb, dst, 0); } rcu_read_unlock(); kfree_skb(skb); } static void tipc_node_mcast_rcv(struct tipc_node *n) { struct tipc_bclink_entry *be = &n->bc_entry; /* 'arrvq' is under inputq2's lock protection */ spin_lock_bh(&be->inputq2.lock); spin_lock_bh(&be->inputq1.lock); skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); spin_unlock_bh(&be->inputq1.lock); spin_unlock_bh(&be->inputq2.lock); tipc_sk_mcast_rcv(n->net, &be->arrvq, &be->inputq2); } static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, int bearer_id, struct sk_buff_head *xmitq) { struct tipc_link *ucl; int rc; rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq); if (rc & TIPC_LINK_DOWN_EVT) { tipc_node_reset_links(n); return; } if (!(rc & TIPC_LINK_SND_STATE)) return; /* If probe message, a STATE response will be sent anyway */ if (msg_probe(hdr)) return; /* Produce a STATE message carrying broadcast NACK */ tipc_node_read_lock(n); ucl = n->links[bearer_id].link; if (ucl) tipc_link_build_state_msg(ucl, xmitq); tipc_node_read_unlock(n); } /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace * @skb: TIPC packet * @bearer_id: id of bearer message arrived on * * Invoked with no locks held. */ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) { int rc; struct sk_buff_head xmitq; struct tipc_bclink_entry *be; struct tipc_link_entry *le; struct tipc_msg *hdr = buf_msg(skb); int usr = msg_user(hdr); u32 dnode = msg_destnode(hdr); struct tipc_node *n; __skb_queue_head_init(&xmitq); /* If NACK for other node, let rcv link for that node peek into it */ if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net))) n = tipc_node_find(net, dnode); else n = tipc_node_find(net, msg_prevnode(hdr)); if (!n) { kfree_skb(skb); return; } be = &n->bc_entry; le = &n->links[bearer_id]; rc = tipc_bcast_rcv(net, be->link, skb); /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_STATE) { tipc_node_read_lock(n); tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); } if (!skb_queue_empty(&xmitq)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); if (!skb_queue_empty(&be->inputq1)) tipc_node_mcast_rcv(n); /* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */ if (!skb_queue_empty(&n->bc_entry.namedq)) tipc_named_rcv(net, &n->bc_entry.namedq, &n->bc_entry.named_rcv_nxt, &n->bc_entry.named_open); /* If reassembly or retransmission failure => reset all links to peer */ if (rc & TIPC_LINK_DOWN_EVT) tipc_node_reset_links(n); tipc_node_put(n); } /** * tipc_node_check_state - check and if necessary update node state * @n: target tipc_node * @skb: TIPC packet * @bearer_id: identity of bearer delivering the packet * @xmitq: queue for messages to be xmited on * Return: true if state and msg are ok, otherwise false */ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int bearer_id, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); int usr = msg_user(hdr); int mtyp = msg_type(hdr); u16 oseqno = msg_seqno(hdr); u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; struct tipc_link *l, *tnl, *pl = NULL; struct tipc_media_addr *maddr; int pb_id; if (trace_tipc_node_check_state_enabled()) { trace_tipc_skb_dump(skb, false, "skb for node state check"); trace_tipc_node_check_state(n, true, " "); } l = n->links[bearer_id].link; if (!l) return false; rcv_nxt = tipc_link_rcv_nxt(l); if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) return true; /* Find parallel link, if any */ for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { if ((pb_id != bearer_id) && n->links[pb_id].link) { pl = n->links[pb_id].link; break; } } if (!tipc_link_validate_msg(l, hdr)) { trace_tipc_skb_dump(skb, false, "PROTO invalid (2)!"); trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (2)!"); return false; } /* Check and update node accesibility if applicable */ if (state == SELF_UP_PEER_COMING) { if (!tipc_link_is_up(l)) return true; if (!msg_peer_link_is_up(hdr)) return true; tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT); } if (state == SELF_DOWN_PEER_LEAVING) { if (msg_peer_node_is_up(hdr)) return false; tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); return true; } if (state == SELF_LEAVING_PEER_DOWN) return false; /* Ignore duplicate packets */ if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) return true; /* Initiate or update failover mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { syncpt = oseqno + exp_pkts - 1; if (pl && !tipc_link_is_reset(pl)) { __tipc_node_link_down(n, &pb_id, xmitq, &maddr); trace_tipc_node_link_down(n, true, "node link down <- failover!"); tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } /* If parallel link was already down, and this happened before * the tunnel link came up, node failover was never started. * Ensure that a FAILOVER_MSG is sent to get peer out of * NODE_FAILINGOVER state, also this node must accept * TUNNEL_MSGs from peer. */ if (n->state != NODE_FAILINGOVER) tipc_node_link_failover(n, pl, l, xmitq); /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; } /* Open parallel link when tunnel link reaches synch point */ if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) { if (!more(rcv_nxt, n->sync_point)) return true; tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); if (pl) tipc_link_fsm_evt(pl, LINK_FAILOVER_END_EVT); return true; } /* No syncing needed if only one link */ if (!pl || !tipc_link_is_up(pl)) return true; /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { if (n->capabilities & TIPC_TUNNEL_ENHANCED) syncpt = msg_syncpt(hdr); else syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1; if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); if (n->state == SELF_UP_PEER_UP) { n->sync_point = syncpt; tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); } } /* Open tunnel link when parallel link reaches synch point */ if (n->state == NODE_SYNCHING) { if (tipc_link_is_synching(l)) { tnl = l; } else { tnl = pl; pl = l; } inputq_len = skb_queue_len(tipc_link_inputq(pl)); dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; if (more(dlv_nxt, n->sync_point)) { tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); return true; } if (l == pl) return true; if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) return true; if (usr == LINK_PROTOCOL) return true; return false; } return true; } /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace * @skb: TIPC packet * @b: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { struct sk_buff_head xmitq; struct tipc_link_entry *le; struct tipc_msg *hdr; struct tipc_node *n; int bearer_id = b->identity; u32 self = tipc_own_addr(net); int usr, rc = 0; u16 bc_ack; #ifdef CONFIG_TIPC_CRYPTO struct tipc_ehdr *ehdr; /* Check if message must be decrypted first */ if (TIPC_SKB_CB(skb)->decrypted || !tipc_ehdr_validate(skb)) goto rcv; ehdr = (struct tipc_ehdr *)skb->data; if (likely(ehdr->user != LINK_CONFIG)) { n = tipc_node_find(net, ntohl(ehdr->addr)); if (unlikely(!n)) goto discard; } else { n = tipc_node_find_by_id(net, ehdr->id); } skb_dst_force(skb); tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b); if (!skb) return; rcv: #endif /* Ensure message is well-formed before touching the header */ if (unlikely(!tipc_msg_validate(&skb))) goto discard; __skb_queue_head_init(&xmitq); hdr = buf_msg(skb); usr = msg_user(hdr); bc_ack = msg_bcast_ack(hdr); /* Handle arrival of discovery or broadcast packet */ if (unlikely(msg_non_seq(hdr))) { if (unlikely(usr == LINK_CONFIG)) return tipc_disc_rcv(net, skb, b); else return tipc_node_bc_rcv(net, skb, bearer_id); } /* Discard unicast link messages destined for another node */ if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) goto discard; /* Locate neighboring node that sent packet */ n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) goto discard; le = &n->links[bearer_id]; /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) { if (unlikely(skb_linearize(skb))) { tipc_node_put(n); goto discard; } hdr = buf_msg(skb); tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) { tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); } /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { spin_lock_bh(&le->lock); if (le->link) { rc = tipc_link_rcv(le->link, skb, &xmitq); skb = NULL; } spin_unlock_bh(&le->lock); } tipc_node_read_unlock(n); /* Check/update node state before receiving */ if (unlikely(skb)) { if (unlikely(skb_linearize(skb))) goto out_node_put; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { if (le->link) { rc = tipc_link_rcv(le->link, skb, &xmitq); skb = NULL; } } tipc_node_write_unlock(n); } if (unlikely(rc & TIPC_LINK_UP_EVT)) tipc_node_link_up(n, bearer_id, &xmitq); if (unlikely(rc & TIPC_LINK_DOWN_EVT)) tipc_node_link_down(n, bearer_id, false); if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) tipc_named_rcv(net, &n->bc_entry.namedq, &n->bc_entry.named_rcv_nxt, &n->bc_entry.named_open); if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) tipc_node_mcast_rcv(n); if (!skb_queue_empty(&le->inputq)) tipc_sk_rcv(net, &le->inputq); if (!skb_queue_empty(&xmitq)) tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr, n); out_node_put: tipc_node_put(n); discard: kfree_skb(skb); } void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop) { struct tipc_net *tn = tipc_net(net); int bearer_id = b->identity; struct sk_buff_head xmitq; struct tipc_link_entry *e; struct tipc_node *n; __skb_queue_head_init(&xmitq); rcu_read_lock(); list_for_each_entry_rcu(n, &tn->node_list, list) { tipc_node_write_lock(n); e = &n->links[bearer_id]; if (e->link) { if (prop == TIPC_NLA_PROP_TOL) tipc_link_set_tolerance(e->link, b->tolerance, &xmitq); else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); /* Update MTU for node link entry */ e->mtu = tipc_link_mss(e->link); } tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } rcu_read_unlock(); } int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; struct tipc_node *peer, *temp_node; u8 node_id[NODE_ID_LEN]; u64 *w0 = (u64 *)&node_id[0]; u64 *w1 = (u64 *)&node_id[8]; u32 addr; int err; /* We identify the peer by its net */ if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, info->extack); if (err) return err; /* attrs[TIPC_NLA_NET_NODEID] and attrs[TIPC_NLA_NET_ADDR] are * mutually exclusive cases */ if (attrs[TIPC_NLA_NET_ADDR]) { addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); if (!addr) return -EINVAL; } if (attrs[TIPC_NLA_NET_NODEID]) { if (!attrs[TIPC_NLA_NET_NODEID_W1]) return -EINVAL; *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); addr = hash128to32(node_id); } if (in_own_node(net, addr)) return -ENOTSUPP; spin_lock_bh(&tn->node_list_lock); peer = tipc_node_find(net, addr); if (!peer) { spin_unlock_bh(&tn->node_list_lock); return -ENXIO; } tipc_node_write_lock(peer); if (peer->state != SELF_DOWN_PEER_DOWN && peer->state != SELF_DOWN_PEER_LEAVING) { tipc_node_write_unlock(peer); err = -EBUSY; goto err_out; } tipc_node_clear_links(peer); tipc_node_write_unlock(peer); tipc_node_delete(peer); /* Calculate cluster capabilities */ tn->capabilities = TIPC_NODE_CAPABILITIES; list_for_each_entry_rcu(temp_node, &tn->node_list, list) { tn->capabilities &= temp_node->capabilities; } tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST)); err = 0; err_out: tipc_node_put(peer); spin_unlock_bh(&tn->node_list_lock); return err; } int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); int done = cb->args[0]; int last_addr = cb->args[1]; struct tipc_node *node; struct tipc_nl_msg msg; if (done) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); if (last_addr) { node = tipc_node_find(net, last_addr); if (!node) { rcu_read_unlock(); /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback * handler. Resulting in the NLMSG_DONE message having * the NLM_F_DUMP_INTR flag set if the node state * changed while we released the lock. */ cb->prev_seq = 1; return -EPIPE; } tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { if (node->preliminary) continue; if (last_addr) { if (node->addr == last_addr) last_addr = 0; else continue; } tipc_node_read_lock(node); err = __tipc_nl_add_node(&msg, node); if (err) { last_addr = node->addr; tipc_node_read_unlock(node); goto out; } tipc_node_read_unlock(node); } done = 1; out: cb->args[0] = done; cb->args[1] = last_addr; rcu_read_unlock(); return skb->len; } /* tipc_node_find_by_name - locate owner node of link by link's name * @net: the applicable net namespace * @name: pointer to link name string * @bearer_id: pointer to index in 'node->links' array where the link was found. * * Returns pointer to node owning the link, or 0 if no matching link is found. */ static struct tipc_node *tipc_node_find_by_name(struct net *net, const char *link_name, unsigned int *bearer_id) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *l; struct tipc_node *n; struct tipc_node *found_node = NULL; int i; *bearer_id = 0; rcu_read_lock(); list_for_each_entry_rcu(n, &tn->node_list, list) { tipc_node_read_lock(n); for (i = 0; i < MAX_BEARERS; i++) { l = n->links[i].link; if (l && !strcmp(tipc_link_name(l), link_name)) { *bearer_id = i; found_node = n; break; } } tipc_node_read_unlock(n); if (found_node) break; } rcu_read_unlock(); return found_node; } int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) { int err; int res = 0; int bearer_id; char *name; struct tipc_link *link; struct tipc_node *node; struct sk_buff_head xmitq; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); __skb_queue_head_init(&xmitq); if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], tipc_nl_link_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_LINK_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_LINK_NAME]); if (strcmp(name, tipc_bclink_name) == 0) return tipc_nl_bc_link_set(net, attrs); node = tipc_node_find_by_name(net, name, &bearer_id); if (!node) return -EINVAL; tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { res = -EINVAL; goto out; } if (attrs[TIPC_NLA_LINK_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); if (err) { res = err; goto out; } if (props[TIPC_NLA_PROP_TOL]) { u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); tipc_link_set_tolerance(link, tol, &xmitq); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); tipc_link_set_prio(link, prio, &xmitq); } if (props[TIPC_NLA_PROP_WIN]) { u32 max_win; max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); tipc_link_set_queue_limits(link, tipc_link_min_win(link), max_win); } } out: tipc_node_read_unlock(node); tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr, NULL); return res; } int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct tipc_nl_msg msg; char *name; int err; msg.portid = info->snd_portid; msg.seq = info->snd_seq; if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], tipc_nl_link_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_LINK_NAME]) return -EINVAL; name = nla_data(attrs[TIPC_NLA_LINK_NAME]); msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg.skb) return -ENOMEM; if (strcmp(name, tipc_bclink_name) == 0) { err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl); if (err) goto err_free; } else { int bearer_id; struct tipc_node *node; struct tipc_link *link; node = tipc_node_find_by_name(net, name, &bearer_id); if (!node) { err = -EINVAL; goto err_free; } tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { tipc_node_read_unlock(node); err = -EINVAL; goto err_free; } err = __tipc_nl_add_link(net, &msg, link, 0); tipc_node_read_unlock(node); if (err) goto err_free; } return genlmsg_reply(msg.skb, info); err_free: nlmsg_free(msg.skb); return err; } int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) { int err; char *link_name; unsigned int bearer_id; struct tipc_link *link; struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); struct tipc_net *tn = tipc_net(net); struct tipc_link_entry *le; if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], tipc_nl_link_policy, info->extack); if (err) return err; if (!attrs[TIPC_NLA_LINK_NAME]) return -EINVAL; link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); err = -EINVAL; if (!strcmp(link_name, tipc_bclink_name)) { err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net)); if (err) return err; return 0; } else if (strstr(link_name, tipc_bclink_name)) { rcu_read_lock(); list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_read_lock(node); link = node->bc_entry.link; if (link && !strcmp(link_name, tipc_link_name(link))) { err = tipc_bclink_reset_stats(net, link); tipc_node_read_unlock(node); break; } tipc_node_read_unlock(node); } rcu_read_unlock(); return err; } node = tipc_node_find_by_name(net, link_name, &bearer_id); if (!node) return -EINVAL; le = &node->links[bearer_id]; tipc_node_read_lock(node); spin_lock_bh(&le->lock); link = node->links[bearer_id].link; if (!link) { spin_unlock_bh(&le->lock); tipc_node_read_unlock(node); return -EINVAL; } tipc_link_reset_stats(link); spin_unlock_bh(&le->lock); tipc_node_read_unlock(node); return 0; } /* Caller should hold node lock */ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, struct tipc_node *node, u32 *prev_link, bool bc_link) { u32 i; int err; for (i = *prev_link; i < MAX_BEARERS; i++) { *prev_link = i; if (!node->links[i].link) continue; err = __tipc_nl_add_link(net, msg, node->links[i].link, NLM_F_MULTI); if (err) return err; } if (bc_link) { *prev_link = i; err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link); if (err) return err; } *prev_link = 0; return 0; } int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; u32 prev_link = cb->args[1]; int done = cb->args[2]; bool bc_link = cb->args[3]; int err; if (done) return 0; if (!prev_node) { /* Check if broadcast-receiver links dumping is needed */ if (attrs && attrs[TIPC_NLA_LINK]) { err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], tipc_nl_link_policy, NULL); if (unlikely(err)) return err; if (unlikely(!link[TIPC_NLA_LINK_BROADCAST])) return -EINVAL; bc_link = true; } } msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback * handler. Resulting in the last NLMSG_DONE message * having the NLM_F_DUMP_INTR flag set. */ cb->prev_seq = 1; goto out; } tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; prev_node = node->addr; } } else { err = tipc_nl_add_bc_link(net, &msg, tn->bcl); if (err) goto out; list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; prev_node = node->addr; } } done = 1; out: rcu_read_unlock(); cb->args[0] = prev_node; cb->args[1] = prev_link; cb->args[2] = done; cb->args[3] = bc_link; return skb->len; } int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attrs[TIPC_NLA_MON_MAX + 1]; struct net *net = sock_net(skb->sk); int err; if (!info->attrs[TIPC_NLA_MON]) return -EINVAL; err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX, info->attrs[TIPC_NLA_MON], tipc_nl_monitor_policy, info->extack); if (err) return err; if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) { u32 val; val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]); err = tipc_nl_monitor_set_threshold(net, val); if (err) return err; } return 0; } static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) { struct nlattr *attrs; void *hdr; u32 val; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, 0, TIPC_NL_MON_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; val = tipc_nl_monitor_get_threshold(net); if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) goto attr_msg_full; nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct tipc_nl_msg msg; int err; msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg.skb) return -ENOMEM; msg.portid = info->snd_portid; msg.seq = info->snd_seq; err = __tipc_nl_add_monitor_prop(net, &msg); if (err) { nlmsg_free(msg.skb); return err; } return genlmsg_reply(msg.skb, info); } int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); u32 prev_bearer = cb->args[0]; struct tipc_nl_msg msg; int bearer_id; int err; if (prev_bearer == MAX_BEARERS) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) { err = __tipc_nl_add_monitor(net, &msg, bearer_id); if (err) break; } rtnl_unlock(); cb->args[0] = bearer_id; return skb->len; } int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); u32 prev_node = cb->args[1]; u32 bearer_id = cb->args[2]; int done = cb->args[0]; struct tipc_nl_msg msg; int err; if (!prev_node) { struct nlattr **attrs = genl_dumpit_info(cb)->info.attrs; struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; if (!attrs[TIPC_NLA_MON]) return -EINVAL; err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX, attrs[TIPC_NLA_MON], tipc_nl_monitor_policy, NULL); if (err) return err; if (!mon[TIPC_NLA_MON_REF]) return -EINVAL; bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]); if (bearer_id >= MAX_BEARERS) return -EINVAL; } if (done) return 0; msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; rtnl_lock(); err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node); if (!err) done = 1; rtnl_unlock(); cb->args[0] = done; cb->args[1] = prev_node; cb->args[2] = bearer_id; return skb->len; } #ifdef CONFIG_TIPC_CRYPTO static int tipc_nl_retrieve_key(struct nlattr **attrs, struct tipc_aead_key **pkey) { struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; struct tipc_aead_key *key; if (!attr) return -ENODATA; if (nla_len(attr) < sizeof(*key)) return -EINVAL; key = (struct tipc_aead_key *)nla_data(attr); if (key->keylen > TIPC_AEAD_KEYLEN_MAX || nla_len(attr) < tipc_aead_key_size(key)) return -EINVAL; *pkey = key; return 0; } static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id) { struct nlattr *attr = attrs[TIPC_NLA_NODE_ID]; if (!attr) return -ENODATA; if (nla_len(attr) < TIPC_NODEID_LEN) return -EINVAL; *node_id = (u8 *)nla_data(attr); return 0; } static int tipc_nl_retrieve_rekeying(struct nlattr **attrs, u32 *intv) { struct nlattr *attr = attrs[TIPC_NLA_NODE_REKEYING]; if (!attr) return -ENODATA; *intv = nla_get_u32(attr); return 0; } static int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1]; struct net *net = sock_net(skb->sk); struct tipc_crypto *tx = tipc_net(net)->crypto_tx, *c = tx; struct tipc_node *n = NULL; struct tipc_aead_key *ukey; bool rekeying = true, master_key = false; u8 *id, *own_id, mode; u32 intv = 0; int rc = 0; if (!info->attrs[TIPC_NLA_NODE]) return -EINVAL; rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX, info->attrs[TIPC_NLA_NODE], tipc_nl_node_policy, info->extack); if (rc) return rc; own_id = tipc_own_id(net); if (!own_id) { GENL_SET_ERR_MSG(info, "not found own node identity (set id?)"); return -EPERM; } rc = tipc_nl_retrieve_rekeying(attrs, &intv); if (rc == -ENODATA) rekeying = false; rc = tipc_nl_retrieve_key(attrs, &ukey); if (rc == -ENODATA && rekeying) goto rekeying; else if (rc) return rc; rc = tipc_aead_key_validate(ukey, info); if (rc) return rc; rc = tipc_nl_retrieve_nodeid(attrs, &id); switch (rc) { case -ENODATA: mode = CLUSTER_KEY; master_key = !!(attrs[TIPC_NLA_NODE_KEY_MASTER]); break; case 0: mode = PER_NODE_KEY; if (memcmp(id, own_id, NODE_ID_LEN)) { n = tipc_node_find_by_id(net, id) ?: tipc_node_create(net, 0, id, 0xffffu, 0, true); if (unlikely(!n)) return -ENOMEM; c = n->crypto_rx; } break; default: return rc; } /* Initiate the TX/RX key */ rc = tipc_crypto_key_init(c, ukey, mode, master_key); if (n) tipc_node_put(n); if (unlikely(rc < 0)) { GENL_SET_ERR_MSG(info, "unable to initiate or attach new key"); return rc; } else if (c == tx) { /* Distribute TX key but not master one */ if (!master_key && tipc_crypto_key_distr(tx, rc, NULL)) GENL_SET_ERR_MSG(info, "failed to replicate new key"); rekeying: /* Schedule TX rekeying if needed */ tipc_crypto_rekeying_sched(tx, rekeying, intv); } return 0; } int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_node_set_key(skb, info); rtnl_unlock(); return err; } static int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct tipc_net *tn = tipc_net(net); struct tipc_node *n; tipc_crypto_key_flush(tn->crypto_tx); rcu_read_lock(); list_for_each_entry_rcu(n, &tn->node_list, list) tipc_crypto_key_flush(n->crypto_rx); rcu_read_unlock(); return 0; } int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) { int err; rtnl_lock(); err = __tipc_nl_node_flush_key(skb, info); rtnl_unlock(); return err; } #endif /** * tipc_node_dump - dump TIPC node data * @n: tipc node to be dumped * @more: dump more? * - false: dump only tipc node data * - true: dump node link data as well * @buf: returned buffer of dump data in format */ int tipc_node_dump(struct tipc_node *n, bool more, char *buf) { int i = 0; size_t sz = (more) ? NODE_LMAX : NODE_LMIN; if (!n) { i += scnprintf(buf, sz, "node data: (null)\n"); return i; } i += scnprintf(buf, sz, "node data: %x", n->addr); i += scnprintf(buf + i, sz - i, " %x", n->state); i += scnprintf(buf + i, sz - i, " %d", n->active_links[0]); i += scnprintf(buf + i, sz - i, " %d", n->active_links[1]); i += scnprintf(buf + i, sz - i, " %x", n->action_flags); i += scnprintf(buf + i, sz - i, " %u", n->failover_sent); i += scnprintf(buf + i, sz - i, " %u", n->sync_point); i += scnprintf(buf + i, sz - i, " %d", n->link_cnt); i += scnprintf(buf + i, sz - i, " %u", n->working_links); i += scnprintf(buf + i, sz - i, " %x", n->capabilities); i += scnprintf(buf + i, sz - i, " %lu\n", n->keepalive_intv); if (!more) return i; i += scnprintf(buf + i, sz - i, "link_entry[0]:\n"); i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[0].mtu); i += scnprintf(buf + i, sz - i, " media: "); i += tipc_media_addr_printf(buf + i, sz - i, &n->links[0].maddr); i += scnprintf(buf + i, sz - i, "\n"); i += tipc_link_dump(n->links[0].link, TIPC_DUMP_NONE, buf + i); i += scnprintf(buf + i, sz - i, " inputq: "); i += tipc_list_dump(&n->links[0].inputq, false, buf + i); i += scnprintf(buf + i, sz - i, "link_entry[1]:\n"); i += scnprintf(buf + i, sz - i, " mtu: %u\n", n->links[1].mtu); i += scnprintf(buf + i, sz - i, " media: "); i += tipc_media_addr_printf(buf + i, sz - i, &n->links[1].maddr); i += scnprintf(buf + i, sz - i, "\n"); i += tipc_link_dump(n->links[1].link, TIPC_DUMP_NONE, buf + i); i += scnprintf(buf + i, sz - i, " inputq: "); i += tipc_list_dump(&n->links[1].inputq, false, buf + i); i += scnprintf(buf + i, sz - i, "bclink:\n "); i += tipc_link_dump(n->bc_entry.link, TIPC_DUMP_NONE, buf + i); return i; } void tipc_node_pre_cleanup_net(struct net *exit_net) { struct tipc_node *n; struct tipc_net *tn; struct net *tmp; rcu_read_lock(); for_each_net_rcu(tmp) { if (tmp == exit_net) continue; tn = tipc_net(tmp); if (!tn) continue; spin_lock_bh(&tn->node_list_lock); list_for_each_entry_rcu(n, &tn->node_list, list) { if (!n->peer_net) continue; if (n->peer_net != exit_net) continue; tipc_node_write_lock(n); n->peer_net = NULL; n->peer_hash_mix = 0; tipc_node_write_unlock_fast(n); break; } spin_unlock_bh(&tn->node_list_lock); } rcu_read_unlock(); }
25825 21890 12207 26952 2 390 1332 4 1628 16315 4 4 14 96 12474 14 1 1483 18 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SEQLOCK_H #define __LINUX_SEQLOCK_H /* * seqcount_t / seqlock_t - a reader-writer consistency mechanism with * lockless readers (read-only retry loops), and no writer starvation. * * See Documentation/locking/seqlock.rst * * Copyrights: * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH */ #include <linux/compiler.h> #include <linux/kcsan-checks.h> #include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/preempt.h> #include <linux/seqlock_types.h> #include <linux/spinlock.h> #include <asm/processor.h> /* * The seqlock seqcount_t interface does not prescribe a precise sequence of * read begin/retry/end. For readers, typically there is a call to * read_seqcount_begin() and read_seqcount_retry(), however, there are more * esoteric cases which do not follow this pattern. * * As a consequence, we take the following best-effort approach for raw usage * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following * memory operations are considered atomic. Usage of the seqlock_t interface * is not affected. */ #define KCSAN_SEQLOCK_REGION_MAX 1000 static inline void __seqcount_init(seqcount_t *s, const char *name, struct lock_class_key *key) { /* * Make sure we are not reinitializing a held lock: */ lockdep_init_map(&s->dep_map, name, key, 0); s->sequence = 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC # define SEQCOUNT_DEP_MAP_INIT(lockname) \ .dep_map = { .name = #lockname } /** * seqcount_init() - runtime initializer for seqcount_t * @s: Pointer to the seqcount_t instance */ # define seqcount_init(s) \ do { \ static struct lock_class_key __key; \ __seqcount_init((s), #s, &__key); \ } while (0) static inline void seqcount_lockdep_reader_access(const seqcount_t *s) { seqcount_t *l = (seqcount_t *)s; unsigned long flags; local_irq_save(flags); seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); seqcount_release(&l->dep_map, _RET_IP_); local_irq_restore(flags); } #else # define SEQCOUNT_DEP_MAP_INIT(lockname) # define seqcount_init(s) __seqcount_init(s, NULL, NULL) # define seqcount_lockdep_reader_access(x) #endif /** * SEQCNT_ZERO() - static initializer for seqcount_t * @name: Name of the seqcount_t instance */ #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /* * Sequence counters with associated locks (seqcount_LOCKNAME_t) * * A sequence counter which associates the lock used for writer * serialization at initialization time. This enables lockdep to validate * that the write side critical section is properly serialized. * * For associated locks which do not implicitly disable preemption, * preemption protection is enforced in the write side function. * * Lockdep is never used in any for the raw write variants. * * See Documentation/locking/seqlock.rst */ /* * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated * @seqcount: The real sequence counter * @lock: Pointer to the associated lock * * A plain sequence counter with external writer synchronization by * LOCKNAME @lock. The lock is associated to the sequence counter in the * static initializer or init function. This enables lockdep to validate * that the write side critical section is properly serialized. * * LOCKNAME: raw_spinlock, spinlock, rwlock or mutex */ /* * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t * @s: Pointer to the seqcount_LOCKNAME_t instance * @lock: Pointer to the associated lock */ #define seqcount_LOCKNAME_init(s, _lock, lockname) \ do { \ seqcount_##lockname##_t *____s = (s); \ seqcount_init(&____s->seqcount); \ __SEQ_LOCK(____s->lock = (_lock)); \ } while (0) #define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock) #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) /* * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t * * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype * @lockbase: prefix for associated lock/unlock */ #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ static __always_inline seqcount_t * \ __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ static __always_inline const seqcount_t * \ __seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ unsigned seq = smp_load_acquire(&s->seqcount.sequence); \ \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ * Re-read the sequence counter since the (possibly \ * preempted) writer made progress. \ */ \ seq = smp_load_acquire(&s->seqcount.sequence); \ } \ \ return seq; \ } \ \ static __always_inline bool \ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ { \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ return preemptible; \ \ /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \ return false; \ } \ \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* * __seqprop() for seqcount_t */ static inline seqcount_t *__seqprop_ptr(seqcount_t *s) { return s; } static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) { return s; } static inline unsigned __seqprop_sequence(const seqcount_t *s) { return smp_load_acquire(&s->sequence); } static inline bool __seqprop_preemptible(const seqcount_t *s) { return false; } static inline void __seqprop_assert(const seqcount_t *s) { lockdep_assert_preemption_disabled(); } #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #undef SEQCOUNT_LOCKNAME /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t * @name: Name of the seqcount_LOCKNAME_t instance * @lock: Pointer to the associated LOCKNAME */ #define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ __SEQ_LOCK(.lock = (assoc_lock)) \ } #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) #define seqprop_ptr(s) __seqprop(s, ptr)(s) #define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) #define seqprop_sequence(s) __seqprop(s, sequence)(s) #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) #define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ ({ \ unsigned __seq; \ \ while (unlikely((__seq = seqprop_sequence(s)) & 1)) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ __seq; \ }) /** * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount_begin(s) __read_seqcount_begin(s) /** * read_seqcount_begin() - begin a seqcount_t read critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Return: count to be passed to read_seqcount_retry() */ #define read_seqcount_begin(s) \ ({ \ seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) /** * raw_read_seqcount() - read the raw seqcount_t counter value * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * raw_read_seqcount opens a read critical section of the given * seqcount_t, without any lockdep checking, and without checking or * masking the sequence counter LSB. Calling code is responsible for * handling that. * * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount(s) \ ({ \ unsigned __seq = seqprop_sequence(s); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ __seq; \ }) /** * raw_seqcount_try_begin() - begin a seqcount_t read critical section * w/o lockdep and w/o counter stabilization * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count to be passed to read_seqcount_retry() * * Similar to raw_seqcount_begin(), except it enables eliding the critical * section entirely if odd, instead of doing the speculation knowing it will * fail. * * Useful when counter stabilization is more or less equivalent to taking * the lock and there is a slowpath that does that. * * If true, start will be set to the (even) sequence count read. * * Return: true when a read critical section is started. */ #define raw_seqcount_try_begin(s, start) \ ({ \ start = raw_read_seqcount(s); \ !(start & 1); \ }) /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * raw_seqcount_begin opens a read critical section of the given * seqcount_t. Unlike read_seqcount_begin(), this function will not wait * for the count to stabilize. If a writer is active when it begins, it * will fail the read_seqcount_retry() at the end of the read critical * section instead of stabilizing at the beginning of it. * * Use this only in special kernel hot paths where the read section is * small and has a high probability of success through other external * means. It will save a single branching instruction. * * Return: count to be passed to read_seqcount_retry() */ #define raw_seqcount_begin(s) \ ({ \ /* \ * If the counter is odd, let read_seqcount_retry() fail \ * by decrementing the counter. \ */ \ raw_read_seqcount(s) & ~1; \ }) /** * __read_seqcount_retry() - end a seqcount_t read section w/o barrier * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is * provided before actually loading any of the variables that are to be * protected in this critical section. * * Use carefully, only in critical code, and comment how the barrier is * provided. * * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { kcsan_atomic_next(0); return unlikely(READ_ONCE(s->sequence) != start); } /** * read_seqcount_retry() - end a seqcount_t read critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @start: count, from read_seqcount_begin() * * read_seqcount_retry closes the read critical section of given * seqcount_t. If the critical section was invalid, it must be ignored * (and typically retried). * * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { smp_rmb(); return do___read_seqcount_retry(s, start); } /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Context: check write_seqcount_begin() */ #define raw_write_seqcount_begin(s) \ do { \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ do_raw_write_seqcount_begin(seqprop_ptr(s)); \ } while (0) static inline void do_raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); } /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Context: check write_seqcount_end() */ #define raw_write_seqcount_end(s) \ do { \ do_raw_write_seqcount_end(seqprop_ptr(s)); \ \ if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) static inline void do_raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; kcsan_nestable_atomic_end(); } /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst * Context: check write_seqcount_begin() */ #define write_seqcount_begin_nested(s, subclass) \ do { \ seqprop_assert(s); \ \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ do_write_seqcount_begin_nested(seqprop_ptr(s), subclass); \ } while (0) static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) { seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); do_raw_write_seqcount_begin(s); } /** * write_seqcount_begin() - start a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Context: sequence counter write side sections must be serialized and * non-preemptible. Preemption will be automatically disabled if and * only if the seqcount write serialization lock is associated, and * preemptible. If readers can be invoked from hardirq or softirq * context, interrupts or bottom halves must be respectively disabled. */ #define write_seqcount_begin(s) \ do { \ seqprop_assert(s); \ \ if (seqprop_preemptible(s)) \ preempt_disable(); \ \ do_write_seqcount_begin(seqprop_ptr(s)); \ } while (0) static inline void do_write_seqcount_begin(seqcount_t *s) { do_write_seqcount_begin_nested(s, 0); } /** * write_seqcount_end() - end a seqcount_t write side critical section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * Context: Preemption will be automatically re-enabled if and only if * the seqcount write serialization lock is associated, and preemptible. */ #define write_seqcount_end(s) \ do { \ do_write_seqcount_end(seqprop_ptr(s)); \ \ if (seqprop_preemptible(s)) \ preempt_enable(); \ } while (0) static inline void do_write_seqcount_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); do_raw_write_seqcount_end(s); } /** * raw_write_seqcount_barrier() - do a seqcount_t write barrier * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * This can be used to provide an ordering guarantee instead of the usual * consistency guarantee. It is one wmb cheaper, because it can collapse * the two back-to-back wmb()s. * * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because * neither writes before nor after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; * bool X = true, Y = false; * * void read(void) * { * bool x, y; * * do { * int s = read_seqcount_begin(&seq); * * x = X; y = Y; * * } while (read_seqcount_retry(&seq, s)); * * BUG_ON(!x && !y); * } * * void write(void) * { * WRITE_ONCE(Y, true); * * raw_write_seqcount_barrier(seq); * * WRITE_ONCE(X, false); * } */ #define raw_write_seqcount_barrier(s) \ do_raw_write_seqcount_barrier(seqprop_ptr(s)) static inline void do_raw_write_seqcount_barrier(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); s->sequence++; kcsan_nestable_atomic_end(); } /** * write_seqcount_invalidate() - invalidate in-progress seqcount_t read * side operations * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * * After write_seqcount_invalidate, no seqcount_t read side operations * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ do_write_seqcount_invalidate(seqprop_ptr(s)) static inline void do_write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); kcsan_nestable_atomic_begin(); s->sequence+=2; kcsan_nestable_atomic_end(); } /* * Latch sequence counters (seqcount_latch_t) * * A sequence counter variant where the counter even/odd value is used to * switch between two copies of protected data. This allows the read path, * typically NMIs, to safely interrupt the write side critical section. * * As the write sections are fully preemptible, no special handling for * PREEMPT_RT is needed. */ typedef struct { seqcount_t seqcount; } seqcount_latch_t; /** * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t * @seq_name: Name of the seqcount_latch_t instance */ #define SEQCNT_LATCH_ZERO(seq_name) { \ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ } /** * seqcount_latch_init() - runtime initializer for seqcount_latch_t * @s: Pointer to the seqcount_latch_t instance */ #define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) /** * raw_read_seqcount_latch() - pick even/odd latch data copy * @s: Pointer to seqcount_latch_t * * See raw_write_seqcount_latch() for details and a full reader/writer * usage example. * * Return: sequence counter raw value. Use the lowest bit as an index for * picking which data copy to read. The full counter must then be checked * with raw_read_seqcount_latch_retry(). */ static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) { /* * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). * Due to the dependent load, a full smp_rmb() is not needed. */ return READ_ONCE(s->seqcount.sequence); } /** * read_seqcount_latch() - pick even/odd latch data copy * @s: Pointer to seqcount_latch_t * * See write_seqcount_latch() for details and a full reader/writer usage * example. * * Return: sequence counter raw value. Use the lowest bit as an index for * picking which data copy to read. The full counter must then be checked * with read_seqcount_latch_retry(). */ static __always_inline unsigned read_seqcount_latch(const seqcount_latch_t *s) { kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return raw_read_seqcount_latch(s); } /** * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section * @s: Pointer to seqcount_latch_t * @start: count, from raw_read_seqcount_latch() * * Return: true if a read section retry is required, else false */ static __always_inline int raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) { smp_rmb(); return unlikely(READ_ONCE(s->seqcount.sequence) != start); } /** * read_seqcount_latch_retry() - end a seqcount_latch_t read section * @s: Pointer to seqcount_latch_t * @start: count, from read_seqcount_latch() * * Return: true if a read section retry is required, else false */ static __always_inline int read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) { kcsan_atomic_next(0); return raw_read_seqcount_latch_retry(s, start); } /** * raw_write_seqcount_latch() - redirect latch readers to even/odd copy * @s: Pointer to seqcount_latch_t */ static __always_inline void raw_write_seqcount_latch(seqcount_latch_t *s) { smp_wmb(); /* prior stores before incrementing "sequence" */ s->seqcount.sequence++; smp_wmb(); /* increment "sequence" before following stores */ } /** * write_seqcount_latch_begin() - redirect latch readers to odd copy * @s: Pointer to seqcount_latch_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never * interrupt the modification -- e.g. the concurrency is strictly between CPUs * -- you most likely do not need this. * * Where the traditional RCU/lockless data structures rely on atomic * modifications to ensure queries observe either the old or the new state the * latch allows the same for non-atomic updates. The trade-off is doubling the * cost of storage; we have to maintain two copies of the entire data * structure. * * Very simply put: we first modify one copy and then the other. This ensures * there is always one copy in a stable state, ready to give us an answer. * * The basic form is a data structure like:: * * struct latch_struct { * seqcount_latch_t seq; * struct data_struct data[2]; * }; * * Where a modification, which is assumed to be externally serialized, does the * following:: * * void latch_modify(struct latch_struct *latch, ...) * { * write_seqcount_latch_begin(&latch->seq); * modify(latch->data[0], ...); * write_seqcount_latch(&latch->seq); * modify(latch->data[1], ...); * write_seqcount_latch_end(&latch->seq); * } * * The query will have a form like:: * * struct entry *latch_query(struct latch_struct *latch, ...) * { * struct entry *entry; * unsigned seq, idx; * * do { * seq = read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * * // This includes needed smp_rmb() * } while (read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } * * So during the modification, queries are first redirected to data[1]. Then we * modify data[0]. When that is complete, we redirect queries back to data[0] * and we can modify data[1]. * * NOTE: * * The non-requirement for atomic modifications does _NOT_ include * the publishing of new entries in the case where data is a dynamic * data structure. * * An iteration might start in data[0] and get suspended long enough * to miss an entire modification sequence, once it resumes it might * observe the new entry. * * NOTE2: * * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ static __always_inline void write_seqcount_latch_begin(seqcount_latch_t *s) { kcsan_nestable_atomic_begin(); raw_write_seqcount_latch(s); } /** * write_seqcount_latch() - redirect latch readers to even copy * @s: Pointer to seqcount_latch_t */ static __always_inline void write_seqcount_latch(seqcount_latch_t *s) { raw_write_seqcount_latch(s); } /** * write_seqcount_latch_end() - end a seqcount_latch_t write section * @s: Pointer to seqcount_latch_t * * Marks the end of a seqcount_latch_t writer section, after all copies of the * latch-protected data have been updated. */ static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s) { kcsan_nestable_atomic_end(); } #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } /** * seqlock_init() - dynamic initializer for seqlock_t * @sl: Pointer to the seqlock_t instance */ #define seqlock_init(sl) \ do { \ spin_lock_init(&(sl)->lock); \ seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock); \ } while (0) /** * DEFINE_SEQLOCK(sl) - Define a statically allocated seqlock_t * @sl: Name of the seqlock_t instance */ #define DEFINE_SEQLOCK(sl) \ seqlock_t sl = __SEQLOCK_UNLOCKED(sl) /** * read_seqbegin() - start a seqlock_t read side critical section * @sl: Pointer to seqlock_t * * Return: count, to be passed to read_seqretry() */ static inline unsigned read_seqbegin(const seqlock_t *sl) { return read_seqcount_begin(&sl->seqcount); } /** * read_seqretry() - end a seqlock_t read side section * @sl: Pointer to seqlock_t * @start: count, from read_seqbegin() * * read_seqretry closes the read side critical section of given seqlock_t. * If the critical section was invalid, it must be ignored (and typically * retried). * * Return: true if a read section retry is required, else false */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { return read_seqcount_retry(&sl->seqcount, start); } /* * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ /** * write_seqlock() - start a seqlock_t write side critical section * @sl: Pointer to seqlock_t * * write_seqlock opens a write side critical section for the given * seqlock_t. It also implicitly acquires the spinlock_t embedded inside * that sequential lock. All seqlock_t write side sections are thus * automatically serialized and non-preemptible. * * Context: if the seqlock_t read section, or other write side critical * sections, can be invoked from hardirq or softirq contexts, use the * _irqsave or _bh variants of this function instead. */ static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); do_write_seqcount_begin(&sl->seqcount.seqcount); } /** * write_sequnlock() - end a seqlock_t write side critical section * @sl: Pointer to seqlock_t * * write_sequnlock closes the (serialized and non-preemptible) write side * critical section of given seqlock_t. */ static inline void write_sequnlock(seqlock_t *sl) { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock(&sl->lock); } /** * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section * @sl: Pointer to seqlock_t * * _bh variant of write_seqlock(). Use only if the read side section, or * other write side sections, can be invoked from softirq contexts. */ static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); do_write_seqcount_begin(&sl->seqcount.seqcount); } /** * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section * @sl: Pointer to seqlock_t * * write_sequnlock_bh closes the serialized, non-preemptible, and * softirqs-disabled, seqlock_t write side critical section opened with * write_seqlock_bh(). */ static inline void write_sequnlock_bh(seqlock_t *sl) { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_bh(&sl->lock); } /** * write_seqlock_irq() - start a non-interruptible seqlock_t write section * @sl: Pointer to seqlock_t * * _irq variant of write_seqlock(). Use only if the read side section, or * other write sections, can be invoked from hardirq contexts. */ static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); do_write_seqcount_begin(&sl->seqcount.seqcount); } /** * write_sequnlock_irq() - end a non-interruptible seqlock_t write section * @sl: Pointer to seqlock_t * * write_sequnlock_irq closes the serialized and non-interruptible * seqlock_t write side section opened with write_seqlock_irq(). */ static inline void write_sequnlock_irq(seqlock_t *sl) { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irq(&sl->lock); } static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) { unsigned long flags; spin_lock_irqsave(&sl->lock, flags); do_write_seqcount_begin(&sl->seqcount.seqcount); return flags; } /** * write_seqlock_irqsave() - start a non-interruptible seqlock_t write * section * @lock: Pointer to seqlock_t * @flags: Stack-allocated storage for saving caller's local interrupt * state, to be passed to write_sequnlock_irqrestore(). * * _irqsave variant of write_seqlock(). Use it only if the read side * section, or other write sections, can be invoked from hardirq context. */ #define write_seqlock_irqsave(lock, flags) \ do { flags = __write_seqlock_irqsave(lock); } while (0) /** * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write * section * @sl: Pointer to seqlock_t * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() * * write_sequnlock_irqrestore closes the serialized and non-interruptible * seqlock_t write section previously opened with write_seqlock_irqsave(). */ static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { do_write_seqcount_end(&sl->seqcount.seqcount); spin_unlock_irqrestore(&sl->lock, flags); } /** * read_seqlock_excl() - begin a seqlock_t locking reader section * @sl: Pointer to seqlock_t * * read_seqlock_excl opens a seqlock_t locking reader critical section. A * locking reader exclusively locks out *both* other writers *and* other * locking readers, but it does not update the embedded sequence number. * * Locking readers act like a normal spin_lock()/spin_unlock(). * * Context: if the seqlock_t write section, *or other read sections*, can * be invoked from hardirq or softirq contexts, use the _irqsave or _bh * variant of this function instead. * * The opened read section must be closed with read_sequnlock_excl(). */ static inline void read_seqlock_excl(seqlock_t *sl) { spin_lock(&sl->lock); } /** * read_sequnlock_excl() - end a seqlock_t locking reader critical section * @sl: Pointer to seqlock_t */ static inline void read_sequnlock_excl(seqlock_t *sl) { spin_unlock(&sl->lock); } /** * read_seqlock_excl_bh() - start a seqlock_t locking reader section with * softirqs disabled * @sl: Pointer to seqlock_t * * _bh variant of read_seqlock_excl(). Use this variant only if the * seqlock_t write side section, *or other read sections*, can be invoked * from softirq contexts. */ static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); } /** * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking * reader section * @sl: Pointer to seqlock_t */ static inline void read_sequnlock_excl_bh(seqlock_t *sl) { spin_unlock_bh(&sl->lock); } /** * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking * reader section * @sl: Pointer to seqlock_t * * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t * write side section, *or other read sections*, can be invoked from a * hardirq context. */ static inline void read_seqlock_excl_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); } /** * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t * locking reader section * @sl: Pointer to seqlock_t */ static inline void read_sequnlock_excl_irq(seqlock_t *sl) { spin_unlock_irq(&sl->lock); } static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) { unsigned long flags; spin_lock_irqsave(&sl->lock, flags); return flags; } /** * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t * locking reader section * @lock: Pointer to seqlock_t * @flags: Stack-allocated storage for saving caller's local interrupt * state, to be passed to read_sequnlock_excl_irqrestore(). * * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t * write side section, *or other read sections*, can be invoked from a * hardirq context. */ #define read_seqlock_excl_irqsave(lock, flags) \ do { flags = __read_seqlock_excl_irqsave(lock); } while (0) /** * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t * locking reader section * @sl: Pointer to seqlock_t * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() */ static inline void read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) { spin_unlock_irqrestore(&sl->lock, flags); } /** * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader * @lock: Pointer to seqlock_t * @seq : Marker and return parameter. If the passed value is even, the * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). * If the passed value is odd, the reader will become a *locking* reader * as in read_seqlock_excl(). In the first call to this function, the * caller *must* initialize and pass an even value to @seq; this way, a * lockless read can be optimistically tried first. * * read_seqbegin_or_lock is an API designed to optimistically try a normal * lockless seqlock_t read section first. If an odd counter is found, the * lockless read trial has failed, and the next read iteration transforms * itself into a full seqlock_t locking reader. * * This is typically used to avoid seqlock_t lockless readers starvation * (too much retry loops) in the case of a sharp spike in write side * activity. * * Context: if the seqlock_t write section, *or other read sections*, can * be invoked from hardirq or softirq contexts, use the _irqsave or _bh * variant of this function instead. * * Check Documentation/locking/seqlock.rst for template example code. * * Return: the encountered sequence counter value, through the @seq * parameter, which is overloaded as a return parameter. This returned * value must be checked with need_seqretry(). If the read section need to * be retried, this returned value must also be passed as the @seq * parameter of the next read_seqbegin_or_lock() iteration. */ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) { if (!(*seq & 1)) /* Even */ *seq = read_seqbegin(lock); else /* Odd */ read_seqlock_excl(lock); } /** * need_seqretry() - validate seqlock_t "locking or lockless" read section * @lock: Pointer to seqlock_t * @seq: sequence count, from read_seqbegin_or_lock() * * Return: true if a read section retry is required, false otherwise */ static inline int need_seqretry(seqlock_t *lock, int seq) { return !(seq & 1) && read_seqretry(lock, seq); } /** * done_seqretry() - end seqlock_t "locking or lockless" reader section * @lock: Pointer to seqlock_t * @seq: count, from read_seqbegin_or_lock() * * done_seqretry finishes the seqlock_t read side critical section started * with read_seqbegin_or_lock() and validated by need_seqretry(). */ static inline void done_seqretry(seqlock_t *lock, int seq) { if (seq & 1) read_sequnlock_excl(lock); } /** * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or * a non-interruptible locking reader * @lock: Pointer to seqlock_t * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). * * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if * the seqlock_t write section, *or other read sections*, can be invoked * from hardirq context. * * Note: Interrupts will be disabled only for "locking reader" mode. * * Return: * * 1. The saved local interrupts state in case of a locking reader, to * be passed to done_seqretry_irqrestore(). * * 2. The encountered sequence counter value, returned through @seq * overloaded as a return parameter. Check read_seqbegin_or_lock(). */ static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { unsigned long flags = 0; if (!(*seq & 1)) /* Even */ *seq = read_seqbegin(lock); else /* Odd */ read_seqlock_excl_irqsave(lock, flags); return flags; } /** * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a * non-interruptible locking reader section * @lock: Pointer to seqlock_t * @seq: Count, from read_seqbegin_or_lock_irqsave() * @flags: Caller's saved local interrupt state in case of a locking * reader, also from read_seqbegin_or_lock_irqsave() * * This is the _irqrestore variant of done_seqretry(). The read section * must've been opened with read_seqbegin_or_lock_irqsave(), and validated * by need_seqretry(). */ static inline void done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) { if (seq & 1) read_sequnlock_excl_irqrestore(lock, flags); } #endif /* __LINUX_SEQLOCK_H */
3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 // SPDX-License-Identifier: GPL-2.0-or-later /* * USB CDC EEM network interface driver * Copyright (C) 2009 Oberthur Technologies * by Omar Laazimani, Olivier Condemine */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ctype.h> #include <linux/ethtool.h> #include <linux/workqueue.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/crc32.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> #include <linux/gfp.h> #include <linux/if_vlan.h> /* * This driver is an implementation of the CDC "Ethernet Emulation * Model" (EEM) specification, which encapsulates Ethernet frames * for transport over USB using a simpler USB device model than the * previous CDC "Ethernet Control Model" (ECM, or "CDC Ethernet"). * * For details, see https://usb.org/sites/default/files/CDC_EEM10.pdf * * This version has been tested with GIGAntIC WuaoW SIM Smart Card on 2.6.24, * 2.6.27 and 2.6.30rc2 kernel. * It has also been validated on Openmoko Om 2008.12 (based on 2.6.24 kernel). * build on 23-April-2009 */ #define EEM_HEAD 2 /* 2 byte header */ /*-------------------------------------------------------------------------*/ static void eem_linkcmd_complete(struct urb *urb) { dev_kfree_skb(urb->context); usb_free_urb(urb); } static void eem_linkcmd(struct usbnet *dev, struct sk_buff *skb) { struct urb *urb; int status; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) goto fail; usb_fill_bulk_urb(urb, dev->udev, dev->out, skb->data, skb->len, eem_linkcmd_complete, skb); status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { usb_free_urb(urb); fail: dev_kfree_skb(skb); netdev_warn(dev->net, "link cmd failure\n"); return; } } static int eem_bind(struct usbnet *dev, struct usb_interface *intf) { int status = 0; status = usbnet_get_endpoints(dev, intf); if (status < 0) return status; /* no jumbogram (16K) support for now */ dev->net->hard_header_len += EEM_HEAD + ETH_FCS_LEN + VLAN_HLEN; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; return 0; } /* * EEM permits packing multiple Ethernet frames into USB transfers * (a "bundle"), but for TX we don't try to do that. */ static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct sk_buff *skb2 = NULL; u16 len = skb->len; u32 crc = 0; int padlen = 0; /* When ((len + EEM_HEAD + ETH_FCS_LEN) % dev->maxpacket) is * zero, stick two bytes of zero length EEM packet on the end. * Else the framework would add invalid single byte padding, * since it can't know whether ZLPs will be handled right by * all the relevant hardware and software. */ if (!((len + EEM_HEAD + ETH_FCS_LEN) % dev->maxpacket)) padlen += 2; if (!skb_cloned(skb)) { int headroom = skb_headroom(skb); int tailroom = skb_tailroom(skb); if ((tailroom >= ETH_FCS_LEN + padlen) && (headroom >= EEM_HEAD)) goto done; if ((headroom + tailroom) > (EEM_HEAD + ETH_FCS_LEN + padlen)) { skb->data = memmove(skb->head + EEM_HEAD, skb->data, skb->len); skb_set_tail_pointer(skb, len); goto done; } } skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags); dev_kfree_skb_any(skb); if (!skb2) return NULL; skb = skb2; done: /* we don't use the "no Ethernet CRC" option */ crc = crc32_le(~0, skb->data, skb->len); crc = ~crc; put_unaligned_le32(crc, skb_put(skb, 4)); /* EEM packet header format: * b0..13: length of ethernet frame * b14: bmCRC (1 == valid Ethernet CRC) * b15: bmType (0 == data) */ len = skb->len; put_unaligned_le16(BIT(14) | len, skb_push(skb, 2)); /* Bundle a zero length EEM packet if needed */ if (padlen) put_unaligned_le16(0, skb_put(skb, 2)); return skb; } static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { /* * Our task here is to strip off framing, leaving skb with one * data frame for the usbnet framework code to process. But we * may have received multiple EEM payloads, or command payloads. * So we must process _everything_ as if it's a header, except * maybe the last data payload * * REVISIT the framework needs updating so that when we consume * all payloads (the last or only message was a command, or a * zero length EEM packet) that is not accounted as an rx_error. */ do { struct sk_buff *skb2 = NULL; u16 header; u16 len = 0; /* incomplete EEM header? */ if (skb->len < EEM_HEAD) return 0; /* * EEM packet header format: * b0..14: EEM type dependent (Data or Command) * b15: bmType */ header = get_unaligned_le16(skb->data); skb_pull(skb, EEM_HEAD); /* * The bmType bit helps to denote when EEM * packet is data or command : * bmType = 0 : EEM data payload * bmType = 1 : EEM (link) command */ if (header & BIT(15)) { u16 bmEEMCmd; /* * EEM (link) command packet: * b0..10: bmEEMCmdParam * b11..13: bmEEMCmd * b14: bmReserved (must be 0) * b15: 1 (EEM command) */ if (header & BIT(14)) { netdev_dbg(dev->net, "reserved command %04x\n", header); continue; } bmEEMCmd = (header >> 11) & 0x7; switch (bmEEMCmd) { /* Responding to echo requests is mandatory. */ case 0: /* Echo command */ len = header & 0x7FF; /* bogus command? */ if (skb->len < len) return 0; skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) goto next; skb_trim(skb2, len); put_unaligned_le16(BIT(15) | BIT(11) | len, skb_push(skb2, 2)); eem_linkcmd(dev, skb2); break; /* * Host may choose to ignore hints. * - suspend: peripheral ready to suspend * - response: suggest N millisec polling * - response complete: suggest N sec polling * * Suspend is reported and maybe heeded. */ case 2: /* Suspend hint */ usbnet_device_suggests_idle(dev); continue; case 3: /* Response hint */ case 4: /* Response complete hint */ continue; /* * Hosts should never receive host-to-peripheral * or reserved command codes; or responses to an * echo command we didn't send. */ case 1: /* Echo response */ case 5: /* Tickle */ default: /* reserved */ netdev_warn(dev->net, "unexpected link command %d\n", bmEEMCmd); continue; } } else { u32 crc, crc2; int is_last; /* zero length EEM packet? */ if (header == 0) continue; /* * EEM data packet header : * b0..13: length of ethernet frame * b14: bmCRC * b15: 0 (EEM data) */ len = header & 0x3FFF; /* bogus EEM payload? */ if (skb->len < len) return 0; /* bogus ethernet frame? */ if (len < (ETH_HLEN + ETH_FCS_LEN)) goto next; /* * Treat the last payload differently: framework * code expects our "fixup" to have stripped off * headers, so "skb" is a data packet (or error). * Else if it's not the last payload, keep "skb" * for further processing. */ is_last = (len == skb->len); if (is_last) skb2 = skb; else { skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) return 0; } /* * The bmCRC helps to denote when the CRC field in * the Ethernet frame contains a calculated CRC: * bmCRC = 1 : CRC is calculated * bmCRC = 0 : CRC = 0xDEADBEEF */ if (header & BIT(14)) { crc = get_unaligned_le32(skb2->data + len - ETH_FCS_LEN); crc2 = ~crc32_le(~0, skb2->data, skb2->len - ETH_FCS_LEN); } else { crc = get_unaligned_be32(skb2->data + len - ETH_FCS_LEN); crc2 = 0xdeadbeef; } skb_trim(skb2, len - ETH_FCS_LEN); if (is_last) return crc == crc2; if (unlikely(crc != crc2)) { dev->net->stats.rx_errors++; dev_kfree_skb_any(skb2); } else usbnet_skb_return(dev, skb2); } next: skb_pull(skb, len); } while (skb->len); return 1; } static const struct driver_info eem_info = { .description = "CDC EEM Device", .flags = FLAG_ETHER | FLAG_POINTTOPOINT, .bind = eem_bind, .rx_fixup = eem_rx_fixup, .tx_fixup = eem_tx_fixup, }; /*-------------------------------------------------------------------------*/ static const struct usb_device_id products[] = { { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_EEM, USB_CDC_PROTO_EEM), .driver_info = (unsigned long) &eem_info, }, { /* EMPTY == end of list */ }, }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver eem_driver = { .name = "cdc_eem", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(eem_driver); MODULE_AUTHOR("Omar Laazimani <omar.oberthur@gmail.com>"); MODULE_DESCRIPTION("USB CDC EEM"); MODULE_LICENSE("GPL");
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 // SPDX-License-Identifier: GPL-2.0+ /* * Edgeport USB Serial Converter driver * * Copyright (C) 2000 Inside Out Networks, All rights reserved. * Copyright (C) 2001-2002 Greg Kroah-Hartman <greg@kroah.com> * * Supports the following devices: * Edgeport/4 * Edgeport/4t * Edgeport/2 * Edgeport/4i * Edgeport/2i * Edgeport/421 * Edgeport/21 * Rapidport/4 * Edgeport/8 * Edgeport/2D8 * Edgeport/4D8 * Edgeport/8i * * For questions or problems with this driver, contact Inside Out * Networks technical support, or Peter Berger <pberger@brimson.com>, * or Al Borchers <alborchers@steinerpoint.com>. * */ #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/serial.h> #include <linux/ioctl.h> #include <linux/wait.h> #include <linux/firmware.h> #include <linux/ihex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include "io_edgeport.h" #include "io_ionsp.h" /* info for the iosp messages */ #include "io_16654.h" /* 16654 UART defines */ #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com> and David Iacovelli" #define DRIVER_DESC "Edgeport USB Serial Driver" #define MAX_NAME_LEN 64 #define OPEN_TIMEOUT (5*HZ) /* 5 seconds */ static const struct usb_device_id edgeport_2port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_2) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_2I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_421) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_21) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_2_DIN) }, { } }; static const struct usb_device_id edgeport_4port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_RAPIDPORT_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4T) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_MT4X56USB) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8_DUAL_CPU) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4_DIN) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_22I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_412_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_COMPATIBLE) }, { } }; static const struct usb_device_id edgeport_8port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_16_DUAL_CPU) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8R) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8RR) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_412_8) }, { } }; static const struct usb_device_id Epic_port_id_table[] = { { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0202) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0203) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0310) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0311) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0312) }, { USB_DEVICE(USB_VENDOR_ID_AXIOHM, AXIOHM_DEVICE_ID_EPIC_A758) }, { USB_DEVICE(USB_VENDOR_ID_AXIOHM, AXIOHM_DEVICE_ID_EPIC_A794) }, { USB_DEVICE(USB_VENDOR_ID_AXIOHM, AXIOHM_DEVICE_ID_EPIC_A225) }, { } }; /* Devices that this driver supports */ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_RAPIDPORT_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4T) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_MT4X56USB) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_2) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_2I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_421) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_21) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8_DUAL_CPU) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_2_DIN) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_4_DIN) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_16_DUAL_CPU) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_22I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_412_4) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_COMPATIBLE) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8I) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8R) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_8RR) }, { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_EDGEPORT_412_8) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0202) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0203) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0310) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0311) }, { USB_DEVICE(USB_VENDOR_ID_NCR, NCR_DEVICE_ID_EPIC_0312) }, { USB_DEVICE(USB_VENDOR_ID_AXIOHM, AXIOHM_DEVICE_ID_EPIC_A758) }, { USB_DEVICE(USB_VENDOR_ID_AXIOHM, AXIOHM_DEVICE_ID_EPIC_A794) }, { USB_DEVICE(USB_VENDOR_ID_AXIOHM, AXIOHM_DEVICE_ID_EPIC_A225) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table_combined); /* receive port state */ enum RXSTATE { EXPECT_HDR1 = 0, /* Expect header byte 1 */ EXPECT_HDR2 = 1, /* Expect header byte 2 */ EXPECT_DATA = 2, /* Expect 'RxBytesRemaining' data */ EXPECT_HDR3 = 3, /* Expect header byte 3 (for status hdrs only) */ }; /* Transmit Fifo * This Transmit queue is an extension of the edgeport Rx buffer. * The maximum amount of data buffered in both the edgeport * Rx buffer (maxTxCredits) and this buffer will never exceed maxTxCredits. */ struct TxFifo { unsigned int head; /* index to head pointer (write) */ unsigned int tail; /* index to tail pointer (read) */ unsigned int count; /* Bytes in queue */ unsigned int size; /* Max size of queue (equal to Max number of TxCredits) */ unsigned char *fifo; /* allocated Buffer */ }; /* This structure holds all of the local port information */ struct edgeport_port { __u16 txCredits; /* our current credits for this port */ __u16 maxTxCredits; /* the max size of the port */ struct TxFifo txfifo; /* transmit fifo -- size will be maxTxCredits */ struct urb *write_urb; /* write URB for this port */ bool write_in_progress; /* 'true' while a write URB is outstanding */ spinlock_t ep_lock; __u8 shadowLCR; /* last LCR value received */ __u8 shadowMCR; /* last MCR value received */ __u8 shadowMSR; /* last MSR value received */ __u8 shadowLSR; /* last LSR value received */ __u8 shadowXonChar; /* last value set as XON char in Edgeport */ __u8 shadowXoffChar; /* last value set as XOFF char in Edgeport */ __u8 validDataMask; __u32 baudRate; bool open; bool openPending; bool commandPending; bool closePending; bool chaseResponsePending; wait_queue_head_t wait_chase; /* for handling sleeping while waiting for chase to finish */ wait_queue_head_t wait_open; /* for handling sleeping while waiting for open to finish */ wait_queue_head_t wait_command; /* for handling sleeping while waiting for command to finish */ struct usb_serial_port *port; /* loop back to the owner of this object */ }; /* This structure holds all of the individual device information */ struct edgeport_serial { char name[MAX_NAME_LEN+2]; /* string name of this device */ struct edge_manuf_descriptor manuf_descriptor; /* the manufacturer descriptor */ struct edge_boot_descriptor boot_descriptor; /* the boot firmware descriptor */ struct edgeport_product_info product_info; /* Product Info */ struct edge_compatibility_descriptor epic_descriptor; /* Edgeport compatible descriptor */ int is_epic; /* flag if EPiC device or not */ __u8 interrupt_in_endpoint; /* the interrupt endpoint handle */ unsigned char *interrupt_in_buffer; /* the buffer we use for the interrupt endpoint */ struct urb *interrupt_read_urb; /* our interrupt urb */ __u8 bulk_in_endpoint; /* the bulk in endpoint handle */ unsigned char *bulk_in_buffer; /* the buffer we use for the bulk in endpoint */ struct urb *read_urb; /* our bulk read urb */ bool read_in_progress; spinlock_t es_lock; __u8 bulk_out_endpoint; /* the bulk out endpoint handle */ __s16 rxBytesAvail; /* the number of bytes that we need to read from this device */ enum RXSTATE rxState; /* the current state of the bulk receive processor */ __u8 rxHeader1; /* receive header byte 1 */ __u8 rxHeader2; /* receive header byte 2 */ __u8 rxHeader3; /* receive header byte 3 */ __u8 rxPort; /* the port that we are currently receiving data for */ __u8 rxStatusCode; /* the receive status code */ __u8 rxStatusParam; /* the receive status parameter */ __s16 rxBytesRemaining; /* the number of port bytes left to read */ struct usb_serial *serial; /* loop back to the owner of this object */ }; /* baud rate information */ struct divisor_table_entry { __u32 BaudRate; __u16 Divisor; }; /* * Define table of divisors for Rev A EdgePort/4 hardware * These assume a 3.6864MHz crystal, the standard /16, and * MCR.7 = 0. */ static const struct divisor_table_entry divisor_table[] = { { 50, 4608}, { 75, 3072}, { 110, 2095}, /* 2094.545455 => 230450 => .0217 % over */ { 134, 1713}, /* 1713.011152 => 230398.5 => .00065% under */ { 150, 1536}, { 300, 768}, { 600, 384}, { 1200, 192}, { 1800, 128}, { 2400, 96}, { 4800, 48}, { 7200, 32}, { 9600, 24}, { 14400, 16}, { 19200, 12}, { 38400, 6}, { 57600, 4}, { 115200, 2}, { 230400, 1}, }; /* Number of outstanding Command Write Urbs */ static atomic_t CmdUrbs = ATOMIC_INIT(0); /* function prototypes */ static void edge_close(struct usb_serial_port *port); static void process_rcvd_data(struct edgeport_serial *edge_serial, unsigned char *buffer, __u16 bufferLength); static void process_rcvd_status(struct edgeport_serial *edge_serial, __u8 byte2, __u8 byte3); static void edge_tty_recv(struct usb_serial_port *port, unsigned char *data, int length); static void handle_new_msr(struct edgeport_port *edge_port, __u8 newMsr); static void handle_new_lsr(struct edgeport_port *edge_port, __u8 lsrData, __u8 lsr, __u8 data); static int send_iosp_ext_cmd(struct edgeport_port *edge_port, __u8 command, __u8 param); static int calc_baud_rate_divisor(struct device *dev, int baud_rate, int *divisor); static void change_port_settings(struct tty_struct *tty, struct edgeport_port *edge_port, const struct ktermios *old_termios); static int send_cmd_write_uart_register(struct edgeport_port *edge_port, __u8 regNum, __u8 regValue); static int write_cmd_usb(struct edgeport_port *edge_port, unsigned char *buffer, int writeLength); static void send_more_port_data(struct edgeport_serial *edge_serial, struct edgeport_port *edge_port); static int rom_write(struct usb_serial *serial, __u16 extAddr, __u16 addr, __u16 length, const __u8 *data); /* ************************************************************************ */ /* ************************************************************************ */ /* ************************************************************************ */ /* ************************************************************************ */ /************************************************************************ * * * update_edgeport_E2PROM() Compare current versions of * * Boot ROM and Manufacture * * Descriptors with versions * * embedded in this driver * * * ************************************************************************/ static void update_edgeport_E2PROM(struct edgeport_serial *edge_serial) { struct device *dev = &edge_serial->serial->dev->dev; __u32 BootCurVer; __u32 BootNewVer; __u8 BootMajorVersion; __u8 BootMinorVersion; __u16 BootBuildNumber; __u32 Bootaddr; const struct ihex_binrec *rec; const struct firmware *fw; const char *fw_name; int response; switch (edge_serial->product_info.iDownloadFile) { case EDGE_DOWNLOAD_FILE_I930: fw_name = "edgeport/boot.fw"; break; case EDGE_DOWNLOAD_FILE_80251: fw_name = "edgeport/boot2.fw"; break; default: return; } response = request_ihex_firmware(&fw, fw_name, &edge_serial->serial->dev->dev); if (response) { dev_err(dev, "Failed to load image \"%s\" err %d\n", fw_name, response); return; } rec = (const struct ihex_binrec *)fw->data; BootMajorVersion = rec->data[0]; BootMinorVersion = rec->data[1]; BootBuildNumber = (rec->data[2] << 8) | rec->data[3]; /* Check Boot Image Version */ BootCurVer = (edge_serial->boot_descriptor.MajorVersion << 24) + (edge_serial->boot_descriptor.MinorVersion << 16) + le16_to_cpu(edge_serial->boot_descriptor.BuildNumber); BootNewVer = (BootMajorVersion << 24) + (BootMinorVersion << 16) + BootBuildNumber; dev_dbg(dev, "Current Boot Image version %d.%d.%d\n", edge_serial->boot_descriptor.MajorVersion, edge_serial->boot_descriptor.MinorVersion, le16_to_cpu(edge_serial->boot_descriptor.BuildNumber)); if (BootNewVer > BootCurVer) { dev_dbg(dev, "**Update Boot Image from %d.%d.%d to %d.%d.%d\n", edge_serial->boot_descriptor.MajorVersion, edge_serial->boot_descriptor.MinorVersion, le16_to_cpu(edge_serial->boot_descriptor.BuildNumber), BootMajorVersion, BootMinorVersion, BootBuildNumber); dev_dbg(dev, "Downloading new Boot Image\n"); for (rec = ihex_next_binrec(rec); rec; rec = ihex_next_binrec(rec)) { Bootaddr = be32_to_cpu(rec->addr); response = rom_write(edge_serial->serial, Bootaddr >> 16, Bootaddr & 0xFFFF, be16_to_cpu(rec->len), &rec->data[0]); if (response < 0) { dev_err(&edge_serial->serial->dev->dev, "rom_write failed (%x, %x, %d)\n", Bootaddr >> 16, Bootaddr & 0xFFFF, be16_to_cpu(rec->len)); break; } } } else { dev_dbg(dev, "Boot Image -- already up to date\n"); } release_firmware(fw); } static void dump_product_info(struct edgeport_serial *edge_serial, struct edgeport_product_info *product_info) { struct device *dev = &edge_serial->serial->dev->dev; /* Dump Product Info structure */ dev_dbg(dev, "**Product Information:\n"); dev_dbg(dev, " ProductId %x\n", product_info->ProductId); dev_dbg(dev, " NumPorts %d\n", product_info->NumPorts); dev_dbg(dev, " ProdInfoVer %d\n", product_info->ProdInfoVer); dev_dbg(dev, " IsServer %d\n", product_info->IsServer); dev_dbg(dev, " IsRS232 %d\n", product_info->IsRS232); dev_dbg(dev, " IsRS422 %d\n", product_info->IsRS422); dev_dbg(dev, " IsRS485 %d\n", product_info->IsRS485); dev_dbg(dev, " RomSize %d\n", product_info->RomSize); dev_dbg(dev, " RamSize %d\n", product_info->RamSize); dev_dbg(dev, " CpuRev %x\n", product_info->CpuRev); dev_dbg(dev, " BoardRev %x\n", product_info->BoardRev); dev_dbg(dev, " BootMajorVersion %d.%d.%d\n", product_info->BootMajorVersion, product_info->BootMinorVersion, le16_to_cpu(product_info->BootBuildNumber)); dev_dbg(dev, " FirmwareMajorVersion %d.%d.%d\n", product_info->FirmwareMajorVersion, product_info->FirmwareMinorVersion, le16_to_cpu(product_info->FirmwareBuildNumber)); dev_dbg(dev, " ManufactureDescDate %d/%d/%d\n", product_info->ManufactureDescDate[0], product_info->ManufactureDescDate[1], product_info->ManufactureDescDate[2]+1900); dev_dbg(dev, " iDownloadFile 0x%x\n", product_info->iDownloadFile); dev_dbg(dev, " EpicVer %d\n", product_info->EpicVer); } static void get_product_info(struct edgeport_serial *edge_serial) { struct edgeport_product_info *product_info = &edge_serial->product_info; memset(product_info, 0, sizeof(struct edgeport_product_info)); product_info->ProductId = (__u16)(le16_to_cpu(edge_serial->serial->dev->descriptor.idProduct) & ~ION_DEVICE_ID_80251_NETCHIP); product_info->NumPorts = edge_serial->manuf_descriptor.NumPorts; product_info->ProdInfoVer = 0; product_info->RomSize = edge_serial->manuf_descriptor.RomSize; product_info->RamSize = edge_serial->manuf_descriptor.RamSize; product_info->CpuRev = edge_serial->manuf_descriptor.CpuRev; product_info->BoardRev = edge_serial->manuf_descriptor.BoardRev; product_info->BootMajorVersion = edge_serial->boot_descriptor.MajorVersion; product_info->BootMinorVersion = edge_serial->boot_descriptor.MinorVersion; product_info->BootBuildNumber = edge_serial->boot_descriptor.BuildNumber; memcpy(product_info->ManufactureDescDate, edge_serial->manuf_descriptor.DescDate, sizeof(edge_serial->manuf_descriptor.DescDate)); /* check if this is 2nd generation hardware */ if (le16_to_cpu(edge_serial->serial->dev->descriptor.idProduct) & ION_DEVICE_ID_80251_NETCHIP) product_info->iDownloadFile = EDGE_DOWNLOAD_FILE_80251; else product_info->iDownloadFile = EDGE_DOWNLOAD_FILE_I930; /* Determine Product type and set appropriate flags */ switch (DEVICE_ID_FROM_USB_PRODUCT_ID(product_info->ProductId)) { case ION_DEVICE_ID_EDGEPORT_COMPATIBLE: case ION_DEVICE_ID_EDGEPORT_4T: case ION_DEVICE_ID_EDGEPORT_4: case ION_DEVICE_ID_EDGEPORT_2: case ION_DEVICE_ID_EDGEPORT_8_DUAL_CPU: case ION_DEVICE_ID_EDGEPORT_8: case ION_DEVICE_ID_EDGEPORT_421: case ION_DEVICE_ID_EDGEPORT_21: case ION_DEVICE_ID_EDGEPORT_2_DIN: case ION_DEVICE_ID_EDGEPORT_4_DIN: case ION_DEVICE_ID_EDGEPORT_16_DUAL_CPU: product_info->IsRS232 = 1; break; case ION_DEVICE_ID_EDGEPORT_2I: /* Edgeport/2 RS422/RS485 */ product_info->IsRS422 = 1; product_info->IsRS485 = 1; break; case ION_DEVICE_ID_EDGEPORT_8I: /* Edgeport/4 RS422 */ case ION_DEVICE_ID_EDGEPORT_4I: /* Edgeport/4 RS422 */ product_info->IsRS422 = 1; break; } dump_product_info(edge_serial, product_info); } static int get_epic_descriptor(struct edgeport_serial *ep) { int result; struct usb_serial *serial = ep->serial; struct edgeport_product_info *product_info = &ep->product_info; struct edge_compatibility_descriptor *epic; struct edge_compatibility_bits *bits; struct device *dev = &serial->dev->dev; ep->is_epic = 0; epic = kmalloc(sizeof(*epic), GFP_KERNEL); if (!epic) return -ENOMEM; result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), USB_REQUEST_ION_GET_EPIC_DESC, 0xC0, 0x00, 0x00, epic, sizeof(*epic), 300); if (result == sizeof(*epic)) { ep->is_epic = 1; memcpy(&ep->epic_descriptor, epic, sizeof(*epic)); memset(product_info, 0, sizeof(struct edgeport_product_info)); product_info->NumPorts = epic->NumPorts; product_info->ProdInfoVer = 0; product_info->FirmwareMajorVersion = epic->MajorVersion; product_info->FirmwareMinorVersion = epic->MinorVersion; product_info->FirmwareBuildNumber = epic->BuildNumber; product_info->iDownloadFile = epic->iDownloadFile; product_info->EpicVer = epic->EpicVer; product_info->Epic = epic->Supports; product_info->ProductId = ION_DEVICE_ID_EDGEPORT_COMPATIBLE; dump_product_info(ep, product_info); bits = &ep->epic_descriptor.Supports; dev_dbg(dev, "**EPIC descriptor:\n"); dev_dbg(dev, " VendEnableSuspend: %s\n", bits->VendEnableSuspend ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPOpen : %s\n", bits->IOSPOpen ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPClose : %s\n", bits->IOSPClose ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPChase : %s\n", bits->IOSPChase ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPSetRxFlow : %s\n", bits->IOSPSetRxFlow ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPSetTxFlow : %s\n", bits->IOSPSetTxFlow ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPSetXChar : %s\n", bits->IOSPSetXChar ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPRxCheck : %s\n", bits->IOSPRxCheck ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPSetClrBreak : %s\n", bits->IOSPSetClrBreak ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPWriteMCR : %s\n", bits->IOSPWriteMCR ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPWriteLCR : %s\n", bits->IOSPWriteLCR ? "TRUE": "FALSE"); dev_dbg(dev, " IOSPSetBaudRate : %s\n", bits->IOSPSetBaudRate ? "TRUE": "FALSE"); dev_dbg(dev, " TrueEdgeport : %s\n", bits->TrueEdgeport ? "TRUE": "FALSE"); result = 0; } else if (result >= 0) { dev_warn(&serial->interface->dev, "short epic descriptor received: %d\n", result); result = -EIO; } kfree(epic); return result; } /************************************************************************/ /************************************************************************/ /* U S B C A L L B A C K F U N C T I O N S */ /* U S B C A L L B A C K F U N C T I O N S */ /************************************************************************/ /************************************************************************/ /***************************************************************************** * edge_interrupt_callback * this is the callback function for when we have received data on the * interrupt endpoint. *****************************************************************************/ static void edge_interrupt_callback(struct urb *urb) { struct edgeport_serial *edge_serial = urb->context; struct device *dev; struct edgeport_port *edge_port; struct usb_serial_port *port; unsigned char *data = urb->transfer_buffer; int length = urb->actual_length; unsigned long flags; int bytes_avail; int position; int txCredits; int portNumber; int result; int status = urb->status; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&urb->dev->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&urb->dev->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } dev = &edge_serial->serial->dev->dev; /* process this interrupt-read even if there are no ports open */ if (length) { usb_serial_debug_data(dev, __func__, length, data); if (length > 1) { bytes_avail = data[0] | (data[1] << 8); if (bytes_avail) { spin_lock_irqsave(&edge_serial->es_lock, flags); edge_serial->rxBytesAvail += bytes_avail; dev_dbg(dev, "%s - bytes_avail=%d, rxBytesAvail=%d, read_in_progress=%d\n", __func__, bytes_avail, edge_serial->rxBytesAvail, edge_serial->read_in_progress); if (edge_serial->rxBytesAvail > 0 && !edge_serial->read_in_progress) { dev_dbg(dev, "%s - posting a read\n", __func__); edge_serial->read_in_progress = true; /* we have pending bytes on the bulk in pipe, send a request */ result = usb_submit_urb(edge_serial->read_urb, GFP_ATOMIC); if (result) { dev_err(dev, "%s - usb_submit_urb(read bulk) failed with result = %d\n", __func__, result); edge_serial->read_in_progress = false; } } spin_unlock_irqrestore(&edge_serial->es_lock, flags); } } /* grab the txcredits for the ports if available */ position = 2; portNumber = 0; while ((position < length - 1) && (portNumber < edge_serial->serial->num_ports)) { txCredits = data[position] | (data[position+1] << 8); if (txCredits) { port = edge_serial->serial->port[portNumber]; edge_port = usb_get_serial_port_data(port); if (edge_port && edge_port->open) { spin_lock_irqsave(&edge_port->ep_lock, flags); edge_port->txCredits += txCredits; spin_unlock_irqrestore(&edge_port->ep_lock, flags); dev_dbg(dev, "%s - txcredits for port%d = %d\n", __func__, portNumber, edge_port->txCredits); /* tell the tty driver that something has changed */ tty_port_tty_wakeup(&edge_port->port->port); /* Since we have more credit, check if more data can be sent */ send_more_port_data(edge_serial, edge_port); } } position += 2; ++portNumber; } } exit: result = usb_submit_urb(urb, GFP_ATOMIC); if (result) dev_err(&urb->dev->dev, "%s - Error %d submitting control urb\n", __func__, result); } /***************************************************************************** * edge_bulk_in_callback * this is the callback function for when we have received data on the * bulk in endpoint. *****************************************************************************/ static void edge_bulk_in_callback(struct urb *urb) { struct edgeport_serial *edge_serial = urb->context; struct device *dev; unsigned char *data = urb->transfer_buffer; int retval; __u16 raw_data_length; int status = urb->status; unsigned long flags; if (status) { dev_dbg(&urb->dev->dev, "%s - nonzero read bulk status received: %d\n", __func__, status); edge_serial->read_in_progress = false; return; } if (urb->actual_length == 0) { dev_dbg(&urb->dev->dev, "%s - read bulk callback with no data\n", __func__); edge_serial->read_in_progress = false; return; } dev = &edge_serial->serial->dev->dev; raw_data_length = urb->actual_length; usb_serial_debug_data(dev, __func__, raw_data_length, data); spin_lock_irqsave(&edge_serial->es_lock, flags); /* decrement our rxBytes available by the number that we just got */ edge_serial->rxBytesAvail -= raw_data_length; dev_dbg(dev, "%s - Received = %d, rxBytesAvail %d\n", __func__, raw_data_length, edge_serial->rxBytesAvail); process_rcvd_data(edge_serial, data, urb->actual_length); /* check to see if there's any more data for us to read */ if (edge_serial->rxBytesAvail > 0) { dev_dbg(dev, "%s - posting a read\n", __func__); retval = usb_submit_urb(edge_serial->read_urb, GFP_ATOMIC); if (retval) { dev_err(dev, "%s - usb_submit_urb(read bulk) failed, retval = %d\n", __func__, retval); edge_serial->read_in_progress = false; } } else { edge_serial->read_in_progress = false; } spin_unlock_irqrestore(&edge_serial->es_lock, flags); } /***************************************************************************** * edge_bulk_out_data_callback * this is the callback function for when we have finished sending * serial data on the bulk out endpoint. *****************************************************************************/ static void edge_bulk_out_data_callback(struct urb *urb) { struct edgeport_port *edge_port = urb->context; int status = urb->status; if (status) { dev_dbg(&urb->dev->dev, "%s - nonzero write bulk status received: %d\n", __func__, status); } if (edge_port->open) tty_port_tty_wakeup(&edge_port->port->port); /* Release the Write URB */ edge_port->write_in_progress = false; /* Check if more data needs to be sent */ send_more_port_data((struct edgeport_serial *) (usb_get_serial_data(edge_port->port->serial)), edge_port); } /***************************************************************************** * BulkOutCmdCallback * this is the callback function for when we have finished sending a * command on the bulk out endpoint. *****************************************************************************/ static void edge_bulk_out_cmd_callback(struct urb *urb) { struct edgeport_port *edge_port = urb->context; struct device *dev = &urb->dev->dev; int status = urb->status; atomic_dec(&CmdUrbs); dev_dbg(dev, "%s - FREE URB %p (outstanding %d)\n", __func__, urb, atomic_read(&CmdUrbs)); /* clean up the transfer buffer */ kfree(urb->transfer_buffer); /* Free the command urb */ usb_free_urb(urb); if (status) { dev_dbg(dev, "%s - nonzero write bulk status received: %d\n", __func__, status); return; } /* tell the tty driver that something has changed */ if (edge_port->open) tty_port_tty_wakeup(&edge_port->port->port); /* we have completed the command */ edge_port->commandPending = false; wake_up(&edge_port->wait_command); } /***************************************************************************** * Driver tty interface functions *****************************************************************************/ /***************************************************************************** * SerialOpen * this function is called by the tty driver when a port is opened * If successful, we return 0 * Otherwise we return a negative error number. *****************************************************************************/ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port) { struct edgeport_port *edge_port = usb_get_serial_port_data(port); struct device *dev = &port->dev; struct usb_serial *serial; struct edgeport_serial *edge_serial; int response; if (edge_port == NULL) return -ENODEV; /* see if we've set up our endpoint info yet (can't set it up in edge_startup as the structures were not set up at that time.) */ serial = port->serial; edge_serial = usb_get_serial_data(serial); if (edge_serial == NULL) return -ENODEV; if (edge_serial->interrupt_in_buffer == NULL) { struct usb_serial_port *port0 = serial->port[0]; /* not set up yet, so do it now */ edge_serial->interrupt_in_buffer = port0->interrupt_in_buffer; edge_serial->interrupt_in_endpoint = port0->interrupt_in_endpointAddress; edge_serial->interrupt_read_urb = port0->interrupt_in_urb; edge_serial->bulk_in_buffer = port0->bulk_in_buffer; edge_serial->bulk_in_endpoint = port0->bulk_in_endpointAddress; edge_serial->read_urb = port0->read_urb; edge_serial->bulk_out_endpoint = port0->bulk_out_endpointAddress; /* set up our interrupt urb */ usb_fill_int_urb(edge_serial->interrupt_read_urb, serial->dev, usb_rcvintpipe(serial->dev, port0->interrupt_in_endpointAddress), port0->interrupt_in_buffer, edge_serial->interrupt_read_urb->transfer_buffer_length, edge_interrupt_callback, edge_serial, edge_serial->interrupt_read_urb->interval); /* set up our bulk in urb */ usb_fill_bulk_urb(edge_serial->read_urb, serial->dev, usb_rcvbulkpipe(serial->dev, port0->bulk_in_endpointAddress), port0->bulk_in_buffer, edge_serial->read_urb->transfer_buffer_length, edge_bulk_in_callback, edge_serial); edge_serial->read_in_progress = false; /* start interrupt read for this edgeport * this interrupt will continue as long * as the edgeport is connected */ response = usb_submit_urb(edge_serial->interrupt_read_urb, GFP_KERNEL); if (response) { dev_err(dev, "%s - Error %d submitting control urb\n", __func__, response); } } /* initialize our wait queues */ init_waitqueue_head(&edge_port->wait_open); init_waitqueue_head(&edge_port->wait_chase); init_waitqueue_head(&edge_port->wait_command); /* initialize our port settings */ edge_port->txCredits = 0; /* Can't send any data yet */ /* Must always set this bit to enable ints! */ edge_port->shadowMCR = MCR_MASTER_IE; edge_port->chaseResponsePending = false; /* send a open port command */ edge_port->openPending = true; edge_port->open = false; response = send_iosp_ext_cmd(edge_port, IOSP_CMD_OPEN_PORT, 0); if (response < 0) { dev_err(dev, "%s - error sending open port command\n", __func__); edge_port->openPending = false; return -ENODEV; } /* now wait for the port to be completely opened */ wait_event_timeout(edge_port->wait_open, !edge_port->openPending, OPEN_TIMEOUT); if (!edge_port->open) { /* open timed out */ dev_dbg(dev, "%s - open timeout\n", __func__); edge_port->openPending = false; return -ENODEV; } /* create the txfifo */ edge_port->txfifo.head = 0; edge_port->txfifo.tail = 0; edge_port->txfifo.count = 0; edge_port->txfifo.size = edge_port->maxTxCredits; edge_port->txfifo.fifo = kmalloc(edge_port->maxTxCredits, GFP_KERNEL); if (!edge_port->txfifo.fifo) { edge_close(port); return -ENOMEM; } /* Allocate a URB for the write */ edge_port->write_urb = usb_alloc_urb(0, GFP_KERNEL); edge_port->write_in_progress = false; if (!edge_port->write_urb) { edge_close(port); return -ENOMEM; } dev_dbg(dev, "%s - Initialize TX fifo to %d bytes\n", __func__, edge_port->maxTxCredits); return 0; } /************************************************************************ * * block_until_chase_response * * This function will block the close until one of the following: * 1. Response to our Chase comes from Edgeport * 2. A timeout of 10 seconds without activity has expired * (1K of Edgeport data @ 2400 baud ==> 4 sec to empty) * ************************************************************************/ static void block_until_chase_response(struct edgeport_port *edge_port) { struct device *dev = &edge_port->port->dev; DEFINE_WAIT(wait); __u16 lastCredits; int timeout = 1*HZ; int loop = 10; while (1) { /* Save Last credits */ lastCredits = edge_port->txCredits; /* Did we get our Chase response */ if (!edge_port->chaseResponsePending) { dev_dbg(dev, "%s - Got Chase Response\n", __func__); /* did we get all of our credit back? */ if (edge_port->txCredits == edge_port->maxTxCredits) { dev_dbg(dev, "%s - Got all credits\n", __func__); return; } } /* Block the thread for a while */ prepare_to_wait(&edge_port->wait_chase, &wait, TASK_UNINTERRUPTIBLE); schedule_timeout(timeout); finish_wait(&edge_port->wait_chase, &wait); if (lastCredits == edge_port->txCredits) { /* No activity.. count down. */ loop--; if (loop == 0) { edge_port->chaseResponsePending = false; dev_dbg(dev, "%s - Chase TIMEOUT\n", __func__); return; } } else { /* Reset timeout value back to 10 seconds */ dev_dbg(dev, "%s - Last %d, Current %d\n", __func__, lastCredits, edge_port->txCredits); loop = 10; } } } /************************************************************************ * * block_until_tx_empty * * This function will block the close until one of the following: * 1. TX count are 0 * 2. The edgeport has stopped * 3. A timeout of 3 seconds without activity has expired * ************************************************************************/ static void block_until_tx_empty(struct edgeport_port *edge_port) { struct device *dev = &edge_port->port->dev; DEFINE_WAIT(wait); struct TxFifo *fifo = &edge_port->txfifo; __u32 lastCount; int timeout = HZ/10; int loop = 30; while (1) { /* Save Last count */ lastCount = fifo->count; /* Is the Edgeport Buffer empty? */ if (lastCount == 0) { dev_dbg(dev, "%s - TX Buffer Empty\n", __func__); return; } /* Block the thread for a while */ prepare_to_wait(&edge_port->wait_chase, &wait, TASK_UNINTERRUPTIBLE); schedule_timeout(timeout); finish_wait(&edge_port->wait_chase, &wait); dev_dbg(dev, "%s wait\n", __func__); if (lastCount == fifo->count) { /* No activity.. count down. */ loop--; if (loop == 0) { dev_dbg(dev, "%s - TIMEOUT\n", __func__); return; } } else { /* Reset timeout value back to seconds */ loop = 30; } } } /***************************************************************************** * edge_close * this function is called by the tty driver when a port is closed *****************************************************************************/ static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; int status; edge_serial = usb_get_serial_data(port->serial); edge_port = usb_get_serial_port_data(port); if (edge_serial == NULL || edge_port == NULL) return; /* block until tx is empty */ block_until_tx_empty(edge_port); edge_port->closePending = true; if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPChase) { /* flush and chase */ edge_port->chaseResponsePending = true; dev_dbg(&port->dev, "%s - Sending IOSP_CMD_CHASE_PORT\n", __func__); status = send_iosp_ext_cmd(edge_port, IOSP_CMD_CHASE_PORT, 0); if (status == 0) /* block until chase finished */ block_until_chase_response(edge_port); else edge_port->chaseResponsePending = false; } if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPClose) { /* close the port */ dev_dbg(&port->dev, "%s - Sending IOSP_CMD_CLOSE_PORT\n", __func__); send_iosp_ext_cmd(edge_port, IOSP_CMD_CLOSE_PORT, 0); } /* port->close = true; */ edge_port->closePending = false; edge_port->open = false; edge_port->openPending = false; usb_kill_urb(edge_port->write_urb); if (edge_port->write_urb) { /* if this urb had a transfer buffer already (old transfer) free it */ kfree(edge_port->write_urb->transfer_buffer); usb_free_urb(edge_port->write_urb); edge_port->write_urb = NULL; } kfree(edge_port->txfifo.fifo); edge_port->txfifo.fifo = NULL; } /***************************************************************************** * SerialWrite * this function is called by the tty driver when data should be written * to the port. * If successful, we return the number of bytes written, otherwise we * return a negative error number. *****************************************************************************/ static int edge_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count) { struct edgeport_port *edge_port = usb_get_serial_port_data(port); struct TxFifo *fifo; int copySize; int bytesleft; int firsthalf; int secondhalf; unsigned long flags; if (edge_port == NULL) return -ENODEV; /* get a pointer to the Tx fifo */ fifo = &edge_port->txfifo; spin_lock_irqsave(&edge_port->ep_lock, flags); /* calculate number of bytes to put in fifo */ copySize = min_t(unsigned int, count, (edge_port->txCredits - fifo->count)); dev_dbg(&port->dev, "%s of %d byte(s) Fifo room %d -- will copy %d bytes\n", __func__, count, edge_port->txCredits - fifo->count, copySize); /* catch writes of 0 bytes which the tty driver likes to give us, and when txCredits is empty */ if (copySize == 0) { dev_dbg(&port->dev, "%s - copySize = Zero\n", __func__); goto finish_write; } /* queue the data * since we can never overflow the buffer we do not have to check for a * full condition * * the copy is done is two parts -- first fill to the end of the buffer * then copy the reset from the start of the buffer */ bytesleft = fifo->size - fifo->head; firsthalf = min(bytesleft, copySize); dev_dbg(&port->dev, "%s - copy %d bytes of %d into fifo \n", __func__, firsthalf, bytesleft); /* now copy our data */ memcpy(&fifo->fifo[fifo->head], data, firsthalf); usb_serial_debug_data(&port->dev, __func__, firsthalf, &fifo->fifo[fifo->head]); /* update the index and size */ fifo->head += firsthalf; fifo->count += firsthalf; /* wrap the index */ if (fifo->head == fifo->size) fifo->head = 0; secondhalf = copySize-firsthalf; if (secondhalf) { dev_dbg(&port->dev, "%s - copy rest of data %d\n", __func__, secondhalf); memcpy(&fifo->fifo[fifo->head], &data[firsthalf], secondhalf); usb_serial_debug_data(&port->dev, __func__, secondhalf, &fifo->fifo[fifo->head]); /* update the index and size */ fifo->count += secondhalf; fifo->head += secondhalf; /* No need to check for wrap since we can not get to end of * the fifo in this part */ } finish_write: spin_unlock_irqrestore(&edge_port->ep_lock, flags); send_more_port_data((struct edgeport_serial *) usb_get_serial_data(port->serial), edge_port); dev_dbg(&port->dev, "%s wrote %d byte(s) TxCredits %d, Fifo %d\n", __func__, copySize, edge_port->txCredits, fifo->count); return copySize; } /************************************************************************ * * send_more_port_data() * * This routine attempts to write additional UART transmit data * to a port over the USB bulk pipe. It is called (1) when new * data has been written to a port's TxBuffer from higher layers * (2) when the peripheral sends us additional TxCredits indicating * that it can accept more Tx data for a given port; and (3) when * a bulk write completes successfully and we want to see if we * can transmit more. * ************************************************************************/ static void send_more_port_data(struct edgeport_serial *edge_serial, struct edgeport_port *edge_port) { struct TxFifo *fifo = &edge_port->txfifo; struct device *dev = &edge_port->port->dev; struct urb *urb; unsigned char *buffer; int status; int count; int bytesleft; int firsthalf; int secondhalf; unsigned long flags; spin_lock_irqsave(&edge_port->ep_lock, flags); if (edge_port->write_in_progress || !edge_port->open || (fifo->count == 0)) { dev_dbg(dev, "%s EXIT - fifo %d, PendingWrite = %d\n", __func__, fifo->count, edge_port->write_in_progress); goto exit_send; } /* since the amount of data in the fifo will always fit into the * edgeport buffer we do not need to check the write length * * Do we have enough credits for this port to make it worthwhile * to bother queueing a write. If it's too small, say a few bytes, * it's better to wait for more credits so we can do a larger write. */ if (edge_port->txCredits < EDGE_FW_GET_TX_CREDITS_SEND_THRESHOLD(edge_port->maxTxCredits, EDGE_FW_BULK_MAX_PACKET_SIZE)) { dev_dbg(dev, "%s Not enough credit - fifo %d TxCredit %d\n", __func__, fifo->count, edge_port->txCredits); goto exit_send; } /* lock this write */ edge_port->write_in_progress = true; /* get a pointer to the write_urb */ urb = edge_port->write_urb; /* make sure transfer buffer is freed */ kfree(urb->transfer_buffer); urb->transfer_buffer = NULL; /* build the data header for the buffer and port that we are about to send out */ count = fifo->count; buffer = kmalloc(count+2, GFP_ATOMIC); if (!buffer) { edge_port->write_in_progress = false; goto exit_send; } buffer[0] = IOSP_BUILD_DATA_HDR1(edge_port->port->port_number, count); buffer[1] = IOSP_BUILD_DATA_HDR2(edge_port->port->port_number, count); /* now copy our data */ bytesleft = fifo->size - fifo->tail; firsthalf = min(bytesleft, count); memcpy(&buffer[2], &fifo->fifo[fifo->tail], firsthalf); fifo->tail += firsthalf; fifo->count -= firsthalf; if (fifo->tail == fifo->size) fifo->tail = 0; secondhalf = count-firsthalf; if (secondhalf) { memcpy(&buffer[2+firsthalf], &fifo->fifo[fifo->tail], secondhalf); fifo->tail += secondhalf; fifo->count -= secondhalf; } if (count) usb_serial_debug_data(&edge_port->port->dev, __func__, count, &buffer[2]); /* fill up the urb with all of our data and submit it */ usb_fill_bulk_urb(urb, edge_serial->serial->dev, usb_sndbulkpipe(edge_serial->serial->dev, edge_serial->bulk_out_endpoint), buffer, count+2, edge_bulk_out_data_callback, edge_port); /* decrement the number of credits we have by the number we just sent */ edge_port->txCredits -= count; edge_port->port->icount.tx += count; status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { /* something went wrong */ dev_err_console(edge_port->port, "%s - usb_submit_urb(write bulk) failed, status = %d, data lost\n", __func__, status); edge_port->write_in_progress = false; /* revert the credits as something bad happened. */ edge_port->txCredits += count; edge_port->port->icount.tx -= count; } dev_dbg(dev, "%s wrote %d byte(s) TxCredit %d, Fifo %d\n", __func__, count, edge_port->txCredits, fifo->count); exit_send: spin_unlock_irqrestore(&edge_port->ep_lock, flags); } /***************************************************************************** * edge_write_room * this function is called by the tty driver when it wants to know how * many bytes of data we can accept for a specific port. *****************************************************************************/ static unsigned int edge_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); unsigned int room; unsigned long flags; /* total of both buffers is still txCredit */ spin_lock_irqsave(&edge_port->ep_lock, flags); room = edge_port->txCredits - edge_port->txfifo.count; spin_unlock_irqrestore(&edge_port->ep_lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, room); return room; } /***************************************************************************** * edge_chars_in_buffer * this function is called by the tty driver when it wants to know how * many bytes of data we currently have outstanding in the port (data that * has been written, but hasn't made it out the port yet) *****************************************************************************/ static unsigned int edge_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); unsigned int num_chars; unsigned long flags; spin_lock_irqsave(&edge_port->ep_lock, flags); num_chars = edge_port->maxTxCredits - edge_port->txCredits + edge_port->txfifo.count; spin_unlock_irqrestore(&edge_port->ep_lock, flags); if (num_chars) { dev_dbg(&port->dev, "%s - returns %u\n", __func__, num_chars); } return num_chars; } /***************************************************************************** * SerialThrottle * this function is called by the tty driver when it wants to stop the data * being read from the port. *****************************************************************************/ static void edge_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); int status; if (edge_port == NULL) return; if (!edge_port->open) { dev_dbg(&port->dev, "%s - port not opened\n", __func__); return; } /* if we are implementing XON/XOFF, send the stop character */ if (I_IXOFF(tty)) { unsigned char stop_char = STOP_CHAR(tty); status = edge_write(tty, port, &stop_char, 1); if (status <= 0) return; } /* if we are implementing RTS/CTS, toggle that line */ if (C_CRTSCTS(tty)) { edge_port->shadowMCR &= ~MCR_RTS; status = send_cmd_write_uart_register(edge_port, MCR, edge_port->shadowMCR); if (status != 0) return; } } /***************************************************************************** * edge_unthrottle * this function is called by the tty driver when it wants to resume the * data being read from the port (called after SerialThrottle is called) *****************************************************************************/ static void edge_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); int status; if (edge_port == NULL) return; if (!edge_port->open) { dev_dbg(&port->dev, "%s - port not opened\n", __func__); return; } /* if we are implementing XON/XOFF, send the start character */ if (I_IXOFF(tty)) { unsigned char start_char = START_CHAR(tty); status = edge_write(tty, port, &start_char, 1); if (status <= 0) return; } /* if we are implementing RTS/CTS, toggle that line */ if (C_CRTSCTS(tty)) { edge_port->shadowMCR |= MCR_RTS; send_cmd_write_uart_register(edge_port, MCR, edge_port->shadowMCR); } } /***************************************************************************** * SerialSetTermios * this function is called by the tty driver when it wants to change * the termios structure *****************************************************************************/ static void edge_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct edgeport_port *edge_port = usb_get_serial_port_data(port); if (edge_port == NULL) return; if (!edge_port->open) { dev_dbg(&port->dev, "%s - port not opened\n", __func__); return; } /* change the port settings to the new ones specified */ change_port_settings(tty, edge_port, old_termios); } /***************************************************************************** * get_lsr_info - get line status register info * * Purpose: Let user call ioctl() to get info when the UART physically * is emptied. On bus types like RS485, the transmitter must * release the bus after transmitting. This must be done when * the transmit shift register is empty, not be done when the * transmit holding register is empty. This functionality * allows an RS485 driver to be written in user space. *****************************************************************************/ static int get_lsr_info(struct edgeport_port *edge_port, unsigned int __user *value) { unsigned int result = 0; unsigned long flags; spin_lock_irqsave(&edge_port->ep_lock, flags); if (edge_port->maxTxCredits == edge_port->txCredits && edge_port->txfifo.count == 0) { dev_dbg(&edge_port->port->dev, "%s -- Empty\n", __func__); result = TIOCSER_TEMT; } spin_unlock_irqrestore(&edge_port->ep_lock, flags); if (copy_to_user(value, &result, sizeof(int))) return -EFAULT; return 0; } static int edge_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); unsigned int mcr; mcr = edge_port->shadowMCR; if (set & TIOCM_RTS) mcr |= MCR_RTS; if (set & TIOCM_DTR) mcr |= MCR_DTR; if (set & TIOCM_LOOP) mcr |= MCR_LOOPBACK; if (clear & TIOCM_RTS) mcr &= ~MCR_RTS; if (clear & TIOCM_DTR) mcr &= ~MCR_DTR; if (clear & TIOCM_LOOP) mcr &= ~MCR_LOOPBACK; edge_port->shadowMCR = mcr; send_cmd_write_uart_register(edge_port, MCR, edge_port->shadowMCR); return 0; } static int edge_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); unsigned int result = 0; unsigned int msr; unsigned int mcr; msr = edge_port->shadowMSR; mcr = edge_port->shadowMCR; result = ((mcr & MCR_DTR) ? TIOCM_DTR: 0) /* 0x002 */ | ((mcr & MCR_RTS) ? TIOCM_RTS: 0) /* 0x004 */ | ((msr & EDGEPORT_MSR_CTS) ? TIOCM_CTS: 0) /* 0x020 */ | ((msr & EDGEPORT_MSR_CD) ? TIOCM_CAR: 0) /* 0x040 */ | ((msr & EDGEPORT_MSR_RI) ? TIOCM_RI: 0) /* 0x080 */ | ((msr & EDGEPORT_MSR_DSR) ? TIOCM_DSR: 0); /* 0x100 */ return result; } /***************************************************************************** * SerialIoctl * this function handles any ioctl calls to the driver *****************************************************************************/ static int edge_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); switch (cmd) { case TIOCSERGETLSR: dev_dbg(&port->dev, "%s TIOCSERGETLSR\n", __func__); return get_lsr_info(edge_port, (unsigned int __user *) arg); } return -ENOIOCTLCMD; } /***************************************************************************** * SerialBreak * this function sends a break to the port *****************************************************************************/ static int edge_break(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct edgeport_port *edge_port = usb_get_serial_port_data(port); struct edgeport_serial *edge_serial = usb_get_serial_data(port->serial); int status = 0; if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPChase) { /* flush and chase */ edge_port->chaseResponsePending = true; dev_dbg(&port->dev, "%s - Sending IOSP_CMD_CHASE_PORT\n", __func__); status = send_iosp_ext_cmd(edge_port, IOSP_CMD_CHASE_PORT, 0); if (status == 0) { /* block until chase finished */ block_until_chase_response(edge_port); } else { edge_port->chaseResponsePending = false; } } if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPSetClrBreak) { if (break_state == -1) { dev_dbg(&port->dev, "%s - Sending IOSP_CMD_SET_BREAK\n", __func__); status = send_iosp_ext_cmd(edge_port, IOSP_CMD_SET_BREAK, 0); } else { dev_dbg(&port->dev, "%s - Sending IOSP_CMD_CLEAR_BREAK\n", __func__); status = send_iosp_ext_cmd(edge_port, IOSP_CMD_CLEAR_BREAK, 0); } if (status) dev_dbg(&port->dev, "%s - error sending break set/clear command.\n", __func__); } return status; } /***************************************************************************** * process_rcvd_data * this function handles the data received on the bulk in pipe. *****************************************************************************/ static void process_rcvd_data(struct edgeport_serial *edge_serial, unsigned char *buffer, __u16 bufferLength) { struct usb_serial *serial = edge_serial->serial; struct device *dev = &serial->dev->dev; struct usb_serial_port *port; struct edgeport_port *edge_port; __u16 lastBufferLength; __u16 rxLen; lastBufferLength = bufferLength + 1; while (bufferLength > 0) { /* failsafe incase we get a message that we don't understand */ if (lastBufferLength == bufferLength) { dev_dbg(dev, "%s - stuck in loop, exiting it.\n", __func__); break; } lastBufferLength = bufferLength; switch (edge_serial->rxState) { case EXPECT_HDR1: edge_serial->rxHeader1 = *buffer; ++buffer; --bufferLength; if (bufferLength == 0) { edge_serial->rxState = EXPECT_HDR2; break; } fallthrough; case EXPECT_HDR2: edge_serial->rxHeader2 = *buffer; ++buffer; --bufferLength; dev_dbg(dev, "%s - Hdr1=%02X Hdr2=%02X\n", __func__, edge_serial->rxHeader1, edge_serial->rxHeader2); /* Process depending on whether this header is * data or status */ if (IS_CMD_STAT_HDR(edge_serial->rxHeader1)) { /* Decode this status header and go to * EXPECT_HDR1 (if we can process the status * with only 2 bytes), or go to EXPECT_HDR3 to * get the third byte. */ edge_serial->rxPort = IOSP_GET_HDR_PORT(edge_serial->rxHeader1); edge_serial->rxStatusCode = IOSP_GET_STATUS_CODE( edge_serial->rxHeader1); if (!IOSP_STATUS_IS_2BYTE( edge_serial->rxStatusCode)) { /* This status needs additional bytes. * Save what we have and then wait for * more data. */ edge_serial->rxStatusParam = edge_serial->rxHeader2; edge_serial->rxState = EXPECT_HDR3; break; } /* We have all the header bytes, process the status now */ process_rcvd_status(edge_serial, edge_serial->rxHeader2, 0); edge_serial->rxState = EXPECT_HDR1; break; } edge_serial->rxPort = IOSP_GET_HDR_PORT(edge_serial->rxHeader1); edge_serial->rxBytesRemaining = IOSP_GET_HDR_DATA_LEN(edge_serial->rxHeader1, edge_serial->rxHeader2); dev_dbg(dev, "%s - Data for Port %u Len %u\n", __func__, edge_serial->rxPort, edge_serial->rxBytesRemaining); if (bufferLength == 0) { edge_serial->rxState = EXPECT_DATA; break; } fallthrough; case EXPECT_DATA: /* Expect data */ if (bufferLength < edge_serial->rxBytesRemaining) { rxLen = bufferLength; /* Expect data to start next buffer */ edge_serial->rxState = EXPECT_DATA; } else { /* BufLen >= RxBytesRemaining */ rxLen = edge_serial->rxBytesRemaining; /* Start another header next time */ edge_serial->rxState = EXPECT_HDR1; } bufferLength -= rxLen; edge_serial->rxBytesRemaining -= rxLen; /* spit this data back into the tty driver if this port is open */ if (rxLen && edge_serial->rxPort < serial->num_ports) { port = serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); if (edge_port && edge_port->open) { dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n", __func__, rxLen, edge_serial->rxPort); edge_tty_recv(edge_port->port, buffer, rxLen); edge_port->port->icount.rx += rxLen; } } buffer += rxLen; break; case EXPECT_HDR3: /* Expect 3rd byte of status header */ edge_serial->rxHeader3 = *buffer; ++buffer; --bufferLength; /* We have all the header bytes, process the status now */ process_rcvd_status(edge_serial, edge_serial->rxStatusParam, edge_serial->rxHeader3); edge_serial->rxState = EXPECT_HDR1; break; } } } /***************************************************************************** * process_rcvd_status * this function handles the any status messages received on the * bulk in pipe. *****************************************************************************/ static void process_rcvd_status(struct edgeport_serial *edge_serial, __u8 byte2, __u8 byte3) { struct usb_serial_port *port; struct edgeport_port *edge_port; struct tty_struct *tty; struct device *dev; __u8 code = edge_serial->rxStatusCode; /* switch the port pointer to the one being currently talked about */ if (edge_serial->rxPort >= edge_serial->serial->num_ports) return; port = edge_serial->serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); if (edge_port == NULL) { dev_err(&edge_serial->serial->dev->dev, "%s - edge_port == NULL for port %d\n", __func__, edge_serial->rxPort); return; } dev = &port->dev; if (code == IOSP_EXT_STATUS) { switch (byte2) { case IOSP_EXT_STATUS_CHASE_RSP: /* we want to do EXT status regardless of port * open/closed */ dev_dbg(dev, "%s - Port %u EXT CHASE_RSP Data = %02x\n", __func__, edge_serial->rxPort, byte3); /* Currently, the only EXT_STATUS is Chase, so process * here instead of one more call to one more subroutine * If/when more EXT_STATUS, there'll be more work to do * Also, we currently clear flag and close the port * regardless of content of above's Byte3. * We could choose to do something else when Byte3 says * Timeout on Chase from Edgeport, like wait longer in * block_until_chase_response, but for now we don't. */ edge_port->chaseResponsePending = false; wake_up(&edge_port->wait_chase); return; case IOSP_EXT_STATUS_RX_CHECK_RSP: dev_dbg(dev, "%s ========== Port %u CHECK_RSP Sequence = %02x =============\n", __func__, edge_serial->rxPort, byte3); /* Port->RxCheckRsp = true; */ return; } } if (code == IOSP_STATUS_OPEN_RSP) { edge_port->txCredits = GET_TX_BUFFER_SIZE(byte3); edge_port->maxTxCredits = edge_port->txCredits; dev_dbg(dev, "%s - Port %u Open Response Initial MSR = %02x TxBufferSize = %d\n", __func__, edge_serial->rxPort, byte2, edge_port->txCredits); handle_new_msr(edge_port, byte2); /* send the current line settings to the port so we are in sync with any further termios calls */ tty = tty_port_tty_get(&edge_port->port->port); if (tty) { change_port_settings(tty, edge_port, &tty->termios); tty_kref_put(tty); } /* we have completed the open */ edge_port->openPending = false; edge_port->open = true; wake_up(&edge_port->wait_open); return; } /* If port is closed, silently discard all rcvd status. We can * have cases where buffered status is received AFTER the close * port command is sent to the Edgeport. */ if (!edge_port->open || edge_port->closePending) return; switch (code) { /* Not currently sent by Edgeport */ case IOSP_STATUS_LSR: dev_dbg(dev, "%s - Port %u LSR Status = %02x\n", __func__, edge_serial->rxPort, byte2); handle_new_lsr(edge_port, false, byte2, 0); break; case IOSP_STATUS_LSR_DATA: dev_dbg(dev, "%s - Port %u LSR Status = %02x, Data = %02x\n", __func__, edge_serial->rxPort, byte2, byte3); /* byte2 is LSR Register */ /* byte3 is broken data byte */ handle_new_lsr(edge_port, true, byte2, byte3); break; /* * case IOSP_EXT_4_STATUS: * dev_dbg(dev, "%s - Port %u LSR Status = %02x Data = %02x\n", * __func__, edge_serial->rxPort, byte2, byte3); * break; */ case IOSP_STATUS_MSR: dev_dbg(dev, "%s - Port %u MSR Status = %02x\n", __func__, edge_serial->rxPort, byte2); /* * Process this new modem status and generate appropriate * events, etc, based on the new status. This routine * also saves the MSR in Port->ShadowMsr. */ handle_new_msr(edge_port, byte2); break; default: dev_dbg(dev, "%s - Unrecognized IOSP status code %u\n", __func__, code); break; } } /***************************************************************************** * edge_tty_recv * this function passes data on to the tty flip buffer *****************************************************************************/ static void edge_tty_recv(struct usb_serial_port *port, unsigned char *data, int length) { int cnt; cnt = tty_insert_flip_string(&port->port, data, length); if (cnt < length) { dev_err(&port->dev, "%s - dropping data, %d bytes lost\n", __func__, length - cnt); } data += cnt; length -= cnt; tty_flip_buffer_push(&port->port); } /***************************************************************************** * handle_new_msr * this function handles any change to the msr register for a port. *****************************************************************************/ static void handle_new_msr(struct edgeport_port *edge_port, __u8 newMsr) { struct async_icount *icount; if (newMsr & (EDGEPORT_MSR_DELTA_CTS | EDGEPORT_MSR_DELTA_DSR | EDGEPORT_MSR_DELTA_RI | EDGEPORT_MSR_DELTA_CD)) { icount = &edge_port->port->icount; /* update input line counters */ if (newMsr & EDGEPORT_MSR_DELTA_CTS) icount->cts++; if (newMsr & EDGEPORT_MSR_DELTA_DSR) icount->dsr++; if (newMsr & EDGEPORT_MSR_DELTA_CD) icount->dcd++; if (newMsr & EDGEPORT_MSR_DELTA_RI) icount->rng++; wake_up_interruptible(&edge_port->port->port.delta_msr_wait); } /* Save the new modem status */ edge_port->shadowMSR = newMsr & 0xf0; } /***************************************************************************** * handle_new_lsr * this function handles any change to the lsr register for a port. *****************************************************************************/ static void handle_new_lsr(struct edgeport_port *edge_port, __u8 lsrData, __u8 lsr, __u8 data) { __u8 newLsr = (__u8) (lsr & (__u8) (LSR_OVER_ERR | LSR_PAR_ERR | LSR_FRM_ERR | LSR_BREAK)); struct async_icount *icount; edge_port->shadowLSR = lsr; if (newLsr & LSR_BREAK) { /* * Parity and Framing errors only count if they * occur exclusive of a break being * received. */ newLsr &= (__u8)(LSR_OVER_ERR | LSR_BREAK); } /* Place LSR data byte into Rx buffer */ if (lsrData) edge_tty_recv(edge_port->port, &data, 1); /* update input line counters */ icount = &edge_port->port->icount; if (newLsr & LSR_BREAK) icount->brk++; if (newLsr & LSR_OVER_ERR) icount->overrun++; if (newLsr & LSR_PAR_ERR) icount->parity++; if (newLsr & LSR_FRM_ERR) icount->frame++; } /**************************************************************************** * sram_write * writes a number of bytes to the Edgeport device's sram starting at the * given address. * If successful returns the number of bytes written, otherwise it returns * a negative error number of the problem. ****************************************************************************/ static int sram_write(struct usb_serial *serial, __u16 extAddr, __u16 addr, __u16 length, const __u8 *data) { int result; __u16 current_length; unsigned char *transfer_buffer; dev_dbg(&serial->dev->dev, "%s - %x, %x, %d\n", __func__, extAddr, addr, length); transfer_buffer = kmalloc(64, GFP_KERNEL); if (!transfer_buffer) return -ENOMEM; /* need to split these writes up into 64 byte chunks */ result = 0; while (length > 0) { if (length > 64) current_length = 64; else current_length = length; /* dev_dbg(&serial->dev->dev, "%s - writing %x, %x, %d\n", __func__, extAddr, addr, current_length); */ memcpy(transfer_buffer, data, current_length); result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), USB_REQUEST_ION_WRITE_RAM, 0x40, addr, extAddr, transfer_buffer, current_length, 300); if (result < 0) break; length -= current_length; addr += current_length; data += current_length; } kfree(transfer_buffer); return result; } /**************************************************************************** * rom_write * writes a number of bytes to the Edgeport device's ROM starting at the * given address. * If successful returns the number of bytes written, otherwise it returns * a negative error number of the problem. ****************************************************************************/ static int rom_write(struct usb_serial *serial, __u16 extAddr, __u16 addr, __u16 length, const __u8 *data) { int result; __u16 current_length; unsigned char *transfer_buffer; transfer_buffer = kmalloc(64, GFP_KERNEL); if (!transfer_buffer) return -ENOMEM; /* need to split these writes up into 64 byte chunks */ result = 0; while (length > 0) { if (length > 64) current_length = 64; else current_length = length; memcpy(transfer_buffer, data, current_length); result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), USB_REQUEST_ION_WRITE_ROM, 0x40, addr, extAddr, transfer_buffer, current_length, 300); if (result < 0) break; length -= current_length; addr += current_length; data += current_length; } kfree(transfer_buffer); return result; } /**************************************************************************** * rom_read * reads a number of bytes from the Edgeport device starting at the given * address. * Returns zero on success or a negative error number. ****************************************************************************/ static int rom_read(struct usb_serial *serial, __u16 extAddr, __u16 addr, __u16 length, __u8 *data) { int result; __u16 current_length; unsigned char *transfer_buffer; transfer_buffer = kmalloc(64, GFP_KERNEL); if (!transfer_buffer) return -ENOMEM; /* need to split these reads up into 64 byte chunks */ result = 0; while (length > 0) { if (length > 64) current_length = 64; else current_length = length; result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), USB_REQUEST_ION_READ_ROM, 0xC0, addr, extAddr, transfer_buffer, current_length, 300); if (result < current_length) { if (result >= 0) result = -EIO; break; } memcpy(data, transfer_buffer, current_length); length -= current_length; addr += current_length; data += current_length; result = 0; } kfree(transfer_buffer); return result; } /**************************************************************************** * send_iosp_ext_cmd * Is used to send a IOSP message to the Edgeport device ****************************************************************************/ static int send_iosp_ext_cmd(struct edgeport_port *edge_port, __u8 command, __u8 param) { unsigned char *buffer; unsigned char *currentCommand; int length = 0; int status = 0; buffer = kmalloc(10, GFP_ATOMIC); if (!buffer) return -ENOMEM; currentCommand = buffer; MAKE_CMD_EXT_CMD(&currentCommand, &length, edge_port->port->port_number, command, param); status = write_cmd_usb(edge_port, buffer, length); if (status) { /* something bad happened, let's free up the memory */ kfree(buffer); } return status; } /***************************************************************************** * write_cmd_usb * this function writes the given buffer out to the bulk write endpoint. *****************************************************************************/ static int write_cmd_usb(struct edgeport_port *edge_port, unsigned char *buffer, int length) { struct edgeport_serial *edge_serial = usb_get_serial_data(edge_port->port->serial); struct device *dev = &edge_port->port->dev; int status = 0; struct urb *urb; usb_serial_debug_data(dev, __func__, length, buffer); /* Allocate our next urb */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; atomic_inc(&CmdUrbs); dev_dbg(dev, "%s - ALLOCATE URB %p (outstanding %d)\n", __func__, urb, atomic_read(&CmdUrbs)); usb_fill_bulk_urb(urb, edge_serial->serial->dev, usb_sndbulkpipe(edge_serial->serial->dev, edge_serial->bulk_out_endpoint), buffer, length, edge_bulk_out_cmd_callback, edge_port); edge_port->commandPending = true; status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { /* something went wrong */ dev_err(dev, "%s - usb_submit_urb(write command) failed, status = %d\n", __func__, status); usb_free_urb(urb); atomic_dec(&CmdUrbs); return status; } #if 0 wait_event(&edge_port->wait_command, !edge_port->commandPending); if (edge_port->commandPending) { /* command timed out */ dev_dbg(dev, "%s - command timed out\n", __func__); status = -EINVAL; } #endif return status; } /***************************************************************************** * send_cmd_write_baud_rate * this function sends the proper command to change the baud rate of the * specified port. *****************************************************************************/ static int send_cmd_write_baud_rate(struct edgeport_port *edge_port, int baudRate) { struct edgeport_serial *edge_serial = usb_get_serial_data(edge_port->port->serial); struct device *dev = &edge_port->port->dev; unsigned char *cmdBuffer; unsigned char *currCmd; int cmdLen = 0; int divisor; int status; u32 number = edge_port->port->port_number; if (edge_serial->is_epic && !edge_serial->epic_descriptor.Supports.IOSPSetBaudRate) { dev_dbg(dev, "SendCmdWriteBaudRate - NOT Setting baud rate for port, baud = %d\n", baudRate); return 0; } dev_dbg(dev, "%s - baud = %d\n", __func__, baudRate); status = calc_baud_rate_divisor(dev, baudRate, &divisor); if (status) { dev_err(dev, "%s - bad baud rate\n", __func__); return status; } /* Alloc memory for the string of commands. */ cmdBuffer = kmalloc(0x100, GFP_ATOMIC); if (!cmdBuffer) return -ENOMEM; currCmd = cmdBuffer; /* Enable access to divisor latch */ MAKE_CMD_WRITE_REG(&currCmd, &cmdLen, number, LCR, LCR_DL_ENABLE); /* Write the divisor itself */ MAKE_CMD_WRITE_REG(&currCmd, &cmdLen, number, DLL, LOW8(divisor)); MAKE_CMD_WRITE_REG(&currCmd, &cmdLen, number, DLM, HIGH8(divisor)); /* Restore original value to disable access to divisor latch */ MAKE_CMD_WRITE_REG(&currCmd, &cmdLen, number, LCR, edge_port->shadowLCR); status = write_cmd_usb(edge_port, cmdBuffer, cmdLen); if (status) { /* something bad happened, let's free up the memory */ kfree(cmdBuffer); } return status; } /***************************************************************************** * calc_baud_rate_divisor * this function calculates the proper baud rate divisor for the specified * baud rate. *****************************************************************************/ static int calc_baud_rate_divisor(struct device *dev, int baudrate, int *divisor) { int i; __u16 custom; for (i = 0; i < ARRAY_SIZE(divisor_table); i++) { if (divisor_table[i].BaudRate == baudrate) { *divisor = divisor_table[i].Divisor; return 0; } } /* We have tried all of the standard baud rates * lets try to calculate the divisor for this baud rate * Make sure the baud rate is reasonable */ if (baudrate > 50 && baudrate < 230400) { /* get divisor */ custom = (__u16)((230400L + baudrate/2) / baudrate); *divisor = custom; dev_dbg(dev, "%s - Baud %d = %d\n", __func__, baudrate, custom); return 0; } return -1; } /***************************************************************************** * send_cmd_write_uart_register * this function builds up a uart register message and sends to the device. *****************************************************************************/ static int send_cmd_write_uart_register(struct edgeport_port *edge_port, __u8 regNum, __u8 regValue) { struct edgeport_serial *edge_serial = usb_get_serial_data(edge_port->port->serial); struct device *dev = &edge_port->port->dev; unsigned char *cmdBuffer; unsigned char *currCmd; unsigned long cmdLen = 0; int status; dev_dbg(dev, "%s - write to %s register 0x%02x\n", (regNum == MCR) ? "MCR" : "LCR", __func__, regValue); if (edge_serial->is_epic && !edge_serial->epic_descriptor.Supports.IOSPWriteMCR && regNum == MCR) { dev_dbg(dev, "SendCmdWriteUartReg - Not writing to MCR Register\n"); return 0; } if (edge_serial->is_epic && !edge_serial->epic_descriptor.Supports.IOSPWriteLCR && regNum == LCR) { dev_dbg(dev, "SendCmdWriteUartReg - Not writing to LCR Register\n"); return 0; } /* Alloc memory for the string of commands. */ cmdBuffer = kmalloc(0x10, GFP_ATOMIC); if (cmdBuffer == NULL) return -ENOMEM; currCmd = cmdBuffer; /* Build a cmd in the buffer to write the given register */ MAKE_CMD_WRITE_REG(&currCmd, &cmdLen, edge_port->port->port_number, regNum, regValue); status = write_cmd_usb(edge_port, cmdBuffer, cmdLen); if (status) { /* something bad happened, let's free up the memory */ kfree(cmdBuffer); } return status; } /***************************************************************************** * change_port_settings * This routine is called to set the UART on the device to match the * specified new settings. *****************************************************************************/ static void change_port_settings(struct tty_struct *tty, struct edgeport_port *edge_port, const struct ktermios *old_termios) { struct device *dev = &edge_port->port->dev; struct edgeport_serial *edge_serial = usb_get_serial_data(edge_port->port->serial); int baud; unsigned cflag; __u8 mask = 0xff; __u8 lData; __u8 lParity; __u8 lStop; __u8 rxFlow; __u8 txFlow; int status; if (!edge_port->open && !edge_port->openPending) { dev_dbg(dev, "%s - port not opened\n", __func__); return; } cflag = tty->termios.c_cflag; switch (cflag & CSIZE) { case CS5: lData = LCR_BITS_5; mask = 0x1f; dev_dbg(dev, "%s - data bits = 5\n", __func__); break; case CS6: lData = LCR_BITS_6; mask = 0x3f; dev_dbg(dev, "%s - data bits = 6\n", __func__); break; case CS7: lData = LCR_BITS_7; mask = 0x7f; dev_dbg(dev, "%s - data bits = 7\n", __func__); break; default: case CS8: lData = LCR_BITS_8; dev_dbg(dev, "%s - data bits = 8\n", __func__); break; } lParity = LCR_PAR_NONE; if (cflag & PARENB) { if (cflag & CMSPAR) { if (cflag & PARODD) { lParity = LCR_PAR_MARK; dev_dbg(dev, "%s - parity = mark\n", __func__); } else { lParity = LCR_PAR_SPACE; dev_dbg(dev, "%s - parity = space\n", __func__); } } else if (cflag & PARODD) { lParity = LCR_PAR_ODD; dev_dbg(dev, "%s - parity = odd\n", __func__); } else { lParity = LCR_PAR_EVEN; dev_dbg(dev, "%s - parity = even\n", __func__); } } else { dev_dbg(dev, "%s - parity = none\n", __func__); } if (cflag & CSTOPB) { lStop = LCR_STOP_2; dev_dbg(dev, "%s - stop bits = 2\n", __func__); } else { lStop = LCR_STOP_1; dev_dbg(dev, "%s - stop bits = 1\n", __func__); } /* figure out the flow control settings */ rxFlow = txFlow = 0x00; if (cflag & CRTSCTS) { rxFlow |= IOSP_RX_FLOW_RTS; txFlow |= IOSP_TX_FLOW_CTS; dev_dbg(dev, "%s - RTS/CTS is enabled\n", __func__); } else { dev_dbg(dev, "%s - RTS/CTS is disabled\n", __func__); } /* if we are implementing XON/XOFF, set the start and stop character in the device */ if (I_IXOFF(tty) || I_IXON(tty)) { unsigned char stop_char = STOP_CHAR(tty); unsigned char start_char = START_CHAR(tty); if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPSetXChar) { send_iosp_ext_cmd(edge_port, IOSP_CMD_SET_XON_CHAR, start_char); send_iosp_ext_cmd(edge_port, IOSP_CMD_SET_XOFF_CHAR, stop_char); } /* if we are implementing INBOUND XON/XOFF */ if (I_IXOFF(tty)) { rxFlow |= IOSP_RX_FLOW_XON_XOFF; dev_dbg(dev, "%s - INBOUND XON/XOFF is enabled, XON = %2x, XOFF = %2x\n", __func__, start_char, stop_char); } else { dev_dbg(dev, "%s - INBOUND XON/XOFF is disabled\n", __func__); } /* if we are implementing OUTBOUND XON/XOFF */ if (I_IXON(tty)) { txFlow |= IOSP_TX_FLOW_XON_XOFF; dev_dbg(dev, "%s - OUTBOUND XON/XOFF is enabled, XON = %2x, XOFF = %2x\n", __func__, start_char, stop_char); } else { dev_dbg(dev, "%s - OUTBOUND XON/XOFF is disabled\n", __func__); } } /* Set flow control to the configured value */ if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPSetRxFlow) send_iosp_ext_cmd(edge_port, IOSP_CMD_SET_RX_FLOW, rxFlow); if (!edge_serial->is_epic || edge_serial->epic_descriptor.Supports.IOSPSetTxFlow) send_iosp_ext_cmd(edge_port, IOSP_CMD_SET_TX_FLOW, txFlow); edge_port->shadowLCR &= ~(LCR_BITS_MASK | LCR_STOP_MASK | LCR_PAR_MASK); edge_port->shadowLCR |= (lData | lParity | lStop); edge_port->validDataMask = mask; /* Send the updated LCR value to the EdgePort */ status = send_cmd_write_uart_register(edge_port, LCR, edge_port->shadowLCR); if (status != 0) return; /* set up the MCR register and send it to the EdgePort */ edge_port->shadowMCR = MCR_MASTER_IE; if (cflag & CBAUD) edge_port->shadowMCR |= (MCR_DTR | MCR_RTS); status = send_cmd_write_uart_register(edge_port, MCR, edge_port->shadowMCR); if (status != 0) return; /* Determine divisor based on baud rate */ baud = tty_get_baud_rate(tty); if (!baud) { /* pick a default, any default... */ baud = 9600; } dev_dbg(dev, "%s - baud rate = %d\n", __func__, baud); status = send_cmd_write_baud_rate(edge_port, baud); if (status == -1) { /* Speed change was not possible - put back the old speed */ baud = tty_termios_baud_rate(old_termios); tty_encode_baud_rate(tty, baud, baud); } } /**************************************************************************** * unicode_to_ascii * Turns a string from Unicode into ASCII. * Doesn't do a good job with any characters that are outside the normal * ASCII range, but it's only for debugging... * NOTE: expects the unicode in LE format ****************************************************************************/ static void unicode_to_ascii(char *string, int buflen, __le16 *unicode, int unicode_size) { int i; if (buflen <= 0) /* never happens, but... */ return; --buflen; /* space for nul */ for (i = 0; i < unicode_size; i++) { if (i >= buflen) break; string[i] = (char)(le16_to_cpu(unicode[i])); } string[i] = 0x00; } /**************************************************************************** * get_manufacturing_desc * reads in the manufacturing descriptor and stores it into the serial * structure. ****************************************************************************/ static void get_manufacturing_desc(struct edgeport_serial *edge_serial) { struct device *dev = &edge_serial->serial->dev->dev; int response; dev_dbg(dev, "getting manufacturer descriptor\n"); response = rom_read(edge_serial->serial, (EDGE_MANUF_DESC_ADDR & 0xffff0000) >> 16, (__u16)(EDGE_MANUF_DESC_ADDR & 0x0000ffff), EDGE_MANUF_DESC_LEN, (__u8 *)(&edge_serial->manuf_descriptor)); if (response < 0) { dev_err(dev, "error in getting manufacturer descriptor: %d\n", response); } else { char string[30]; dev_dbg(dev, "**Manufacturer Descriptor\n"); dev_dbg(dev, " RomSize: %dK\n", edge_serial->manuf_descriptor.RomSize); dev_dbg(dev, " RamSize: %dK\n", edge_serial->manuf_descriptor.RamSize); dev_dbg(dev, " CpuRev: %d\n", edge_serial->manuf_descriptor.CpuRev); dev_dbg(dev, " BoardRev: %d\n", edge_serial->manuf_descriptor.BoardRev); dev_dbg(dev, " NumPorts: %d\n", edge_serial->manuf_descriptor.NumPorts); dev_dbg(dev, " DescDate: %d/%d/%d\n", edge_serial->manuf_descriptor.DescDate[0], edge_serial->manuf_descriptor.DescDate[1], edge_serial->manuf_descriptor.DescDate[2]+1900); unicode_to_ascii(string, sizeof(string), edge_serial->manuf_descriptor.SerialNumber, edge_serial->manuf_descriptor.SerNumLength/2); dev_dbg(dev, " SerialNumber: %s\n", string); unicode_to_ascii(string, sizeof(string), edge_serial->manuf_descriptor.AssemblyNumber, edge_serial->manuf_descriptor.AssemblyNumLength/2); dev_dbg(dev, " AssemblyNumber: %s\n", string); unicode_to_ascii(string, sizeof(string), edge_serial->manuf_descriptor.OemAssyNumber, edge_serial->manuf_descriptor.OemAssyNumLength/2); dev_dbg(dev, " OemAssyNumber: %s\n", string); dev_dbg(dev, " UartType: %d\n", edge_serial->manuf_descriptor.UartType); dev_dbg(dev, " IonPid: %d\n", edge_serial->manuf_descriptor.IonPid); dev_dbg(dev, " IonConfig: %d\n", edge_serial->manuf_descriptor.IonConfig); } } /**************************************************************************** * get_boot_desc * reads in the bootloader descriptor and stores it into the serial * structure. ****************************************************************************/ static void get_boot_desc(struct edgeport_serial *edge_serial) { struct device *dev = &edge_serial->serial->dev->dev; int response; dev_dbg(dev, "getting boot descriptor\n"); response = rom_read(edge_serial->serial, (EDGE_BOOT_DESC_ADDR & 0xffff0000) >> 16, (__u16)(EDGE_BOOT_DESC_ADDR & 0x0000ffff), EDGE_BOOT_DESC_LEN, (__u8 *)(&edge_serial->boot_descriptor)); if (response < 0) { dev_err(dev, "error in getting boot descriptor: %d\n", response); } else { dev_dbg(dev, "**Boot Descriptor:\n"); dev_dbg(dev, " BootCodeLength: %d\n", le16_to_cpu(edge_serial->boot_descriptor.BootCodeLength)); dev_dbg(dev, " MajorVersion: %d\n", edge_serial->boot_descriptor.MajorVersion); dev_dbg(dev, " MinorVersion: %d\n", edge_serial->boot_descriptor.MinorVersion); dev_dbg(dev, " BuildNumber: %d\n", le16_to_cpu(edge_serial->boot_descriptor.BuildNumber)); dev_dbg(dev, " Capabilities: 0x%x\n", le16_to_cpu(edge_serial->boot_descriptor.Capabilities)); dev_dbg(dev, " UConfig0: %d\n", edge_serial->boot_descriptor.UConfig0); dev_dbg(dev, " UConfig1: %d\n", edge_serial->boot_descriptor.UConfig1); } } /**************************************************************************** * load_application_firmware * This is called to load the application firmware to the device ****************************************************************************/ static void load_application_firmware(struct edgeport_serial *edge_serial) { struct device *dev = &edge_serial->serial->dev->dev; const struct ihex_binrec *rec; const struct firmware *fw; const char *fw_name; const char *fw_info; int response; __u32 Operaddr; __u16 build; switch (edge_serial->product_info.iDownloadFile) { case EDGE_DOWNLOAD_FILE_I930: fw_info = "downloading firmware version (930)"; fw_name = "edgeport/down.fw"; break; case EDGE_DOWNLOAD_FILE_80251: fw_info = "downloading firmware version (80251)"; fw_name = "edgeport/down2.fw"; break; case EDGE_DOWNLOAD_FILE_NONE: dev_dbg(dev, "No download file specified, skipping download\n"); return; default: return; } response = request_ihex_firmware(&fw, fw_name, &edge_serial->serial->dev->dev); if (response) { dev_err(dev, "Failed to load image \"%s\" err %d\n", fw_name, response); return; } rec = (const struct ihex_binrec *)fw->data; build = (rec->data[2] << 8) | rec->data[3]; dev_dbg(dev, "%s %d.%d.%d\n", fw_info, rec->data[0], rec->data[1], build); edge_serial->product_info.FirmwareMajorVersion = rec->data[0]; edge_serial->product_info.FirmwareMinorVersion = rec->data[1]; edge_serial->product_info.FirmwareBuildNumber = cpu_to_le16(build); for (rec = ihex_next_binrec(rec); rec; rec = ihex_next_binrec(rec)) { Operaddr = be32_to_cpu(rec->addr); response = sram_write(edge_serial->serial, Operaddr >> 16, Operaddr & 0xFFFF, be16_to_cpu(rec->len), &rec->data[0]); if (response < 0) { dev_err(&edge_serial->serial->dev->dev, "sram_write failed (%x, %x, %d)\n", Operaddr >> 16, Operaddr & 0xFFFF, be16_to_cpu(rec->len)); break; } } dev_dbg(dev, "sending exec_dl_code\n"); response = usb_control_msg (edge_serial->serial->dev, usb_sndctrlpipe(edge_serial->serial->dev, 0), USB_REQUEST_ION_EXEC_DL_CODE, 0x40, 0x4000, 0x0001, NULL, 0, 3000); release_firmware(fw); } /**************************************************************************** * edge_startup ****************************************************************************/ static int edge_startup(struct usb_serial *serial) { struct edgeport_serial *edge_serial; struct usb_device *dev; struct device *ddev = &serial->dev->dev; int i; int response; bool interrupt_in_found; bool bulk_in_found; bool bulk_out_found; static const __u32 descriptor[3] = { EDGE_COMPATIBILITY_MASK0, EDGE_COMPATIBILITY_MASK1, EDGE_COMPATIBILITY_MASK2 }; dev = serial->dev; /* create our private serial structure */ edge_serial = kzalloc(sizeof(struct edgeport_serial), GFP_KERNEL); if (!edge_serial) return -ENOMEM; spin_lock_init(&edge_serial->es_lock); edge_serial->serial = serial; usb_set_serial_data(serial, edge_serial); /* get the name for the device from the device */ i = usb_string(dev, dev->descriptor.iManufacturer, &edge_serial->name[0], MAX_NAME_LEN+1); if (i < 0) i = 0; edge_serial->name[i++] = ' '; usb_string(dev, dev->descriptor.iProduct, &edge_serial->name[i], MAX_NAME_LEN+2 - i); dev_info(&serial->dev->dev, "%s detected\n", edge_serial->name); /* Read the epic descriptor */ if (get_epic_descriptor(edge_serial) < 0) { /* memcpy descriptor to Supports structures */ memcpy(&edge_serial->epic_descriptor.Supports, descriptor, sizeof(struct edge_compatibility_bits)); /* get the manufacturing descriptor for this device */ get_manufacturing_desc(edge_serial); /* get the boot descriptor */ get_boot_desc(edge_serial); get_product_info(edge_serial); } /* set the number of ports from the manufacturing description */ /* serial->num_ports = serial->product_info.NumPorts; */ if ((!edge_serial->is_epic) && (edge_serial->product_info.NumPorts != serial->num_ports)) { dev_warn(ddev, "Device Reported %d serial ports vs. core thinking we have %d ports, email greg@kroah.com this information.\n", edge_serial->product_info.NumPorts, serial->num_ports); } dev_dbg(ddev, "%s - time 1 %ld\n", __func__, jiffies); /* If not an EPiC device */ if (!edge_serial->is_epic) { /* now load the application firmware into this device */ load_application_firmware(edge_serial); dev_dbg(ddev, "%s - time 2 %ld\n", __func__, jiffies); /* Check current Edgeport EEPROM and update if necessary */ update_edgeport_E2PROM(edge_serial); dev_dbg(ddev, "%s - time 3 %ld\n", __func__, jiffies); /* set the configuration to use #1 */ /* dev_dbg(ddev, "set_configuration 1\n"); */ /* usb_set_configuration (dev, 1); */ } dev_dbg(ddev, " FirmwareMajorVersion %d.%d.%d\n", edge_serial->product_info.FirmwareMajorVersion, edge_serial->product_info.FirmwareMinorVersion, le16_to_cpu(edge_serial->product_info.FirmwareBuildNumber)); /* we set up the pointers to the endpoints in the edge_open function, * as the structures aren't created yet. */ response = 0; if (edge_serial->is_epic) { struct usb_host_interface *alt; alt = serial->interface->cur_altsetting; /* EPIC thing, set up our interrupt polling now and our read * urb, so that the device knows it really is connected. */ interrupt_in_found = bulk_in_found = bulk_out_found = false; for (i = 0; i < alt->desc.bNumEndpoints; ++i) { struct usb_endpoint_descriptor *endpoint; int buffer_size; endpoint = &alt->endpoint[i].desc; buffer_size = usb_endpoint_maxp(endpoint); if (!interrupt_in_found && (usb_endpoint_is_int_in(endpoint))) { /* we found a interrupt in endpoint */ dev_dbg(ddev, "found interrupt in\n"); /* not set up yet, so do it now */ edge_serial->interrupt_read_urb = usb_alloc_urb(0, GFP_KERNEL); if (!edge_serial->interrupt_read_urb) { response = -ENOMEM; break; } edge_serial->interrupt_in_buffer = kmalloc(buffer_size, GFP_KERNEL); if (!edge_serial->interrupt_in_buffer) { response = -ENOMEM; break; } edge_serial->interrupt_in_endpoint = endpoint->bEndpointAddress; /* set up our interrupt urb */ usb_fill_int_urb( edge_serial->interrupt_read_urb, dev, usb_rcvintpipe(dev, endpoint->bEndpointAddress), edge_serial->interrupt_in_buffer, buffer_size, edge_interrupt_callback, edge_serial, endpoint->bInterval); interrupt_in_found = true; } if (!bulk_in_found && (usb_endpoint_is_bulk_in(endpoint))) { /* we found a bulk in endpoint */ dev_dbg(ddev, "found bulk in\n"); /* not set up yet, so do it now */ edge_serial->read_urb = usb_alloc_urb(0, GFP_KERNEL); if (!edge_serial->read_urb) { response = -ENOMEM; break; } edge_serial->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL); if (!edge_serial->bulk_in_buffer) { response = -ENOMEM; break; } edge_serial->bulk_in_endpoint = endpoint->bEndpointAddress; /* set up our bulk in urb */ usb_fill_bulk_urb(edge_serial->read_urb, dev, usb_rcvbulkpipe(dev, endpoint->bEndpointAddress), edge_serial->bulk_in_buffer, usb_endpoint_maxp(endpoint), edge_bulk_in_callback, edge_serial); bulk_in_found = true; } if (!bulk_out_found && (usb_endpoint_is_bulk_out(endpoint))) { /* we found a bulk out endpoint */ dev_dbg(ddev, "found bulk out\n"); edge_serial->bulk_out_endpoint = endpoint->bEndpointAddress; bulk_out_found = true; } } if (response || !interrupt_in_found || !bulk_in_found || !bulk_out_found) { if (!response) { dev_err(ddev, "expected endpoints not found\n"); response = -ENODEV; } goto error; } /* start interrupt read for this edgeport this interrupt will * continue as long as the edgeport is connected */ response = usb_submit_urb(edge_serial->interrupt_read_urb, GFP_KERNEL); if (response) { dev_err(ddev, "%s - Error %d submitting control urb\n", __func__, response); goto error; } } return response; error: usb_free_urb(edge_serial->interrupt_read_urb); kfree(edge_serial->interrupt_in_buffer); usb_free_urb(edge_serial->read_urb); kfree(edge_serial->bulk_in_buffer); kfree(edge_serial); return response; } /**************************************************************************** * edge_disconnect * This function is called whenever the device is removed from the usb bus. ****************************************************************************/ static void edge_disconnect(struct usb_serial *serial) { struct edgeport_serial *edge_serial = usb_get_serial_data(serial); if (edge_serial->is_epic) { usb_kill_urb(edge_serial->interrupt_read_urb); usb_kill_urb(edge_serial->read_urb); } } /**************************************************************************** * edge_release * This function is called when the device structure is deallocated. ****************************************************************************/ static void edge_release(struct usb_serial *serial) { struct edgeport_serial *edge_serial = usb_get_serial_data(serial); if (edge_serial->is_epic) { usb_kill_urb(edge_serial->interrupt_read_urb); usb_free_urb(edge_serial->interrupt_read_urb); kfree(edge_serial->interrupt_in_buffer); usb_kill_urb(edge_serial->read_urb); usb_free_urb(edge_serial->read_urb); kfree(edge_serial->bulk_in_buffer); } kfree(edge_serial); } static int edge_port_probe(struct usb_serial_port *port) { struct edgeport_port *edge_port; edge_port = kzalloc(sizeof(*edge_port), GFP_KERNEL); if (!edge_port) return -ENOMEM; spin_lock_init(&edge_port->ep_lock); edge_port->port = port; usb_set_serial_port_data(port, edge_port); return 0; } static void edge_port_remove(struct usb_serial_port *port) { struct edgeport_port *edge_port; edge_port = usb_get_serial_port_data(port); kfree(edge_port); } static struct usb_serial_driver edgeport_2port_device = { .driver = { .name = "edgeport_2", }, .description = "Edgeport 2 port adapter", .id_table = edgeport_2port_id_table, .num_ports = 2, .num_bulk_in = 1, .num_bulk_out = 1, .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, .unthrottle = edge_unthrottle, .attach = edge_startup, .disconnect = edge_disconnect, .release = edge_release, .port_probe = edge_port_probe, .port_remove = edge_port_remove, .ioctl = edge_ioctl, .set_termios = edge_set_termios, .tiocmget = edge_tiocmget, .tiocmset = edge_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .write = edge_write, .write_room = edge_write_room, .chars_in_buffer = edge_chars_in_buffer, .break_ctl = edge_break, .read_int_callback = edge_interrupt_callback, .read_bulk_callback = edge_bulk_in_callback, .write_bulk_callback = edge_bulk_out_data_callback, }; static struct usb_serial_driver edgeport_4port_device = { .driver = { .name = "edgeport_4", }, .description = "Edgeport 4 port adapter", .id_table = edgeport_4port_id_table, .num_ports = 4, .num_bulk_in = 1, .num_bulk_out = 1, .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, .unthrottle = edge_unthrottle, .attach = edge_startup, .disconnect = edge_disconnect, .release = edge_release, .port_probe = edge_port_probe, .port_remove = edge_port_remove, .ioctl = edge_ioctl, .set_termios = edge_set_termios, .tiocmget = edge_tiocmget, .tiocmset = edge_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .write = edge_write, .write_room = edge_write_room, .chars_in_buffer = edge_chars_in_buffer, .break_ctl = edge_break, .read_int_callback = edge_interrupt_callback, .read_bulk_callback = edge_bulk_in_callback, .write_bulk_callback = edge_bulk_out_data_callback, }; static struct usb_serial_driver edgeport_8port_device = { .driver = { .name = "edgeport_8", }, .description = "Edgeport 8 port adapter", .id_table = edgeport_8port_id_table, .num_ports = 8, .num_bulk_in = 1, .num_bulk_out = 1, .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, .unthrottle = edge_unthrottle, .attach = edge_startup, .disconnect = edge_disconnect, .release = edge_release, .port_probe = edge_port_probe, .port_remove = edge_port_remove, .ioctl = edge_ioctl, .set_termios = edge_set_termios, .tiocmget = edge_tiocmget, .tiocmset = edge_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .write = edge_write, .write_room = edge_write_room, .chars_in_buffer = edge_chars_in_buffer, .break_ctl = edge_break, .read_int_callback = edge_interrupt_callback, .read_bulk_callback = edge_bulk_in_callback, .write_bulk_callback = edge_bulk_out_data_callback, }; static struct usb_serial_driver epic_device = { .driver = { .name = "epic", }, .description = "EPiC device", .id_table = Epic_port_id_table, .num_ports = 1, .num_bulk_in = 1, .num_bulk_out = 1, .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, .unthrottle = edge_unthrottle, .attach = edge_startup, .disconnect = edge_disconnect, .release = edge_release, .port_probe = edge_port_probe, .port_remove = edge_port_remove, .ioctl = edge_ioctl, .set_termios = edge_set_termios, .tiocmget = edge_tiocmget, .tiocmset = edge_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .write = edge_write, .write_room = edge_write_room, .chars_in_buffer = edge_chars_in_buffer, .break_ctl = edge_break, .read_int_callback = edge_interrupt_callback, .read_bulk_callback = edge_bulk_in_callback, .write_bulk_callback = edge_bulk_out_data_callback, }; static struct usb_serial_driver * const serial_drivers[] = { &edgeport_2port_device, &edgeport_4port_device, &edgeport_8port_device, &epic_device, NULL }; module_usb_serial_driver(serial_drivers, id_table_combined); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_FIRMWARE("edgeport/boot.fw"); MODULE_FIRMWARE("edgeport/boot2.fw"); MODULE_FIRMWARE("edgeport/down.fw"); MODULE_FIRMWARE("edgeport/down2.fw");
18 1 14 10 10 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 // SPDX-License-Identifier: GPL-2.0 /* XDP user-space ring structure * Copyright(c) 2018 Intel Corporation. */ #include <linux/log2.h> #include <linux/slab.h> #include <linux/overflow.h> #include <linux/vmalloc.h> #include <net/xdp_sock_drv.h> #include "xsk_queue.h" static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue) { struct xdp_umem_ring *umem_ring; struct xdp_rxtx_ring *rxtx_ring; if (umem_queue) return struct_size(umem_ring, desc, q->nentries); return struct_size(rxtx_ring, desc, q->nentries); } struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) { struct xsk_queue *q; size_t size; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; q->nentries = nentries; q->ring_mask = nentries - 1; size = xskq_get_ring_size(q, umem_queue); /* size which is overflowing or close to SIZE_MAX will become 0 in * PAGE_ALIGN(), checking SIZE_MAX is enough due to the previous * is_power_of_2(), the rest will be handled by vmalloc_user() */ if (unlikely(size == SIZE_MAX)) { kfree(q); return NULL; } size = PAGE_ALIGN(size); q->ring = vmalloc_user(size); if (!q->ring) { kfree(q); return NULL; } q->ring_vmalloc_size = size; return q; } void xskq_destroy(struct xsk_queue *q) { if (!q) return; vfree(q->ring); kfree(q); }
17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 /* * crc32.h * See linux/lib/crc32.c for license and changes */ #ifndef _LINUX_CRC32_H #define _LINUX_CRC32_H #include <linux/types.h> #include <linux/bitrev.h> u32 crc32_le_arch(u32 crc, const u8 *p, size_t len); u32 crc32_le_base(u32 crc, const u8 *p, size_t len); u32 crc32_be_arch(u32 crc, const u8 *p, size_t len); u32 crc32_be_base(u32 crc, const u8 *p, size_t len); u32 crc32c_arch(u32 crc, const u8 *p, size_t len); u32 crc32c_base(u32 crc, const u8 *p, size_t len); static inline u32 crc32_le(u32 crc, const void *p, size_t len) { if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32_le_arch(crc, p, len); return crc32_le_base(crc, p, len); } static inline u32 crc32_be(u32 crc, const void *p, size_t len) { if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32_be_arch(crc, p, len); return crc32_be_base(crc, p, len); } static inline u32 crc32c(u32 crc, const void *p, size_t len) { if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32c_arch(crc, p, len); return crc32c_base(crc, p, len); } /* * crc32_optimizations() returns flags that indicate which CRC32 library * functions are using architecture-specific optimizations. Unlike * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 * variants and also whether any needed CPU features are available at runtime. */ #define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ #define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ #define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */ #if IS_ENABLED(CONFIG_CRC32_ARCH) u32 crc32_optimizations(void); #else static inline u32 crc32_optimizations(void) { return 0; } #endif /** * crc32_le_combine - Combine two crc32 check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, crc32_le() check values were calculated * for each, crc1 and crc2. * * @crc1: crc32 of the first block * @crc2: crc32 of the second block * @len2: length of the second block * * Return: The crc32_le() check value of seq1 and seq2 concatenated, * requiring only crc1, crc2, and len2. Note: If seq_full denotes * the concatenated memory area of seq1 with seq2, and crc_full * the crc32_le() value of seq_full, then crc_full == * crc32_le_combine(crc1, crc2, len2) when crc_full was seeded * with the same initializer as crc1, and crc2 seed was 0. See * also crc32_combine_test(). */ u32 crc32_le_shift(u32 crc, size_t len); static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } u32 crc32c_shift(u32 crc, size_t len); /** * crc32c_combine - Combine two crc32c check values into one. For two sequences * of bytes, seq1 and seq2 with lengths len1 and len2, crc32c() * check values were calculated for each, crc1 and crc2. * * @crc1: crc32c of the first block * @crc2: crc32c of the second block * @len2: length of the second block * * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring * only crc1, crc2, and len2. Note: If seq_full denotes the concatenated * memory area of seq1 with seq2, and crc_full the crc32c() value of * seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when * crc_full was seeded with the same initializer as crc1, and crc2 seed * was 0. See also crc_combine_test(). */ static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2) { return crc32c_shift(crc1, len2) ^ crc2; } #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* * Helpers for hash table generation of ethernet nics: * * Ethernet sends the least significant bit of a byte first, thus crc32_le * is used. The output of crc32_le is bit reversed [most significant bit * is in bit nr 0], thus it must be reversed before use. Except for * nics that bit swap the result internally... */ #define ether_crc(length, data) bitrev32(crc32_le(~0, data, length)) #define ether_crc_le(length, data) crc32_le(~0, data, length) #endif /* _LINUX_CRC32_H */
100 100 4 3 3 155 1 154 3 1 198 313 26 241 71 217 175 2 3 3 2 2 217 97 216 222 223 222 92 198 219 48 217 92 199 175 91 175 179 179 179 40 44 175 46 164 104 105 104 43 85 85 85 63 63 63 30 48 49 12 13 13 2 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 // SPDX-License-Identifier: GPL-2.0 #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/netpoll.h> #include <linux/export.h> #include <net/gro.h> #include "vlan.h" bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; u16 vlan_id = skb_vlan_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id); if (!vlan_dev) return false; skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return false; if (unlikely(!(vlan_dev->flags & IFF_UP))) { kfree_skb(skb); *skbp = NULL; return false; } skb->dev = vlan_dev; if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) { /* Our lower layer thinks this is not local, let's make sure. * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, vlan_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) && !netif_is_macvlan_port(vlan_dev) && !netif_is_bridge_port(vlan_dev)) { unsigned int offset = skb->data - skb_mac_header(skb); /* * vlan_insert_tag expect skb->data pointing to mac header. * So change skb->data before calling it and change back to * original position later */ skb_push(skb, offset); skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto, skb->vlan_tci, skb->mac_len); if (!skb) return false; skb_pull(skb, offset + VLAN_HLEN); skb_reset_mac_len(skb); } skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); __vlan_hwaccel_clear_tag(skb); rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); u64_stats_inc(&rx_stats->rx_packets); u64_stats_add(&rx_stats->rx_bytes, skb->len); if (skb->pkt_type == PACKET_MULTICAST) u64_stats_inc(&rx_stats->rx_multicast); u64_stats_update_end(&rx_stats->syncp); return true; } /* Must be invoked with rcu_read_lock. */ struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, __be16 vlan_proto, u16 vlan_id) { struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); if (vlan_info) { return vlan_group_get_device(&vlan_info->grp, vlan_proto, vlan_id); } else { /* * Lower devices of master uppers (bonding, team) do not have * grp assigned to themselves. Grp is assigned to upper device * instead. */ struct net_device *upper_dev; upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) return __vlan_find_dev_deep_rcu(upper_dev, vlan_proto, vlan_id); } return NULL; } EXPORT_SYMBOL(__vlan_find_dev_deep_rcu); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { struct net_device *ret = vlan_dev_priv(dev)->real_dev; while (is_vlan_dev(ret)) ret = vlan_dev_priv(ret)->real_dev; return ret; } EXPORT_SYMBOL(vlan_dev_real_dev); u16 vlan_dev_vlan_id(const struct net_device *dev) { return vlan_dev_priv(dev)->vlan_id; } EXPORT_SYMBOL(vlan_dev_vlan_id); __be16 vlan_dev_vlan_proto(const struct net_device *dev) { return vlan_dev_priv(dev)->vlan_proto; } EXPORT_SYMBOL(vlan_dev_vlan_proto); /* * vlan info and vid list */ static void vlan_group_free(struct vlan_group *grp) { int i, j; for (i = 0; i < VLAN_PROTO_NUM; i++) for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++) kfree(grp->vlan_devices_arrays[i][j]); } static void vlan_info_free(struct vlan_info *vlan_info) { vlan_group_free(&vlan_info->grp); kfree(vlan_info); } static void vlan_info_rcu_free(struct rcu_head *rcu) { vlan_info_free(container_of(rcu, struct vlan_info, rcu)); } static struct vlan_info *vlan_info_alloc(struct net_device *dev) { struct vlan_info *vlan_info; vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL); if (!vlan_info) return NULL; vlan_info->real_dev = dev; INIT_LIST_HEAD(&vlan_info->vid_list); return vlan_info; } struct vlan_vid_info { struct list_head list; __be16 proto; u16 vid; int refcount; }; static bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto) { if (proto == htons(ETH_P_8021Q) && dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) return true; if (proto == htons(ETH_P_8021AD) && dev->features & NETIF_F_HW_VLAN_STAG_FILTER) return true; return false; } static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, __be16 proto, u16 vid) { struct vlan_vid_info *vid_info; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { if (vid_info->proto == proto && vid_info->vid == vid) return vid_info; } return NULL; } static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid) { struct vlan_vid_info *vid_info; vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); if (!vid_info) return NULL; vid_info->proto = proto; vid_info->vid = vid; return vid_info; } static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid) { if (!vlan_hw_filter_capable(dev, proto)) return 0; if (netif_device_present(dev)) return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid); else return -ENODEV; } static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid) { if (!vlan_hw_filter_capable(dev, proto)) return 0; if (netif_device_present(dev)) return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid); else return -ENODEV; } int vlan_for_each(struct net_device *dev, int (*action)(struct net_device *dev, int vid, void *arg), void *arg) { struct vlan_vid_info *vid_info; struct vlan_info *vlan_info; struct net_device *vdev; int ret; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { vdev = vlan_group_get_device(&vlan_info->grp, vid_info->proto, vid_info->vid); ret = action(vdev, vid_info->vid, arg); if (ret) return ret; } return 0; } EXPORT_SYMBOL(vlan_for_each); int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto) { struct net_device *real_dev = vlan_info->real_dev; struct vlan_vid_info *vlan_vid_info; int err; list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) { if (vlan_vid_info->proto == proto) { err = vlan_add_rx_filter_info(real_dev, proto, vlan_vid_info->vid); if (err) goto unwind; } } return 0; unwind: list_for_each_entry_continue_reverse(vlan_vid_info, &vlan_info->vid_list, list) { if (vlan_vid_info->proto == proto) vlan_kill_rx_filter_info(real_dev, proto, vlan_vid_info->vid); } return err; } EXPORT_SYMBOL(vlan_filter_push_vids); void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto) { struct vlan_vid_info *vlan_vid_info; list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) if (vlan_vid_info->proto == proto) vlan_kill_rx_filter_info(vlan_info->real_dev, vlan_vid_info->proto, vlan_vid_info->vid); } EXPORT_SYMBOL(vlan_filter_drop_vids); static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid, struct vlan_vid_info **pvid_info) { struct net_device *dev = vlan_info->real_dev; struct vlan_vid_info *vid_info; int err; vid_info = vlan_vid_info_alloc(proto, vid); if (!vid_info) return -ENOMEM; err = vlan_add_rx_filter_info(dev, proto, vid); if (err) { kfree(vid_info); return err; } list_add(&vid_info->list, &vlan_info->vid_list); vlan_info->nr_vids++; *pvid_info = vid_info; return 0; } int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; bool vlan_info_created = false; int err; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) { vlan_info = vlan_info_alloc(dev); if (!vlan_info) return -ENOMEM; vlan_info_created = true; } vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) { err = __vlan_vid_add(vlan_info, proto, vid, &vid_info); if (err) goto out_free_vlan_info; } vid_info->refcount++; if (vlan_info_created) rcu_assign_pointer(dev->vlan_info, vlan_info); return 0; out_free_vlan_info: if (vlan_info_created) kfree(vlan_info); return err; } EXPORT_SYMBOL(vlan_vid_add); static void __vlan_vid_del(struct vlan_info *vlan_info, struct vlan_vid_info *vid_info) { struct net_device *dev = vlan_info->real_dev; __be16 proto = vid_info->proto; u16 vid = vid_info->vid; int err; err = vlan_kill_rx_filter_info(dev, proto, vid); if (err && dev->reg_state != NETREG_UNREGISTERING) netdev_warn(dev, "failed to kill vid %04x/%d\n", proto, vid); list_del(&vid_info->list); kfree(vid_info); vlan_info->nr_vids--; } void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_info *vlan_info; struct vlan_vid_info *vid_info; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) return; vid_info = vlan_vid_info_get(vlan_info, proto, vid); if (!vid_info) return; vid_info->refcount--; if (vid_info->refcount == 0) { __vlan_vid_del(vlan_info, vid_info); if (vlan_info->nr_vids == 0) { RCU_INIT_POINTER(dev->vlan_info, NULL); call_rcu(&vlan_info->rcu, vlan_info_rcu_free); } } } EXPORT_SYMBOL(vlan_vid_del); int vlan_vids_add_by_dev(struct net_device *dev, const struct net_device *by_dev) { struct vlan_vid_info *vid_info; struct vlan_info *vlan_info; int err; ASSERT_RTNL(); vlan_info = rtnl_dereference(by_dev->vlan_info); if (!vlan_info) return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; } return 0; unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } return err; } EXPORT_SYMBOL(vlan_vids_add_by_dev); void vlan_vids_del_by_dev(struct net_device *dev, const struct net_device *by_dev) { struct vlan_vid_info *vid_info; struct vlan_info *vlan_info; ASSERT_RTNL(); vlan_info = rtnl_dereference(by_dev->vlan_info); if (!vlan_info) return; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } } EXPORT_SYMBOL(vlan_vids_del_by_dev); bool vlan_uses_dev(const struct net_device *dev) { struct vlan_info *vlan_info; ASSERT_RTNL(); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) return false; return vlan_info->grp.nr_vlan_devs ? true : false; } EXPORT_SYMBOL(vlan_uses_dev); static struct sk_buff *vlan_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct packet_offload *ptype; unsigned int hlen, off_vlan; struct sk_buff *pp = NULL; struct vlan_hdr *vhdr; struct sk_buff *p; __be16 type; int flush = 1; off_vlan = skb_gro_offset(skb); hlen = off_vlan + sizeof(*vhdr); vhdr = skb_gro_header(skb, hlen, off_vlan); if (unlikely(!vhdr)) goto out; NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; type = vhdr->h_vlan_encapsulated_proto; ptype = gro_find_receive_by_type(type); if (!ptype) goto out; flush = 0; list_for_each_entry(p, head, list) { struct vlan_hdr *vhdr2; if (!NAPI_GRO_CB(p)->same_flow) continue; vhdr2 = (struct vlan_hdr *)(p->data + off_vlan); if (compare_vlan_header(vhdr, vhdr2)) NAPI_GRO_CB(p)->same_flow = 0; } skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive, ipv6_gro_receive, inet_gro_receive, head, skb); out: skb_gro_flush_final(skb, pp, flush); return pp; } static int vlan_gro_complete(struct sk_buff *skb, int nhoff) { struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff); __be16 type = vhdr->h_vlan_encapsulated_proto; struct packet_offload *ptype; int err = -ENOENT; ptype = gro_find_complete_by_type(type); if (ptype) err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, ipv6_gro_complete, inet_gro_complete, skb, nhoff + sizeof(*vhdr)); return err; } static struct packet_offload vlan_packet_offloads[] __read_mostly = { { .type = cpu_to_be16(ETH_P_8021Q), .priority = 10, .callbacks = { .gro_receive = vlan_gro_receive, .gro_complete = vlan_gro_complete, }, }, { .type = cpu_to_be16(ETH_P_8021AD), .priority = 10, .callbacks = { .gro_receive = vlan_gro_receive, .gro_complete = vlan_gro_complete, }, }, }; static int __init vlan_offload_init(void) { unsigned int i; for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++) dev_add_offload(&vlan_packet_offloads[i]); return 0; } fs_initcall(vlan_offload_init);
28 33 29 33 25 33 17 14 7 12 4 3 1 29 8 8 29 5 24 23 1 24 24 23 22 10 19 16 9 6 9 18 18 18 15 12 1 2 14 17 12 7 2 26 26 2 2 6 13 10 3 1 15 13 3 15 14 3 10 1 9 16 18 18 5 18 5 5 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 /* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/in.h> #include <linux/export.h> #include <linux/sched/clock.h> #include <linux/time.h> #include <linux/rds.h> #include "rds.h" void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, struct in6_addr *saddr) { refcount_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = conn; inc->i_saddr = *saddr; inc->i_usercopy.rdma_cookie = 0; inc->i_usercopy.rx_tstamp = ktime_set(0, 0); memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace)); } EXPORT_SYMBOL_GPL(rds_inc_init); void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, struct in6_addr *saddr) { refcount_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = cp->cp_conn; inc->i_conn_path = cp; inc->i_saddr = *saddr; inc->i_usercopy.rdma_cookie = 0; inc->i_usercopy.rx_tstamp = ktime_set(0, 0); } EXPORT_SYMBOL_GPL(rds_inc_path_init); static void rds_inc_addref(struct rds_incoming *inc) { rdsdebug("addref inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); refcount_inc(&inc->i_refcount); } void rds_inc_put(struct rds_incoming *inc) { rdsdebug("put inc %p ref %d\n", inc, refcount_read(&inc->i_refcount)); if (refcount_dec_and_test(&inc->i_refcount)) { BUG_ON(!list_empty(&inc->i_item)); inc->i_conn->c_trans->inc_free(inc); } } EXPORT_SYMBOL_GPL(rds_inc_put); static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, struct rds_cong_map *map, int delta, __be16 port) { int now_congested; if (delta == 0) return; rs->rs_rcv_bytes += delta; if (delta > 0) rds_stats_add(s_recv_bytes_added_to_socket, delta); else rds_stats_add(s_recv_bytes_removed_from_socket, -delta); /* loop transport doesn't send/recv congestion updates */ if (rs->rs_transport->t_type == RDS_TRANS_LOOP) return; now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d " "now_cong %d delta %d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, rds_sk_rcvbuf(rs), now_congested, delta); /* wasn't -> am congested */ if (!rs->rs_congested && now_congested) { rs->rs_congested = 1; rds_cong_set_bit(map, port); rds_cong_queue_updates(map); } /* was -> aren't congested */ /* Require more free space before reporting uncongested to prevent bouncing cong/uncong state too often */ else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { rs->rs_congested = 0; rds_cong_clear_bit(map, port); rds_cong_queue_updates(map); } /* do nothing if no change in cong state */ } static void rds_conn_peer_gen_update(struct rds_connection *conn, u32 peer_gen_num) { int i; struct rds_message *rm, *tmp; unsigned long flags; WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP); if (peer_gen_num != 0) { if (conn->c_peer_gen_num != 0 && peer_gen_num != conn->c_peer_gen_num) { for (i = 0; i < RDS_MPATH_WORKERS; i++) { struct rds_conn_path *cp; cp = &conn->c_path[i]; spin_lock_irqsave(&cp->cp_lock, flags); cp->cp_next_tx_seq = 1; cp->cp_next_rx_seq = 0; list_for_each_entry_safe(rm, tmp, &cp->cp_retrans, m_conn_item) { set_bit(RDS_MSG_FLUSH, &rm->m_flags); } spin_unlock_irqrestore(&cp->cp_lock, flags); } } conn->c_peer_gen_num = peer_gen_num; } } /* * Process all extension headers that come with this message. */ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs) { struct rds_header *hdr = &inc->i_hdr; unsigned int pos = 0, type, len; union { struct rds_ext_header_version version; struct rds_ext_header_rdma rdma; struct rds_ext_header_rdma_dest rdma_dest; } buffer; while (1) { len = sizeof(buffer); type = rds_message_next_extension(hdr, &pos, &buffer, &len); if (type == RDS_EXTHDR_NONE) break; /* Process extension header here */ switch (type) { case RDS_EXTHDR_RDMA: rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0); break; case RDS_EXTHDR_RDMA_DEST: /* We ignore the size for now. We could stash it * somewhere and use it for error checking. */ inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie( be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); break; } } } static void rds_recv_hs_exthdrs(struct rds_header *hdr, struct rds_connection *conn) { unsigned int pos = 0, type, len; union { struct rds_ext_header_version version; u16 rds_npaths; u32 rds_gen_num; } buffer; u32 new_peer_gen_num = 0; while (1) { len = sizeof(buffer); type = rds_message_next_extension(hdr, &pos, &buffer, &len); if (type == RDS_EXTHDR_NONE) break; /* Process extension header here */ switch (type) { case RDS_EXTHDR_NPATHS: conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, be16_to_cpu(buffer.rds_npaths)); break; case RDS_EXTHDR_GEN_NUM: new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num); break; default: pr_warn_ratelimited("ignoring unknown exthdr type " "0x%x\n", type); } } /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ conn->c_npaths = max_t(int, conn->c_npaths, 1); conn->c_ping_triggered = 0; rds_conn_peer_gen_update(conn, new_peer_gen_num); } /* rds_start_mprds() will synchronously start multiple paths when appropriate. * The scheme is based on the following rules: * * 1. rds_sendmsg on first connect attempt sends the probe ping, with the * sender's npaths (s_npaths) * 2. rcvr of probe-ping knows the mprds_paths = min(s_npaths, r_npaths). It * sends back a probe-pong with r_npaths. After that, if rcvr is the * smaller ip addr, it starts rds_conn_path_connect_if_down on all * mprds_paths. * 3. sender gets woken up, and can move to rds_conn_path_connect_if_down. * If it is the smaller ipaddr, rds_conn_path_connect_if_down can be * called after reception of the probe-pong on all mprds_paths. * Otherwise (sender of probe-ping is not the smaller ip addr): just call * rds_conn_path_connect_if_down on the hashed path. (see rule 4) * 4. rds_connect_worker must only trigger a connection if laddr < faddr. * 5. sender may end up queuing the packet on the cp. will get sent out later. * when connection is completed. */ static void rds_start_mprds(struct rds_connection *conn) { int i; struct rds_conn_path *cp; if (conn->c_npaths > 1 && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) { for (i = 0; i < conn->c_npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_connect_if_down(cp); } } } /* * The transport must make sure that this is serialized against other * rx and conn reset on this specific conn. * * We currently assert that only one fragmented message will be sent * down a connection at a time. This lets us reassemble in the conn * instead of per-flow which means that we don't have to go digging through * flows to tear down partial reassembly progress on conn failure and * we save flow lookup and locking for each frag arrival. It does mean * that small messages will wait behind large ones. Fragmenting at all * is only to reduce the memory consumption of pre-posted buffers. * * The caller passes in saddr and daddr instead of us getting it from the * conn. This lets loopback, who only has one conn for both directions, * tell us which roles the addrs in the conn are playing for this message. */ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, struct in6_addr *daddr, struct rds_incoming *inc, gfp_t gfp) { struct rds_sock *rs = NULL; struct sock *sk; unsigned long flags; struct rds_conn_path *cp; inc->i_conn = conn; inc->i_rx_jiffies = jiffies; if (conn->c_trans->t_mp_capable) cp = inc->i_conn_path; else cp = &conn->c_path[0]; rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u " "flags 0x%x rx_jiffies %lu\n", conn, (unsigned long long)cp->cp_next_rx_seq, inc, (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence), be32_to_cpu(inc->i_hdr.h_len), be16_to_cpu(inc->i_hdr.h_sport), be16_to_cpu(inc->i_hdr.h_dport), inc->i_hdr.h_flags, inc->i_rx_jiffies); /* * Sequence numbers should only increase. Messages get their * sequence number as they're queued in a sending conn. They * can be dropped, though, if the sending socket is closed before * they hit the wire. So sequence numbers can skip forward * under normal operation. They can also drop back in the conn * failover case as previously sent messages are resent down the * new instance of a conn. We drop those, otherwise we have * to assume that the next valid seq does not come after a * hole in the fragment stream. * * The headers don't give us a way to realize if fragments of * a message have been dropped. We assume that frags that arrive * to a flow are part of the current message on the flow that is * being reassembled. This means that senders can't drop messages * from the sending conn until all their frags are sent. * * XXX we could spend more on the wire to get more robust failure * detection, arguably worth it to avoid data corruption. */ if (be64_to_cpu(inc->i_hdr.h_sequence) < cp->cp_next_rx_seq && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { rds_stats_inc(s_recv_drop_old_seq); goto out; } cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { if (inc->i_hdr.h_sport == 0) { rdsdebug("ignore ping with 0 sport from %pI6c\n", saddr); goto out; } rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); /* if this is a handshake ping, start multipath if necessary */ if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport), be16_to_cpu(inc->i_hdr.h_dport))) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); rds_start_mprds(cp->cp_conn); } goto out; } if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT && inc->i_hdr.h_sport == 0) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); /* if this is a handshake pong, start multipath if necessary */ rds_start_mprds(cp->cp_conn); wake_up(&cp->cp_conn->c_hs_waitq); goto out; } rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if); if (!rs) { rds_stats_inc(s_recv_drop_no_sock); goto out; } /* Process extension headers */ rds_recv_incoming_exthdrs(inc, rs); /* We can be racing with rds_release() which marks the socket dead. */ sk = rds_rs_to_sk(rs); /* serialize with rds_release -> sock_orphan */ write_lock_irqsave(&rs->rs_recv_lock, flags); if (!sock_flag(sk, SOCK_DEAD)) { rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs); rds_stats_inc(s_recv_queued); rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); if (sock_flag(sk, SOCK_RCVTSTAMP)) inc->i_usercopy.rx_tstamp = ktime_get_real(); rds_inc_addref(inc); inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); list_add_tail(&inc->i_item, &rs->rs_recv_queue); __rds_wake_sk_sleep(sk); } else { rds_stats_inc(s_recv_drop_dead_sock); } write_unlock_irqrestore(&rs->rs_recv_lock, flags); out: if (rs) rds_sock_put(rs); } EXPORT_SYMBOL_GPL(rds_recv_incoming); /* * be very careful here. This is being called as the condition in * wait_event_*() needs to cope with being called many times. */ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc) { unsigned long flags; if (!*inc) { read_lock_irqsave(&rs->rs_recv_lock, flags); if (!list_empty(&rs->rs_recv_queue)) { *inc = list_entry(rs->rs_recv_queue.next, struct rds_incoming, i_item); rds_inc_addref(*inc); } read_unlock_irqrestore(&rs->rs_recv_lock, flags); } return *inc != NULL; } static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int drop) { struct sock *sk = rds_rs_to_sk(rs); int ret = 0; unsigned long flags; struct rds_incoming *to_drop = NULL; write_lock_irqsave(&rs->rs_recv_lock, flags); if (!list_empty(&inc->i_item)) { ret = 1; if (drop) { /* XXX make sure this i_conn is reliable */ rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, -be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); list_del_init(&inc->i_item); to_drop = inc; } } write_unlock_irqrestore(&rs->rs_recv_lock, flags); if (to_drop) rds_inc_put(to_drop); rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); return ret; } /* * Pull errors off the error queue. * If msghdr is NULL, we will just purge the error queue. */ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; struct rds_rdma_notify cmsg; unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); int err = 0; memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ /* put_cmsg copies to user space and thus may sleep. We can't do this * with rs_lock held, so first grab as many notifications as we can stuff * in the user provided cmsg buffer. We don't try to copy more, to avoid * losing notifications - except when the buffer is so small that it wouldn't * even hold a single notification. Then we give him as much of this single * msg as we can squeeze in, and set MSG_CTRUNC. */ if (msghdr) { max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg)); if (!max_messages) max_messages = 1; } spin_lock_irqsave(&rs->rs_lock, flags); while (!list_empty(&rs->rs_notify_queue) && count < max_messages) { notifier = list_entry(rs->rs_notify_queue.next, struct rds_notifier, n_list); list_move(&notifier->n_list, &copy); count++; } spin_unlock_irqrestore(&rs->rs_lock, flags); if (!count) return 0; while (!list_empty(&copy)) { notifier = list_entry(copy.next, struct rds_notifier, n_list); if (msghdr) { cmsg.user_token = notifier->n_user_token; cmsg.status = notifier->n_status; err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, sizeof(cmsg), &cmsg); if (err) break; } list_del_init(&notifier->n_list); kfree(notifier); } /* If we bailed out because of an error in put_cmsg, * we may be left with one or more notifications that we * didn't process. Return them to the head of the list. */ if (!list_empty(&copy)) { spin_lock_irqsave(&rs->rs_lock, flags); list_splice(&copy, &rs->rs_notify_queue); spin_unlock_irqrestore(&rs->rs_lock, flags); } return err; } /* * Queue a congestion notification */ static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr) { uint64_t notify = rs->rs_cong_notify; unsigned long flags; int err; err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE, sizeof(notify), &notify); if (err) return err; spin_lock_irqsave(&rs->rs_lock, flags); rs->rs_cong_notify &= ~notify; spin_unlock_irqrestore(&rs->rs_lock, flags); return 0; } /* * Receive any control messages. */ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, struct rds_sock *rs) { int ret = 0; if (inc->i_usercopy.rdma_cookie) { ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, sizeof(inc->i_usercopy.rdma_cookie), &inc->i_usercopy.rdma_cookie); if (ret) goto out; } if ((inc->i_usercopy.rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp); if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, sizeof(tv), &tv); } else { struct __kernel_sock_timeval sk_tv; sk_tv.tv_sec = tv.tv_sec; sk_tv.tv_usec = tv.tv_usec; ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, sizeof(sk_tv), &sk_tv); } if (ret) goto out; } if (rs->rs_rx_traces) { struct rds_cmsg_rx_trace t; int i, j; memset(&t, 0, sizeof(t)); inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); t.rx_traces = rs->rs_rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { j = rs->rs_rx_trace[i]; t.rx_trace_pos[i] = j; t.rx_trace[i] = inc->i_rx_lat_trace[j + 1] - inc->i_rx_lat_trace[j]; } ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY, sizeof(t), &t); if (ret) goto out; } out: return ret; } static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) { struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue; struct rds_msg_zcopy_info *info = NULL; struct rds_zcopy_cookies *done; unsigned long flags; if (!msg->msg_control) return false; if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) || msg->msg_controllen < CMSG_SPACE(sizeof(*done))) return false; spin_lock_irqsave(&q->lock, flags); if (!list_empty(&q->zcookie_head)) { info = list_entry(q->zcookie_head.next, struct rds_msg_zcopy_info, rs_zcookie_next); list_del(&info->rs_zcookie_next); } spin_unlock_irqrestore(&q->lock, flags); if (!info) return false; done = &info->zcookies; if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done), done)) { spin_lock_irqsave(&q->lock, flags); list_add(&info->rs_zcookie_next, &q->zcookie_head); spin_unlock_irqrestore(&q->lock, flags); return false; } kfree(info); return true; } int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int msg_flags) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); long timeo; int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); struct rds_incoming *inc = NULL; /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ timeo = sock_rcvtimeo(sk, nonblock); rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); if (msg_flags & MSG_OOB) goto out; if (msg_flags & MSG_ERRQUEUE) return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR); while (1) { /* If there are pending notifications, do those - and nothing else */ if (!list_empty(&rs->rs_notify_queue)) { ret = rds_notify_queue_get(rs, msg); break; } if (rs->rs_cong_notify) { ret = rds_notify_cong(rs, msg); break; } if (!rds_next_incoming(rs, &inc)) { if (nonblock) { bool reaped = rds_recvmsg_zcookie(rs, msg); ret = reaped ? 0 : -EAGAIN; break; } timeo = wait_event_interruptible_timeout(*sk_sleep(sk), (!list_empty(&rs->rs_notify_queue) || rs->rs_cong_notify || rds_next_incoming(rs, &inc)), timeo); rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, timeo); if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) continue; ret = timeo; if (ret == 0) ret = -ETIMEDOUT; break; } rdsdebug("copying inc %p from %pI6c:%u to user\n", inc, &inc->i_conn->c_faddr, ntohs(inc->i_hdr.h_sport)); ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); if (ret < 0) break; /* * if the message we just copied isn't at the head of the * recv queue then someone else raced us to return it, try * to get the next message. */ if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) { rds_inc_put(inc); inc = NULL; rds_stats_inc(s_recv_deliver_raced); iov_iter_revert(&msg->msg_iter, ret); continue; } if (ret < be32_to_cpu(inc->i_hdr.h_len)) { if (msg_flags & MSG_TRUNC) ret = be32_to_cpu(inc->i_hdr.h_len); msg->msg_flags |= MSG_TRUNC; } if (rds_cmsg_recv(inc, msg, rs)) { ret = -EFAULT; break; } rds_recvmsg_zcookie(rs, msg); rds_stats_inc(s_recv_delivered); if (msg->msg_name) { if (ipv6_addr_v4mapped(&inc->i_saddr)) { sin->sin_family = AF_INET; sin->sin_port = inc->i_hdr.h_sport; sin->sin_addr.s_addr = inc->i_saddr.s6_addr32[3]; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); msg->msg_namelen = sizeof(*sin); } else { sin6->sin6_family = AF_INET6; sin6->sin6_port = inc->i_hdr.h_sport; sin6->sin6_addr = inc->i_saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = rs->rs_bound_scope_id; msg->msg_namelen = sizeof(*sin6); } } break; } if (inc) rds_inc_put(inc); out: return ret; } /* * The socket is being shut down and we're asked to drop messages that were * queued for recvmsg. The caller has unbound the socket so the receive path * won't queue any more incoming fragments or messages on the socket. */ void rds_clear_recv_queue(struct rds_sock *rs) { struct sock *sk = rds_rs_to_sk(rs); struct rds_incoming *inc, *tmp; unsigned long flags; LIST_HEAD(to_drop); write_lock_irqsave(&rs->rs_recv_lock, flags); list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, -be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); list_move(&inc->i_item, &to_drop); } write_unlock_irqrestore(&rs->rs_recv_lock, flags); list_for_each_entry_safe(inc, tmp, &to_drop, i_item) { list_del_init(&inc->i_item); rds_inc_put(inc); } } /* * inc->i_saddr isn't used here because it is only set in the receive * path. */ void rds_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, __be32 saddr, __be32 daddr, int flip) { struct rds_info_message minfo; minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo.len = be32_to_cpu(inc->i_hdr.h_len); minfo.tos = inc->i_conn->c_tos; if (flip) { minfo.laddr = daddr; minfo.faddr = saddr; minfo.lport = inc->i_hdr.h_dport; minfo.fport = inc->i_hdr.h_sport; } else { minfo.laddr = saddr; minfo.faddr = daddr; minfo.lport = inc->i_hdr.h_sport; minfo.fport = inc->i_hdr.h_dport; } minfo.flags = 0; rds_info_copy(iter, &minfo, sizeof(minfo)); } #if IS_ENABLED(CONFIG_IPV6) void rds6_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, struct in6_addr *saddr, struct in6_addr *daddr, int flip) { struct rds6_info_message minfo6; minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo6.len = be32_to_cpu(inc->i_hdr.h_len); minfo6.tos = inc->i_conn->c_tos; if (flip) { minfo6.laddr = *daddr; minfo6.faddr = *saddr; minfo6.lport = inc->i_hdr.h_dport; minfo6.fport = inc->i_hdr.h_sport; } else { minfo6.laddr = *saddr; minfo6.faddr = *daddr; minfo6.lport = inc->i_hdr.h_sport; minfo6.fport = inc->i_hdr.h_dport; } minfo6.flags = 0; rds_info_copy(iter, &minfo6, sizeof(minfo6)); } #endif
1011 359 2 155 236 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_KSM_H #define __LINUX_KSM_H /* * Memory merging support. * * This code enables dynamic sharing of identical pages found in different * memory areas, even if they are not shared by fork(). */ #include <linux/bitops.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/sched.h> #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); void ksm_add_vma(struct vm_area_struct *vma); int ksm_enable_merge_any(struct mm_struct *mm); int ksm_disable_merge_any(struct mm_struct *mm); int ksm_disable(struct mm_struct *mm); int __ksm_enter(struct mm_struct *mm); void __ksm_exit(struct mm_struct *mm); /* * To identify zeropages that were mapped by KSM, we reuse the dirty bit * in the PTE. If the PTE is dirty, the zeropage was mapped by KSM when * deduplicating memory. */ #define is_ksm_zero_pte(pte) (is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte)) extern atomic_long_t ksm_zero_pages; static inline void ksm_map_zero_page(struct mm_struct *mm) { atomic_long_inc(&ksm_zero_pages); atomic_long_inc(&mm->ksm_zero_pages); } static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { if (is_ksm_zero_pte(pte)) { atomic_long_dec(&ksm_zero_pages); atomic_long_dec(&mm->ksm_zero_pages); } } static inline long mm_ksm_zero_pages(struct mm_struct *mm) { return atomic_long_read(&mm->ksm_zero_pages); } static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { /* Adding mm to ksm is best effort on fork. */ if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) __ksm_enter(mm); } static inline int ksm_execve(struct mm_struct *mm) { if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) return __ksm_enter(mm); return 0; } static inline void ksm_exit(struct mm_struct *mm) { if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) __ksm_exit(mm); } /* * When do_swap_page() first faults in from swap what used to be a KSM page, * no problem, it will be assigned to this vma's anon_vma; but thereafter, * it might be faulted into a different anon_vma (or perhaps to a different * offset in the same anon_vma). do_swap_page() cannot do all the locking * needed to reconstitute a cross-anon_vma KSM page: for now it has to make * a copy, and leave remerging the pages to a later pass of ksmd. * * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ struct folio *ksm_might_need_to_copy(struct folio *folio, struct vm_area_struct *vma, unsigned long addr); void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early); long ksm_process_profit(struct mm_struct *); bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ static inline void ksm_add_vma(struct vm_area_struct *vma) { } static inline int ksm_disable(struct mm_struct *mm) { return 0; } static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { } static inline int ksm_execve(struct mm_struct *mm) { return 0; } static inline void ksm_exit(struct mm_struct *mm) { } static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { } static inline void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early) { } #ifdef CONFIG_MMU static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) { return 0; } static inline struct folio *ksm_might_need_to_copy(struct folio *folio, struct vm_area_struct *vma, unsigned long addr) { return folio; } static inline void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc) { } static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old) { } #endif /* CONFIG_MMU */ #endif /* !CONFIG_KSM */ #endif /* __LINUX_KSM_H */
9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Landlock LSM - Object management * * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI */ #ifndef _SECURITY_LANDLOCK_OBJECT_H #define _SECURITY_LANDLOCK_OBJECT_H #include <linux/compiler_types.h> #include <linux/refcount.h> #include <linux/spinlock.h> struct landlock_object; /** * struct landlock_object_underops - Operations on an underlying object */ struct landlock_object_underops { /** * @release: Releases the underlying object (e.g. iput() for an inode). */ void (*release)(struct landlock_object *const object) __releases(object->lock); }; /** * struct landlock_object - Security blob tied to a kernel object * * The goal of this structure is to enable to tie a set of ephemeral access * rights (pertaining to different domains) to a kernel object (e.g an inode) * in a safe way. This implies to handle concurrent use and modification. * * The lifetime of a &struct landlock_object depends on the rules referring to * it. */ struct landlock_object { /** * @usage: This counter is used to tie an object to the rules matching * it or to keep it alive while adding a new rule. If this counter * reaches zero, this struct must not be modified, but this counter can * still be read from within an RCU read-side critical section. When * adding a new rule to an object with a usage counter of zero, we must * wait until the pointer to this object is set to NULL (or recycled). */ refcount_t usage; /** * @lock: Protects against concurrent modifications. This lock must be * held from the time @usage drops to zero until any weak references * from @underobj to this object have been cleaned up. * * Lock ordering: inode->i_lock nests inside this. */ spinlock_t lock; /** * @underobj: Used when cleaning up an object and to mark an object as * tied to its underlying kernel structure. This pointer is protected * by @lock. Cf. landlock_release_inodes() and release_inode(). */ void *underobj; union { /** * @rcu_free: Enables lockless use of @usage, @lock and * @underobj from within an RCU read-side critical section. * @rcu_free and @underops are only used by * landlock_put_object(). */ struct rcu_head rcu_free; /** * @underops: Enables landlock_put_object() to release the * underlying object (e.g. inode). */ const struct landlock_object_underops *underops; }; }; struct landlock_object * landlock_create_object(const struct landlock_object_underops *const underops, void *const underobj); void landlock_put_object(struct landlock_object *const object); static inline void landlock_get_object(struct landlock_object *const object) { if (object) refcount_inc(&object->usage); } #endif /* _SECURITY_LANDLOCK_OBJECT_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 // SPDX-License-Identifier: GPL-2.0-or-later /* * gspca ViCam subdriver * * Copyright (C) 2011 Hans de Goede <hdegoede@redhat.com> * * Based on the usbvideo vicam driver, which is: * * Copyright (c) 2002 Joe Burks (jburks@wavicle.org), * Chris Cheney (chris.cheney@gmail.com), * Pavel Machek (pavel@ucw.cz), * John Tyner (jtyner@cs.ucr.edu), * Monroe Williams (monroe@pobox.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "vicam" #define HEADER_SIZE 64 #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/firmware.h> #include <linux/ihex.h> #include "gspca.h" #define VICAM_FIRMWARE "vicam/firmware.fw" MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>"); MODULE_DESCRIPTION("GSPCA ViCam USB Camera Driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(VICAM_FIRMWARE); struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct work_struct work_struct; }; /* The vicam sensor has a resolution of 512 x 244, with I believe square pixels, but this is forced to a 4:3 ratio by optics. So it has non square pixels :( */ static struct v4l2_pix_format vicam_mode[] = { { 256, 122, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 256, .sizeimage = 256 * 122, .colorspace = V4L2_COLORSPACE_SRGB,}, /* 2 modes with somewhat more square pixels */ { 256, 200, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 256, .sizeimage = 256 * 200, .colorspace = V4L2_COLORSPACE_SRGB,}, { 256, 240, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 256, .sizeimage = 256 * 240, .colorspace = V4L2_COLORSPACE_SRGB,}, #if 0 /* This mode has extremely non square pixels, testing use only */ { 512, 122, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 512, .sizeimage = 512 * 122, .colorspace = V4L2_COLORSPACE_SRGB,}, #endif { 512, 244, V4L2_PIX_FMT_SGRBG8, V4L2_FIELD_NONE, .bytesperline = 512, .sizeimage = 512 * 244, .colorspace = V4L2_COLORSPACE_SRGB,}, }; static int vicam_control_msg(struct gspca_dev *gspca_dev, u8 request, u16 value, u16 index, u8 *data, u16 len) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), request, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, len, 1000); if (ret < 0) pr_err("control msg req %02X error %d\n", request, ret); return ret; } static int vicam_set_camera_power(struct gspca_dev *gspca_dev, int state) { int ret; ret = vicam_control_msg(gspca_dev, 0x50, state, 0, NULL, 0); if (ret < 0) return ret; if (state) ret = vicam_control_msg(gspca_dev, 0x55, 1, 0, NULL, 0); return ret; } /* * request and read a block of data */ static int vicam_read_frame(struct gspca_dev *gspca_dev, u8 *data, int size) { int ret, unscaled_height, act_len = 0; u8 *req_data = gspca_dev->usb_buf; s32 expo = v4l2_ctrl_g_ctrl(gspca_dev->exposure); s32 gain = v4l2_ctrl_g_ctrl(gspca_dev->gain); memset(req_data, 0, 16); req_data[0] = gain; if (gspca_dev->pixfmt.width == 256) req_data[1] |= 0x01; /* low nibble x-scale */ if (gspca_dev->pixfmt.height <= 122) { req_data[1] |= 0x10; /* high nibble y-scale */ unscaled_height = gspca_dev->pixfmt.height * 2; } else unscaled_height = gspca_dev->pixfmt.height; req_data[2] = 0x90; /* unknown, does not seem to do anything */ if (unscaled_height <= 200) req_data[3] = 0x06; /* vend? */ else if (unscaled_height <= 242) /* Yes 242 not 240 */ req_data[3] = 0x07; /* vend? */ else /* Up to 244 lines with req_data[3] == 0x08 */ req_data[3] = 0x08; /* vend? */ if (expo < 256) { /* Frame rate maxed out, use partial frame expo time */ req_data[4] = 255 - expo; req_data[5] = 0x00; req_data[6] = 0x00; req_data[7] = 0x01; } else { /* Modify frame rate */ req_data[4] = 0x00; req_data[5] = 0x00; req_data[6] = expo & 0xFF; req_data[7] = expo >> 8; } req_data[8] = ((244 - unscaled_height) / 2) & ~0x01; /* vstart */ /* bytes 9-15 do not seem to affect exposure or image quality */ mutex_lock(&gspca_dev->usb_lock); ret = vicam_control_msg(gspca_dev, 0x51, 0x80, 0, req_data, 16); mutex_unlock(&gspca_dev->usb_lock); if (ret < 0) return ret; ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), data, size, &act_len, 10000); /* successful, it returns 0, otherwise negative */ if (ret < 0 || act_len != size) { pr_err("bulk read fail (%d) len %d/%d\n", ret, act_len, size); return -EIO; } return 0; } /* * This function is called as a workqueue function and runs whenever the camera * is streaming data. Because it is a workqueue function it is allowed to sleep * so we can use synchronous USB calls. To avoid possible collisions with other * threads attempting to use gspca_dev->usb_buf we take the usb_lock when * performing USB operations using it. In practice we don't really need this * as the cameras controls are only written from the workqueue. */ static void vicam_dostream(struct work_struct *work) { struct sd *sd = container_of(work, struct sd, work_struct); struct gspca_dev *gspca_dev = &sd->gspca_dev; int ret, frame_sz; u8 *buffer; frame_sz = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].sizeimage + HEADER_SIZE; buffer = kmalloc(frame_sz, GFP_KERNEL); if (!buffer) { pr_err("Couldn't allocate USB buffer\n"); goto exit; } while (gspca_dev->present && gspca_dev->streaming) { #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif ret = vicam_read_frame(gspca_dev, buffer, frame_sz); if (ret < 0) break; /* Note the frame header contents seem to be completely constant, they do not change with either image, or settings. So we simply discard it. The frames have a very similar 64 byte footer, which we don't even bother reading from the cam */ gspca_frame_add(gspca_dev, FIRST_PACKET, buffer + HEADER_SIZE, frame_sz - HEADER_SIZE); gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); } exit: kfree(buffer); } /* This function is called at probe time just before sd_init */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam = &gspca_dev->cam; struct sd *sd = (struct sd *)gspca_dev; /* We don't use the buffer gspca allocates so make it small. */ cam->bulk = 1; cam->bulk_size = 64; cam->cam_mode = vicam_mode; cam->nmodes = ARRAY_SIZE(vicam_mode); INIT_WORK(&sd->work_struct, vicam_dostream); return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { int ret; const struct ihex_binrec *rec; const struct firmware *fw; u8 *firmware_buf; ret = request_ihex_firmware(&fw, VICAM_FIRMWARE, &gspca_dev->dev->dev); if (ret) { pr_err("Failed to load \"vicam/firmware.fw\": %d\n", ret); return ret; } firmware_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!firmware_buf) { ret = -ENOMEM; goto exit; } for (rec = (void *)fw->data; rec; rec = ihex_next_binrec(rec)) { memcpy(firmware_buf, rec->data, be16_to_cpu(rec->len)); ret = vicam_control_msg(gspca_dev, 0xff, 0, 0, firmware_buf, be16_to_cpu(rec->len)); if (ret < 0) break; } kfree(firmware_buf); exit: release_firmware(fw); return ret; } /* Set up for getting frames. */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; int ret; ret = vicam_set_camera_power(gspca_dev, 1); if (ret < 0) return ret; schedule_work(&sd->work_struct); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *)gspca_dev; /* wait for the work queue to terminate */ mutex_unlock(&gspca_dev->usb_lock); /* This waits for vicam_dostream to finish */ flush_work(&dev->work_struct); mutex_lock(&gspca_dev->usb_lock); if (gspca_dev->present) vicam_set_camera_power(gspca_dev, 0); } static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 2); gspca_dev->exposure = v4l2_ctrl_new_std(hdl, NULL, V4L2_CID_EXPOSURE, 0, 2047, 1, 256); gspca_dev->gain = v4l2_ctrl_new_std(hdl, NULL, V4L2_CID_GAIN, 0, 255, 1, 200); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* Table of supported USB devices */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x04c1, 0x009d)}, {USB_DEVICE(0x0602, 0x1001)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stop0 = sd_stop0, }; /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
796 1317 796 1305 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 // SPDX-License-Identifier: GPL-2.0-only /* * IPv6 packet mangling table, a port of the IPv4 mangle table to IPv6 * * Copyright (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> #include <net/ipv6.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables mangle table"); #define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, }; static unsigned int ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct in6_addr saddr, daddr; unsigned int ret, verdict; u32 flowlabel, mark; u8 hop_limit; int err; /* save source/dest address, mark, hoplimit, flowlabel, priority, */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); mark = skb->mark; hop_limit = ipv6_hdr(skb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ret = ip6t_do_table(priv, skb, state); verdict = ret & NF_VERDICT_MASK; if (verdict != NF_DROP && verdict != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } return ret; } /* The work comes in here from netfilter.c. */ static unsigned int ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(priv, skb, state); return ip6t_do_table(priv, skb, state); } static struct nf_hook_ops *mangle_ops __read_mostly; static int ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; int ret; repl = ip6t_alloc_initial_table(&packet_mangler); if (repl == NULL) return -ENOMEM; ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops); kfree(repl); return ret; } static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { ip6t_unregister_table_pre_exit(net, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "mangle"); } static struct pernet_operations ip6table_mangle_net_ops = { .pre_exit = ip6table_mangle_net_pre_exit, .exit = ip6table_mangle_net_exit, }; static int __init ip6table_mangle_init(void) { int ret = xt_register_template(&packet_mangler, ip6table_mangle_table_init); if (ret < 0) return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); if (IS_ERR(mangle_ops)) { xt_unregister_template(&packet_mangler); return PTR_ERR(mangle_ops); } ret = register_pernet_subsys(&ip6table_mangle_net_ops); if (ret < 0) { xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } return ret; } static void __exit ip6table_mangle_fini(void) { unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); kfree(mangle_ops); } module_init(ip6table_mangle_init); module_exit(ip6table_mangle_fini);
7567 7571 26 7563 993 8 986 7020 7015 7021 4703 11580 4953 4955 77 573 383 500 500 500 1404 6 1398 3546 1500 77 1497 78 2594 78 2579 498 596 9 3545 3543 2034 284 560 1658 60 283 283 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 // SPDX-License-Identifier: GPL-2.0-only #include <linux/bitmap.h> #include <linux/bug.h> #include <linux/export.h> #include <linux/idr.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/xarray.h> /** * idr_alloc_u32() - Allocate an ID. * @idr: IDR handle. * @ptr: Pointer to be associated with the new ID. * @nextid: Pointer to an ID. * @max: The maximum ID to allocate (inclusive). * @gfp: Memory allocation flags. * * Allocates an unused ID in the range specified by @nextid and @max. * Note that @max is inclusive whereas the @end parameter to idr_alloc() * is exclusive. The new ID is assigned to @nextid before the pointer * is inserted into the IDR, so if @nextid points into the object pointed * to by @ptr, a concurrent lookup will not find an uninitialised ID. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only * accesses to the IDR may be done under the RCU read lock or may * exclude simultaneous writers. * * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed, * or -ENOSPC if no free IDs could be found. If an error occurred, * @nextid is unchanged. */ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, unsigned long max, gfp_t gfp) { struct radix_tree_iter iter; void __rcu **slot; unsigned int base = idr->idr_base; unsigned int id = *nextid; if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR))) idr->idr_rt.xa_flags |= IDR_RT_MARKER; id = (id < base) ? 0 : id - base; radix_tree_iter_init(&iter, id); slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base); if (IS_ERR(slot)) return PTR_ERR(slot); *nextid = iter.index + base; /* there is a memory barrier inside radix_tree_iter_replace() */ radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); return 0; } EXPORT_SYMBOL_GPL(idr_alloc_u32); /** * idr_alloc() - Allocate an ID. * @idr: IDR handle. * @ptr: Pointer to be associated with the new ID. * @start: The minimum ID (inclusive). * @end: The maximum ID (exclusive). * @gfp: Memory allocation flags. * * Allocates an unused ID in the range specified by @start and @end. If * @end is <= 0, it is treated as one larger than %INT_MAX. This allows * callers to use @start + N as @end as long as N is within integer range. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only * accesses to the IDR may be done under the RCU read lock or may * exclude simultaneous writers. * * Return: The newly allocated ID, -ENOMEM if memory allocation failed, * or -ENOSPC if no free IDs could be found. */ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) { u32 id = start; int ret; if (WARN_ON_ONCE(start < 0)) return -EINVAL; ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp); if (ret) return ret; return id; } EXPORT_SYMBOL_GPL(idr_alloc); /** * idr_alloc_cyclic() - Allocate an ID cyclically. * @idr: IDR handle. * @ptr: Pointer to be associated with the new ID. * @start: The minimum ID (inclusive). * @end: The maximum ID (exclusive). * @gfp: Memory allocation flags. * * Allocates an unused ID in the range specified by @start and @end. If * @end is <= 0, it is treated as one larger than %INT_MAX. This allows * callers to use @start + N as @end as long as N is within integer range. * The search for an unused ID will start at the last ID allocated and will * wrap around to @start if no free IDs are found before reaching @end. * * The caller should provide their own locking to ensure that two * concurrent modifications to the IDR are not possible. Read-only * accesses to the IDR may be done under the RCU read lock or may * exclude simultaneous writers. * * Return: The newly allocated ID, -ENOMEM if memory allocation failed, * or -ENOSPC if no free IDs could be found. */ int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) { u32 id = idr->idr_next; int err, max = end > 0 ? end - 1 : INT_MAX; if ((int)id < start) id = start; err = idr_alloc_u32(idr, ptr, &id, max, gfp); if ((err == -ENOSPC) && (id > start)) { id = start; err = idr_alloc_u32(idr, ptr, &id, max, gfp); } if (err) return err; idr->idr_next = id + 1; return id; } EXPORT_SYMBOL(idr_alloc_cyclic); /** * idr_remove() - Remove an ID from the IDR. * @idr: IDR handle. * @id: Pointer ID. * * Removes this ID from the IDR. If the ID was not previously in the IDR, * this function returns %NULL. * * Since this function modifies the IDR, the caller should provide their * own locking to ensure that concurrent modification of the same IDR is * not possible. * * Return: The pointer formerly associated with this ID. */ void *idr_remove(struct idr *idr, unsigned long id) { return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL); } EXPORT_SYMBOL_GPL(idr_remove); /** * idr_find() - Return pointer for given ID. * @idr: IDR handle. * @id: Pointer ID. * * Looks up the pointer associated with this ID. A %NULL pointer may * indicate that @id is not allocated or that the %NULL pointer was * associated with this ID. * * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. * * Return: The pointer associated with this ID. */ void *idr_find(const struct idr *idr, unsigned long id) { return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base); } EXPORT_SYMBOL_GPL(idr_find); /** * idr_for_each() - Iterate through all stored pointers. * @idr: IDR handle. * @fn: Function to be called for each pointer. * @data: Data passed to callback function. * * The callback function will be called for each entry in @idr, passing * the ID, the entry and @data. * * If @fn returns anything other than %0, the iteration stops and that * value is returned from this function. * * idr_for_each() can be called concurrently with idr_alloc() and * idr_remove() if protected by RCU. Newly added entries may not be * seen and deleted entries may be seen, but adding and removing entries * will not cause other entries to be skipped, nor spurious ones to be seen. */ int idr_for_each(const struct idr *idr, int (*fn)(int id, void *p, void *data), void *data) { struct radix_tree_iter iter; void __rcu **slot; int base = idr->idr_base; radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { int ret; unsigned long id = iter.index + base; if (WARN_ON_ONCE(id > INT_MAX)) break; ret = fn(id, rcu_dereference_raw(*slot), data); if (ret) return ret; } return 0; } EXPORT_SYMBOL(idr_for_each); /** * idr_get_next_ul() - Find next populated entry. * @idr: IDR handle. * @nextid: Pointer to an ID. * * Returns the next populated entry in the tree with an ID greater than * or equal to the value pointed to by @nextid. On exit, @nextid is updated * to the ID of the found value. To use in a loop, the value pointed to by * nextid must be incremented by the user. */ void *idr_get_next_ul(struct idr *idr, unsigned long *nextid) { struct radix_tree_iter iter; void __rcu **slot; void *entry = NULL; unsigned long base = idr->idr_base; unsigned long id = *nextid; id = (id < base) ? 0 : id - base; radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) { entry = rcu_dereference_raw(*slot); if (!entry) continue; if (!xa_is_internal(entry)) break; if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry)) break; slot = radix_tree_iter_retry(&iter); } if (!slot) return NULL; *nextid = iter.index + base; return entry; } EXPORT_SYMBOL(idr_get_next_ul); /** * idr_get_next() - Find next populated entry. * @idr: IDR handle. * @nextid: Pointer to an ID. * * Returns the next populated entry in the tree with an ID greater than * or equal to the value pointed to by @nextid. On exit, @nextid is updated * to the ID of the found value. To use in a loop, the value pointed to by * nextid must be incremented by the user. */ void *idr_get_next(struct idr *idr, int *nextid) { unsigned long id = *nextid; void *entry = idr_get_next_ul(idr, &id); if (WARN_ON_ONCE(id > INT_MAX)) return NULL; *nextid = id; return entry; } EXPORT_SYMBOL(idr_get_next); /** * idr_replace() - replace pointer for given ID. * @idr: IDR handle. * @ptr: New pointer to associate with the ID. * @id: ID to change. * * Replace the pointer registered with an ID and return the old value. * This function can be called under the RCU read lock concurrently with * idr_alloc() and idr_remove() (as long as the ID being removed is not * the one being replaced!). * * Returns: the old value on success. %-ENOENT indicates that @id was not * found. %-EINVAL indicates that @ptr was not valid. */ void *idr_replace(struct idr *idr, void *ptr, unsigned long id) { struct radix_tree_node *node; void __rcu **slot = NULL; void *entry; id -= idr->idr_base; entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) return ERR_PTR(-ENOENT); __radix_tree_replace(&idr->idr_rt, node, slot, ptr); return entry; } EXPORT_SYMBOL(idr_replace); /** * DOC: IDA description * * The IDA is an ID allocator which does not provide the ability to * associate an ID with a pointer. As such, it only needs to store one * bit per ID, and so is more space efficient than an IDR. To use an IDA, * define it using DEFINE_IDA() (or embed a &struct ida in a data structure, * then initialise it using ida_init()). To allocate a new ID, call * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range(). * To free an ID, call ida_free(). * * ida_destroy() can be used to dispose of an IDA without needing to * free the individual IDs in it. You can use ida_is_empty() to find * out whether the IDA has any IDs currently allocated. * * The IDA handles its own locking. It is safe to call any of the IDA * functions without synchronisation in your code. * * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward * limitation, it should be quite straightforward to raise the maximum. */ /* * Developer's notes: * * The IDA uses the functionality provided by the XArray to store bitmaps in * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap * have been set. * * I considered telling the XArray that each slot is an order-10 node * and indexing by bit number, but the XArray can't allow a single multi-index * entry in the head, which would significantly increase memory consumption * for the IDA. So instead we divide the index by the number of bits in the * leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given * leaf, instead of allocating a 128-byte bitmap, we store the bits * as a value entry. Value entries never have the XA_FREE_MARK cleared * because we can always convert them into a bitmap entry. * * It would be possible to optimise further; once we've run out of a * single 128-byte bitmap, we currently switch to a 576-byte node, put * the 128-byte bitmap in the first entry and then start allocating extra * 128-byte entries. We could instead use the 512 bytes of the node's * data as a bitmap before moving to that scheme. I do not believe this * is a worthwhile optimisation; Rasmus Villemoes surveyed the current * users of the IDA and almost none of them use more than 1024 entries. * Those that do use more than the 8192 IDs that the 512 bytes would * provide. * * The IDA always uses a lock to alloc/free. If we add a 'test_bit' * equivalent, it will still need locking. Going to RCU lookup would require * using RCU to free bitmaps, and that's not trivial without embedding an * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte * bitmap, which is excessive. */ /** * ida_alloc_range() - Allocate an unused ID. * @ida: IDA handle. * @min: Lowest ID to allocate. * @max: Highest ID to allocate. * @gfp: Memory allocation flags. * * Allocate an ID between @min and @max, inclusive. The allocated ID will * not exceed %INT_MAX, even if @max is larger. * * Context: Any context. It is safe to call this function without * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, gfp_t gfp) { XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS); unsigned bit = min % IDA_BITMAP_BITS; unsigned long flags; struct ida_bitmap *bitmap, *alloc = NULL; if ((int)min < 0) return -ENOSPC; if ((int)max < 0) max = INT_MAX; retry: xas_lock_irqsave(&xas, flags); next: bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK); if (xas.xa_index > min / IDA_BITMAP_BITS) bit = 0; if (xas.xa_index * IDA_BITMAP_BITS + bit > max) goto nospc; if (xa_is_value(bitmap)) { unsigned long tmp = xa_to_value(bitmap); if (bit < BITS_PER_XA_VALUE) { bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit); if (xas.xa_index * IDA_BITMAP_BITS + bit > max) goto nospc; if (bit < BITS_PER_XA_VALUE) { tmp |= 1UL << bit; xas_store(&xas, xa_mk_value(tmp)); goto out; } } bitmap = alloc; if (!bitmap) bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); if (!bitmap) goto alloc; bitmap->bitmap[0] = tmp; xas_store(&xas, bitmap); if (xas_error(&xas)) { bitmap->bitmap[0] = 0; goto out; } } if (bitmap) { bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit); if (xas.xa_index * IDA_BITMAP_BITS + bit > max) goto nospc; if (bit == IDA_BITMAP_BITS) goto next; __set_bit(bit, bitmap->bitmap); if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) xas_clear_mark(&xas, XA_FREE_MARK); } else { if (bit < BITS_PER_XA_VALUE) { bitmap = xa_mk_value(1UL << bit); } else { bitmap = alloc; if (!bitmap) bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); if (!bitmap) goto alloc; __set_bit(bit, bitmap->bitmap); } xas_store(&xas, bitmap); } out: xas_unlock_irqrestore(&xas, flags); if (xas_nomem(&xas, gfp)) { xas.xa_index = min / IDA_BITMAP_BITS; bit = min % IDA_BITMAP_BITS; goto retry; } if (bitmap != alloc) kfree(alloc); if (xas_error(&xas)) return xas_error(&xas); return xas.xa_index * IDA_BITMAP_BITS + bit; alloc: xas_unlock_irqrestore(&xas, flags); alloc = kzalloc(sizeof(*bitmap), gfp); if (!alloc) return -ENOMEM; xas_set(&xas, min / IDA_BITMAP_BITS); bit = min % IDA_BITMAP_BITS; goto retry; nospc: xas_unlock_irqrestore(&xas, flags); kfree(alloc); return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); /** * ida_find_first_range - Get the lowest used ID. * @ida: IDA handle. * @min: Lowest ID to get. * @max: Highest ID to get. * * Get the lowest used ID between @min and @max, inclusive. The returned * ID will not exceed %INT_MAX, even if @max is larger. * * Context: Any context. Takes and releases the xa_lock. * Return: The lowest used ID, or errno if no used ID is found. */ int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max) { unsigned long index = min / IDA_BITMAP_BITS; unsigned int offset = min % IDA_BITMAP_BITS; unsigned long *addr, size, bit; unsigned long tmp = 0; unsigned long flags; void *entry; int ret; if ((int)min < 0) return -EINVAL; if ((int)max < 0) max = INT_MAX; xa_lock_irqsave(&ida->xa, flags); entry = xa_find(&ida->xa, &index, max / IDA_BITMAP_BITS, XA_PRESENT); if (!entry) { ret = -ENOENT; goto err_unlock; } if (index > min / IDA_BITMAP_BITS) offset = 0; if (index * IDA_BITMAP_BITS + offset > max) { ret = -ENOENT; goto err_unlock; } if (xa_is_value(entry)) { tmp = xa_to_value(entry); addr = &tmp; size = BITS_PER_XA_VALUE; } else { addr = ((struct ida_bitmap *)entry)->bitmap; size = IDA_BITMAP_BITS; } bit = find_next_bit(addr, size, offset); xa_unlock_irqrestore(&ida->xa, flags); if (bit == size || index * IDA_BITMAP_BITS + bit > max) return -ENOENT; return index * IDA_BITMAP_BITS + bit; err_unlock: xa_unlock_irqrestore(&ida->xa, flags); return ret; } EXPORT_SYMBOL(ida_find_first_range); /** * ida_free() - Release an allocated ID. * @ida: IDA handle. * @id: Previously allocated ID. * * Context: Any context. It is safe to call this function without * locking in your code. */ void ida_free(struct ida *ida, unsigned int id) { XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS); unsigned bit = id % IDA_BITMAP_BITS; struct ida_bitmap *bitmap; unsigned long flags; if ((int)id < 0) return; xas_lock_irqsave(&xas, flags); bitmap = xas_load(&xas); if (xa_is_value(bitmap)) { unsigned long v = xa_to_value(bitmap); if (bit >= BITS_PER_XA_VALUE) goto err; if (!(v & (1UL << bit))) goto err; v &= ~(1UL << bit); if (!v) goto delete; xas_store(&xas, xa_mk_value(v)); } else { if (!bitmap || !test_bit(bit, bitmap->bitmap)) goto err; __clear_bit(bit, bitmap->bitmap); xas_set_mark(&xas, XA_FREE_MARK); if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { kfree(bitmap); delete: xas_store(&xas, NULL); } } xas_unlock_irqrestore(&xas, flags); return; err: xas_unlock_irqrestore(&xas, flags); WARN(1, "ida_free called for id=%d which is not allocated.\n", id); } EXPORT_SYMBOL(ida_free); /** * ida_destroy() - Free all IDs. * @ida: IDA handle. * * Calling this function frees all IDs and releases all resources used * by an IDA. When this call returns, the IDA is empty and can be reused * or freed. If the IDA is already empty, there is no need to call this * function. * * Context: Any context. It is safe to call this function without * locking in your code. */ void ida_destroy(struct ida *ida) { XA_STATE(xas, &ida->xa, 0); struct ida_bitmap *bitmap; unsigned long flags; xas_lock_irqsave(&xas, flags); xas_for_each(&xas, bitmap, ULONG_MAX) { if (!xa_is_value(bitmap)) kfree(bitmap); xas_store(&xas, NULL); } xas_unlock_irqrestore(&xas, flags); } EXPORT_SYMBOL(ida_destroy); #ifndef __KERNEL__ extern void xa_dump_index(unsigned long index, unsigned int shift); #define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS) static void ida_dump_entry(void *entry, unsigned long index) { unsigned long i; if (!entry) return; if (xa_is_node(entry)) { struct xa_node *node = xa_to_node(entry); unsigned int shift = node->shift + IDA_CHUNK_SHIFT + XA_CHUNK_SHIFT; xa_dump_index(index * IDA_BITMAP_BITS, shift); xa_dump_node(node); for (i = 0; i < XA_CHUNK_SIZE; i++) ida_dump_entry(node->slots[i], index | (i << node->shift)); } else if (xa_is_value(entry)) { xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG)); pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry); } else { struct ida_bitmap *bitmap = entry; xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT); pr_cont("bitmap: %p data", bitmap); for (i = 0; i < IDA_BITMAP_LONGS; i++) pr_cont(" %lx", bitmap->bitmap[i]); pr_cont("\n"); } } static void ida_dump(struct ida *ida) { struct xarray *xa = &ida->xa; pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head, xa->xa_flags >> ROOT_TAG_SHIFT); ida_dump_entry(xa->xa_head, 0); } #endif
1 1 1 1 1 1 9 9 7 1 1 5 2 2 2 1 1 1 4 2 2 2 2 2 7 1 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 // SPDX-License-Identifier: GPL-2.0-only /* * Vxlan vni filter for collect metadata mode * * Authors: Roopa Prabhu <roopa@nvidia.com> * */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/etherdevice.h> #include <linux/rhashtable.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/vxlan.h> #include "vxlan_private.h" static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct vxlan_vni_node *vnode = ptr; __be32 vni = *(__be32 *)arg->key; return vnode->vni != vni; } const struct rhashtable_params vxlan_vni_rht_params = { .head_offset = offsetof(struct vxlan_vni_node, vnode), .key_offset = offsetof(struct vxlan_vni_node, vni), .key_len = sizeof(__be32), .nelem_hint = 3, .max_size = VXLAN_N_VID, .obj_cmpfn = vxlan_vni_cmp, .automatic_shrinking = true, }; static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, struct vxlan_vni_node *v, bool del) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_dev_node *node; struct vxlan_sock *vs; spin_lock(&vn->sock_lock); if (del) { if (!hlist_unhashed(&v->hlist4.hlist)) hlist_del_init_rcu(&v->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) if (!hlist_unhashed(&v->hlist6.hlist)) hlist_del_init_rcu(&v->hlist6.hlist); #endif goto out; } #if IS_ENABLED(CONFIG_IPV6) vs = rtnl_dereference(vxlan->vn6_sock); if (vs && v) { node = &v->hlist6; hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); } #endif vs = rtnl_dereference(vxlan->vn4_sock); if (vs && v) { node = &v->hlist4; hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); } out: spin_unlock(&vn->sock_lock); } void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, struct vxlan_sock *vs, bool ipv6) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_vni_node *v, *tmp; struct vxlan_dev_node *node; if (!vg) return; spin_lock(&vn->sock_lock); list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { #if IS_ENABLED(CONFIG_IPV6) if (ipv6) node = &v->hlist6; else #endif node = &v->hlist4; node->vxlan = vxlan; hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); } spin_unlock(&vn->sock_lock); } void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) { struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_vni_node *v, *tmp; if (!vg) return; spin_lock(&vn->sock_lock); list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { hlist_del_init_rcu(&v->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&v->hlist6.hlist); #endif } spin_unlock(&vn->sock_lock); } static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, struct vxlan_vni_stats *dest) { int i; memset(dest, 0, sizeof(*dest)); for_each_possible_cpu(i) { struct vxlan_vni_stats_pcpu *pstats; struct vxlan_vni_stats temp; unsigned int start; pstats = per_cpu_ptr(vninode->stats, i); do { start = u64_stats_fetch_begin(&pstats->syncp); memcpy(&temp, &pstats->stats, sizeof(temp)); } while (u64_stats_fetch_retry(&pstats->syncp, start)); dest->rx_packets += temp.rx_packets; dest->rx_bytes += temp.rx_bytes; dest->rx_drops += temp.rx_drops; dest->rx_errors += temp.rx_errors; dest->tx_packets += temp.tx_packets; dest->tx_bytes += temp.tx_bytes; dest->tx_drops += temp.tx_drops; dest->tx_errors += temp.tx_errors; } } static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode, int type, unsigned int len) { struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats); u64_stats_update_begin(&pstats->syncp); switch (type) { case VXLAN_VNI_STATS_RX: pstats->stats.rx_bytes += len; pstats->stats.rx_packets++; break; case VXLAN_VNI_STATS_RX_DROPS: pstats->stats.rx_drops++; break; case VXLAN_VNI_STATS_RX_ERRORS: pstats->stats.rx_errors++; break; case VXLAN_VNI_STATS_TX: pstats->stats.tx_bytes += len; pstats->stats.tx_packets++; break; case VXLAN_VNI_STATS_TX_DROPS: pstats->stats.tx_drops++; break; case VXLAN_VNI_STATS_TX_ERRORS: pstats->stats.tx_errors++; break; } u64_stats_update_end(&pstats->syncp); } void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, struct vxlan_vni_node *vninode, int type, unsigned int len) { struct vxlan_vni_node *vnode; if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) return; if (vninode) { vnode = vninode; } else { vnode = vxlan_vnifilter_lookup(vxlan, vni); if (!vnode) return; } vxlan_vnifilter_stats_add(vnode, type, len); } static u32 vnirange(struct vxlan_vni_node *vbegin, struct vxlan_vni_node *vend) { return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); } static size_t vxlan_vnifilter_entry_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct tunnel_msg)) + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ } static int __vnifilter_entry_fill_stats(struct sk_buff *skb, const struct vxlan_vni_node *vbegin) { struct vxlan_vni_stats vstats; struct nlattr *vstats_attr; vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS); if (!vstats_attr) goto out_stats_err; vxlan_vnifilter_stats_get(vbegin, &vstats); if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES, vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS, vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS, vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS, vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES, vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS, vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS, vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) || nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS, vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD)) goto out_stats_err; nla_nest_end(skb, vstats_attr); return 0; out_stats_err: nla_nest_cancel(skb, vstats_attr); return -EMSGSIZE; } static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, struct vxlan_vni_node *vbegin, struct vxlan_vni_node *vend, bool fill_stats) { struct nlattr *ventry; u32 vs = be32_to_cpu(vbegin->vni); u32 ve = 0; if (vbegin != vend) ve = be32_to_cpu(vend->vni); ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); if (!ventry) return false; if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) goto out_err; if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) goto out_err; if (!vxlan_addr_any(&vbegin->remote_ip)) { if (vbegin->remote_ip.sa.sa_family == AF_INET) { if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, vbegin->remote_ip.sin.sin_addr.s_addr)) goto out_err; #if IS_ENABLED(CONFIG_IPV6) } else { if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, &vbegin->remote_ip.sin6.sin6_addr)) goto out_err; #endif } } if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin)) goto out_err; nla_nest_end(skb, ventry); return true; out_err: nla_nest_cancel(skb, ventry); return false; } static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, struct vxlan_vni_node *vninode, int cmd) { struct tunnel_msg *tmsg; struct sk_buff *skb; struct nlmsghdr *nlh; struct net *net = dev_net(vxlan->dev); int err = -ENOBUFS; skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); if (!skb) goto out_err; err = -EMSGSIZE; nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); if (!nlh) goto out_err; tmsg = nlmsg_data(nlh); memset(tmsg, 0, sizeof(*tmsg)); tmsg->family = AF_BRIDGE; tmsg->ifindex = vxlan->dev->ifindex; if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false)) goto out_err; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); return; out_err: rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); kfree_skb(skb); } static int vxlan_vnifilter_dump_dev(const struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; struct vxlan_dev *vxlan = netdev_priv(dev); struct tunnel_msg *new_tmsg, *tmsg; int idx = 0, s_idx = cb->args[1]; struct vxlan_vni_group *vg; struct nlmsghdr *nlh; bool dump_stats; int err = 0; if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) return -EINVAL; /* RCU needed because of the vni locking rules (rcu || rtnl) */ vg = rcu_dereference(vxlan->vnigrp); if (!vg || !vg->num_vnis) return 0; tmsg = nlmsg_data(cb->nlh); dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; new_tmsg = nlmsg_data(nlh); memset(new_tmsg, 0, sizeof(*new_tmsg)); new_tmsg->family = PF_BRIDGE; new_tmsg->ifindex = dev->ifindex; list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { if (idx < s_idx) { idx++; continue; } if (!vbegin) { vbegin = v; vend = v; continue; } if (!dump_stats && vnirange(vend, v) == 1 && vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { goto update_end; } else { if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) { err = -EMSGSIZE; break; } idx += vnirange(vbegin, vend) + 1; vbegin = v; } update_end: vend = v; } if (!err && vbegin) { if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) err = -EMSGSIZE; } cb->args[1] = err ? idx : 0; nlmsg_end(skb, nlh); return err; } static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, err = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); struct tunnel_msg *tmsg; struct net_device *dev; if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) { NL_SET_ERR_MSG(cb->extack, "Invalid msg length"); return -EINVAL; } tmsg = nlmsg_data(cb->nlh); if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); return -EINVAL; } rcu_read_lock(); if (tmsg->ifindex) { dev = dev_get_by_index_rcu(net, tmsg->ifindex); if (!dev) { err = -ENODEV; goto out_err; } if (!netif_is_vxlan(dev)) { NL_SET_ERR_MSG(cb->extack, "The device is not a vxlan device"); err = -EINVAL; goto out_err; } err = vxlan_vnifilter_dump_dev(dev, skb, cb); /* if the dump completed without an error we return 0 here */ if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { if (!netif_is_vxlan(dev)) continue; if (idx < s_idx) goto skip; err = vxlan_vnifilter_dump_dev(dev, skb, cb); if (err == -EMSGSIZE) break; skip: idx++; } } cb->args[0] = idx; rcu_read_unlock(); return skb->len; out_err: rcu_read_unlock(); return err; } static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, .len = sizeof_field(struct iphdr, daddr) }, [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr) }, }; static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, }; static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, union vxlan_addr *old_remote_ip, union vxlan_addr *remote_ip, struct netlink_ext_ack *extack) { struct vxlan_rdst *dst = &vxlan->default_dst; u32 hash_index; int err = 0; hash_index = fdb_head_index(vxlan, all_zeros_mac, vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); if (remote_ip && !vxlan_addr_any(remote_ip)) { err = vxlan_fdb_update(vxlan, all_zeros_mac, remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_APPEND | NLM_F_CREATE, vxlan->cfg.dst_port, vni, vni, dst->remote_ifindex, NTF_SELF, 0, true, extack); if (err) { spin_unlock_bh(&vxlan->hash_lock[hash_index]); return err; } } if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { __vxlan_fdb_delete(vxlan, all_zeros_mac, *old_remote_ip, vxlan->cfg.dst_port, vni, vni, dst->remote_ifindex, true); } spin_unlock_bh(&vxlan->hash_lock[hash_index]); return err; } static int vxlan_vni_update_group(struct vxlan_dev *vxlan, struct vxlan_vni_node *vninode, union vxlan_addr *group, bool create, bool *changed, struct netlink_ext_ack *extack) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_rdst *dst = &vxlan->default_dst; union vxlan_addr *newrip = NULL, *oldrip = NULL; union vxlan_addr old_remote_ip; int ret = 0; memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); /* if per vni remote ip is not present use vxlan dev * default dst remote ip for fdb entry */ if (group && !vxlan_addr_any(group)) { newrip = group; } else { if (!vxlan_addr_any(&dst->remote_ip)) newrip = &dst->remote_ip; } /* if old rip exists, and no newrip, * explicitly delete old rip */ if (!newrip && !vxlan_addr_any(&old_remote_ip)) oldrip = &old_remote_ip; if (!newrip && !oldrip) return 0; if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) return 0; ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, oldrip, newrip, extack); if (ret) goto out; if (group) memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&old_remote_ip) && !vxlan_group_used(vn, vxlan, vninode->vni, &old_remote_ip, vxlan->default_dst.remote_ifindex)) { ret = vxlan_igmp_leave(vxlan, &old_remote_ip, 0); if (ret) goto out; } if (vxlan_addr_multicast(&vninode->remote_ip)) { ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); if (ret == -EADDRINUSE) ret = 0; if (ret) goto out; } } *changed = true; return 0; out: return ret; } int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, union vxlan_addr *old_remote_ip, union vxlan_addr *new_remote_ip, struct netlink_ext_ack *extack) { struct list_head *headp, *hpos; struct vxlan_vni_group *vg; struct vxlan_vni_node *vent; int ret; vg = rtnl_dereference(vxlan->vnigrp); headp = &vg->vni_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct vxlan_vni_node, vlist); if (vxlan_addr_any(&vent->remote_ip)) { ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, old_remote_ip, new_remote_ip, extack); if (ret) return ret; } } return 0; } static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, struct vxlan_vni_node *vninode) { struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_rdst *dst = &vxlan->default_dst; /* if per vni remote_ip not present, delete the * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || !vxlan_addr_any(&dst->remote_ip)) { u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vninode->vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), vxlan->cfg.dst_port, vninode->vni, vninode->vni, dst->remote_ifindex, true); spin_unlock_bh(&vxlan->hash_lock[hash_index]); } if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && !vxlan_group_used(vn, vxlan, vninode->vni, &vninode->remote_ip, dst->remote_ifindex)) { vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); } } } static int vxlan_vni_update(struct vxlan_dev *vxlan, struct vxlan_vni_group *vg, __be32 vni, union vxlan_addr *group, bool *changed, struct netlink_ext_ack *extack) { struct vxlan_vni_node *vninode; int ret; vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, vxlan_vni_rht_params); if (!vninode) return 0; ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, extack); if (ret) return ret; if (changed) vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return 0; } static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, struct vxlan_vni_node *v) { struct list_head *headp, *hpos; struct vxlan_vni_node *vent; headp = &vg->vni_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct vxlan_vni_node, vlist); if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) continue; else break; } list_add_rcu(&v->vlist, hpos); vg->num_vnis++; } static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, struct vxlan_vni_node *v) { list_del_rcu(&v->vlist); vg->num_vnis--; } static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, __be32 vni) { struct vxlan_vni_node *vninode; vninode = kzalloc(sizeof(*vninode), GFP_KERNEL); if (!vninode) return NULL; vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu); if (!vninode->stats) { kfree(vninode); return NULL; } vninode->vni = vni; vninode->hlist4.vxlan = vxlan; #if IS_ENABLED(CONFIG_IPV6) vninode->hlist6.vxlan = vxlan; #endif return vninode; } static void vxlan_vni_free(struct vxlan_vni_node *vninode) { free_percpu(vninode->stats); kfree(vninode); } static int vxlan_vni_add(struct vxlan_dev *vxlan, struct vxlan_vni_group *vg, u32 vni, union vxlan_addr *group, struct netlink_ext_ack *extack) { struct vxlan_vni_node *vninode; __be32 v = cpu_to_be32(vni); bool changed = false; int err = 0; if (vxlan_vnifilter_lookup(vxlan, v)) return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); if (err) { NL_SET_ERR_MSG(extack, "VNI in use"); return err; } vninode = vxlan_vni_alloc(vxlan, v); if (!vninode) return -ENOMEM; err = rhashtable_lookup_insert_fast(&vg->vni_hash, &vninode->vnode, vxlan_vni_rht_params); if (err) { vxlan_vni_free(vninode); return err; } __vxlan_vni_add_list(vg, vninode); if (vxlan->dev->flags & IFF_UP) vxlan_vs_add_del_vninode(vxlan, vninode, false); err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, extack); if (changed) vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); return err; } static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) { struct vxlan_vni_node *v; v = container_of(rcu, struct vxlan_vni_node, rcu); vxlan_vni_free(v); } static int vxlan_vni_del(struct vxlan_dev *vxlan, struct vxlan_vni_group *vg, u32 vni, struct netlink_ext_ack *extack) { struct vxlan_vni_node *vninode; __be32 v = cpu_to_be32(vni); int err = 0; vg = rtnl_dereference(vxlan->vnigrp); vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, vxlan_vni_rht_params); if (!vninode) { err = -ENOENT; goto out; } vxlan_vni_delete_group(vxlan, vninode); err = rhashtable_remove_fast(&vg->vni_hash, &vninode->vnode, vxlan_vni_rht_params); if (err) goto out; __vxlan_vni_del_list(vg, vninode); vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); if (vxlan->dev->flags & IFF_UP) vxlan_vs_add_del_vninode(vxlan, vninode, true); call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); return 0; out: return err; } static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, __u32 end_vni, union vxlan_addr *group, int cmd, struct netlink_ext_ack *extack) { struct vxlan_vni_group *vg; int v, err = 0; vg = rtnl_dereference(vxlan->vnigrp); for (v = start_vni; v <= end_vni; v++) { switch (cmd) { case RTM_NEWTUNNEL: err = vxlan_vni_add(vxlan, vg, v, group, extack); break; case RTM_DELTUNNEL: err = vxlan_vni_del(vxlan, vg, v, extack); break; default: err = -EOPNOTSUPP; break; } if (err) goto out; } return 0; out: return err; } static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, struct nlattr *nlvnifilter, int cmd, struct netlink_ext_ack *extack) { struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; u32 vni_start = 0, vni_end = 0; union vxlan_addr group; int err; err = nla_parse_nested(vattrs, VXLAN_VNIFILTER_ENTRY_MAX, nlvnifilter, vni_filter_entry_policy, extack); if (err) return err; if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); vni_end = vni_start; } if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); if (!vni_start && !vni_end) { NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, "vni start nor end found in vni entry"); return -EINVAL; } if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { group.sin.sin_addr.s_addr = nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); group.sa.sa_family = AF_INET; } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { group.sin6.sin6_addr = nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); group.sa.sa_family = AF_INET6; } else { memset(&group, 0, sizeof(group)); } if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { NL_SET_ERR_MSG(extack, "Local interface required for multicast remote group"); return -EINVAL; } err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, extack); if (err) return err; return 0; } void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) { struct vxlan_vni_node *v, *tmp; struct vxlan_vni_group *vg; vg = rtnl_dereference(vxlan->vnigrp); list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { rhashtable_remove_fast(&vg->vni_hash, &v->vnode, vxlan_vni_rht_params); hlist_del_init_rcu(&v->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&v->hlist6.hlist); #endif __vxlan_vni_del_list(vg, v); vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); call_rcu(&v->rcu, vxlan_vni_node_rcu_free); } rhashtable_destroy(&vg->vni_hash); kfree(vg); } int vxlan_vnigroup_init(struct vxlan_dev *vxlan) { struct vxlan_vni_group *vg; int ret; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) return -ENOMEM; ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); if (ret) { kfree(vg); return ret; } INIT_LIST_HEAD(&vg->vni_list); rcu_assign_pointer(vxlan->vnigrp, vg); return 0; } static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tunnel_msg *tmsg; struct vxlan_dev *vxlan; struct net_device *dev; struct nlattr *attr; int err, vnis = 0; int rem; /* this should validate the header and check for remaining bytes */ err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, vni_filter_policy, extack); if (err < 0) return err; tmsg = nlmsg_data(nlh); dev = __dev_get_by_index(net, tmsg->ifindex); if (!dev) return -ENODEV; if (!netif_is_vxlan(dev)) { NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); return -EINVAL; } vxlan = netdev_priv(dev); if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) return -EOPNOTSUPP; nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) { switch (nla_type(attr)) { case VXLAN_VNIFILTER_ENTRY: err = vxlan_process_vni_filter(vxlan, attr, nlh->nlmsg_type, extack); break; default: continue; } vnis++; if (err) break; } if (!vnis) { NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); err = -EINVAL; } return err; } static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = { {THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0}, {THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0}, {THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0}, }; int vxlan_vnifilter_init(void) { return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers); } void vxlan_vnifilter_uninit(void) { rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers); }
102 9 5 1 46 11 47 13 2 1 1 2 2 2 48 48 48 39 39 1 2 39 2 2 2 2 2 1 1 1 1 1 1 23 23 23 6 14 2 11 57 57 10 51 51 4 34 38 24 39 39 39 34 6 6 39 14 4 4 6 19 5 32 45 46 14 37 45 49 17 17 49 49 17 17 17 49 4 4 4 48 1 117 117 117 116 113 4 6 111 6 5 1 111 6 6 111 111 116 100 28 116 1830 1834 1834 395 1742 1835 1741 11 1 2 1 5 7 6 3 2 2 1 276 1 100 100 49 49 68 68 40 279 278 277 279 277 279 277 279 278 277 279 279 279 278 276 279 278 277 276 277 279 277 1 106 112 13 161 4 1 2 1 48 48 101 78 23 29 58 24 45 44 45 10 27 45 4 283 1490 1484 235 6 1250 1254 1251 1252 3 1250 1548 1313 2 230 1296 248 236 1310 1539 288 1262 1253 287 4 282 284 18 198 1 9 11 216 1290 1502 1505 1510 1502 1504 1508 1503 5 1506 219 1 3 1478 1488 1477 2 7 7 245 254 1558 237 1553 1552 15 5 2 1 7 10 17 13 4 2 2 6 2 4 4 15 15 26 23 23 3 23 22 3 28 2 26 23 20 30 10 26 13 15 27 51 50 2 51 51 1 48 48 101 315 361 1 361 361 2 1 1 1 249 112 1558 2 1566 13 2 119 116 1 1 36 24 2 1 11 11 1 78 25 99 2 4 95 99 1 99 2 2 8 2 3 3 4 2 4 3 5 5 2 3 1 3 3 3 3 2 3 1 10 4 6 6 7 1 2 2 2 31 21 2 2 2 5 239 132 5 137 125 1 15 1 1 1 25 110 1 1 1 1 13 119 1 1 2 2 2 2 2 2 1 5 1 2 1 3 2 1 31 3 8 2 11 2 2 2 1 1 3 2 1 3 1 2 1 1 5 2 1 3 1 1 13 5 121 2 235 7 2 5 13 13 46 46 2 2 1 75 26 2 1 1 29 2 6 2 2 12 13 12 13 2007 1795 303 13 67 67 5 3 2 3 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 // SPDX-License-Identifier: GPL-2.0-or-later /* * TUN - Universal TUN/TAP device driver. * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com> * * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $ */ /* * Changes: * * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14 * Add TUNSETLINK ioctl to set the link encapsulation * * Mark Smith <markzzzsmith@yahoo.com.au> * Use eth_random_addr() for tap MAC address. * * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20 * Fixes in packet dropping, queue length setting and queue wakeup. * Increased default tx queue length. * Added ethtool API. * Minor cleanups * * Daniel Podlejski <underley@underley.eu.org> * Modifications for 2.3.99-pre5 kernel. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define DRV_NAME "tun" #define DRV_VERSION "1.6" #define DRV_DESCRIPTION "Universal TUN/TAP device driver" #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>" #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/major.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/fcntl.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/miscdevice.h> #include <linux/ethtool.h> #include <linux/rtnetlink.h> #include <linux/compat.h> #include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_tun.h> #include <linux/if_vlan.h> #include <linux/crc32.h> #include <linux/math.h> #include <linux/nsproxy.h> #include <linux/virtio_net.h> #include <linux/rcupdate.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> #include <net/xdp.h> #include <net/ip_tunnels.h> #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/skb_array.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/mutex.h> #include <linux/ieee802154.h> #include <uapi/linux/if_ltalk.h> #include <uapi/linux/if_fddi.h> #include <uapi/linux/if_hippi.h> #include <uapi/linux/if_fc.h> #include <net/ax25.h> #include <net/rose.h> #include <net/6lowpan.h> #include <net/rps.h> #include <linux/uaccess.h> #include <linux/proc_fs.h> #include "tun_vnet.h" static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd); #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) /* TUN device flags */ /* IFF_ATTACH_QUEUE is never stored in device flags, * overload it to mean fasync when stored there. */ #define TUN_FASYNC IFF_ATTACH_QUEUE #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) #define GOODCOPY_LEN 128 #define FLT_EXACT_COUNT 8 struct tap_filter { unsigned int count; /* Number of addrs. Zero means disabled */ u32 mask[2]; /* Mask of the hashed addrs */ unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; /* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal * to max number of VCPUs in guest. */ #define MAX_TAP_QUEUES 256 #define MAX_TAP_FLOWS 4096 #define TUN_FLOW_EXPIRE (3 * HZ) /* A tun_file connects an open character device to a tuntap netdevice. It * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and * tap_filter were kept in tun_struct since they were used for filtering for the * netdevice not for a specific queue (at least I didn't see the requirement for * this). * * RCU usage: * The tun_file and tun_struct are loosely coupled, the pointer from one to the * other can only be read while rcu_read_lock or rtnl_lock is held. */ struct tun_file { struct sock sk; struct socket socket; struct tun_struct __rcu *tun; struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; union { u16 queue_index; unsigned int ifindex; }; struct napi_struct napi; bool napi_enabled; bool napi_frags_enabled; struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; struct ptr_ring tx_ring; struct xdp_rxq_info xdp_rxq; }; struct tun_page { struct page *page; int count; }; struct tun_flow_entry { struct hlist_node hash_link; struct rcu_head rcu; struct tun_struct *tun; u32 rxhash; u32 rps_rxhash; int queue_index; unsigned long updated ____cacheline_aligned_in_smp; }; #define TUN_NUM_FLOW_ENTRIES 1024 #define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1) struct tun_prog { struct rcu_head rcu; struct bpf_prog *prog; }; /* Since the socket were moved to tun_file, to preserve the behavior of persist * device, socket filter, sndbuf and vnet header size were restore when the * file were attached to a persist device. */ struct tun_struct { struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; unsigned int numqueues; unsigned int flags; kuid_t owner; kgid_t group; struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4) int align; int vnet_hdr_sz; int sndbuf; struct tap_filter txflt; struct sock_fprog fprog; /* protected by rtnl lock */ bool filter_attached; u32 msg_enable; spinlock_t lock; struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; struct timer_list flow_gc_timer; unsigned long ageing_time; unsigned int numdisabled; struct list_head disabled; void *security; u32 flow_count; u32 rx_batched; atomic_long_t rx_frame_errors; struct bpf_prog __rcu *xdp_prog; struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; struct ethtool_link_ksettings link_ksettings; /* init args */ struct file *file; struct ifreq *ifr; }; struct veth { __be16 h_vlan_proto; __be16 h_vlan_TCI; }; static void tun_flow_init(struct tun_struct *tun); static void tun_flow_uninit(struct tun_struct *tun); static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); struct sk_buff_head *queue = &tfile->sk.sk_write_queue; struct sk_buff_head process_queue; struct sk_buff *skb; int received = 0; __skb_queue_head_init(&process_queue); spin_lock(&queue->lock); skb_queue_splice_tail_init(queue, &process_queue); spin_unlock(&queue->lock); while (received < budget && (skb = __skb_dequeue(&process_queue))) { napi_gro_receive(napi, skb); ++received; } if (!skb_queue_empty(&process_queue)) { spin_lock(&queue->lock); skb_queue_splice(&process_queue, queue); spin_unlock(&queue->lock); } return received; } static int tun_napi_poll(struct napi_struct *napi, int budget) { unsigned int received; received = tun_napi_receive(napi, budget); if (received < budget) napi_complete_done(napi, received); return received; } static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, bool napi_en, bool napi_frags) { tfile->napi_enabled = napi_en; tfile->napi_frags_enabled = napi_en && napi_frags; if (napi_en) { netif_napi_add_tx(tun->dev, &tfile->napi, tun_napi_poll); napi_enable(&tfile->napi); } } static void tun_napi_enable(struct tun_file *tfile) { if (tfile->napi_enabled) napi_enable(&tfile->napi); } static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) napi_disable(&tfile->napi); } static void tun_napi_del(struct tun_file *tfile) { if (tfile->napi_enabled) netif_napi_del(&tfile->napi); } static bool tun_napi_frags_enabled(const struct tun_file *tfile) { return tfile->napi_frags_enabled; } static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; } static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) { struct tun_flow_entry *e; hlist_for_each_entry_rcu(e, head, hash_link) { if (e->rxhash == rxhash) return e; } return NULL; } static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, struct hlist_head *head, u32 rxhash, u16 queue_index) { struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC); if (e) { netif_info(tun, tx_queued, tun->dev, "create flow: hash %u index %u\n", rxhash, queue_index); e->updated = jiffies; e->rxhash = rxhash; e->rps_rxhash = 0; e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); ++tun->flow_count; } return e; } static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) { netif_info(tun, tx_queued, tun->dev, "delete flow: hash %u index %u\n", e->rxhash, e->queue_index); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); --tun->flow_count; } static void tun_flow_flush(struct tun_struct *tun) { int i; spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; struct hlist_node *n; hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) tun_flow_delete(tun, e); } spin_unlock_bh(&tun->lock); } static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) { int i; spin_lock_bh(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; struct hlist_node *n; hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { if (e->queue_index == queue_index) tun_flow_delete(tun, e); } } spin_unlock_bh(&tun->lock); } static void tun_flow_cleanup(struct timer_list *t) { struct tun_struct *tun = from_timer(tun, t, flow_gc_timer); unsigned long delay = tun->ageing_time; unsigned long next_timer = jiffies + delay; unsigned long count = 0; int i; spin_lock(&tun->lock); for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { struct tun_flow_entry *e; struct hlist_node *n; hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) { unsigned long this_timer; this_timer = e->updated + delay; if (time_before_eq(this_timer, jiffies)) { tun_flow_delete(tun, e); continue; } count++; if (time_before(this_timer, next_timer)) next_timer = this_timer; } } if (count) mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); spin_unlock(&tun->lock); } static void tun_flow_update(struct tun_struct *tun, u32 rxhash, struct tun_file *tfile) { struct hlist_head *head; struct tun_flow_entry *e; unsigned long delay = tun->ageing_time; u16 queue_index = tfile->queue_index; head = &tun->flows[tun_hashfn(rxhash)]; rcu_read_lock(); e = tun_flow_find(head, rxhash); if (likely(e)) { /* TODO: keep queueing to old queue until it's empty? */ if (READ_ONCE(e->queue_index) != queue_index) WRITE_ONCE(e->queue_index, queue_index); if (e->updated != jiffies) e->updated = jiffies; sock_rps_record_flow_hash(e->rps_rxhash); } else { spin_lock_bh(&tun->lock); if (!tun_flow_find(head, rxhash) && tun->flow_count < MAX_TAP_FLOWS) tun_flow_create(tun, head, rxhash, queue_index); if (!timer_pending(&tun->flow_gc_timer)) mod_timer(&tun->flow_gc_timer, round_jiffies_up(jiffies + delay)); spin_unlock_bh(&tun->lock); } rcu_read_unlock(); } /* Save the hash received in the stack receive path and update the * flow_hash table accordingly. */ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) { if (unlikely(e->rps_rxhash != hash)) e->rps_rxhash = hash; } /* We try to identify a flow through its rxhash. The reason that * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As * the userspace application move between processors, we may get a * different rxq no. here. */ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) { struct tun_flow_entry *e; u32 txq, numqueues; numqueues = READ_ONCE(tun->numqueues); txq = __skb_get_hash_symmetric(skb); e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); if (e) { tun_flow_save_rps_rxhash(e, txq); txq = e->queue_index; } else { txq = reciprocal_scale(txq, numqueues); } return txq; } static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) { struct tun_prog *prog; u32 numqueues; u16 ret = 0; numqueues = READ_ONCE(tun->numqueues); if (!numqueues) return 0; prog = rcu_dereference(tun->steering_prog); if (prog) ret = bpf_prog_run_clear_cb(prog->prog, skb); return ret % numqueues; } static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { struct tun_struct *tun = netdev_priv(dev); u16 ret; rcu_read_lock(); if (rcu_dereference(tun->steering_prog)) ret = tun_ebpf_select_queue(tun, skb); else ret = tun_automq_select_queue(tun, skb); rcu_read_unlock(); return ret; } static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || (gid_valid(tun->group) && !in_egroup_p(tun->group))) && !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) { netif_set_real_num_tx_queues(tun->dev, tun->numqueues); netif_set_real_num_rx_queues(tun->dev, tun->numqueues); } static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile) { tfile->detached = tun; list_add_tail(&tfile->next, &tun->disabled); ++tun->numdisabled; } static struct tun_struct *tun_enable_queue(struct tun_file *tfile) { struct tun_struct *tun = tfile->detached; tfile->detached = NULL; list_del_init(&tfile->next); --tun->numdisabled; return tun; } void tun_ptr_free(void *ptr) { if (!ptr) return; if (tun_is_xdp_frame(ptr)) { struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); xdp_return_frame(xdpf); } else { __skb_array_destroy_skb(ptr); } } EXPORT_SYMBOL_GPL(tun_ptr_free); static void tun_queue_purge(struct tun_file *tfile) { void *ptr; while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL) tun_ptr_free(ptr); skb_queue_purge(&tfile->sk.sk_write_queue); skb_queue_purge(&tfile->sk.sk_error_queue); } static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; struct tun_struct *tun; tun = rtnl_dereference(tfile->tun); if (tun && clean) { if (!tfile->detached) tun_napi_disable(tfile); tun_napi_del(tfile); } if (tun && !tfile->detached) { u16 index = tfile->queue_index; BUG_ON(index >= tun->numqueues); rcu_assign_pointer(tun->tfiles[index], tun->tfiles[tun->numqueues - 1]); ntfile = rtnl_dereference(tun->tfiles[index]); ntfile->queue_index = index; ntfile->xdp_rxq.queue_index = index; rcu_assign_pointer(tun->tfiles[tun->numqueues - 1], NULL); --tun->numqueues; if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); } else { tun_disable_queue(tun, tfile); tun_napi_disable(tfile); } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); /* Drop read queue */ tun_queue_purge(tfile); tun_set_real_num_queues(tun); } else if (tfile->detached && clean) { tun = tun_enable_queue(tfile); sock_put(&tfile->sk); } if (clean) { if (tun && tun->numqueues == 0 && tun->numdisabled == 0) { netif_carrier_off(tun->dev); if (!(tun->flags & IFF_PERSIST) && tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); } } static void tun_detach(struct tun_file *tfile, bool clean) { struct tun_struct *tun; struct net_device *dev; rtnl_lock(); tun = rtnl_dereference(tfile->tun); dev = tun ? tun->dev : NULL; __tun_detach(tfile, clean); if (dev) netdev_state_change(dev); rtnl_unlock(); if (clean) sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile, *tmp; int i, n = tun->numqueues; for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); tun_napi_disable(tfile); tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); --tun->numqueues; } list_for_each_entry(tfile, &tun->disabled, next) { tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); } BUG_ON(tun->numqueues != 0); synchronize_net(); for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); tun_napi_del(tfile); /* Drop read queue */ tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); } BUG_ON(tun->numdisabled != 0); if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter, bool napi, bool napi_frags, bool publish_tun) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; int err; err = security_tun_dev_attach(tfile->socket.sk, tun->security); if (err < 0) goto out; err = -EINVAL; if (rtnl_dereference(tfile->tun) && !tfile->detached) goto out; err = -EBUSY; if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1) goto out; err = -E2BIG; if (!tfile->detached && tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES) goto out; err = 0; /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { lock_sock(tfile->socket.sk); err = sk_attach_filter(&tun->fprog, tfile->socket.sk); release_sock(tfile->socket.sk); if (!err) goto out; } if (!tfile->detached && ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL, tun_ptr_free)) { err = -ENOMEM; goto out; } tfile->queue_index = tun->numqueues; tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; if (tfile->detached) { /* Re-attach detached tfile, updating XDP queue_index */ WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq)); if (tfile->xdp_rxq.queue_index != tfile->queue_index) tfile->xdp_rxq.queue_index = tfile->queue_index; } else { /* Setup XDP RX-queue info, for new tfile getting attached */ err = xdp_rxq_info_reg(&tfile->xdp_rxq, tun->dev, tfile->queue_index, 0); if (err < 0) goto out; err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); if (err < 0) { xdp_rxq_info_unreg(&tfile->xdp_rxq); goto out; } err = 0; } if (tfile->detached) { tun_enable_queue(tfile); tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); } if (rtnl_dereference(tun->xdp_prog)) sock_set_flag(&tfile->sk, SOCK_XDP); /* device is allowed to go away first, so no need to hold extra * refcnt. */ /* Publish tfile->tun and tun->tfiles only after we've fully * initialized tfile; otherwise we risk using half-initialized * object. */ if (publish_tun) rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; tun_set_real_num_queues(tun); out: return err; } static struct tun_struct *tun_get(struct tun_file *tfile) { struct tun_struct *tun; rcu_read_lock(); tun = rcu_dereference(tfile->tun); if (tun) dev_hold(tun->dev); rcu_read_unlock(); return tun; } static void tun_put(struct tun_struct *tun) { dev_put(tun->dev); } /* TAP filtering */ static void addr_hash_set(u32 *mask, const u8 *addr) { int n = ether_crc(ETH_ALEN, addr) >> 26; mask[n >> 5] |= (1 << (n & 31)); } static unsigned int addr_hash_test(const u32 *mask, const u8 *addr) { int n = ether_crc(ETH_ALEN, addr) >> 26; return mask[n >> 5] & (1 << (n & 31)); } static int update_filter(struct tap_filter *filter, void __user *arg) { struct { u8 u[ETH_ALEN]; } *addr; struct tun_filter uf; int err, alen, n, nexact; if (copy_from_user(&uf, arg, sizeof(uf))) return -EFAULT; if (!uf.count) { /* Disabled */ filter->count = 0; return 0; } alen = ETH_ALEN * uf.count; addr = memdup_user(arg + sizeof(uf), alen); if (IS_ERR(addr)) return PTR_ERR(addr); /* The filter is updated without holding any locks. Which is * perfectly safe. We disable it first and in the worst * case we'll accept a few undesired packets. */ filter->count = 0; wmb(); /* Use first set of addresses as an exact filter */ for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++) memcpy(filter->addr[n], addr[n].u, ETH_ALEN); nexact = n; /* Remaining multicast addresses are hashed, * unicast will leave the filter disabled. */ memset(filter->mask, 0, sizeof(filter->mask)); for (; n < uf.count; n++) { if (!is_multicast_ether_addr(addr[n].u)) { err = 0; /* no filter */ goto free_addr; } addr_hash_set(filter->mask, addr[n].u); } /* For ALLMULTI just set the mask to all ones. * This overrides the mask populated above. */ if ((uf.flags & TUN_FLT_ALLMULTI)) memset(filter->mask, ~0, sizeof(filter->mask)); /* Now enable the filter */ wmb(); filter->count = nexact; /* Return the number of exact filters */ err = nexact; free_addr: kfree(addr); return err; } /* Returns: 0 - drop, !=0 - accept */ static int run_filter(struct tap_filter *filter, const struct sk_buff *skb) { /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect * at this point. */ struct ethhdr *eh = (struct ethhdr *) skb->data; int i; /* Exact match */ for (i = 0; i < filter->count; i++) if (ether_addr_equal(eh->h_dest, filter->addr[i])) return 1; /* Inexact match (multicast only) */ if (is_multicast_ether_addr(eh->h_dest)) return addr_hash_test(filter->mask, eh->h_dest); return 0; } /* * Checks whether the packet is accepted or not. * Returns: 0 - drop, !=0 - accept */ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) { if (!filter->count) return 1; return run_filter(filter, skb); } /* Network device part of the driver */ static const struct ethtool_ops tun_ethtool_ops; static int tun_net_init(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); struct ifreq *ifr = tun->ifr; int err; spin_lock_init(&tun->lock); err = security_tun_dev_alloc_security(&tun->security); if (err < 0) return err; tun_flow_init(tun); dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->features = dev->hw_features; dev->vlan_features = dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); dev->lltx = true; tun->flags = (tun->flags & ~TUN_FEATURES) | (ifr->ifr_flags & TUN_FEATURES); INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, ifr->ifr_flags & IFF_NAPI_FRAGS, false); if (err < 0) { tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); return err; } return 0; } /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { tun_detach_all(dev); } /* Net device open. */ static int tun_net_open(struct net_device *dev) { netif_tx_start_all_queues(dev); return 0; } /* Net device close. */ static int tun_net_close(struct net_device *dev) { netif_tx_stop_all_queues(dev); return 0; } /* Net device start xmit */ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) { #ifdef CONFIG_RPS if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ struct tun_flow_entry *e; __u32 rxhash; rxhash = __skb_get_hash_symmetric(skb); e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash); if (e) tun_flow_save_rps_rxhash(e, rxhash); } #endif } static unsigned int run_ebpf_filter(struct tun_struct *tun, struct sk_buff *skb, int len) { struct tun_prog *prog = rcu_dereference(tun->filter_prog); if (prog) len = bpf_prog_run_clear_cb(prog->prog, skb); return len; } /* Net device start xmit */ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); enum skb_drop_reason drop_reason; int txq = skb->queue_mapping; struct netdev_queue *queue; struct tun_file *tfile; int len = skb->len; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ if (!tfile) { drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; } if (!rcu_dereference(tun->steering_prog)) tun_automq_xmit(tun, skb); netif_info(tun, tx_queued, tun->dev, "%s %d\n", __func__, skb->len); /* Drop if the filter does not like it. * This is a noop if the filter is disabled. * Filter can be enabled only for the TAP devices. */ if (!check_filter(&tun->txflt, skb)) { drop_reason = SKB_DROP_REASON_TAP_TXFILTER; goto drop; } if (tfile->socket.sk->sk_filter && sk_filter(tfile->socket.sk, skb)) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; } len = run_ebpf_filter(tun, skb, len); if (len == 0) { drop_reason = SKB_DROP_REASON_TAP_FILTER; goto drop; } if (pskb_trim(skb, len)) { drop_reason = SKB_DROP_REASON_NOMEM; goto drop; } if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto drop; } skb_tx_timestamp(skb); /* Orphan the skb - required as we might hang on to it * for indefinite time. */ skb_orphan(skb); nf_reset_ct(skb); if (ptr_ring_produce(&tfile->tx_ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; } /* dev->lltx requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); txq_trans_cond_update(queue); /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); tfile->socket.sk->sk_data_ready(tfile->socket.sk); rcu_read_unlock(); return NETDEV_TX_OK; drop: dev_core_stats_tx_dropped_inc(dev); skb_tx_error(skb); kfree_skb_reason(skb, drop_reason); rcu_read_unlock(); return NET_XMIT_DROP; } static void tun_net_mclist(struct net_device *dev) { /* * This callback is supposed to deal with mc filter in * _rx_ path and has nothing to do with the _tx_ path. * In rx path we always accept everything userspace gives us. */ } static netdev_features_t tun_net_fix_features(struct net_device *dev, netdev_features_t features) { struct tun_struct *tun = netdev_priv(dev); return (features & tun->set_features) | (features & ~TUN_USER_FEATURES); } static void tun_set_headroom(struct net_device *dev, int new_hr) { struct tun_struct *tun = netdev_priv(dev); if (new_hr < NET_SKB_PAD) new_hr = NET_SKB_PAD; tun->align = new_hr; } static void tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct tun_struct *tun = netdev_priv(dev); dev_get_tstats64(dev, stats); stats->rx_frame_errors += (unsigned long)atomic_long_read(&tun->rx_frame_errors); } static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile; struct bpf_prog *old_prog; int i; old_prog = rtnl_dereference(tun->xdp_prog); rcu_assign_pointer(tun->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); if (prog) sock_set_flag(&tfile->sk, SOCK_XDP); else sock_reset_flag(&tfile->sk, SOCK_XDP); } list_for_each_entry(tfile, &tun->disabled, next) { if (prog) sock_set_flag(&tfile->sk, SOCK_XDP); else sock_reset_flag(&tfile->sk, SOCK_XDP); } return 0; } static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return tun_xdp_set(dev, xdp->prog, xdp->extack); default: return -EINVAL; } } static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) { if (new_carrier) { struct tun_struct *tun = netdev_priv(dev); if (!tun->numqueues) return -EPERM; netif_carrier_on(dev); } else { netif_carrier_off(dev); } return 0; } static const struct net_device_ops tun_netdev_ops = { .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, .ndo_fix_features = tun_net_fix_features, .ndo_select_queue = tun_select_queue, .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, .ndo_change_carrier = tun_net_change_carrier, }; static void __tun_xdp_flush_tfile(struct tun_file *tfile) { /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); tfile->socket.sk->sk_data_ready(tfile->socket.sk); } static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile; u32 numqueues; int nxmit = 0; int i; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; rcu_read_lock(); resample: numqueues = READ_ONCE(tun->numqueues); if (!numqueues) { rcu_read_unlock(); return -ENXIO; /* Caller will free/return all frames */ } tfile = rcu_dereference(tun->tfiles[smp_processor_id() % numqueues]); if (unlikely(!tfile)) goto resample; spin_lock(&tfile->tx_ring.producer_lock); for (i = 0; i < n; i++) { struct xdp_frame *xdp = frames[i]; /* Encode the XDP flag into lowest bit for consumer to differ * XDP buffer from sk_buff. */ void *frame = tun_xdp_to_ptr(xdp); if (__ptr_ring_produce(&tfile->tx_ring, frame)) { dev_core_stats_tx_dropped_inc(dev); break; } nxmit++; } spin_unlock(&tfile->tx_ring.producer_lock); if (flags & XDP_XMIT_FLUSH) __tun_xdp_flush_tfile(tfile); rcu_read_unlock(); return nxmit; } static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) { struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); int nxmit; if (unlikely(!frame)) return -EOVERFLOW; nxmit = tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH); if (!nxmit) xdp_return_frame_rx_napi(frame); return nxmit; } static const struct net_device_ops tap_netdev_ops = { .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, .ndo_fix_features = tun_net_fix_features, .ndo_set_rx_mode = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_select_queue = tun_select_queue, .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, .ndo_bpf = tun_xdp, .ndo_xdp_xmit = tun_xdp_xmit, .ndo_change_carrier = tun_net_change_carrier, }; static void tun_flow_init(struct tun_struct *tun) { int i; for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) INIT_HLIST_HEAD(&tun->flows[i]); tun->ageing_time = TUN_FLOW_EXPIRE; timer_setup(&tun->flow_gc_timer, tun_flow_cleanup, 0); mod_timer(&tun->flow_gc_timer, round_jiffies_up(jiffies + tun->ageing_time)); } static void tun_flow_uninit(struct tun_struct *tun) { timer_delete_sync(&tun->flow_gc_timer); tun_flow_flush(tun); } #define MIN_MTU 68 #define MAX_MTU 65535 /* Initialize net device. */ static void tun_net_initialize(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: dev->netdev_ops = &tun_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; /* Point-to-Point TUN Device */ dev->hard_header_len = 0; dev->addr_len = 0; dev->mtu = 1500; /* Zero header length */ dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; break; case IFF_TAP: dev->netdev_ops = &tap_netdev_ops; /* Ethernet TAP Device */ ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; eth_hw_addr_random(dev); /* Currently tun does not support XDP, only tap does. */ dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT; break; } dev->min_mtu = MIN_MTU; dev->max_mtu = MAX_MTU - dev->hard_header_len; } static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) { struct sock *sk = tfile->socket.sk; return (tun->dev->flags & IFF_UP) && sock_writeable(sk); } /* Character device part */ /* Poll */ static __poll_t tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); struct sock *sk; __poll_t mask = 0; if (!tun) return EPOLLERR; sk = tfile->socket.sk; poll_wait(file, sk_sleep(sk), wait); if (!ptr_ring_empty(&tfile->tx_ring)) mask |= EPOLLIN | EPOLLRDNORM; /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to * guarantee EPOLLOUT to be raised by either here or * tun_sock_write_space(). Then process could get notification * after it writes to a down device and meets -EIO. */ if (tun_sock_writeable(tun, tfile) || (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && tun_sock_writeable(tun, tfile))) mask |= EPOLLOUT | EPOLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) mask = EPOLLERR; tun_put(tun); return mask; } static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, size_t len, const struct iov_iter *it) { struct sk_buff *skb; size_t linear; int err; int i; if (it->nr_segs > MAX_SKB_FRAGS + 1 || len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE); local_bh_disable(); skb = napi_get_frags(&tfile->napi); local_bh_enable(); if (!skb) return ERR_PTR(-ENOMEM); linear = iov_iter_single_seg_count(it); err = __skb_grow(skb, linear); if (err) goto free; skb->len = len; skb->data_len = len - linear; skb->truesize += skb->data_len; for (i = 1; i < it->nr_segs; i++) { const struct iovec *iov = iter_iov(it) + i; size_t fragsz = iov->iov_len; struct page *page; void *frag; if (fragsz == 0 || fragsz > PAGE_SIZE) { err = -EINVAL; goto free; } frag = netdev_alloc_frag(fragsz); if (!frag) { err = -ENOMEM; goto free; } page = virt_to_head_page(frag); skb_fill_page_desc(skb, i - 1, page, frag - page_address(page), fragsz); } return skb; free: /* frees skb and all frags allocated with napi_alloc_frag() */ napi_free_frags(&tfile->napi); return ERR_PTR(err); } /* prepad is the amount to reserve at front. len is length after that. * linear is a hint as to how much to copy (usually headers). */ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, size_t prepad, size_t len, size_t linear, int noblock) { struct sock *sk = tfile->socket.sk; struct sk_buff *skb; int err; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE) linear = len; if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, &err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return ERR_PTR(err); skb_reserve(skb, prepad); skb_put(skb, linear); skb->data_len = len - linear; skb->len += len - linear; return skb; } static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb, int more) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; struct sk_buff_head process_queue; u32 rx_batched = tun->rx_batched; bool rcv = false; if (!rx_batched || (!more && skb_queue_empty(queue))) { local_bh_disable(); skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); return; } spin_lock(&queue->lock); if (!more || skb_queue_len(queue) == rx_batched) { __skb_queue_head_init(&process_queue); skb_queue_splice_tail_init(queue, &process_queue); rcv = true; } else { __skb_queue_tail(queue, skb); } spin_unlock(&queue->lock); if (rcv) { struct sk_buff *nskb; local_bh_disable(); while ((nskb = __skb_dequeue(&process_queue))) { skb_record_rx_queue(nskb, tfile->queue_index); netif_receive_skb(nskb); } skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); } } static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile, int len, int noblock, bool zerocopy) { if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) return false; if (tfile->socket.sk->sk_sndbuf != INT_MAX) return false; if (!noblock) return false; if (zerocopy) return false; if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) return false; return true; } static struct sk_buff *__tun_build_skb(struct tun_file *tfile, struct page_frag *alloc_frag, char *buf, int buflen, int len, int pad, int metasize) { struct sk_buff *skb = build_skb(buf, buflen); if (!skb) return ERR_PTR(-ENOMEM); skb_reserve(skb, pad); skb_put(skb, len); if (metasize) skb_metadata_set(skb, metasize); skb_set_owner_w(skb, tfile->socket.sk); get_page(alloc_frag->page); alloc_frag->offset += buflen; return skb; } static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, struct xdp_buff *xdp, u32 act) { int err; switch (act) { case XDP_REDIRECT: err = xdp_do_redirect(tun->dev, xdp, xdp_prog); if (err) { dev_core_stats_rx_dropped_inc(tun->dev); return err; } dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_TX: err = tun_xdp_tx(tun->dev, xdp); if (err < 0) { dev_core_stats_rx_dropped_inc(tun->dev); return err; } dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data); break; case XDP_PASS: break; default: bpf_warn_invalid_xdp_action(tun->dev, xdp_prog, act); fallthrough; case XDP_ABORTED: trace_xdp_exception(tun->dev, xdp_prog, act); fallthrough; case XDP_DROP: dev_core_stats_rx_dropped_inc(tun->dev); break; } return act; } static struct sk_buff *tun_build_skb(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *from, struct virtio_net_hdr *hdr, int len, int *skb_xdp) { struct page_frag *alloc_frag = &current->task_frag; struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct bpf_prog *xdp_prog; int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); char *buf; size_t copied; int pad = TUN_RX_PAD; int metasize = 0; int err = 0; rcu_read_lock(); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) pad += XDP_PACKET_HEADROOM; buflen += SKB_DATA_ALIGN(len + pad); rcu_read_unlock(); alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) return ERR_PTR(-ENOMEM); buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; copied = copy_page_from_iter(alloc_frag->page, alloc_frag->offset + pad, len, from); if (copied != len) return ERR_PTR(-EFAULT); /* There's a small window that XDP may be set after the check * of xdp_prog above, this should be rare and for simplicity * we do XDP on skb in case the headroom is not enough. */ if (hdr->gso_type || !xdp_prog) { *skb_xdp = 1; return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad, metasize); } *skb_xdp = 0; local_bh_disable(); rcu_read_lock(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { struct xdp_buff xdp; u32 act; xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq); xdp_prepare_buff(&xdp, buf, pad, len, true); act = bpf_prog_run_xdp(xdp_prog, &xdp); if (act == XDP_REDIRECT || act == XDP_TX) { get_page(alloc_frag->page); alloc_frag->offset += buflen; } err = tun_xdp_act(tun, xdp_prog, &xdp, act); if (err < 0) { if (act == XDP_REDIRECT || act == XDP_TX) put_page(alloc_frag->page); goto out; } if (err == XDP_REDIRECT) xdp_do_flush(); if (err != XDP_PASS) goto out; pad = xdp.data - xdp.data_hard_start; len = xdp.data_end - xdp.data; /* It is known that the xdp_buff was prepared with metadata * support, so the metasize will never be negative. */ metasize = xdp.data - xdp.data_meta; } bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad, metasize); out: bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); return NULL; } /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, struct iov_iter *from, int noblock, bool more) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; size_t total_len = iov_iter_count(from); size_t len = total_len, align = tun->align, linear; struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; int hdr_len = 0; bool zerocopy = false; int err; u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) return -EINVAL; len -= sizeof(pi); if (!copy_from_iter_full(&pi, sizeof(pi), from)) return -EFAULT; } if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); if (hdr_len < 0) return hdr_len; len -= vnet_hdr_sz; } if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN))) return -EINVAL; } good_linear = SKB_MAX_HEAD(align); if (msg_control) { struct iov_iter i = *from; /* There are 256 bytes to be copied in skb, so there is * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); linear = copylen; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) zerocopy = true; } if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { /* For the packet that is not easy to be processed * (e.g gso or jumbo packet), we will do it at after * skb was created with generic XDP routine. */ skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); err = PTR_ERR_OR_ZERO(skb); if (err) goto drop; if (!skb) return total_len; } else { if (!zerocopy) { copylen = len; linear = min(hdr_len, good_linear); } if (frags) { mutex_lock(&tfile->napi_mutex); skb = tun_napi_alloc_frags(tfile, copylen, from); /* tun_napi_alloc_frags() enforces a layout for the skb. * If zerocopy is enabled, then this layout will be * overwritten by zerocopy_sg_from_iter(). */ zerocopy = false; } else { if (!linear) linear = min_t(size_t, good_linear, copylen); skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); } err = PTR_ERR_OR_ZERO(skb); if (err) goto drop; if (zerocopy) err = zerocopy_sg_from_iter(skb, from); else err = skb_copy_datagram_from_iter(skb, 0, from, len); if (err) { err = -EFAULT; drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto drop; } } if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; } switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: if (tun->flags & IFF_NO_PI) { u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; switch (ip_version) { case 4: pi.proto = htons(ETH_P_IP); break; case 6: pi.proto = htons(ETH_P_IPV6); break; default: err = -EINVAL; goto drop; } } skb_reset_mac_header(skb); skb->protocol = pi.proto; skb->dev = tun->dev; break; case IFF_TAP: if (frags && !pskb_may_pull(skb, ETH_HLEN)) { err = -ENOMEM; drop_reason = SKB_DROP_REASON_HDR_TRUNC; goto drop; } skb->protocol = eth_type_trans(skb, tun->dev); break; } /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { skb_zcopy_init(skb, msg_control); } else if (msg_control) { struct ubuf_info *uarg = msg_control; uarg->ops->complete(NULL, uarg, false); } skb_reset_network_header(skb); skb_probe_transport_header(skb); skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { struct bpf_prog *xdp_prog; int ret; local_bh_disable(); rcu_read_lock(); xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { ret = do_xdp_generic(xdp_prog, &skb); if (ret != XDP_PASS) { rcu_read_unlock(); local_bh_enable(); goto unlock_frags; } } rcu_read_unlock(); local_bh_enable(); } /* Compute the costly rx hash only if needed for flow updates. * We may get a very small possibility of OOO during switching, not * worth to optimize. */ if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 && !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); rcu_read_lock(); if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; } if (frags) { u32 headlen; /* Exercise flow dissector code path. */ skb_push(skb, ETH_HLEN); headlen = eth_get_headlen(tun->dev, skb->data, skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { WARN_ON_ONCE(1); err = -ENOMEM; dev_core_stats_rx_dropped_inc(tun->dev); napi_busy: napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); return err; } if (likely(napi_schedule_prep(&tfile->napi))) { local_bh_disable(); napi_gro_frags(&tfile->napi); napi_complete(&tfile->napi); local_bh_enable(); } else { err = -EBUSY; goto napi_busy; } mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; int queue_len; spin_lock_bh(&queue->lock); if (unlikely(tfile->detached)) { spin_unlock_bh(&queue->lock); rcu_read_unlock(); err = -EBUSY; goto free_skb; } __skb_queue_tail(queue, skb); queue_len = skb_queue_len(queue); spin_unlock(&queue->lock); if (!more || queue_len > NAPI_POLL_WEIGHT) napi_schedule(&tfile->napi); local_bh_enable(); } else if (!IS_ENABLED(CONFIG_4KSTACKS)) { tun_rx_batched(tun, tfile, skb, more); } else { netif_rx(skb); } rcu_read_unlock(); preempt_disable(); dev_sw_netstats_rx_add(tun->dev, len); preempt_enable(); if (rxhash) tun_flow_update(tun, rxhash, tfile); return total_len; drop: if (err != -EAGAIN) dev_core_stats_rx_dropped_inc(tun->dev); free_skb: if (!IS_ERR_OR_NULL(skb)) kfree_skb_reason(skb, drop_reason); unlock_frags: if (frags) { tfile->napi.skb = NULL; mutex_unlock(&tfile->napi_mutex); } return err ?: total_len; } static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); ssize_t result; int noblock = 0; if (!tun) return -EBADFD; if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) noblock = 1; result = tun_get_user(tun, tfile, NULL, from, noblock, false); tun_put(tun); return result; } static ssize_t tun_put_user_xdp(struct tun_struct *tun, struct tun_file *tfile, struct xdp_frame *xdp_frame, struct iov_iter *iter) { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; ssize_t ret; if (tun->flags & IFF_VNET_HDR) { struct virtio_net_hdr gso = { 0 }; vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); if (ret) return ret; } ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; preempt_disable(); dev_sw_netstats_tx_add(tun->dev, 1, ret); preempt_enable(); return ret; } /* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb, struct iov_iter *iter) { struct tun_pi pi = { 0, skb->protocol }; ssize_t total; int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; int ret; if (skb_vlan_tag_present(skb)) vlan_hlen = VLAN_HLEN; if (tun->flags & IFF_VNET_HDR) vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); total = skb->len + vlan_hlen + vnet_hdr_sz; if (!(tun->flags & IFF_NO_PI)) { if (iov_iter_count(iter) < sizeof(pi)) return -EINVAL; total += sizeof(pi); if (iov_iter_count(iter) < total) { /* Packet will be striped */ pi.flags |= TUN_PKT_STRIP; } if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi)) return -EFAULT; } if (vnet_hdr_sz) { struct virtio_net_hdr gso; ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); if (ret) return ret; ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); if (ret) return ret; } if (vlan_hlen) { int ret; struct veth veth; veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); if (ret || !iov_iter_count(iter)) goto done; ret = copy_to_iter(&veth, sizeof(veth), iter); if (ret != sizeof(veth) || !iov_iter_count(iter)) goto done; } skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); done: /* caller is in process context, */ preempt_disable(); dev_sw_netstats_tx_add(tun->dev, 1, skb->len + vlan_hlen); preempt_enable(); return total; } static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) { DECLARE_WAITQUEUE(wait, current); void *ptr = NULL; int error = 0; ptr = ptr_ring_consume(&tfile->tx_ring); if (ptr) goto out; if (noblock) { error = -EAGAIN; goto out; } add_wait_queue(&tfile->socket.wq.wait, &wait); while (1) { set_current_state(TASK_INTERRUPTIBLE); ptr = ptr_ring_consume(&tfile->tx_ring); if (ptr) break; if (signal_pending(current)) { error = -ERESTARTSYS; break; } if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) { error = -EFAULT; break; } schedule(); } __set_current_state(TASK_RUNNING); remove_wait_queue(&tfile->socket.wq.wait, &wait); out: *err = error; return ptr; } static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *to, int noblock, void *ptr) { ssize_t ret; int err; if (!iov_iter_count(to)) { tun_ptr_free(ptr); return 0; } if (!ptr) { /* Read frames from ring */ ptr = tun_ring_recv(tfile, noblock, &err); if (!ptr) return err; } if (tun_is_xdp_frame(ptr)) { struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); ret = tun_put_user_xdp(tun, tfile, xdpf, to); xdp_return_frame(xdpf); } else { struct sk_buff *skb = ptr; ret = tun_put_user(tun, tfile, skb, to); if (unlikely(ret < 0)) kfree_skb(skb); else consume_skb(skb); } return ret; } static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); ssize_t len = iov_iter_count(to), ret; int noblock = 0; if (!tun) return -EBADFD; if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) noblock = 1; ret = tun_do_read(tun, tfile, to, noblock, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; tun_put(tun); return ret; } static void tun_prog_free(struct rcu_head *rcu) { struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu); bpf_prog_destroy(prog->prog); kfree(prog); } static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p, struct bpf_prog *prog) { struct tun_prog *old, *new = NULL; if (prog) { new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) return -ENOMEM; new->prog = prog; } spin_lock_bh(&tun->lock); old = rcu_dereference_protected(*prog_p, lockdep_is_held(&tun->lock)); rcu_assign_pointer(*prog_p, new); spin_unlock_bh(&tun->lock); if (old) call_rcu(&old->rcu, tun_prog_free); return 0; } static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); BUG_ON(!(list_empty(&tun->disabled))); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); __tun_set_ebpf(tun, &tun->filter_prog, NULL); } static void tun_setup(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); tun->owner = INVALID_UID; tun->group = INVALID_GID; tun_default_link_ksettings(dev, &tun->link_ksettings); dev->ethtool_ops = &tun_ethtool_ops; dev->needs_free_netdev = true; dev->priv_destructor = tun_free_netdev; /* We prefer our own queue length */ dev->tx_queue_len = TUN_READQ_SIZE; } /* Trivial set of netlink ops to allow deleting tun or tap * device with netlink. */ static int tun_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "tun/tap creation via rtnetlink is not supported."); return -EOPNOTSUPP; } static size_t tun_get_size(const struct net_device *dev) { BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t)); BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t)); return nla_total_size(sizeof(uid_t)) + /* OWNER */ nla_total_size(sizeof(gid_t)) + /* GROUP */ nla_total_size(sizeof(u8)) + /* TYPE */ nla_total_size(sizeof(u8)) + /* PI */ nla_total_size(sizeof(u8)) + /* VNET_HDR */ nla_total_size(sizeof(u8)) + /* PERSIST */ nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */ nla_total_size(sizeof(u32)) + /* NUM_QUEUES */ nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */ 0; } static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK)) goto nla_put_failure; if (uid_valid(tun->owner) && nla_put_u32(skb, IFLA_TUN_OWNER, from_kuid_munged(current_user_ns(), tun->owner))) goto nla_put_failure; if (gid_valid(tun->group) && nla_put_u32(skb, IFLA_TUN_GROUP, from_kgid_munged(current_user_ns(), tun->group))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE, !!(tun->flags & IFF_MULTI_QUEUE))) goto nla_put_failure; if (tun->flags & IFF_MULTI_QUEUE) { if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES, tun->numdisabled)) goto nla_put_failure; } return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops tun_link_ops __read_mostly = { .kind = DRV_NAME, .priv_size = sizeof(struct tun_struct), .setup = tun_setup, .validate = tun_validate, .get_size = tun_get_size, .fill_info = tun_fill_info, }; static void tun_sock_write_space(struct sock *sk) { struct tun_file *tfile; wait_queue_head_t *wqueue; if (!sock_writeable(sk)) return; if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); if (wqueue && waitqueue_active(wqueue)) wake_up_interruptible_sync_poll(wqueue, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); tfile = container_of(sk, struct tun_file, sk); kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } static void tun_put_page(struct tun_page *tpage) { if (tpage->page) __page_frag_cache_drain(tpage->page, tpage->count); } static int tun_xdp_one(struct tun_struct *tun, struct tun_file *tfile, struct xdp_buff *xdp, int *flush, struct tun_page *tpage) { unsigned int datasize = xdp->data_end - xdp->data; struct tun_xdp_hdr *hdr = xdp->data_hard_start; struct virtio_net_hdr *gso = &hdr->gso; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; struct sk_buff_head *queue; u32 rxhash = 0, act; int buflen = hdr->buflen; int metasize = 0; int ret = 0; bool skb_xdp = false; struct page *page; if (unlikely(datasize < ETH_HLEN)) return -EINVAL; xdp_prog = rcu_dereference(tun->xdp_prog); if (xdp_prog) { if (gso->gso_type) { skb_xdp = true; goto build; } xdp_init_buff(xdp, buflen, &tfile->xdp_rxq); act = bpf_prog_run_xdp(xdp_prog, xdp); ret = tun_xdp_act(tun, xdp_prog, xdp, act); if (ret < 0) { put_page(virt_to_head_page(xdp->data)); return ret; } switch (ret) { case XDP_REDIRECT: *flush = true; fallthrough; case XDP_TX: return 0; case XDP_PASS: break; default: page = virt_to_head_page(xdp->data); if (tpage->page == page) { ++tpage->count; } else { tun_put_page(tpage); tpage->page = page; tpage->count = 1; } return 0; } } build: skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { ret = -ENOMEM; goto out; } skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); /* The externally provided xdp_buff may have no metadata support, which * is marked by xdp->data_meta being xdp->data + 1. This will lead to a * metasize of -1 and is the reason why the condition checks for > 0. */ metasize = xdp->data - xdp->data_meta; if (metasize > 0) skb_metadata_set(skb, metasize); if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; goto out; } skb->protocol = eth_type_trans(skb, tun->dev); skb_reset_network_header(skb); skb_probe_transport_header(skb); skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { ret = do_xdp_generic(xdp_prog, &skb); if (ret != XDP_PASS) { ret = 0; goto out; } } if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 && !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); if (tfile->napi_enabled) { queue = &tfile->sk.sk_write_queue; spin_lock(&queue->lock); if (unlikely(tfile->detached)) { spin_unlock(&queue->lock); kfree_skb(skb); return -EBUSY; } __skb_queue_tail(queue, skb); spin_unlock(&queue->lock); ret = 1; } else { netif_receive_skb(skb); ret = 0; } /* No need to disable preemption here since this function is * always called with bh disabled */ dev_sw_netstats_rx_add(tun->dev, datasize); if (rxhash) tun_flow_update(tun, rxhash, tfile); out: return ret; } static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { int ret, i; struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); struct tun_msg_ctl *ctl = m->msg_control; struct xdp_buff *xdp; if (!tun) return -EBADFD; if (m->msg_controllen == sizeof(struct tun_msg_ctl) && ctl && ctl->type == TUN_MSG_PTR) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct tun_page tpage; int n = ctl->num; int flush = 0, queued = 0; memset(&tpage, 0, sizeof(tpage)); local_bh_disable(); rcu_read_lock(); bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); for (i = 0; i < n; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage); if (ret > 0) queued += ret; } if (flush) xdp_do_flush(); if (tfile->napi_enabled && queued > 0) napi_schedule(&tfile->napi); bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); tun_put_page(&tpage); ret = total_len; goto out; } ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter, m->msg_flags & MSG_DONTWAIT, m->msg_flags & MSG_MORE); out: tun_put(tun); return ret; } static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); void *ptr = m->msg_control; int ret; if (!tun) { ret = -EBADFD; goto out_free; } if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { ret = -EINVAL; goto out_put_tun; } if (flags & MSG_ERRQUEUE) { ret = sock_recv_errqueue(sock->sk, m, total_len, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; } out: tun_put(tun); return ret; out_put_tun: tun_put(tun); out_free: tun_ptr_free(ptr); return ret; } static int tun_ptr_peek_len(void *ptr) { if (likely(ptr)) { if (tun_is_xdp_frame(ptr)) { struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); return xdpf->len; } return __skb_array_len_with_tag(ptr); } else { return 0; } } static int tun_peek_len(struct socket *sock) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun; int ret = 0; tun = tun_get(tfile); if (!tun) return 0; ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len); tun_put(tun); return ret; } /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { .peek_len = tun_peek_len, .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, }; static struct proto tun_proto = { .name = "tun", .owner = THIS_MODULE, .obj_size = sizeof(struct tun_file), }; static int tun_flags(struct tun_struct *tun) { return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP); } static ssize_t tun_flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct tun_struct *tun = netdev_priv(to_net_dev(dev)); return sysfs_emit(buf, "0x%x\n", tun_flags(tun)); } static ssize_t owner_show(struct device *dev, struct device_attribute *attr, char *buf) { struct tun_struct *tun = netdev_priv(to_net_dev(dev)); return uid_valid(tun->owner)? sysfs_emit(buf, "%u\n", from_kuid_munged(current_user_ns(), tun->owner)) : sysfs_emit(buf, "-1\n"); } static ssize_t group_show(struct device *dev, struct device_attribute *attr, char *buf) { struct tun_struct *tun = netdev_priv(to_net_dev(dev)); return gid_valid(tun->group) ? sysfs_emit(buf, "%u\n", from_kgid_munged(current_user_ns(), tun->group)) : sysfs_emit(buf, "-1\n"); } static DEVICE_ATTR_RO(tun_flags); static DEVICE_ATTR_RO(owner); static DEVICE_ATTR_RO(group); static struct attribute *tun_dev_attrs[] = { &dev_attr_tun_flags.attr, &dev_attr_owner.attr, &dev_attr_group.attr, NULL }; static const struct attribute_group tun_attr_group = { .attrs = tun_dev_attrs }; static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { struct tun_struct *tun; struct tun_file *tfile = file->private_data; struct net_device *dev; int err; if (tfile->detached) return -EINVAL; if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) { if (!capable(CAP_NET_ADMIN)) return -EPERM; if (!(ifr->ifr_flags & IFF_NAPI) || (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP) return -EINVAL; } dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { if (ifr->ifr_flags & IFF_TUN_EXCL) return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) tun = netdev_priv(dev); else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) tun = netdev_priv(dev); else return -EINVAL; if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) return err; err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, ifr->ifr_flags & IFF_NAPI, ifr->ifr_flags & IFF_NAPI_FRAGS, true); if (err < 0) return err; if (tun->flags & IFF_MULTI_QUEUE && (tun->numqueues + tun->numdisabled > 1)) { /* One or more queue has already been attached, no need * to initialize the device again. */ netdev_state_change(dev); return 0; } tun->flags = (tun->flags & ~TUN_FEATURES) | (ifr->ifr_flags & TUN_FEATURES); netdev_state_change(dev); } else { char *name; unsigned long flags = 0; int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ? MAX_TAP_QUEUES : 1; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = security_tun_dev_create(); if (err < 0) return err; /* Set dev type */ if (ifr->ifr_flags & IFF_TUN) { /* TUN device */ flags |= IFF_TUN; name = "tun%d"; } else if (ifr->ifr_flags & IFF_TAP) { /* TAP device */ flags |= IFF_TAP; name = "tap%d"; } else return -EINVAL; if (*ifr->ifr_name) name = ifr->ifr_name; dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, NET_NAME_UNKNOWN, tun_setup, queues, queues); if (!dev) return -ENOMEM; dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; dev->ifindex = tfile->ifindex; dev->sysfs_groups[0] = &tun_attr_group; tun = netdev_priv(dev); tun->dev = dev; tun->flags = flags; tun->txflt.count = 0; tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); tun->align = NET_SKB_PAD; tun->filter_attached = false; tun->sndbuf = tfile->socket.sk->sk_sndbuf; tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); tun->ifr = ifr; tun->file = file; tun_net_initialize(dev); err = register_netdevice(tun->dev); if (err < 0) { free_netdev(dev); return err; } /* free_netdev() won't check refcnt, to avoid race * with dev_put() we need publish tun after registration. */ rcu_assign_pointer(tfile->tun, tun); } if (ifr->ifr_flags & IFF_NO_CARRIER) netif_carrier_off(tun->dev); else netif_carrier_on(tun->dev); /* Make sure persistent devices do not get stuck in * xoff state. */ if (netif_running(tun->dev)) netif_tx_wake_all_queues(tun->dev); strcpy(ifr->ifr_name, tun->dev->name); return 0; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) { strcpy(ifr->ifr_name, tun->dev->name); ifr->ifr_flags = tun_flags(tun); } /* This is like a cut-down ethtool ops, except done via tun fd so no * privs required. */ static int set_offload(struct tun_struct *tun, unsigned long arg) { netdev_features_t features = 0; if (arg & TUN_F_CSUM) { features |= NETIF_F_HW_CSUM; arg &= ~TUN_F_CSUM; if (arg & (TUN_F_TSO4|TUN_F_TSO6)) { if (arg & TUN_F_TSO_ECN) { features |= NETIF_F_TSO_ECN; arg &= ~TUN_F_TSO_ECN; } if (arg & TUN_F_TSO4) features |= NETIF_F_TSO; if (arg & TUN_F_TSO6) features |= NETIF_F_TSO6; arg &= ~(TUN_F_TSO4|TUN_F_TSO6); } arg &= ~TUN_F_UFO; /* TODO: for now USO4 and USO6 should work simultaneously */ if (arg & TUN_F_USO4 && arg & TUN_F_USO6) { features |= NETIF_F_GSO_UDP_L4; arg &= ~(TUN_F_USO4 | TUN_F_USO6); } } /* This gives the user a way to test for new features in future by * trying to set them. */ if (arg) return -EINVAL; tun->set_features = features; tun->dev->wanted_features &= ~TUN_USER_FEATURES; tun->dev->wanted_features |= features; netdev_update_features(tun->dev); return 0; } static void tun_detach_filter(struct tun_struct *tun, int n) { int i; struct tun_file *tfile; for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); lock_sock(tfile->socket.sk); sk_detach_filter(tfile->socket.sk); release_sock(tfile->socket.sk); } tun->filter_attached = false; } static int tun_attach_filter(struct tun_struct *tun) { int i, ret = 0; struct tun_file *tfile; for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); lock_sock(tfile->socket.sk); ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); release_sock(tfile->socket.sk); if (ret) { tun_detach_filter(tun, i); return ret; } } tun->filter_attached = true; return ret; } static void tun_set_sndbuf(struct tun_struct *tun) { struct tun_file *tfile; int i; for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); tfile->socket.sk->sk_sndbuf = tun->sndbuf; } } static int tun_set_queue(struct file *file, struct ifreq *ifr) { struct tun_file *tfile = file->private_data; struct tun_struct *tun; int ret = 0; rtnl_lock(); if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { tun = tfile->detached; if (!tun) { ret = -EINVAL; goto unlock; } ret = security_tun_dev_attach_queue(tun->security); if (ret < 0) goto unlock; ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, tun->flags & IFF_NAPI_FRAGS, true); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) ret = -EINVAL; else __tun_detach(tfile, false); } else ret = -EINVAL; if (ret >= 0) netdev_state_change(tun->dev); unlock: rtnl_unlock(); return ret; } static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p, void __user *data) { struct bpf_prog *prog; int fd; if (copy_from_user(&fd, data, sizeof(fd))) return -EFAULT; if (fd == -1) { prog = NULL; } else { prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(prog)) return PTR_ERR(prog); } return __tun_set_ebpf(tun, prog_p, prog); } /* Return correct value for tun->dev->addr_len based on tun->dev->type. */ static unsigned char tun_get_addr_len(unsigned short type) { switch (type) { case ARPHRD_IP6GRE: case ARPHRD_TUNNEL6: return sizeof(struct in6_addr); case ARPHRD_IPGRE: case ARPHRD_TUNNEL: case ARPHRD_SIT: return 4; case ARPHRD_ETHER: return ETH_ALEN; case ARPHRD_IEEE802154: case ARPHRD_IEEE802154_MONITOR: return IEEE802154_EXTENDED_ADDR_LEN; case ARPHRD_PHONET_PIPE: case ARPHRD_PPP: case ARPHRD_NONE: return 0; case ARPHRD_6LOWPAN: return EUI64_ADDR_LEN; case ARPHRD_FDDI: return FDDI_K_ALEN; case ARPHRD_HIPPI: return HIPPI_ALEN; case ARPHRD_IEEE802: return FC_ALEN; case ARPHRD_ROSE: return ROSE_ADDR_LEN; case ARPHRD_NETROM: return AX25_ADDR_LEN; case ARPHRD_LOCALTLK: return LTALK_ALEN; default: return 0; } } static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { struct tun_file *tfile = file->private_data; struct net *net = sock_net(&tfile->sk); struct tun_struct *tun; void __user* argp = (void __user*)arg; unsigned int carrier; struct ifreq ifr; kuid_t owner; kgid_t group; int ifindex; int sndbuf; int ret; bool do_notify = false; if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { memset(&ifr, 0, sizeof(ifr)); } if (cmd == TUNGETFEATURES) { /* Currently this just means: "what IFF flags are valid?". * This is needed because we never checked for invalid flags on * TUNSETIFF. */ return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER | TUN_FEATURES, (unsigned int __user*)argp); } else if (cmd == TUNSETQUEUE) { return tun_set_queue(file, &ifr); } else if (cmd == SIOCGSKNS) { if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; return open_related_ns(&net->ns, get_net_ns); } rtnl_lock(); tun = tun_get(tfile); if (cmd == TUNSETIFF) { ret = -EEXIST; if (tun) goto unlock; ifr.ifr_name[IFNAMSIZ-1] = '\0'; ret = tun_set_iff(net, file, &ifr); if (ret) goto unlock; if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; goto unlock; } if (cmd == TUNSETIFINDEX) { ret = -EPERM; if (tun) goto unlock; ret = -EFAULT; if (copy_from_user(&ifindex, argp, sizeof(ifindex))) goto unlock; ret = -EINVAL; if (ifindex < 0) goto unlock; ret = 0; tfile->ifindex = ifindex; goto unlock; } ret = -EBADFD; if (!tun) goto unlock; netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd); net = dev_net(tun->dev); ret = 0; switch (cmd) { case TUNGETIFF: tun_get_iff(tun, &ifr); if (tfile->detached) ifr.ifr_flags |= IFF_DETACH_QUEUE; if (!tfile->socket.sk->sk_filter) ifr.ifr_flags |= IFF_NOFILTER; if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; case TUNSETNOCSUM: /* Disable/Enable checksum */ /* [unimplemented] */ netif_info(tun, drv, tun->dev, "ignored: set checksum %s\n", arg ? "disabled" : "enabled"); break; case TUNSETPERSIST: /* Disable/Enable persist mode. Keep an extra reference to the * module to prevent the module being unprobed. */ if (arg && !(tun->flags & IFF_PERSIST)) { tun->flags |= IFF_PERSIST; __module_get(THIS_MODULE); do_notify = true; } if (!arg && (tun->flags & IFF_PERSIST)) { tun->flags &= ~IFF_PERSIST; module_put(THIS_MODULE); do_notify = true; } netif_info(tun, drv, tun->dev, "persist %s\n", arg ? "enabled" : "disabled"); break; case TUNSETOWNER: /* Set owner of the device */ owner = make_kuid(current_user_ns(), arg); if (!uid_valid(owner)) { ret = -EINVAL; break; } tun->owner = owner; do_notify = true; netif_info(tun, drv, tun->dev, "owner set to %u\n", from_kuid(&init_user_ns, tun->owner)); break; case TUNSETGROUP: /* Set group of the device */ group = make_kgid(current_user_ns(), arg); if (!gid_valid(group)) { ret = -EINVAL; break; } tun->group = group; do_notify = true; netif_info(tun, drv, tun->dev, "group set to %u\n", from_kgid(&init_user_ns, tun->group)); break; case TUNSETLINK: /* Only allow setting the type when the interface is down */ if (tun->dev->flags & IFF_UP) { netif_info(tun, drv, tun->dev, "Linktype set failed because interface is up\n"); ret = -EBUSY; } else { ret = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, tun->dev); ret = notifier_to_errno(ret); if (ret) { netif_info(tun, drv, tun->dev, "Refused to change device type\n"); break; } tun->dev->type = (int) arg; tun->dev->addr_len = tun_get_addr_len(tun->dev->type); netif_info(tun, drv, tun->dev, "linktype set to %d\n", tun->dev->type); call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, tun->dev); } break; case TUNSETDEBUG: tun->msg_enable = (u32)arg; break; case TUNSETOFFLOAD: ret = set_offload(tun, arg); break; case TUNSETTXFILTER: /* Can be set only for TAPs */ ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = update_filter(&tun->txflt, (void __user *)arg); break; case SIOCGIFHWADDR: /* Get hw address */ dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; case SIOCSIFHWADDR: /* Set hw address */ ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: sndbuf = tfile->socket.sk->sk_sndbuf; if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) ret = -EFAULT; break; case TUNSETSNDBUF: if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) { ret = -EFAULT; break; } if (sndbuf <= 0) { ret = -EINVAL; break; } tun->sndbuf = sndbuf; tun_set_sndbuf(tun); break; case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = -EFAULT; if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) break; ret = tun_attach_filter(tun); break; case TUNDETACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = 0; tun_detach_filter(tun, tun->numqueues); break; case TUNGETFILTER: ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP) break; ret = -EFAULT; if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog))) break; ret = 0; break; case TUNSETSTEERINGEBPF: ret = tun_set_ebpf(tun, &tun->steering_prog, argp); break; case TUNSETFILTEREBPF: ret = tun_set_ebpf(tun, &tun->filter_prog, argp); break; case TUNSETCARRIER: ret = -EFAULT; if (copy_from_user(&carrier, argp, sizeof(carrier))) goto unlock; ret = tun_net_change_carrier(tun->dev, (bool)carrier); break; case TUNGETDEVNETNS: ret = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto unlock; ret = open_related_ns(&net->ns, get_net_ns); break; default: ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break; } if (do_notify) netdev_state_change(tun->dev); unlock: rtnl_unlock(); if (tun) tun_put(tun); return ret; } static long tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq)); } #ifdef CONFIG_COMPAT static long tun_chr_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { case TUNSETIFF: case TUNGETIFF: case TUNSETTXFILTER: case TUNGETSNDBUF: case TUNSETSNDBUF: case SIOCGIFHWADDR: case SIOCSIFHWADDR: arg = (unsigned long)compat_ptr(arg); break; default: arg = (compat_ulong_t)arg; break; } /* * compat_ifreq is shorter than ifreq, so we must not access beyond * the end of that structure. All fields that are used in this * driver are compatible though, we don't need to convert the * contents. */ return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq)); } #endif /* CONFIG_COMPAT */ static int tun_chr_fasync(int fd, struct file *file, int on) { struct tun_file *tfile = file->private_data; int ret; if (on) { ret = file_f_owner_allocate(file); if (ret) goto out; } if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) goto out; if (on) { __f_setown(file, task_pid(current), PIDTYPE_TGID, 0); tfile->flags |= TUN_FASYNC; } else tfile->flags &= ~TUN_FASYNC; ret = 0; out: return ret; } static int tun_chr_open(struct inode *inode, struct file * file) { struct net *net = current->nsproxy->net_ns; struct tun_file *tfile; tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto, 0); if (!tfile) return -ENOMEM; if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { sk_free(&tfile->sk); return -ENOMEM; } mutex_init(&tfile->napi_mutex); RCU_INIT_POINTER(tfile->tun, NULL); tfile->flags = 0; tfile->ifindex = 0; init_waitqueue_head(&tfile->socket.wq.wait); tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid()); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; file->private_data = tfile; INIT_LIST_HEAD(&tfile->next); sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); /* tun groks IOCB_NOWAIT just fine, mark it as such */ file->f_mode |= FMODE_NOWAIT; return 0; } static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; tun_detach(tfile, true); return 0; } #ifdef CONFIG_PROC_FS static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file) { struct tun_file *tfile = file->private_data; struct tun_struct *tun; struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); rtnl_lock(); tun = tun_get(tfile); if (tun) tun_get_iff(tun, &ifr); rtnl_unlock(); if (tun) tun_put(tun); seq_printf(m, "iff:\t%s\n", ifr.ifr_name); } #endif static const struct file_operations tun_fops = { .owner = THIS_MODULE, .read_iter = tun_chr_read_iter, .write_iter = tun_chr_write_iter, .poll = tun_chr_poll, .unlocked_ioctl = tun_chr_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = tun_chr_compat_ioctl, #endif .open = tun_chr_open, .release = tun_chr_close, .fasync = tun_chr_fasync, #ifdef CONFIG_PROC_FS .show_fdinfo = tun_chr_show_fdinfo, #endif }; static struct miscdevice tun_miscdev = { .minor = TUN_MINOR, .name = "tun", .nodename = "net/tun", .fops = &tun_fops, }; /* ethtool interface */ static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { ethtool_link_ksettings_zero_link_mode(cmd, supported); ethtool_link_ksettings_zero_link_mode(cmd, advertising); cmd->base.speed = SPEED_10000; cmd->base.duplex = DUPLEX_FULL; cmd->base.port = PORT_TP; cmd->base.phy_address = 0; cmd->base.autoneg = AUTONEG_DISABLE; } static int tun_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct tun_struct *tun = netdev_priv(dev); memcpy(cmd, &tun->link_ksettings, sizeof(*cmd)); return 0; } static int tun_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { struct tun_struct *tun = netdev_priv(dev); memcpy(&tun->link_ksettings, cmd, sizeof(*cmd)); return 0; } static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct tun_struct *tun = netdev_priv(dev); strscpy(info->driver, DRV_NAME, sizeof(info->driver)); strscpy(info->version, DRV_VERSION, sizeof(info->version)); switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: strscpy(info->bus_info, "tun", sizeof(info->bus_info)); break; case IFF_TAP: strscpy(info->bus_info, "tap", sizeof(info->bus_info)); break; } } static u32 tun_get_msglevel(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); return tun->msg_enable; } static void tun_set_msglevel(struct net_device *dev, u32 value) { struct tun_struct *tun = netdev_priv(dev); tun->msg_enable = value; } static int tun_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct tun_struct *tun = netdev_priv(dev); ec->rx_max_coalesced_frames = tun->rx_batched; return 0; } static int tun_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct tun_struct *tun = netdev_priv(dev); if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT) tun->rx_batched = NAPI_POLL_WEIGHT; else tun->rx_batched = ec->rx_max_coalesced_frames; return 0; } static void tun_get_channels(struct net_device *dev, struct ethtool_channels *channels) { struct tun_struct *tun = netdev_priv(dev); channels->combined_count = tun->numqueues; channels->max_combined = tun->flags & IFF_MULTI_QUEUE ? MAX_TAP_QUEUES : 1; } static const struct ethtool_ops tun_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES, .get_drvinfo = tun_get_drvinfo, .get_msglevel = tun_get_msglevel, .set_msglevel = tun_set_msglevel, .get_link = ethtool_op_get_link, .get_channels = tun_get_channels, .get_ts_info = ethtool_op_get_ts_info, .get_coalesce = tun_get_coalesce, .set_coalesce = tun_set_coalesce, .get_link_ksettings = tun_get_link_ksettings, .set_link_ksettings = tun_set_link_ksettings, }; static int tun_queue_resize(struct tun_struct *tun) { struct net_device *dev = tun->dev; struct tun_file *tfile; struct ptr_ring **rings; int n = tun->numqueues + tun->numdisabled; int ret, i; rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); if (!rings) return -ENOMEM; for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); rings[i] = &tfile->tx_ring; } list_for_each_entry(tfile, &tun->disabled, next) rings[i++] = &tfile->tx_ring; ret = ptr_ring_resize_multiple_bh(rings, n, dev->tx_queue_len, GFP_KERNEL, tun_ptr_free); kfree(rings); return ret; } static int tun_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tun_struct *tun = netdev_priv(dev); int i; if (dev->rtnl_link_ops != &tun_link_ops) return NOTIFY_DONE; switch (event) { case NETDEV_CHANGE_TX_QUEUE_LEN: if (tun_queue_resize(tun)) return NOTIFY_BAD; break; case NETDEV_UP: for (i = 0; i < tun->numqueues; i++) { struct tun_file *tfile; tfile = rtnl_dereference(tun->tfiles[i]); tfile->socket.sk->sk_write_space(tfile->socket.sk); } break; default: break; } return NOTIFY_DONE; } static struct notifier_block tun_notifier_block __read_mostly = { .notifier_call = tun_device_event, }; static int __init tun_init(void) { int ret = 0; pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION); ret = rtnl_link_register(&tun_link_ops); if (ret) { pr_err("Can't register link_ops\n"); goto err_linkops; } ret = misc_register(&tun_miscdev); if (ret) { pr_err("Can't register misc device %d\n", TUN_MINOR); goto err_misc; } ret = register_netdevice_notifier(&tun_notifier_block); if (ret) { pr_err("Can't register netdevice notifier\n"); goto err_notifier; } return 0; err_notifier: misc_deregister(&tun_miscdev); err_misc: rtnl_link_unregister(&tun_link_ops); err_linkops: return ret; } static void __exit tun_cleanup(void) { misc_deregister(&tun_miscdev); rtnl_link_unregister(&tun_link_ops); unregister_netdevice_notifier(&tun_notifier_block); } /* Get an underlying socket object from tun file. Returns error unless file is * attached to a device. The returned object works like a packet socket, it * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for * holding a reference to the file for as long as the socket is in use. */ struct socket *tun_get_socket(struct file *file) { struct tun_file *tfile; if (file->f_op != &tun_fops) return ERR_PTR(-EINVAL); tfile = file->private_data; if (!tfile) return ERR_PTR(-EBADFD); return &tfile->socket; } EXPORT_SYMBOL_GPL(tun_get_socket); struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile; if (file->f_op != &tun_fops) return ERR_PTR(-EINVAL); tfile = file->private_data; if (!tfile) return ERR_PTR(-EBADFD); return &tfile->tx_ring; } EXPORT_SYMBOL_GPL(tun_get_tx_ring); module_init(tun_init); module_exit(tun_cleanup); MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR(DRV_COPYRIGHT); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(TUN_MINOR); MODULE_ALIAS("devname:net/tun");
39 18 22 39 15 25 15 24 1 42 10 34 34 34 34 32 32 9 9 14 22 22 9 5 8 12 12 12 9 206 205 204 9 9 206 205 206 5 1 24 25 25 25 3 3 2 2 2 8 13 2 6 2 4 12 12 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 // SPDX-License-Identifier: GPL-2.0 /* * Copyright 2018 Noralf Trønnes */ #include <linux/dma-buf.h> #include <linux/export.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/vmalloc.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> #endif #include <drm/drm.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_prime.h> #include <drm/drm_print.h> MODULE_IMPORT_NS("DMA_BUF"); /** * DOC: overview * * This library provides helpers for GEM objects backed by shmem buffers * allocated using anonymous pageable memory. * * Functions that operate on the GEM object receive struct &drm_gem_shmem_object. * For GEM callback helpers in struct &drm_gem_object functions, see likewise * named functions with an _object_ infix (e.g., drm_gem_shmem_object_vmap() wraps * drm_gem_shmem_vmap()). These helpers perform the necessary type conversion. */ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = { .free = drm_gem_shmem_object_free, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, .get_sg_table = drm_gem_shmem_object_get_sg_table, .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = drm_gem_shmem_object_mmap, .vm_ops = &drm_gem_shmem_vm_ops, }; static struct drm_gem_shmem_object * __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private, struct vfsmount *gemfs) { struct drm_gem_shmem_object *shmem; struct drm_gem_object *obj; int ret = 0; size = PAGE_ALIGN(size); if (dev->driver->gem_create_object) { obj = dev->driver->gem_create_object(dev, size); if (IS_ERR(obj)) return ERR_CAST(obj); shmem = to_drm_gem_shmem_obj(obj); } else { shmem = kzalloc(sizeof(*shmem), GFP_KERNEL); if (!shmem) return ERR_PTR(-ENOMEM); obj = &shmem->base; } if (!obj->funcs) obj->funcs = &drm_gem_shmem_funcs; if (private) { drm_gem_private_object_init(dev, obj, size); shmem->map_wc = false; /* dma-buf mappings use always writecombine */ } else { ret = drm_gem_object_init_with_mnt(dev, obj, size, gemfs); } if (ret) { drm_gem_private_object_fini(obj); goto err_free; } ret = drm_gem_create_mmap_offset(obj); if (ret) goto err_release; INIT_LIST_HEAD(&shmem->madv_list); if (!private) { /* * Our buffers are kept pinned, so allocating them * from the MOVABLE zone is a really bad idea, and * conflicts with CMA. See comments above new_inode() * why this is required _and_ expected if you're * going to pin these pages. */ mapping_set_gfp_mask(obj->filp->f_mapping, GFP_HIGHUSER | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); } return shmem; err_release: drm_gem_object_release(obj); err_free: kfree(obj); return ERR_PTR(ret); } /** * drm_gem_shmem_create - Allocate an object with the given size * @dev: DRM device * @size: Size of the object to allocate * * This function creates a shmem GEM object. * * Returns: * A struct drm_gem_shmem_object * on success or an ERR_PTR()-encoded negative * error code on failure. */ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size) { return __drm_gem_shmem_create(dev, size, false, NULL); } EXPORT_SYMBOL_GPL(drm_gem_shmem_create); /** * drm_gem_shmem_create_with_mnt - Allocate an object with the given size in a * given mountpoint * @dev: DRM device * @size: Size of the object to allocate * @gemfs: tmpfs mount where the GEM object will be created * * This function creates a shmem GEM object in a given tmpfs mountpoint. * * Returns: * A struct drm_gem_shmem_object * on success or an ERR_PTR()-encoded negative * error code on failure. */ struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, size_t size, struct vfsmount *gemfs) { return __drm_gem_shmem_create(dev, size, false, gemfs); } EXPORT_SYMBOL_GPL(drm_gem_shmem_create_with_mnt); /** * drm_gem_shmem_free - Free resources associated with a shmem GEM object * @shmem: shmem GEM object to free * * This function cleans up the GEM object state and frees the memory used to * store the object itself. */ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; if (drm_gem_is_imported(obj)) { drm_prime_gem_destroy(obj, shmem->sgt); } else { dma_resv_lock(shmem->base.resv, NULL); drm_WARN_ON(obj->dev, shmem->vmap_use_count); if (shmem->sgt) { dma_unmap_sgtable(obj->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(shmem->sgt); kfree(shmem->sgt); } if (shmem->pages) drm_gem_shmem_put_pages(shmem); drm_WARN_ON(obj->dev, shmem->pages_use_count); dma_resv_unlock(shmem->base.resv); } drm_gem_object_release(obj); kfree(shmem); } EXPORT_SYMBOL_GPL(drm_gem_shmem_free); static int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct page **pages; dma_resv_assert_held(shmem->base.resv); if (shmem->pages_use_count++ > 0) return 0; pages = drm_gem_get_pages(obj); if (IS_ERR(pages)) { drm_dbg_kms(obj->dev, "Failed to get pages (%ld)\n", PTR_ERR(pages)); shmem->pages_use_count = 0; return PTR_ERR(pages); } /* * TODO: Allocating WC pages which are correctly flushed is only * supported on x86. Ideal solution would be a GFP_WC flag, which also * ttm_pool.c could use. */ #ifdef CONFIG_X86 if (shmem->map_wc) set_pages_array_wc(pages, obj->size >> PAGE_SHIFT); #endif shmem->pages = pages; return 0; } /* * drm_gem_shmem_put_pages - Decrease use count on the backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function decreases the use count and puts the backing pages when use drops to zero. */ void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; dma_resv_assert_held(shmem->base.resv); if (drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count)) return; if (--shmem->pages_use_count > 0) return; #ifdef CONFIG_X86 if (shmem->map_wc) set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); #endif drm_gem_put_pages(obj, shmem->pages, shmem->pages_mark_dirty_on_put, shmem->pages_mark_accessed_on_put); shmem->pages = NULL; } EXPORT_SYMBOL(drm_gem_shmem_put_pages); int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem) { int ret; dma_resv_assert_held(shmem->base.resv); drm_WARN_ON(shmem->base.dev, drm_gem_is_imported(&shmem->base)); ret = drm_gem_shmem_get_pages(shmem); return ret; } EXPORT_SYMBOL(drm_gem_shmem_pin_locked); void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem) { dma_resv_assert_held(shmem->base.resv); drm_gem_shmem_put_pages(shmem); } EXPORT_SYMBOL(drm_gem_shmem_unpin_locked); /** * drm_gem_shmem_pin - Pin backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function makes sure the backing pages are pinned in memory while the * buffer is exported. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; int ret; drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); ret = dma_resv_lock_interruptible(shmem->base.resv, NULL); if (ret) return ret; ret = drm_gem_shmem_pin_locked(shmem); dma_resv_unlock(shmem->base.resv); return ret; } EXPORT_SYMBOL(drm_gem_shmem_pin); /** * drm_gem_shmem_unpin - Unpin backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function removes the requirement that the backing pages are pinned in * memory. */ void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); dma_resv_lock(shmem->base.resv, NULL); drm_gem_shmem_unpin_locked(shmem); dma_resv_unlock(shmem->base.resv); } EXPORT_SYMBOL(drm_gem_shmem_unpin); /* * drm_gem_shmem_vmap - Create a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Returns the kernel virtual address of the SHMEM GEM object's backing * store. * * This function makes sure that a contiguous kernel virtual address mapping * exists for the buffer backing the shmem GEM object. It hides the differences * between dma-buf imported and natively allocated objects. * * Acquired mappings should be cleaned up by calling drm_gem_shmem_vunmap(). * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct iosys_map *map) { struct drm_gem_object *obj = &shmem->base; int ret = 0; if (drm_gem_is_imported(obj)) { ret = dma_buf_vmap(obj->dma_buf, map); if (!ret) { if (drm_WARN_ON(obj->dev, map->is_iomem)) { dma_buf_vunmap(obj->dma_buf, map); return -EIO; } } } else { pgprot_t prot = PAGE_KERNEL; dma_resv_assert_held(shmem->base.resv); if (shmem->vmap_use_count++ > 0) { iosys_map_set_vaddr(map, shmem->vaddr); return 0; } ret = drm_gem_shmem_get_pages(shmem); if (ret) goto err_zero_use; if (shmem->map_wc) prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, VM_MAP, prot); if (!shmem->vaddr) ret = -ENOMEM; else iosys_map_set_vaddr(map, shmem->vaddr); } if (ret) { drm_dbg_kms(obj->dev, "Failed to vmap pages, error %d\n", ret); goto err_put_pages; } return 0; err_put_pages: if (!drm_gem_is_imported(obj)) drm_gem_shmem_put_pages(shmem); err_zero_use: shmem->vmap_use_count = 0; return ret; } EXPORT_SYMBOL(drm_gem_shmem_vmap); /* * drm_gem_shmem_vunmap - Unmap a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Kernel virtual address where the SHMEM GEM object was mapped * * This function cleans up a kernel virtual address mapping acquired by * drm_gem_shmem_vmap(). The mapping is only removed when the use count drops to * zero. * * This function hides the differences between dma-buf imported and natively * allocated objects. */ void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct iosys_map *map) { struct drm_gem_object *obj = &shmem->base; if (drm_gem_is_imported(obj)) { dma_buf_vunmap(obj->dma_buf, map); } else { dma_resv_assert_held(shmem->base.resv); if (drm_WARN_ON_ONCE(obj->dev, !shmem->vmap_use_count)) return; if (--shmem->vmap_use_count > 0) return; vunmap(shmem->vaddr); drm_gem_shmem_put_pages(shmem); } shmem->vaddr = NULL; } EXPORT_SYMBOL(drm_gem_shmem_vunmap); static int drm_gem_shmem_create_with_handle(struct drm_file *file_priv, struct drm_device *dev, size_t size, uint32_t *handle) { struct drm_gem_shmem_object *shmem; int ret; shmem = drm_gem_shmem_create(dev, size); if (IS_ERR(shmem)) return PTR_ERR(shmem); /* * Allocate an id of idr table where the obj is registered * and handle has the id what user can see. */ ret = drm_gem_handle_create(file_priv, &shmem->base, handle); /* drop reference from allocate - handle holds it now. */ drm_gem_object_put(&shmem->base); return ret; } /* Update madvise status, returns true if not purged, else * false or -errno. */ int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv) { dma_resv_assert_held(shmem->base.resv); if (shmem->madv >= 0) shmem->madv = madv; madv = shmem->madv; return (madv >= 0); } EXPORT_SYMBOL(drm_gem_shmem_madvise); void drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct drm_device *dev = obj->dev; dma_resv_assert_held(shmem->base.resv); drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem)); dma_unmap_sgtable(dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(shmem->sgt); kfree(shmem->sgt); shmem->sgt = NULL; drm_gem_shmem_put_pages(shmem); shmem->madv = -1; drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); drm_gem_free_mmap_offset(obj); /* Our goal here is to return as much of the memory as * is possible back to the system as we are called from OOM. * To do this we must instruct the shmfs to drop all of its * backing pages, *now*. */ shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1); invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); } EXPORT_SYMBOL(drm_gem_shmem_purge); /** * drm_gem_shmem_dumb_create - Create a dumb shmem buffer object * @file: DRM file structure to create the dumb buffer for * @dev: DRM device * @args: IOCTL data * * This function computes the pitch of the dumb buffer and rounds it up to an * integer number of bytes per pixel. Drivers for hardware that doesn't have * any additional restrictions on the pitch can directly use this function as * their &drm_driver.dumb_create callback. * * For hardware with additional restrictions, drivers can adjust the fields * set up by userspace before calling into this function. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { u32 min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8); if (!args->pitch || !args->size) { args->pitch = min_pitch; args->size = PAGE_ALIGN(args->pitch * args->height); } else { /* ensure sane minimum values */ if (args->pitch < min_pitch) args->pitch = min_pitch; if (args->size < args->pitch * args->height) args->size = PAGE_ALIGN(args->pitch * args->height); } return drm_gem_shmem_create_with_handle(file, dev, args->size, &args->handle); } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); loff_t num_pages = obj->size >> PAGE_SHIFT; vm_fault_t ret; struct page *page; pgoff_t page_offset; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; dma_resv_lock(shmem->base.resv, NULL); if (page_offset >= num_pages || drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || shmem->madv < 0) { ret = VM_FAULT_SIGBUS; } else { page = shmem->pages[page_offset]; ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); } dma_resv_unlock(shmem->base.resv); return ret; } static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); dma_resv_lock(shmem->base.resv, NULL); /* * We should have already pinned the pages when the buffer was first * mmap'd, vm_open() just grabs an additional reference for the new * mm the vma is getting copied into (ie. on fork()). */ if (!drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count)) shmem->pages_use_count++; dma_resv_unlock(shmem->base.resv); drm_gem_vm_open(vma); } static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); dma_resv_lock(shmem->base.resv, NULL); drm_gem_shmem_put_pages(shmem); dma_resv_unlock(shmem->base.resv); drm_gem_vm_close(vma); } const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, }; EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_ops); /** * drm_gem_shmem_mmap - Memory-map a shmem GEM object * @shmem: shmem GEM object * @vma: VMA for the area to be mapped * * This function implements an augmented version of the GEM DRM file mmap * operation for shmem objects. * * Returns: * 0 on success or a negative error code on failure. */ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma) { struct drm_gem_object *obj = &shmem->base; int ret; if (drm_gem_is_imported(obj)) { /* Reset both vm_ops and vm_private_data, so we don't end up with * vm_ops pointing to our implementation if the dma-buf backend * doesn't set those fields. */ vma->vm_private_data = NULL; vma->vm_ops = NULL; ret = dma_buf_mmap(obj->dma_buf, vma, 0); /* Drop the reference drm_gem_mmap_obj() acquired.*/ if (!ret) drm_gem_object_put(obj); return ret; } if (is_cow_mapping(vma->vm_flags)) return -EINVAL; dma_resv_lock(shmem->base.resv, NULL); ret = drm_gem_shmem_get_pages(shmem); dma_resv_unlock(shmem->base.resv); if (ret) return ret; vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return 0; } EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap); /** * drm_gem_shmem_print_info() - Print &drm_gem_shmem_object info for debugfs * @shmem: shmem GEM object * @p: DRM printer * @indent: Tab indentation level */ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, struct drm_printer *p, unsigned int indent) { if (drm_gem_is_imported(&shmem->base)) return; drm_printf_indent(p, indent, "pages_use_count=%u\n", shmem->pages_use_count); drm_printf_indent(p, indent, "vmap_use_count=%u\n", shmem->vmap_use_count); drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr); } EXPORT_SYMBOL(drm_gem_shmem_print_info); /** * drm_gem_shmem_get_sg_table - Provide a scatter/gather table of pinned * pages for a shmem GEM object * @shmem: shmem GEM object * * This function exports a scatter/gather table suitable for PRIME usage by * calling the standard DMA mapping API. * * Drivers who need to acquire an scatter/gather table for objects need to call * drm_gem_shmem_get_pages_sgt() instead. * * Returns: * A pointer to the scatter/gather table of pinned pages or error pointer on failure. */ struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); return drm_prime_pages_to_sg(obj->dev, shmem->pages, obj->size >> PAGE_SHIFT); } EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; int ret; struct sg_table *sgt; if (shmem->sgt) return shmem->sgt; drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); ret = drm_gem_shmem_get_pages(shmem); if (ret) return ERR_PTR(ret); sgt = drm_gem_shmem_get_sg_table(shmem); if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); goto err_put_pages; } /* Map the pages for use by the h/w. */ ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0); if (ret) goto err_free_sgt; shmem->sgt = sgt; return sgt; err_free_sgt: sg_free_table(sgt); kfree(sgt); err_put_pages: drm_gem_shmem_put_pages(shmem); return ERR_PTR(ret); } /** * drm_gem_shmem_get_pages_sgt - Pin pages, dma map them, and return a * scatter/gather table for a shmem GEM object. * @shmem: shmem GEM object * * This function returns a scatter/gather table suitable for driver usage. If * the sg table doesn't exist, the pages are pinned, dma-mapped, and a sg * table created. * * This is the main function for drivers to get at backing storage, and it hides * and difference between dma-buf imported and natively allocated objects. * drm_gem_shmem_get_sg_table() should not be directly called by drivers. * * Returns: * A pointer to the scatter/gather table of pinned pages or errno on failure. */ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem) { int ret; struct sg_table *sgt; ret = dma_resv_lock_interruptible(shmem->base.resv, NULL); if (ret) return ERR_PTR(ret); sgt = drm_gem_shmem_get_pages_sgt_locked(shmem); dma_resv_unlock(shmem->base.resv); return sgt; } EXPORT_SYMBOL_GPL(drm_gem_shmem_get_pages_sgt); /** * drm_gem_shmem_prime_import_sg_table - Produce a shmem GEM object from * another driver's scatter/gather table of pinned pages * @dev: Device to import into * @attach: DMA-BUF attachment * @sgt: Scatter/gather table of pinned pages * * This function imports a scatter/gather table exported via DMA-BUF by * another driver. Drivers that use the shmem helpers should set this as their * &drm_driver.gem_prime_import_sg_table callback. * * Returns: * A pointer to a newly created GEM object or an ERR_PTR-encoded negative * error code on failure. */ struct drm_gem_object * drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt) { size_t size = PAGE_ALIGN(attach->dmabuf->size); struct drm_gem_shmem_object *shmem; shmem = __drm_gem_shmem_create(dev, size, true, NULL); if (IS_ERR(shmem)) return ERR_CAST(shmem); shmem->sgt = sgt; drm_dbg_prime(dev, "size = %zu\n", size); return &shmem->base; } EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); MODULE_DESCRIPTION("DRM SHMEM memory-management helpers"); MODULE_IMPORT_NS("DMA_BUF"); MODULE_LICENSE("GPL v2");
3 3 1 1 1 2 2 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/blkdev.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> #include <linux/init.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> #include <linux/parser.h> #include <linux/ctype.h> #include <linux/namei.h> #include <linux/miscdevice.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/crc32c.h> #include <linux/btrfs.h> #include <linux/security.h> #include <linux/fs_parser.h> #include "messages.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "direct-io.h" #include "props.h" #include "xattr.h" #include "bio.h" #include "export.h" #include "compression.h" #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" #include "space-info.h" #include "sysfs.h" #include "zoned.h" #include "tests/btrfs-tests.h" #include "block-group.h" #include "discard.h" #include "qgroup.h" #include "raid56.h" #include "fs.h" #include "accessors.h" #include "defrag.h" #include "dir-item.h" #include "ioctl.h" #include "scrub.h" #include "verity.h" #include "super.h" #include "extent-tree.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> static const struct super_operations btrfs_super_ops; static struct file_system_type btrfs_fs_type; static void btrfs_put_super(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); close_ctree(fs_info); } /* Store the mount options related information. */ struct btrfs_fs_context { char *subvol_name; u64 subvol_objectid; u64 max_inline; u32 commit_interval; u32 metadata_ratio; u32 thread_pool_size; unsigned long long mount_opt; unsigned long compress_type:4; int compress_level; refcount_t refs; }; enum { Opt_acl, Opt_clear_cache, Opt_commit_interval, Opt_compress, Opt_compress_force, Opt_compress_force_type, Opt_compress_type, Opt_degraded, Opt_device, Opt_fatal_errors, Opt_flushoncommit, Opt_max_inline, Opt_barrier, Opt_datacow, Opt_datasum, Opt_defrag, Opt_discard, Opt_discard_mode, Opt_ratio, Opt_rescan_uuid_tree, Opt_skip_balance, Opt_space_cache, Opt_space_cache_version, Opt_ssd, Opt_ssd_spread, Opt_subvol, Opt_subvol_empty, Opt_subvolid, Opt_thread_pool, Opt_treelog, Opt_user_subvol_rm_allowed, Opt_norecovery, /* Rescue options */ Opt_rescue, Opt_usebackuproot, Opt_nologreplay, /* Debugging options */ Opt_enospc_debug, #ifdef CONFIG_BTRFS_DEBUG Opt_fragment, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY Opt_ref_verify, #endif Opt_err, }; enum { Opt_fatal_errors_panic, Opt_fatal_errors_bug, }; static const struct constant_table btrfs_parameter_fatal_errors[] = { { "panic", Opt_fatal_errors_panic }, { "bug", Opt_fatal_errors_bug }, {} }; enum { Opt_discard_sync, Opt_discard_async, }; static const struct constant_table btrfs_parameter_discard[] = { { "sync", Opt_discard_sync }, { "async", Opt_discard_async }, {} }; enum { Opt_space_cache_v1, Opt_space_cache_v2, }; static const struct constant_table btrfs_parameter_space_cache[] = { { "v1", Opt_space_cache_v1 }, { "v2", Opt_space_cache_v2 }, {} }; enum { Opt_rescue_usebackuproot, Opt_rescue_nologreplay, Opt_rescue_ignorebadroots, Opt_rescue_ignoredatacsums, Opt_rescue_ignoremetacsums, Opt_rescue_ignoresuperflags, Opt_rescue_parameter_all, }; static const struct constant_table btrfs_parameter_rescue[] = { { "usebackuproot", Opt_rescue_usebackuproot }, { "nologreplay", Opt_rescue_nologreplay }, { "ignorebadroots", Opt_rescue_ignorebadroots }, { "ibadroots", Opt_rescue_ignorebadroots }, { "ignoredatacsums", Opt_rescue_ignoredatacsums }, { "ignoremetacsums", Opt_rescue_ignoremetacsums}, { "ignoresuperflags", Opt_rescue_ignoresuperflags}, { "idatacsums", Opt_rescue_ignoredatacsums }, { "imetacsums", Opt_rescue_ignoremetacsums}, { "isuperflags", Opt_rescue_ignoresuperflags}, { "all", Opt_rescue_parameter_all }, {} }; #ifdef CONFIG_BTRFS_DEBUG enum { Opt_fragment_parameter_data, Opt_fragment_parameter_metadata, Opt_fragment_parameter_all, }; static const struct constant_table btrfs_parameter_fragment[] = { { "data", Opt_fragment_parameter_data }, { "metadata", Opt_fragment_parameter_metadata }, { "all", Opt_fragment_parameter_all }, {} }; #endif static const struct fs_parameter_spec btrfs_fs_parameters[] = { fsparam_flag_no("acl", Opt_acl), fsparam_flag_no("autodefrag", Opt_defrag), fsparam_flag_no("barrier", Opt_barrier), fsparam_flag("clear_cache", Opt_clear_cache), fsparam_u32("commit", Opt_commit_interval), fsparam_flag("compress", Opt_compress), fsparam_string("compress", Opt_compress_type), fsparam_flag("compress-force", Opt_compress_force), fsparam_string("compress-force", Opt_compress_force_type), fsparam_flag_no("datacow", Opt_datacow), fsparam_flag_no("datasum", Opt_datasum), fsparam_flag("degraded", Opt_degraded), fsparam_string("device", Opt_device), fsparam_flag_no("discard", Opt_discard), fsparam_enum("discard", Opt_discard_mode, btrfs_parameter_discard), fsparam_enum("fatal_errors", Opt_fatal_errors, btrfs_parameter_fatal_errors), fsparam_flag_no("flushoncommit", Opt_flushoncommit), fsparam_string("max_inline", Opt_max_inline), fsparam_u32("metadata_ratio", Opt_ratio), fsparam_flag("rescan_uuid_tree", Opt_rescan_uuid_tree), fsparam_flag("skip_balance", Opt_skip_balance), fsparam_flag_no("space_cache", Opt_space_cache), fsparam_enum("space_cache", Opt_space_cache_version, btrfs_parameter_space_cache), fsparam_flag_no("ssd", Opt_ssd), fsparam_flag_no("ssd_spread", Opt_ssd_spread), fsparam_string("subvol", Opt_subvol), fsparam_flag("subvol=", Opt_subvol_empty), fsparam_u64("subvolid", Opt_subvolid), fsparam_u32("thread_pool", Opt_thread_pool), fsparam_flag_no("treelog", Opt_treelog), fsparam_flag("user_subvol_rm_allowed", Opt_user_subvol_rm_allowed), /* Rescue options. */ fsparam_enum("rescue", Opt_rescue, btrfs_parameter_rescue), /* Deprecated, with alias rescue=nologreplay */ __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL), /* Deprecated, with alias rescue=usebackuproot */ __fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL), /* For compatibility only, alias for "rescue=nologreplay". */ fsparam_flag("norecovery", Opt_norecovery), /* Debugging options. */ fsparam_flag_no("enospc_debug", Opt_enospc_debug), #ifdef CONFIG_BTRFS_DEBUG fsparam_enum("fragment", Opt_fragment, btrfs_parameter_fragment), #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY fsparam_flag("ref_verify", Opt_ref_verify), #endif {} }; /* No support for restricting writes to btrfs devices yet... */ static inline blk_mode_t btrfs_open_mode(struct fs_context *fc) { return sb_open_mode(fc->sb_flags) & ~BLK_OPEN_RESTRICT_WRITES; } static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct btrfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, btrfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_degraded: btrfs_set_opt(ctx->mount_opt, DEGRADED); break; case Opt_subvol_empty: /* * This exists because we used to allow it on accident, so we're * keeping it to maintain ABI. See 37becec95ac3 ("Btrfs: allow * empty subvol= again"). */ break; case Opt_subvol: kfree(ctx->subvol_name); ctx->subvol_name = kstrdup(param->string, GFP_KERNEL); if (!ctx->subvol_name) return -ENOMEM; break; case Opt_subvolid: ctx->subvol_objectid = result.uint_64; /* subvolid=0 means give me the original fs_tree. */ if (!ctx->subvol_objectid) ctx->subvol_objectid = BTRFS_FS_TREE_OBJECTID; break; case Opt_device: { struct btrfs_device *device; blk_mode_t mode = btrfs_open_mode(fc); mutex_lock(&uuid_mutex); device = btrfs_scan_one_device(param->string, mode, false); mutex_unlock(&uuid_mutex); if (IS_ERR(device)) return PTR_ERR(device); break; } case Opt_datasum: if (result.negated) { btrfs_set_opt(ctx->mount_opt, NODATASUM); } else { btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } break; case Opt_datacow: if (result.negated) { btrfs_clear_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); btrfs_set_opt(ctx->mount_opt, NODATACOW); btrfs_set_opt(ctx->mount_opt, NODATASUM); } else { btrfs_clear_opt(ctx->mount_opt, NODATACOW); } break; case Opt_compress_force: case Opt_compress_force_type: btrfs_set_opt(ctx->mount_opt, FORCE_COMPRESS); fallthrough; case Opt_compress: case Opt_compress_type: /* * Provide the same semantics as older kernels that don't use fs * context, specifying the "compress" option clears * "force-compress" without the need to pass * "compress-force=[no|none]" before specifying "compress". */ if (opt != Opt_compress_force && opt != Opt_compress_force_type) btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); if (opt == Opt_compress || opt == Opt_compress_force) { ctx->compress_type = BTRFS_COMPRESS_ZLIB; ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "zlib", 4) == 0) { ctx->compress_type = BTRFS_COMPRESS_ZLIB; ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB, param->string + 4); btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "lzo", 3) == 0) { ctx->compress_type = BTRFS_COMPRESS_LZO; ctx->compress_level = 0; btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "zstd", 4) == 0) { ctx->compress_type = BTRFS_COMPRESS_ZSTD; ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD, param->string + 4); btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "no", 2) == 0) { ctx->compress_level = 0; ctx->compress_type = 0; btrfs_clear_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); } else { btrfs_err(NULL, "unrecognized compression value %s", param->string); return -EINVAL; } break; case Opt_ssd: if (result.negated) { btrfs_set_opt(ctx->mount_opt, NOSSD); btrfs_clear_opt(ctx->mount_opt, SSD); btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD); } else { btrfs_set_opt(ctx->mount_opt, SSD); btrfs_clear_opt(ctx->mount_opt, NOSSD); } break; case Opt_ssd_spread: if (result.negated) { btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD); } else { btrfs_set_opt(ctx->mount_opt, SSD); btrfs_set_opt(ctx->mount_opt, SSD_SPREAD); btrfs_clear_opt(ctx->mount_opt, NOSSD); } break; case Opt_barrier: if (result.negated) btrfs_set_opt(ctx->mount_opt, NOBARRIER); else btrfs_clear_opt(ctx->mount_opt, NOBARRIER); break; case Opt_thread_pool: if (result.uint_32 == 0) { btrfs_err(NULL, "invalid value 0 for thread_pool"); return -EINVAL; }