/*
* The AEGIS-128 Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define AEGIS128_BLOCK_ALIGN 16
#define AEGIS128_BLOCK_SIZE 16
#define AEGIS128_NONCE_SIZE 16
#define AEGIS128_STATE_BLOCKS 5
#define AEGIS128_KEY_SIZE 16
#define AEGIS128_MIN_AUTH_SIZE 8
#define AEGIS128_MAX_AUTH_SIZE 16
asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis128_aesni_ad(
void *state, unsigned int length, const void *data);
asmlinkage void crypto_aegis128_aesni_enc(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_dec(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_enc_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_dec_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128_aesni_final(
void *state, void *tag_xor, unsigned int cryptlen,
unsigned int assoclen);
struct aegis_block {
u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN);
};
struct aegis_state {
struct aegis_block blocks[AEGIS128_STATE_BLOCKS];
};
struct aegis_ctx {
struct aegis_block key;
};
struct aegis_crypt_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
void *dst);
void (*crypt_tail)(void *state, unsigned int length, const void *src,
void *dst);
};
static void crypto_aegis128_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS128_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis128_aesni_ad(state,
AEGIS128_BLOCK_SIZE,
buf.bytes);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis128_aesni_ad(state, left, src);
src += left & ~(AEGIS128_BLOCK_SIZE - 1);
left &= AEGIS128_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); /*covered*/
}
}
static void crypto_aegis128_aesni_process_crypt(
struct aegis_state *state, struct aead_request *req,
const struct aegis_crypt_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize, base;
ops->skcipher_walk_init(&walk, req, false); /*covered*/
while (walk.nbytes) { /*covered*/
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_blocks(state, chunksize, src, dst);
base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
src += base;
dst += base;
chunksize &= AEGIS128_BLOCK_SIZE - 1;
if (chunksize > 0)
ops->crypt_tail(state, chunksize, src, dst);
skcipher_walk_done(&walk, 0);
}
}
static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead)
{
u8 *ctx = crypto_aead_ctx(aead); /*covered*/
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
return (void *)ctx;
}
static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead); /*covered*/
if (keylen != AEGIS128_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
return 0; /*covered*/
}
static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS128_MAX_AUTH_SIZE) /*covered*/
return -EINVAL;
if (authsize < AEGIS128_MIN_AUTH_SIZE) /*covered*/
return -EINVAL;
return 0; /*covered*/
}
static void crypto_aegis128_aesni_crypt(struct aead_request *req,
struct aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis_crypt_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
struct aegis_state state;
kernel_fpu_begin();
crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); /*covered*/
crypto_aegis128_aesni_process_crypt(&state, req, ops);
crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
{
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = crypto_aegis128_aesni_enc,
.crypt_tail = crypto_aegis128_aesni_enc_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = crypto_aegis128_aesni_dec,
.crypt_tail = crypto_aegis128_aesni_dec_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead)
{
return 0; /*covered*/
}
static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
{
}
static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int cryptd_aegis128_aesni_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req); /*covered*/
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() || /*covered*/
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm); /*covered*/
aead_request_set_tfm(req, aead); /*covered*/
return crypto_aead_encrypt(req); /*covered*/
}
static int cryptd_aegis128_aesni_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL, /*covered*/
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
static struct aead_alg crypto_aegis128_aesni_alg[] = {
{
.setkey = crypto_aegis128_aesni_setkey,
.setauthsize = crypto_aegis128_aesni_setauthsize,
.encrypt = crypto_aegis128_aesni_encrypt,
.decrypt = crypto_aegis128_aesni_decrypt,
.init = crypto_aegis128_aesni_init_tfm,
.exit = crypto_aegis128_aesni_exit_tfm,
.ivsize = AEGIS128_NONCE_SIZE,
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
.chunksize = AEGIS128_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_alignmask = 0,
.cra_name = "__aegis128",
.cra_driver_name = "__aegis128-aesni",
.cra_module = THIS_MODULE,
}
}, {
.setkey = cryptd_aegis128_aesni_setkey,
.setauthsize = cryptd_aegis128_aesni_setauthsize,
.encrypt = cryptd_aegis128_aesni_encrypt,
.decrypt = cryptd_aegis128_aesni_decrypt,
.init = cryptd_aegis128_aesni_init_tfm,
.exit = cryptd_aegis128_aesni_exit_tfm,
.ivsize = AEGIS128_NONCE_SIZE,
.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
.chunksize = AEGIS128_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_alignmask = 0,
.cra_priority = 400,
.cra_name = "aegis128",
.cra_driver_name = "aegis128-aesni",
.cra_module = THIS_MODULE,
}
}
};
static int __init crypto_aegis128_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128_aesni_alg,
ARRAY_SIZE(crypto_aegis128_aesni_alg));
}
static void __exit crypto_aegis128_aesni_module_exit(void)
{
crypto_unregister_aeads(crypto_aegis128_aesni_alg,
ARRAY_SIZE(crypto_aegis128_aesni_alg));
}
module_init(crypto_aegis128_aesni_module_init);
module_exit(crypto_aegis128_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
MODULE_ALIAS_CRYPTO("aegis128");
MODULE_ALIAS_CRYPTO("aegis128-aesni");
/*
* The AEGIS-128L Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define AEGIS128L_BLOCK_ALIGN 16
#define AEGIS128L_BLOCK_SIZE 32
#define AEGIS128L_NONCE_SIZE 16
#define AEGIS128L_STATE_BLOCKS 8
#define AEGIS128L_KEY_SIZE 16
#define AEGIS128L_MIN_AUTH_SIZE 8
#define AEGIS128L_MAX_AUTH_SIZE 16
asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis128l_aesni_ad(
void *state, unsigned int length, const void *data);
asmlinkage void crypto_aegis128l_aesni_enc(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_dec(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_enc_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_dec_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis128l_aesni_final(
void *state, void *tag_xor, unsigned int cryptlen,
unsigned int assoclen);
struct aegis_block {
u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
};
struct aegis_state {
struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
};
struct aegis_ctx {
struct aegis_block key;
};
struct aegis_crypt_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
void *dst);
void (*crypt_tail)(void *state, unsigned int length, const void *src,
void *dst);
};
static void crypto_aegis128l_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS128L_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis128l_aesni_ad(state,
AEGIS128L_BLOCK_SIZE,
buf.bytes);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis128l_aesni_ad(state, left, src);
src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
left &= AEGIS128L_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
}
}
static void crypto_aegis128l_aesni_process_crypt(
struct aegis_state *state, struct aead_request *req,
const struct aegis_crypt_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize, base;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_blocks(state, chunksize, src, dst);
base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
src += base;
dst += base;
chunksize &= AEGIS128L_BLOCK_SIZE - 1;
if (chunksize > 0)
ops->crypt_tail(state, chunksize, src, dst);
skcipher_walk_done(&walk, 0);
}
}
static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
{
u8 *ctx = crypto_aead_ctx(aead); /*covered*/
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
return (void *)ctx;
}
static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead); /*covered*/
if (keylen != AEGIS128L_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
return 0; /*covered*/
}
static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS128L_MAX_AUTH_SIZE) /*covered*/
return -EINVAL;
if (authsize < AEGIS128L_MIN_AUTH_SIZE) /*covered*/
return -EINVAL;
return 0; /*covered*/
}
static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
struct aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis_crypt_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
struct aegis_state state;
kernel_fpu_begin();
crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
crypto_aegis128l_aesni_process_crypt(&state, req, ops);
crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
{
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = crypto_aegis128l_aesni_enc,
.crypt_tail = crypto_aegis128l_aesni_enc_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = crypto_aegis128l_aesni_dec,
.crypt_tail = crypto_aegis128l_aesni_dec_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
{
return 0; /*covered*/
}
static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
{
} /*covered*/
static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL, /*covered*/
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx); /*covered*/
}
static struct aead_alg crypto_aegis128l_aesni_alg[] = {
{
.setkey = crypto_aegis128l_aesni_setkey,
.setauthsize = crypto_aegis128l_aesni_setauthsize,
.encrypt = crypto_aegis128l_aesni_encrypt,
.decrypt = crypto_aegis128l_aesni_decrypt,
.init = crypto_aegis128l_aesni_init_tfm,
.exit = crypto_aegis128l_aesni_exit_tfm,
.ivsize = AEGIS128L_NONCE_SIZE,
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
.chunksize = AEGIS128L_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_alignmask = 0,
.cra_name = "__aegis128l",
.cra_driver_name = "__aegis128l-aesni",
.cra_module = THIS_MODULE,
}
}, {
.setkey = cryptd_aegis128l_aesni_setkey,
.setauthsize = cryptd_aegis128l_aesni_setauthsize,
.encrypt = cryptd_aegis128l_aesni_encrypt,
.decrypt = cryptd_aegis128l_aesni_decrypt,
.init = cryptd_aegis128l_aesni_init_tfm,
.exit = cryptd_aegis128l_aesni_exit_tfm,
.ivsize = AEGIS128L_NONCE_SIZE,
.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
.chunksize = AEGIS128L_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_alignmask = 0,
.cra_priority = 400,
.cra_name = "aegis128l",
.cra_driver_name = "aegis128l-aesni",
.cra_module = THIS_MODULE,
}
}
};
static int __init crypto_aegis128l_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128l_aesni_alg,
ARRAY_SIZE(crypto_aegis128l_aesni_alg));
}
static void __exit crypto_aegis128l_aesni_module_exit(void)
{
crypto_unregister_aeads(crypto_aegis128l_aesni_alg,
ARRAY_SIZE(crypto_aegis128l_aesni_alg));
}
module_init(crypto_aegis128l_aesni_module_init);
module_exit(crypto_aegis128l_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
MODULE_ALIAS_CRYPTO("aegis128l");
MODULE_ALIAS_CRYPTO("aegis128l-aesni");
/*
* The AEGIS-256 Authenticated-Encryption Algorithm
* Glue for AES-NI + SSE2 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define AEGIS256_BLOCK_ALIGN 16
#define AEGIS256_BLOCK_SIZE 16
#define AEGIS256_NONCE_SIZE 32
#define AEGIS256_STATE_BLOCKS 6
#define AEGIS256_KEY_SIZE 32
#define AEGIS256_MIN_AUTH_SIZE 8
#define AEGIS256_MAX_AUTH_SIZE 16
asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
asmlinkage void crypto_aegis256_aesni_ad(
void *state, unsigned int length, const void *data);
asmlinkage void crypto_aegis256_aesni_enc(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_dec(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_enc_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_dec_tail(
void *state, unsigned int length, const void *src, void *dst);
asmlinkage void crypto_aegis256_aesni_final(
void *state, void *tag_xor, unsigned int cryptlen,
unsigned int assoclen);
struct aegis_block {
u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
};
struct aegis_state {
struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
};
struct aegis_ctx {
struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
};
struct aegis_crypt_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, unsigned int length, const void *src,
void *dst);
void (*crypt_tail)(void *state, unsigned int length, const void *src,
void *dst);
};
static void crypto_aegis256_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
struct aegis_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= AEGIS256_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
crypto_aegis256_aesni_ad(state,
AEGIS256_BLOCK_SIZE,
buf.bytes);
pos = 0;
left -= fill;
src += fill;
}
crypto_aegis256_aesni_ad(state, left, src);
src += left & ~(AEGIS256_BLOCK_SIZE - 1);
left &= AEGIS256_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
}
}
static void crypto_aegis256_aesni_process_crypt(
struct aegis_state *state, struct aead_request *req,
const struct aegis_crypt_ops *ops)
{
struct skcipher_walk walk;
u8 *src, *dst;
unsigned int chunksize, base;
ops->skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
src = walk.src.virt.addr;
dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops->crypt_blocks(state, chunksize, src, dst);
base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
src += base;
dst += base;
chunksize &= AEGIS256_BLOCK_SIZE - 1;
if (chunksize > 0)
ops->crypt_tail(state, chunksize, src, dst);
skcipher_walk_done(&walk, 0);
}
}
static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
{
u8 *ctx = crypto_aead_ctx(aead); /*covered*/
ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
return (void *)ctx;
}
static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead); /*covered*/
if (keylen != AEGIS256_KEY_SIZE) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
memcpy(ctx->key, key, AEGIS256_KEY_SIZE); /*covered*/
return 0; /*covered*/
}
static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
if (authsize > AEGIS256_MAX_AUTH_SIZE) /*covered*/
return -EINVAL;
if (authsize < AEGIS256_MIN_AUTH_SIZE) /*covered*/
return -EINVAL;
return 0; /*covered*/
}
static void crypto_aegis256_aesni_crypt(struct aead_request *req,
struct aegis_block *tag_xor,
unsigned int cryptlen,
const struct aegis_crypt_ops *ops)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
struct aegis_state state;
kernel_fpu_begin();
crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
crypto_aegis256_aesni_process_crypt(&state, req, ops);
crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
{
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = crypto_aegis256_aesni_enc,
.crypt_tail = crypto_aegis256_aesni_enc_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
static const struct aegis_crypt_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = crypto_aegis256_aesni_dec,
.crypt_tail = crypto_aegis256_aesni_dec_tail,
};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
{
return 0; /*covered*/
}
static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
{
}
static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead,
const u8 *key, unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int cryptd_aegis256_aesni_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
static int cryptd_aegis256_aesni_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL, /*covered*/
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
static struct aead_alg crypto_aegis256_aesni_alg[] = {
{
.setkey = crypto_aegis256_aesni_setkey,
.setauthsize = crypto_aegis256_aesni_setauthsize,
.encrypt = crypto_aegis256_aesni_encrypt,
.decrypt = crypto_aegis256_aesni_decrypt,
.init = crypto_aegis256_aesni_init_tfm,
.exit = crypto_aegis256_aesni_exit_tfm,
.ivsize = AEGIS256_NONCE_SIZE,
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
.chunksize = AEGIS256_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aegis_ctx) +
__alignof__(struct aegis_ctx),
.cra_alignmask = 0,
.cra_name = "__aegis256",
.cra_driver_name = "__aegis256-aesni",
.cra_module = THIS_MODULE,
}
}, {
.setkey = cryptd_aegis256_aesni_setkey,
.setauthsize = cryptd_aegis256_aesni_setauthsize,
.encrypt = cryptd_aegis256_aesni_encrypt,
.decrypt = cryptd_aegis256_aesni_decrypt,
.init = cryptd_aegis256_aesni_init_tfm,
.exit = cryptd_aegis256_aesni_exit_tfm,
.ivsize = AEGIS256_NONCE_SIZE,
.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
.chunksize = AEGIS256_BLOCK_SIZE,
.base = {
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_alignmask = 0,
.cra_priority = 400,
.cra_name = "aegis256",
.cra_driver_name = "aegis256-aesni",
.cra_module = THIS_MODULE,
}
}
};
static int __init crypto_aegis256_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis256_aesni_alg,
ARRAY_SIZE(crypto_aegis256_aesni_alg));
}
static void __exit crypto_aegis256_aesni_module_exit(void)
{
crypto_unregister_aeads(crypto_aegis256_aesni_alg,
ARRAY_SIZE(crypto_aegis256_aesni_alg));
}
module_init(crypto_aegis256_aesni_module_init);
module_exit(crypto_aegis256_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
MODULE_ALIAS_CRYPTO("aegis256");
MODULE_ALIAS_CRYPTO("aegis256-aesni");
/*
* Support for Intel AES-NI instructions. This file contains glue
* code, the real AES implementation is in intel-aes_asm.S.
*
* Copyright (C) 2008, Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
* interface for 64-bit kernels.
* Authors: Adrian Hoban <adrian.hoban@intel.com>
* Gabriele Paoloni <gabriele.paoloni@intel.com>
* Tadeusz Struk (tadeusz.struk@intel.com)
* Aidan O'Mahony (aidan.o.mahony@intel.com)
* Copyright (c) 2010, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
#include <crypto/xts.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>
#include <asm/crypto/aes.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#ifdef CONFIG_X86_64
#include <asm/crypto/glue_helper.h>
#endif
#define AESNI_ALIGN 16
#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
#define RFC4106_HASH_SUBKEY_SIZE 16
#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
/* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location.
* This needs to be 16 byte aligned.
*/
struct aesni_rfc4106_gcm_ctx {
u8 hash_subkey[16] AESNI_ALIGN_ATTR;
struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
u8 nonce[4];
};
struct generic_gcmaes_ctx {
u8 hash_subkey[16] AESNI_ALIGN_ATTR;
struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
};
struct aesni_xts_ctx {
u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
};
#define GCM_BLOCK_LEN 16
struct gcm_context_data {
/* init, update and finalize context data */
u8 aad_hash[GCM_BLOCK_LEN];
u64 aad_length;
u64 in_length;
u8 partial_block_enc_key[GCM_BLOCK_LEN];
u8 orig_IV[GCM_BLOCK_LEN];
u8 current_counter[GCM_BLOCK_LEN];
u64 partial_block_len;
u64 unused;
u8 hash_keys[GCM_BLOCK_LEN * 16];
};
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len);
asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in);
asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in);
asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len);
asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len);
asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
#define AVX_GEN2_OPTSIZE 640
#define AVX_GEN4_OPTSIZE 4096
#ifdef CONFIG_X86_64
static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, bool enc, u8 *iv);
/* asmlinkage void aesni_gcm_enc()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
* struct gcm_context_data. May be uninitialized.
* u8 *out, Ciphertext output. Encrypt in-place is allowed.
* const u8 *in, Plaintext input
* unsigned long plaintext_len, Length of data in bytes for encryption.
* u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
* 16-byte aligned pointer.
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, Additional Authentication Data (AAD)
* unsigned long aad_len, Length of AAD in bytes.
* u8 *auth_tag, Authenticated Tag output.
* unsigned long auth_tag_len), Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
*/
asmlinkage void aesni_gcm_enc(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
/* asmlinkage void aesni_gcm_dec()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
* struct gcm_context_data. May be uninitialized.
* u8 *out, Plaintext output. Decrypt in-place is allowed.
* const u8 *in, Ciphertext input
* unsigned long ciphertext_len, Length of data in bytes for decryption.
* u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
* 16-byte aligned pointer.
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, Additional Authentication Data (AAD)
* unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
* to be 8 or 12 bytes
* u8 *auth_tag, Authenticated Tag output.
* unsigned long auth_tag_len) Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
*/
asmlinkage void aesni_gcm_dec(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
/* Scatter / Gather routines, with args similar to above */
asmlinkage void aesni_gcm_init(void *ctx,
struct gcm_context_data *gdata,
u8 *iv,
u8 *hash_subkey, const u8 *aad,
unsigned long aad_len);
asmlinkage void aesni_gcm_enc_update(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len);
asmlinkage void aesni_gcm_dec_update(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in,
unsigned long ciphertext_len);
asmlinkage void aesni_gcm_finalize(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
static struct aesni_gcm_tfm_s {
void (*init)(void *ctx,
struct gcm_context_data *gdata,
u8 *iv,
u8 *hash_subkey, const u8 *aad,
unsigned long aad_len);
void (*enc_update)(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in,
unsigned long plaintext_len);
void (*dec_update)(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in,
unsigned long ciphertext_len);
void (*finalize)(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
} *aesni_gcm_tfm;
struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
.init = &aesni_gcm_init,
.enc_update = &aesni_gcm_enc_update,
.dec_update = &aesni_gcm_dec_update,
.finalize = &aesni_gcm_finalize,
};
#ifdef CONFIG_AS_AVX
asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
/*
* asmlinkage void aesni_gcm_init_avx_gen2()
* gcm_data *my_ctx_data, context data
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
*/
asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data,
struct gcm_context_data *gdata,
u8 *iv,
u8 *hash_subkey,
const u8 *aad,
unsigned long aad_len);
asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len);
asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in,
unsigned long ciphertext_len);
asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
.init = &aesni_gcm_init_avx_gen2,
.enc_update = &aesni_gcm_enc_update_avx_gen2,
.dec_update = &aesni_gcm_dec_update_avx_gen2,
.finalize = &aesni_gcm_finalize_avx_gen2,
};
#endif
#ifdef CONFIG_AS_AVX2
/*
* asmlinkage void aesni_gcm_init_avx_gen4()
* gcm_data *my_ctx_data, context data
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
*/
asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data,
struct gcm_context_data *gdata,
u8 *iv,
u8 *hash_subkey,
const u8 *aad,
unsigned long aad_len);
asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len);
asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in,
unsigned long ciphertext_len);
asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
.init = &aesni_gcm_init_avx_gen4,
.enc_update = &aesni_gcm_enc_update_avx_gen4,
.dec_update = &aesni_gcm_dec_update_avx_gen4,
.finalize = &aesni_gcm_finalize_avx_gen4,
};
#endif
static inline struct
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
{
unsigned long align = AESNI_ALIGN;
if (align <= crypto_tfm_ctx_alignment())
align = 1;
return PTR_ALIGN(crypto_aead_ctx(tfm), align); /*covered*/
}
static inline struct
generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
{
unsigned long align = AESNI_ALIGN;
if (align <= crypto_tfm_ctx_alignment())
align = 1;
return PTR_ALIGN(crypto_aead_ctx(tfm), align); /*covered*/
}
#endif
static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
{
unsigned long addr = (unsigned long)raw_ctx; /*covered*/
unsigned long align = AESNI_ALIGN;
if (align <= crypto_tfm_ctx_alignment())
align = 1;
return (struct crypto_aes_ctx *)ALIGN(addr, align);
}
static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
const u8 *in_key, unsigned int key_len)
{
struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); /*covered*/
u32 *flags = &tfm->crt_flags;
int err;
if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && /*covered*/
key_len != AES_KEYSIZE_256) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; /*covered*/
return -EINVAL;
}
if (!irq_fpu_usable()) /*covered*/
err = crypto_aes_expand_key(ctx, in_key, key_len);
else {
kernel_fpu_begin(); /*covered*/
err = aesni_set_key(ctx, in_key, key_len);
kernel_fpu_end(); /*covered*/
}
return err;
}
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
{
return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); /*covered*/
}
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); /*covered*/
if (!irq_fpu_usable())
crypto_aes_encrypt_x86(ctx, dst, src);
else {
kernel_fpu_begin(); /*covered*/
aesni_enc(ctx, dst, src);
kernel_fpu_end();
}
} /*covered*/
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); /*covered*/
if (!irq_fpu_usable())
crypto_aes_decrypt_x86(ctx, dst, src);
else {
kernel_fpu_begin(); /*covered*/
aesni_dec(ctx, dst, src);
kernel_fpu_end();
}
} /*covered*/
static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
aesni_enc(ctx, dst, src);
}
static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
aesni_dec(ctx, dst, src);
}
static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int len)
{
return aes_set_key_common(crypto_skcipher_tfm(tfm), /*covered*/
crypto_skcipher_ctx(tfm), key, len);
}
static int ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
static int ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
kernel_fpu_end();
return err;
}
#ifdef CONFIG_X86_64
static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv; /*covered*/
u8 keystream[AES_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
aesni_enc(ctx, keystream, ctrblk);
crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
#ifdef CONFIG_AS_AVX
static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv)
{
/*
* based on key length, override with the by8 version
* of ctr mode encryption/decryption for improved performance
* aes_set_key_common() ensures that key length is one of
* {128,192,256}
*/
if (ctx->key_length == AES_KEYSIZE_128) /*covered*/
aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); /*covered*/
else if (ctx->key_length == AES_KEYSIZE_192) /*covered*/
aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); /*covered*/
else
aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); /*covered*/
} /*covered*/
#endif
static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, true);
kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, /*covered*/
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
if (walk.nbytes) { /*covered*/
ctr_crypt_final(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, 0);
}
kernel_fpu_end(); /*covered*/
return err;
}
static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
keylen /= 2;
/* first half of xts-key is for crypt */
err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
key, keylen);
if (err)
return err;
/* second half of xts-key is for tweak */
return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
key + keylen, keylen);
}
static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
{
aesni_enc(ctx, out, in);
}
static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
}
static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
}
static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
}
static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
}
static const struct common_glue_ctx aesni_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = 1,
.funcs = { {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
} }
};
static const struct common_glue_ctx aesni_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = 1,
.funcs = { {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
} }
};
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&aesni_enc_xts, req,
XTS_TWEAK_CAST(aesni_xts_tweak),
aes_ctx(ctx->raw_tweak_ctx),
aes_ctx(ctx->raw_crypt_ctx));
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&aesni_dec_xts, req,
XTS_TWEAK_CAST(aesni_xts_tweak),
aes_ctx(ctx->raw_tweak_ctx),
aes_ctx(ctx->raw_crypt_ctx));
}
static int rfc4106_init(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", /*covered*/
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
static void rfc4106_exit(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx); /*covered*/
}
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
struct crypto_cipher *tfm;
int ret;
tfm = crypto_alloc_cipher("aes", 0, 0); /*covered*/
if (IS_ERR(tfm))
return PTR_ERR(tfm);
ret = crypto_cipher_setkey(tfm, key, key_len); /*covered*/
if (ret)
goto out_free_cipher;
/* Clear the data in the hash sub key container to zero.*/
/* We want to cipher all zeros to create the hash sub key. */
memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); /*covered*/
crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey);
out_free_cipher:
crypto_free_cipher(tfm); /*covered*/
return ret; /*covered*/
}
static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
unsigned int key_len)
{
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); /*covered*/
if (key_len < 4) {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
/*Account for 4 byte nonce at the end.*/
key_len -= 4; /*covered*/
memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
return aes_set_key_common(crypto_aead_tfm(aead),
&ctx->aes_key_expanded, key, key_len) ?: /*covered*/
rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
}
static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key,
unsigned int key_len)
{
struct cryptd_aead **ctx = crypto_aead_ctx(parent);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setkey(&cryptd_tfm->base, key, key_len);
}
static int common_rfc4106_set_authsize(struct crypto_aead *aead,
unsigned int authsize)
{
switch (authsize) { /*covered*/
case 8:
case 12:
case 16:
break;
default:
return -EINVAL;
}
return 0;
}
/* This is the Integrity Check Value (aka the authentication tag length and can
* be 8, 12 or 16 bytes long. */
static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(parent);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
unsigned int authsize)
{
switch (authsize) { /*covered*/
case 4:
case 8:
case 12:
case 13:
case 14:
case 15:
case 16:
break;
default:
return -EINVAL;
}
return 0;
}
static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
unsigned int assoclen, u8 *hash_subkey,
u8 *iv, void *aes_ctx)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
struct gcm_context_data data AESNI_ALIGN_ATTR;
struct scatter_walk dst_sg_walk = {};
unsigned long left = req->cryptlen;
unsigned long len, srclen, dstlen;
struct scatter_walk assoc_sg_walk;
struct scatter_walk src_sg_walk;
struct scatterlist src_start[2];
struct scatterlist dst_start[2];
struct scatterlist *src_sg;
struct scatterlist *dst_sg;
u8 *src, *dst, *assoc;
u8 *assocmem = NULL;
u8 authTag[16];
if (!enc)
left -= auth_tag_len; /*covered*/
#ifdef CONFIG_AS_AVX2
if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4) /*covered*/
gcm_tfm = &aesni_gcm_tfm_avx_gen2;
#endif
#ifdef CONFIG_AS_AVX
if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2) /*covered*/
gcm_tfm = &aesni_gcm_tfm_sse;
#endif
/* Linearize assoc, if not already linear */
if (req->src->length >= assoclen && req->src->length && /*covered*/
(!PageHighMem(sg_page(req->src)) || /*covered*/
req->src->offset + req->src->length <= PAGE_SIZE)) {
scatterwalk_start(&assoc_sg_walk, req->src); /*covered*/
assoc = scatterwalk_map(&assoc_sg_walk);
} else {
/* assoc can be any length, so must be on heap */
assocmem = kmalloc(assoclen, GFP_ATOMIC);
if (unlikely(!assocmem))
return -ENOMEM;
assoc = assocmem;
scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
}
src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); /*covered*/
scatterwalk_start(&src_sg_walk, src_sg);
if (req->src != req->dst) {
dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen); /*covered*/
scatterwalk_start(&dst_sg_walk, dst_sg);
}
kernel_fpu_begin(); /*covered*/
gcm_tfm->init(aes_ctx, &data, iv,
hash_subkey, assoc, assoclen);
if (req->src != req->dst) {
while (left) { /*covered*/
src = scatterwalk_map(&src_sg_walk); /*covered*/
dst = scatterwalk_map(&dst_sg_walk); /*covered*/
srclen = scatterwalk_clamp(&src_sg_walk, left);
dstlen = scatterwalk_clamp(&dst_sg_walk, left);
len = min(srclen, dstlen);
if (len) { /*covered*/
if (enc)
gcm_tfm->enc_update(aes_ctx, &data, /*covered*/
dst, src, len);
else
gcm_tfm->dec_update(aes_ctx, &data,
dst, src, len);
}
left -= len; /*covered*/
scatterwalk_unmap(src);
scatterwalk_unmap(dst); /*covered*/
scatterwalk_advance(&src_sg_walk, len); /*covered*/
scatterwalk_advance(&dst_sg_walk, len);
scatterwalk_done(&src_sg_walk, 0, left); /*covered*/
scatterwalk_done(&dst_sg_walk, 1, left); /*covered*/
}
} else {
while (left) { /*covered*/
dst = src = scatterwalk_map(&src_sg_walk); /*covered*/
len = scatterwalk_clamp(&src_sg_walk, left);
if (len) { /*covered*/
if (enc)
gcm_tfm->enc_update(aes_ctx, &data, /*covered*/
src, src, len);
else
gcm_tfm->dec_update(aes_ctx, &data, /*covered*/
src, src, len);
}
left -= len; /*covered*/
scatterwalk_unmap(src);
scatterwalk_advance(&src_sg_walk, len); /*covered*/
scatterwalk_done(&src_sg_walk, 1, left); /*covered*/
}
}
gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len); /*covered*/
kernel_fpu_end();
if (!assocmem)
scatterwalk_unmap(assoc); /*covered*/
else
kfree(assocmem);
if (!enc) { /*covered*/
u8 authTagMsg[16];
/* Copy out original authTag */
scatterwalk_map_and_copy(authTagMsg, req->src, /*covered*/
req->assoclen + req->cryptlen -
auth_tag_len,
auth_tag_len, 0);
/* Compare generated tag with passed in tag. */
return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
-EBADMSG : 0; /*covered*/
}
/* Copy in the authTag */
scatterwalk_map_and_copy(authTag, req->dst, /*covered*/
req->assoclen + req->cryptlen,
auth_tag_len, 1);
return 0; /*covered*/
}
static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
aes_ctx);
}
static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
aes_ctx);
}
static int helper_rfc4106_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
unsigned int i;
__be32 counter = cpu_to_be32(1);
/* Assuming we are supporting rfc4106 64-bit extended */
/* sequence numbers We need to have the AAD length equal */
/* to 16 or 20 bytes */
if (unlikely(req->assoclen != 16 && req->assoclen != 20))
return -EINVAL;
/* IV below built */
for (i = 0; i < 4; i++)
*(iv+i) = ctx->nonce[i];
for (i = 0; i < 8; i++)
*(iv+4+i) = req->iv[i];
*((__be32 *)(iv+12)) = counter;
return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
aes_ctx);
}
static int helper_rfc4106_decrypt(struct aead_request *req)
{
__be32 counter = cpu_to_be32(1);
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
unsigned int i;
if (unlikely(req->assoclen != 16 && req->assoclen != 20))
return -EINVAL;
/* Assuming we are supporting rfc4106 64-bit extended */
/* sequence numbers We need to have the AAD length */
/* equal to 16 or 20 bytes */
/* IV below built */
for (i = 0; i < 4; i++)
*(iv+i) = ctx->nonce[i];
for (i = 0; i < 8; i++)
*(iv+4+i) = req->iv[i];
*((__be32 *)(iv+12)) = counter;
return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
aes_ctx);
}
static int gcmaes_wrapper_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
struct cryptd_aead *cryptd_tfm = *ctx;
tfm = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() || /*covered*/
!cryptd_aead_queued(cryptd_tfm)))
tfm = cryptd_aead_child(cryptd_tfm); /*covered*/
aead_request_set_tfm(req, tfm); /*covered*/
return crypto_aead_encrypt(req); /*covered*/
}
static int gcmaes_wrapper_decrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
struct cryptd_aead *cryptd_tfm = *ctx;
tfm = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() || /*covered*/
!cryptd_aead_queued(cryptd_tfm)))
tfm = cryptd_aead_child(cryptd_tfm); /*covered*/
aead_request_set_tfm(req, tfm); /*covered*/
return crypto_aead_decrypt(req); /*covered*/
}
#endif
static struct crypto_alg aesni_algs[] = { {
.cra_name = "aes",
.cra_driver_name = "aes-aesni",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = aes_set_key,
.cia_encrypt = aes_encrypt,
.cia_decrypt = aes_decrypt
}
}
}, {
.cra_name = "__aes",
.cra_driver_name = "__aes-aesni",
.cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = AES_MIN_KEY_SIZE,
.cia_max_keysize = AES_MAX_KEY_SIZE,
.cia_setkey = aes_set_key,
.cia_encrypt = __aes_encrypt,
.cia_decrypt = __aes_decrypt
}
}
} };
static struct skcipher_alg aesni_skciphers[] = {
{
.base = {
.cra_name = "__ecb(aes)",
.cra_driver_name = "__ecb-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = aesni_skcipher_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base = {
.cra_name = "__cbc(aes)",
.cra_driver_name = "__cbc-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aesni_skcipher_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
#ifdef CONFIG_X86_64
}, {
.base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = aesni_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}, {
.base = {
.cra_name = "__xts(aes)",
.cra_driver_name = "__xts-aes-aesni",
.cra_priority = 401,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = XTS_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = xts_aesni_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
#endif
}
};
static
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
#ifdef CONFIG_X86_64
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
unsigned int key_len)
{
struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); /*covered*/
return aes_set_key_common(crypto_aead_tfm(aead),
&ctx->aes_key_expanded, key, key_len) ?: /*covered*/
rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); /*covered*/
}
static int generic_gcmaes_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
__be32 counter = cpu_to_be32(1);
memcpy(iv, req->iv, 12);
*((__be32 *)(iv+12)) = counter;
return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
aes_ctx);
}
static int generic_gcmaes_decrypt(struct aead_request *req)
{
__be32 counter = cpu_to_be32(1);
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
memcpy(iv, req->iv, 12);
*((__be32 *)(iv+12)) = counter;
return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
aes_ctx);
}
static int generic_gcmaes_init(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni", /*covered*/
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
static void generic_gcmaes_exit(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
static struct aead_alg aesni_aead_algs[] = { {
.setkey = common_rfc4106_set_key,
.setauthsize = common_rfc4106_set_authsize,
.encrypt = helper_rfc4106_encrypt,
.decrypt = helper_rfc4106_decrypt,
.ivsize = GCM_RFC4106_IV_SIZE,
.maxauthsize = 16,
.base = {
.cra_name = "__gcm-aes-aesni",
.cra_driver_name = "__driver-gcm-aes-aesni",
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx),
.cra_alignmask = AESNI_ALIGN - 1,
.cra_module = THIS_MODULE,
},
}, {
.init = rfc4106_init,
.exit = rfc4106_exit,
.setkey = gcmaes_wrapper_set_key,
.setauthsize = gcmaes_wrapper_set_authsize,
.encrypt = gcmaes_wrapper_encrypt,
.decrypt = gcmaes_wrapper_decrypt,
.ivsize = GCM_RFC4106_IV_SIZE,
.maxauthsize = 16,
.base = {
.cra_name = "rfc4106(gcm(aes))",
.cra_driver_name = "rfc4106-gcm-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_module = THIS_MODULE,
},
}, {
.setkey = generic_gcmaes_set_key,
.setauthsize = generic_gcmaes_set_authsize,
.encrypt = generic_gcmaes_encrypt,
.decrypt = generic_gcmaes_decrypt,
.ivsize = GCM_AES_IV_SIZE,
.maxauthsize = 16,
.base = {
.cra_name = "__generic-gcm-aes-aesni",
.cra_driver_name = "__driver-generic-gcm-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct generic_gcmaes_ctx),
.cra_alignmask = AESNI_ALIGN - 1,
.cra_module = THIS_MODULE,
},
}, {
.init = generic_gcmaes_init,
.exit = generic_gcmaes_exit,
.setkey = gcmaes_wrapper_set_key,
.setauthsize = gcmaes_wrapper_set_authsize,
.encrypt = gcmaes_wrapper_encrypt,
.decrypt = gcmaes_wrapper_decrypt,
.ivsize = GCM_AES_IV_SIZE,
.maxauthsize = 16,
.base = {
.cra_name = "gcm(aes)",
.cra_driver_name = "generic-gcm-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_module = THIS_MODULE,
},
} };
#else
static struct aead_alg aesni_aead_algs[0];
#endif
static const struct x86_cpu_id aesni_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AES),
{}
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
static void aesni_free_simds(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
aesni_simd_skciphers[i]; i++)
simd_skcipher_free(aesni_simd_skciphers[i]);
}
static int __init aesni_init(void)
{
struct simd_skcipher_alg *simd;
const char *basename;
const char *algname;
const char *drvname;
int err;
int i;
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
#ifdef CONFIG_AS_AVX2
if (boot_cpu_has(X86_FEATURE_AVX2)) {
pr_info("AVX2 version of gcm_enc/dec engaged.\n");
aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4;
} else
#endif
#ifdef CONFIG_AS_AVX
if (boot_cpu_has(X86_FEATURE_AVX)) {
pr_info("AVX version of gcm_enc/dec engaged.\n");
aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
} else
#endif
{
pr_info("SSE version of gcm_enc/dec engaged.\n");
aesni_gcm_tfm = &aesni_gcm_tfm_sse;
}
aesni_ctr_enc_tfm = aesni_ctr_enc;
#ifdef CONFIG_AS_AVX
if (boot_cpu_has(X86_FEATURE_AVX)) {
/* optimize performance of ctr mode encryption transform */
aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
pr_info("AES CTR mode by8 optimization enabled\n");
}
#endif
#endif
err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
if (err)
return err;
err = crypto_register_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
if (err)
goto unregister_algs;
err = crypto_register_aeads(aesni_aead_algs,
ARRAY_SIZE(aesni_aead_algs));
if (err)
goto unregister_skciphers;
for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) {
algname = aesni_skciphers[i].base.cra_name + 2;
drvname = aesni_skciphers[i].base.cra_driver_name + 2;
basename = aesni_skciphers[i].base.cra_driver_name;
simd = simd_skcipher_create_compat(algname, drvname, basename);
err = PTR_ERR(simd);
if (IS_ERR(simd))
goto unregister_simds;
aesni_simd_skciphers[i] = simd;
}
return 0;
unregister_simds:
aesni_free_simds();
crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
unregister_skciphers:
crypto_unregister_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
unregister_algs:
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
return err;
}
static void __exit aesni_exit(void)
{
aesni_free_simds();
crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
crypto_unregister_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
}
late_initcall(aesni_init);
module_exit(aesni_exit);
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("aes");
/*
* Glue Code for assembler optimized version of Blowfish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <crypto/algapi.h>
#include <crypto/blowfish.h>
#include <crypto/internal/skcipher.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false); /*covered*/
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false); /*covered*/
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return blowfish_setkey(&tfm->base, key, keylen); /*covered*/
}
static int ecb_crypt(struct skcipher_request *req,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
{
unsigned int bsize = BF_BLOCK_SIZE;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
u8 *wsrc = walk.src.virt.addr; /*covered*/
u8 *wdst = walk.dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
fn_4way(ctx, wdst, wsrc); /*covered*/
wsrc += bsize * 4;
wdst += bsize * 4;
nbytes -= bsize * 4;
} while (nbytes >= bsize * 4);
if (nbytes < bsize) /*covered*/
goto done;
}
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc); /*covered*/
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = skcipher_walk_done(&walk, nbytes); /*covered*/
}
return err; /*covered*/
}
static int ecb_encrypt(struct skcipher_request *req)
{
return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); /*covered*/
}
static int ecb_decrypt(struct skcipher_request *req)
{
return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); /*covered*/
}
static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr; /*covered*/
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv; /*covered*/
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv; /*covered*/
return nbytes;
}
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
return err; /*covered*/
}
static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr; /*covered*/
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[4 - 1];
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
nbytes -= bsize * 4 - bsize;
src -= 4 - 1; /*covered*/
dst -= 4 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
ivs[2] = src[2];
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
dst[1] ^= ivs[0];
dst[2] ^= ivs[1];
dst[3] ^= ivs[2];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1); /*covered*/
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 4);
}
/* Handle leftovers */
for (;;) {
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); /*covered*/
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1); /*covered*/
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv; /*covered*/
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_decrypt(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
return err; /*covered*/
}
static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv; /*covered*/
u8 keystream[BF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
blowfish_enc_blk(ctx, keystream, ctrblk);
crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk)
{
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr; /*covered*/
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
__be64 ctrblocks[4];
/* Process four block batch */
if (nbytes >= bsize * 4) {
do {
if (dst != src) { /*covered*/
dst[0] = src[0]; /*covered*/
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
}
/* create ctrblks for parallel encrypt */
ctrblocks[0] = cpu_to_be64(ctrblk++); /*covered*/
ctrblocks[1] = cpu_to_be64(ctrblk++);
ctrblocks[2] = cpu_to_be64(ctrblk++);
ctrblocks[3] = cpu_to_be64(ctrblk++);
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += 4;
dst += 4;
} while ((nbytes -= bsize * 4) >= bsize * 4);
if (nbytes < bsize) /*covered*/
goto done;
}
/* Handle leftovers */
do {
if (dst != src) /*covered*/
*dst = *src; /*covered*/
ctrblocks[0] = cpu_to_be64(ctrblk++); /*covered*/
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
*(__be64 *)walk->iv = cpu_to_be64(ctrblk); /*covered*/
return nbytes;
}
static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
nbytes = __ctr_crypt(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
if (nbytes) { /*covered*/
ctr_crypt_final(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, 0);
}
return err; /*covered*/
}
static struct crypto_alg bf_cipher_alg = {
.cra_name = "blowfish",
.cra_driver_name = "blowfish-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = BF_MIN_KEY_SIZE,
.cia_max_keysize = BF_MAX_KEY_SIZE,
.cia_setkey = blowfish_setkey,
.cia_encrypt = blowfish_encrypt,
.cia_decrypt = blowfish_decrypt,
}
}
};
static struct skcipher_alg bf_skcipher_algs[] = {
{
.base.cra_name = "ecb(blowfish)",
.base.cra_driver_name = "ecb-blowfish-asm",
.base.cra_priority = 300,
.base.cra_blocksize = BF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct bf_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "cbc(blowfish)",
.base.cra_driver_name = "cbc-blowfish-asm",
.base.cra_priority = 300,
.base.cra_blocksize = BF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct bf_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "ctr(blowfish)",
.base.cra_driver_name = "ctr-blowfish-asm",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct bf_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.chunksize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
};
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, blowfish-x86_64 is slower than generic C
* implementation because use of 64bit rotates (which are really
* slow on P4). Therefore blacklist P4s.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
int err;
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"blowfish-x86_64: performance on this CPU "
"would be suboptimal: disabling "
"blowfish-x86_64.\n");
return -ENODEV;
}
err = crypto_register_alg(&bf_cipher_alg);
if (err)
return err;
err = crypto_register_skciphers(bf_skcipher_algs,
ARRAY_SIZE(bf_skcipher_algs));
if (err)
crypto_unregister_alg(&bf_cipher_alg);
return err;
}
static void __exit fini(void)
{
crypto_unregister_alg(&bf_cipher_alg);
crypto_unregister_skciphers(bf_skcipher_algs,
ARRAY_SIZE(bf_skcipher_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
MODULE_ALIAS_CRYPTO("blowfish");
MODULE_ALIAS_CRYPTO("blowfish-asm");
/*
* Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/xts.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
/* 32-way AVX2/AES-NI parallel cipher functions */
asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_enc_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 4,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
}, {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_xts = {
.num_funcs = 3,
.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
}, {
.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
} }
};
static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, /*covered*/
&tfm->base.crt_flags);
}
static int ecb_encrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&camellia_enc, req);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&camellia_dec, req);
}
static int cbc_encrypt(struct skcipher_request *req)
{
return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
req);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
}
static int ctr_crypt(struct skcipher_request *req)
{
return glue_ctr_req_128bit(&camellia_ctr, req);
}
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&camellia_enc_xts, req,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&camellia_dec_xts, req,
XTS_TWEAK_CAST(camellia_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct skcipher_alg camellia_algs[] = {
{
.base.cra_name = "__ecb(camellia)",
.base.cra_driver_name = "__ecb-camellia-aesni-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(camellia)",
.base.cra_driver_name = "__cbc-camellia-aesni-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(camellia)",
.base.cra_driver_name = "__ctr-camellia-aesni-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.chunksize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}, {
.base.cra_name = "__xts(camellia)",
.base.cra_driver_name = "__xts-camellia-aesni-avx2",
.base.cra_priority = 500,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
.max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = xts_camellia_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
};
static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
static int __init camellia_aesni_init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX) ||
!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(camellia_algs,
ARRAY_SIZE(camellia_algs),
camellia_simd_algs);
}
static void __exit camellia_aesni_fini(void)
{
simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
camellia_simd_algs);
}
module_init(camellia_aesni_init);
module_exit(camellia_aesni_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
MODULE_ALIAS_CRYPTO("camellia");
MODULE_ALIAS_CRYPTO("camellia-asm");
/*
* Glue Code for assembler optimized version of Camellia
*
* Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* Camellia parts based on code by:
* Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <asm/unaligned.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
/* regular block cipher functions */
asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__camellia_enc_blk);
asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_dec_blk);
/* 2-way parallel cipher functions */
asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
}
static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
/* camellia sboxes */
__visible const u64 camellia_sp10011110[256] = {
0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL,
0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL,
0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL,
0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL,
0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL,
0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL,
0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL,
0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL,
0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL,
0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL,
0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL,
0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL,
0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL,
0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL,
0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL,
0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL,
0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL,
0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL,
0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL,
0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL,
0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL,
0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL,
0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL,
0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL,
0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL,
0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL,
0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL,
0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL,
0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL,
0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL,
0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL,
0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL,
0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL,
0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL,
0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL,
0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL,
0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL,
0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL,
0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL,
0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL,
0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL,
0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL,
0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL,
0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL,
0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL,
0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL,
0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL,
0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL,
0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL,
0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL,
0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL,
0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL,
0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL,
0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL,
0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL,
0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL,
0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL,
0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL,
0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL,
0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL,
0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL,
0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL,
0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL,
0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL,
0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL,
0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL,
0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL,
0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL,
0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL,
0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL,
0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL,
0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL,
0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL,
0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL,
0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL,
0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL,
0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL,
0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL,
0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL,
0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL,
0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL,
0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL,
0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL,
0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL,
0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL,
0x9e00009e9e9e9e00ULL,
};
__visible const u64 camellia_sp22000222[256] = {
0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL,
0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL,
0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL,
0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL,
0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL,
0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL,
0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL,
0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL,
0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL,
0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL,
0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL,
0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL,
0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL,
0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL,
0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL,
0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL,
0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL,
0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL,
0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL,
0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL,
0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL,
0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL,
0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL,
0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL,
0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL,
0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL,
0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL,
0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL,
0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL,
0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL,
0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL,
0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL,
0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL,
0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL,
0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL,
0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL,
0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL,
0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL,
0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL,
0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL,
0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL,
0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL,
0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL,
0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL,
0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL,
0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL,
0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL,
0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL,
0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL,
0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL,
0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL,
0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL,
0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL,
0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL,
0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL,
0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL,
0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL,
0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL,
0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL,
0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL,
0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL,
0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL,
0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL,
0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL,
0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL,
0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL,
0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL,
0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL,
0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL,
0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL,
0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL,
0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL,
0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL,
0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL,
0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL,
0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL,
0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL,
0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL,
0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL,
0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL,
0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL,
0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL,
0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL,
0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL,
0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL,
0x3d3d0000003d3d3dULL,
};
__visible const u64 camellia_sp03303033[256] = {
0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL,
0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL,
0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL,
0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL,
0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL,
0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL,
0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL,
0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL,
0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL,
0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL,
0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL,
0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL,
0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL,
0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL,
0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL,
0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL,
0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL,
0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL,
0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL,
0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL,
0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL,
0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL,
0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL,
0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL,
0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL,
0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL,
0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL,
0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL,
0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL,
0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL,
0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL,
0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL,
0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL,
0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL,
0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL,
0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL,
0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL,
0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL,
0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL,
0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL,
0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL,
0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL,
0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL,
0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL,
0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL,
0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL,
0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL,
0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL,
0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL,
0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL,
0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL,
0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL,
0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL,
0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL,
0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL,
0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL,
0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL,
0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL,
0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL,
0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL,
0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL,
0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL,
0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL,
0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL,
0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL,
0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL,
0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL,
0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL,
0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL,
0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL,
0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL,
0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL,
0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL,
0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL,
0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL,
0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL,
0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL,
0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL,
0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL,
0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL,
0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL,
0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL,
0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL,
0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL,
0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL,
0x004f4f004f004f4fULL,
};
__visible const u64 camellia_sp00444404[256] = {
0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL,
0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL,
0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL,
0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL,
0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL,
0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL,
0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL,
0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL,
0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL,
0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL,
0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL,
0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL,
0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL,
0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL,
0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL,
0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL,
0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL,
0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL,
0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL,
0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL,
0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL,
0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL,
0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL,
0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL,
0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL,
0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL,
0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL,
0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL,
0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL,
0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL,
0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL,
0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL,
0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL,
0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL,
0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL,
0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL,
0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL,
0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL,
0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL,
0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL,
0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL,
0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL,
0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL,
0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL,
0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL,
0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL,
0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL,
0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL,
0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL,
0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL,
0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL,
0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL,
0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL,
0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL,
0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL,
0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL,
0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL,
0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL,
0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL,
0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL,
0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL,
0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL,
0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL,
0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL,
0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL,
0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL,
0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL,
0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL,
0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL,
0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL,
0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL,
0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL,
0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL,
0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL,
0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL,
0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL,
0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL,
0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL,
0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL,
0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL,
0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL,
0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL,
0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL,
0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL,
0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL,
0x00009e9e9e9e009eULL,
};
__visible const u64 camellia_sp02220222[256] = {
0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL,
0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL,
0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL,
0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL,
0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL,
0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL,
0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL,
0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL,
0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL,
0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL,
0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL,
0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL,
0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL,
0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL,
0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL,
0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL,
0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL,
0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL,
0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL,
0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL,
0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL,
0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL,
0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL,
0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL,
0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL,
0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL,
0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL,
0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL,
0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL,
0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL,
0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL,
0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL,
0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL,
0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL,
0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL,
0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL,
0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL,
0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL,
0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL,
0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL,
0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL,
0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL,
0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL,
0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL,
0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL,
0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL,
0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL,
0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL,
0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL,
0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL,
0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL,
0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL,
0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL,
0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL,
0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL,
0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL,
0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL,
0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL,
0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL,
0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL,
0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL,
0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL,
0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL,
0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL,
0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL,
0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL,
0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL,
0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL,
0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL,
0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL,
0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL,
0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL,
0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL,
0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL,
0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL,
0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL,
0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL,
0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL,
0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL,
0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL,
0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL,
0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL,
0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL,
0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL,
0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL,
0x003d3d3d003d3d3dULL,
};
__visible const u64 camellia_sp30333033[256] = {
0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL,
0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL,
0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL,
0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL,
0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL,
0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL,
0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL,
0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL,
0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL,
0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL,
0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL,
0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL,
0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL,
0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL,
0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL,
0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL,
0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL,
0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL,
0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL,
0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL,
0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL,
0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL,
0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL,
0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL,
0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL,
0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL,
0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL,
0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL,
0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL,
0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL,
0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL,
0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL,
0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL,
0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL,
0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL,
0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL,
0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL,
0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL,
0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL,
0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL,
0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL,
0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL,
0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL,
0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL,
0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL,
0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL,
0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL,
0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL,
0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL,
0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL,
0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL,
0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL,
0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL,
0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL,
0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL,
0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL,
0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL,
0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL,
0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL,
0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL,
0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL,
0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL,
0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL,
0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL,
0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL,
0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL,
0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL,
0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL,
0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL,
0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL,
0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL,
0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL,
0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL,
0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL,
0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL,
0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL,
0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL,
0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL,
0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL,
0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL,
0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL,
0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL,
0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL,
0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL,
0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL,
0x4f004f4f4f004f4fULL,
};
__visible const u64 camellia_sp44044404[256] = {
0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL,
0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL,
0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL,
0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL,
0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL,
0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL,
0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL,
0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL,
0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL,
0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL,
0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL,
0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL,
0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL,
0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL,
0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL,
0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL,
0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL,
0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL,
0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL,
0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL,
0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL,
0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL,
0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL,
0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL,
0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL,
0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL,
0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL,
0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL,
0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL,
0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL,
0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL,
0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL,
0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL,
0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL,
0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL,
0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL,
0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL,
0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL,
0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL,
0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL,
0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL,
0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL,
0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL,
0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL,
0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL,
0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL,
0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL,
0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL,
0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL,
0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL,
0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL,
0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL,
0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL,
0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL,
0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL,
0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL,
0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL,
0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL,
0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL,
0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL,
0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL,
0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL,
0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL,
0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL,
0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL,
0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL,
0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL,
0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL,
0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL,
0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL,
0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL,
0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL,
0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL,
0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL,
0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL,
0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL,
0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL,
0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL,
0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL,
0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL,
0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL,
0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL,
0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL,
0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL,
0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL,
0x9e9e009e9e9e009eULL,
};
__visible const u64 camellia_sp11101110[256] = {
0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL,
0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL,
0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL,
0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL,
0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL,
0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL,
0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL,
0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL,
0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL,
0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL,
0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL,
0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL,
0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL,
0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL,
0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL,
0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL,
0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL,
0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL,
0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL,
0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL,
0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL,
0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL,
0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL,
0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL,
0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL,
0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL,
0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL,
0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL,
0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL,
0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL,
0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL,
0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL,
0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL,
0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL,
0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL,
0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL,
0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL,
0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL,
0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL,
0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL,
0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL,
0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL,
0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL,
0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL,
0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL,
0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL,
0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL,
0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL,
0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL,
0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL,
0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL,
0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL,
0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL,
0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL,
0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL,
0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL,
0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL,
0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL,
0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL,
0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL,
0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL,
0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL,
0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL,
0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL,
0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL,
0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL,
0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL,
0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL,
0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL,
0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL,
0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL,
0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL,
0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL,
0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL,
0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL,
0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL,
0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL,
0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL,
0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL,
0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL,
0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL,
0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL,
0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL,
0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL,
0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL,
0x9e9e9e009e9e9e00ULL,
};
/* key constants */
#define CAMELLIA_SIGMA1L (0xA09E667FL)
#define CAMELLIA_SIGMA1R (0x3BCC908BL)
#define CAMELLIA_SIGMA2L (0xB67AE858L)
#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
#define CAMELLIA_SIGMA3L (0xC6EF372FL)
#define CAMELLIA_SIGMA3R (0xE94F82BEL)
#define CAMELLIA_SIGMA4L (0x54FF53A5L)
#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
#define CAMELLIA_SIGMA5L (0x10E527FAL)
#define CAMELLIA_SIGMA5R (0xDE682D1DL)
#define CAMELLIA_SIGMA6L (0xB05688C2L)
#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
/* macros */
#define ROLDQ(l, r, bits) ({ \
u64 t = l; \
l = (l << bits) | (r >> (64 - bits)); \
r = (r << bits) | (t >> (64 - bits)); \
})
#define CAMELLIA_F(x, kl, kr, y) ({ \
u64 ii = x ^ (((u64)kl << 32) | kr); \
y = camellia_sp11101110[(uint8_t)ii]; \
y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \
ii >>= 16; \
y ^= camellia_sp30333033[(uint8_t)ii]; \
y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \
ii >>= 16; \
y ^= camellia_sp00444404[(uint8_t)ii]; \
y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \
ii >>= 16; \
y ^= camellia_sp22000222[(uint8_t)ii]; \
y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \
y = ror64(y, 32); \
})
#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
{
u64 kw4, tt;
u32 dw, tl, tr;
/* absorb kw2 to other subkeys */
/* round 2 */
subRL[3] ^= subRL[1]; /*covered*/
/* round 4 */
subRL[5] ^= subRL[1];
/* round 6 */
subRL[7] ^= subRL[1];
subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
/* modified for FLinv(kl2) */
dw = (subRL[1] & subRL[9]) >> 32;
subRL[1] ^= rol32(dw, 1);
/* round 8 */
subRL[11] ^= subRL[1];
/* round 10 */
subRL[13] ^= subRL[1];
/* round 12 */
subRL[15] ^= subRL[1];
subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
/* modified for FLinv(kl4) */
dw = (subRL[1] & subRL[17]) >> 32;
subRL[1] ^= rol32(dw, 1);
/* round 14 */
subRL[19] ^= subRL[1];
/* round 16 */
subRL[21] ^= subRL[1];
/* round 18 */
subRL[23] ^= subRL[1];
if (max == 24) {
/* kw3 */
subRL[24] ^= subRL[1]; /*covered*/
/* absorb kw4 to other subkeys */
kw4 = subRL[25];
} else {
subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; /*covered*/
/* modified for FLinv(kl6) */
dw = (subRL[1] & subRL[25]) >> 32;
subRL[1] ^= rol32(dw, 1);
/* round 20 */
subRL[27] ^= subRL[1];
/* round 22 */
subRL[29] ^= subRL[1];
/* round 24 */
subRL[31] ^= subRL[1];
/* kw3 */
subRL[32] ^= subRL[1];
/* absorb kw4 to other subkeys */
kw4 = subRL[33];
/* round 23 */
subRL[30] ^= kw4;
/* round 21 */
subRL[28] ^= kw4;
/* round 19 */
subRL[26] ^= kw4;
kw4 ^= (kw4 & ~subRL[24]) << 32;
/* modified for FL(kl5) */
dw = (kw4 & subRL[24]) >> 32;
kw4 ^= rol32(dw, 1);
}
/* round 17 */
subRL[22] ^= kw4; /*covered*/
/* round 15 */
subRL[20] ^= kw4;
/* round 13 */
subRL[18] ^= kw4;
kw4 ^= (kw4 & ~subRL[16]) << 32;
/* modified for FL(kl3) */
dw = (kw4 & subRL[16]) >> 32;
kw4 ^= rol32(dw, 1);
/* round 11 */
subRL[14] ^= kw4;
/* round 9 */
subRL[12] ^= kw4;
/* round 7 */
subRL[10] ^= kw4;
kw4 ^= (kw4 & ~subRL[8]) << 32;
/* modified for FL(kl1) */
dw = (kw4 & subRL[8]) >> 32;
kw4 ^= rol32(dw, 1);
/* round 5 */
subRL[6] ^= kw4;
/* round 3 */
subRL[4] ^= kw4;
/* round 1 */
subRL[2] ^= kw4;
/* kw1 */
subRL[0] ^= kw4;
/* key XOR is end of F-function */
SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */
SET_SUBKEY_LR(2, subRL[3]); /* round 1 */
SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */
SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */
SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */
SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */
tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
dw = tl & (subRL[8] >> 32); /* FL(kl1) */
tr = subRL[10] ^ rol32(dw, 1);
tt = (tr | ((u64)tl << 32));
SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */
SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */
SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */
tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
dw = tl & (subRL[9] >> 32); /* FLinv(kl2) */
tr = subRL[7] ^ rol32(dw, 1);
tt = (tr | ((u64)tl << 32));
SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */
SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */
SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */
SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */
SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */
tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
dw = tl & (subRL[16] >> 32); /* FL(kl3) */
tr = subRL[18] ^ rol32(dw, 1);
tt = (tr | ((u64)tl << 32));
SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */
SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */
SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */
tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
dw = tl & (subRL[17] >> 32); /* FLinv(kl4) */
tr = subRL[15] ^ rol32(dw, 1);
tt = (tr | ((u64)tl << 32));
SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */
SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */
SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */
SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */
SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */
if (max == 24) {
SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ /*covered*/
SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */
} else {
tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); /*covered*/
dw = tl & (subRL[24] >> 32); /* FL(kl5) */
tr = subRL[26] ^ rol32(dw, 1);
tt = (tr | ((u64)tl << 32));
SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */
SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */
SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */
tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
dw = tl & (subRL[25] >> 32); /* FLinv(kl6) */
tr = subRL[23] ^ rol32(dw, 1);
tt = (tr | ((u64)tl << 32));
SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */
SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */
SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */
SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */
SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */
SET_SUBKEY_LR(31, subRL[30]); /* round 24 */
SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */
}
} /*covered*/
static void camellia_setup128(const unsigned char *key, u64 *subkey)
{
u64 kl, kr, ww;
u64 subRL[26];
/**
* k == kl || kr (|| is concatenation)
*/
kl = get_unaligned_be64(key); /*covered*/
kr = get_unaligned_be64(key + 8);
/* generate KL dependent subkeys */
/* kw1 */
subRL[0] = kl;
/* kw2 */
subRL[1] = kr;
/* rotation left shift 15bit */
ROLDQ(kl, kr, 15);
/* k3 */
subRL[4] = kl;
/* k4 */
subRL[5] = kr;
/* rotation left shift 15+30bit */
ROLDQ(kl, kr, 30);
/* k7 */
subRL[10] = kl;
/* k8 */
subRL[11] = kr;
/* rotation left shift 15+30+15bit */
ROLDQ(kl, kr, 15);
/* k10 */
subRL[13] = kr;
/* rotation left shift 15+30+15+17 bit */
ROLDQ(kl, kr, 17);
/* kl3 */
subRL[16] = kl;
/* kl4 */
subRL[17] = kr;
/* rotation left shift 15+30+15+17+17 bit */
ROLDQ(kl, kr, 17);
/* k13 */
subRL[18] = kl;
/* k14 */
subRL[19] = kr;
/* rotation left shift 15+30+15+17+17+17 bit */
ROLDQ(kl, kr, 17);
/* k17 */
subRL[22] = kl;
/* k18 */
subRL[23] = kr;
/* generate KA */
kl = subRL[0];
kr = subRL[1];
CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
kr ^= ww;
CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
/* current status == (kll, klr, w0, w1) */
CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
kr ^= ww;
CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
kl ^= ww;
/* generate KA dependent subkeys */
/* k1, k2 */
subRL[2] = kl;
subRL[3] = kr;
ROLDQ(kl, kr, 15);
/* k5,k6 */
subRL[6] = kl;
subRL[7] = kr;
ROLDQ(kl, kr, 15);
/* kl1, kl2 */
subRL[8] = kl;
subRL[9] = kr;
ROLDQ(kl, kr, 15);
/* k9 */
subRL[12] = kl;
ROLDQ(kl, kr, 15);
/* k11, k12 */
subRL[14] = kl;
subRL[15] = kr;
ROLDQ(kl, kr, 34);
/* k15, k16 */
subRL[20] = kl;
subRL[21] = kr;
ROLDQ(kl, kr, 17);
/* kw3, kw4 */
subRL[24] = kl;
subRL[25] = kr;
camellia_setup_tail(subkey, subRL, 24);
}
static void camellia_setup256(const unsigned char *key, u64 *subkey)
{
u64 kl, kr; /* left half of key */
u64 krl, krr; /* right half of key */
u64 ww; /* temporary variables */
u64 subRL[34];
/**
* key = (kl || kr || krl || krr) (|| is concatenation)
*/
kl = get_unaligned_be64(key); /*covered*/
kr = get_unaligned_be64(key + 8);
krl = get_unaligned_be64(key + 16);
krr = get_unaligned_be64(key + 24);
/* generate KL dependent subkeys */
/* kw1 */
subRL[0] = kl;
/* kw2 */
subRL[1] = kr;
ROLDQ(kl, kr, 45);
/* k9 */
subRL[12] = kl;
/* k10 */
subRL[13] = kr;
ROLDQ(kl, kr, 15);
/* kl3 */
subRL[16] = kl;
/* kl4 */
subRL[17] = kr;
ROLDQ(kl, kr, 17);
/* k17 */
subRL[22] = kl;
/* k18 */
subRL[23] = kr;
ROLDQ(kl, kr, 34);
/* k23 */
subRL[30] = kl;
/* k24 */
subRL[31] = kr;
/* generate KR dependent subkeys */
ROLDQ(krl, krr, 15);
/* k3 */
subRL[4] = krl;
/* k4 */
subRL[5] = krr;
ROLDQ(krl, krr, 15);
/* kl1 */
subRL[8] = krl;
/* kl2 */
subRL[9] = krr;
ROLDQ(krl, krr, 30);
/* k13 */
subRL[18] = krl;
/* k14 */
subRL[19] = krr;
ROLDQ(krl, krr, 34);
/* k19 */
subRL[26] = krl;
/* k20 */
subRL[27] = krr;
ROLDQ(krl, krr, 34);
/* generate KA */
kl = subRL[0] ^ krl;
kr = subRL[1] ^ krr;
CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
kr ^= ww;
CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
kl ^= krl;
CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
kr ^= ww ^ krr;
CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
kl ^= ww;
/* generate KB */
krl ^= kl;
krr ^= kr;
CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
krr ^= ww;
CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
krl ^= ww;
/* generate KA dependent subkeys */
ROLDQ(kl, kr, 15);
/* k5 */
subRL[6] = kl;
/* k6 */
subRL[7] = kr;
ROLDQ(kl, kr, 30);
/* k11 */
subRL[14] = kl;
/* k12 */
subRL[15] = kr;
/* rotation left shift 32bit */
ROLDQ(kl, kr, 32);
/* kl5 */
subRL[24] = kl;
/* kl6 */
subRL[25] = kr;
/* rotation left shift 17 from k11,k12 -> k21,k22 */
ROLDQ(kl, kr, 17);
/* k21 */
subRL[28] = kl;
/* k22 */
subRL[29] = kr;
/* generate KB dependent subkeys */
/* k1 */
subRL[2] = krl;
/* k2 */
subRL[3] = krr;
ROLDQ(krl, krr, 30);
/* k7 */
subRL[10] = krl;
/* k8 */
subRL[11] = krr;
ROLDQ(krl, krr, 30);
/* k15 */
subRL[20] = krl;
/* k16 */
subRL[21] = krr;
ROLDQ(krl, krr, 51);
/* kw3 */
subRL[32] = krl;
/* kw4 */
subRL[33] = krr;
camellia_setup_tail(subkey, subRL, 32);
}
static void camellia_setup192(const unsigned char *key, u64 *subkey)
{
unsigned char kk[32];
u64 krl, krr;
memcpy(kk, key, 24);
memcpy((unsigned char *)&krl, key+16, 8);
krr = ~krl;
memcpy(kk+24, (unsigned char *)&krr, 8);
camellia_setup256(kk, subkey); /*covered*/
}
int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
unsigned int key_len, u32 *flags)
{
if (key_len != 16 && key_len != 24 && key_len != 32) { /*covered*/
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; /*covered*/
return -EINVAL;
}
cctx->key_length = key_len; /*covered*/
switch (key_len) {
case 16:
camellia_setup128(key, cctx->key_table);
break;
case 24:
camellia_setup192(key, cctx->key_table); /*covered*/
break;
case 32:
camellia_setup256(key, cctx->key_table);
break;
}
return 0;
}
EXPORT_SYMBOL_GPL(__camellia_setkey);
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
{
return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len, /*covered*/
&tfm->crt_flags);
}
static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
unsigned int key_len)
{
return camellia_setkey(&tfm->base, key, key_len); /*covered*/
}
void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
{
u128 iv = *src; /*covered*/
camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
u128_xor(&dst[1], &dst[1], &iv);
}
EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
if (dst != src) /*covered*/
*dst = *src; /*covered*/
le128_to_be128(&ctrblk, iv); /*covered*/
le128_inc(iv); /*covered*/
camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
}
EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblks[2];
if (dst != src) { /*covered*/
dst[0] = src[0]; /*covered*/
dst[1] = src[1];
}
le128_to_be128(&ctrblks[0], iv); /*covered*/
le128_inc(iv);
le128_to_be128(&ctrblks[1], iv); /*covered*/
le128_inc(iv); /*covered*/
camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
} }
};
static const struct common_glue_ctx camellia_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
} }
};
static const struct common_glue_ctx camellia_dec = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
} }
};
static const struct common_glue_ctx camellia_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 2,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
} }
};
static int ecb_encrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&camellia_enc, req); /*covered*/
}
static int ecb_decrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&camellia_dec, req); /*covered*/
}
static int cbc_encrypt(struct skcipher_request *req)
{
return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), /*covered*/
req);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); /*covered*/
}
static int ctr_crypt(struct skcipher_request *req)
{
return glue_ctr_req_128bit(&camellia_ctr, req); /*covered*/
}
static struct crypto_alg camellia_cipher_alg = {
.cra_name = "camellia",
.cra_driver_name = "camellia-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
.cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
.cia_setkey = camellia_setkey,
.cia_encrypt = camellia_encrypt,
.cia_decrypt = camellia_decrypt
}
}
};
static struct skcipher_alg camellia_skcipher_algs[] = {
{
.base.cra_name = "ecb(camellia)",
.base.cra_driver_name = "ecb-camellia-asm",
.base.cra_priority = 300,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.setkey = camellia_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "cbc(camellia)",
.base.cra_driver_name = "cbc-camellia-asm",
.base.cra_priority = 300,
.base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "ctr(camellia)",
.base.cra_driver_name = "ctr-camellia-asm",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct camellia_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAMELLIA_MIN_KEY_SIZE,
.max_keysize = CAMELLIA_MAX_KEY_SIZE,
.ivsize = CAMELLIA_BLOCK_SIZE,
.chunksize = CAMELLIA_BLOCK_SIZE,
.setkey = camellia_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, camellia-asm is slower than original assembler
* implementation because excessive uses of 64bit rotate and
* left-shifts (which are really slow on P4) needed to store and
* handle 128bit block in two 64bit registers.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
int err;
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"camellia-x86_64: performance on this CPU "
"would be suboptimal: disabling "
"camellia-x86_64.\n");
return -ENODEV;
}
err = crypto_register_alg(&camellia_cipher_alg);
if (err)
return err;
err = crypto_register_skciphers(camellia_skcipher_algs,
ARRAY_SIZE(camellia_skcipher_algs));
if (err)
crypto_unregister_alg(&camellia_cipher_alg);
return err;
}
static void __exit fini(void)
{
crypto_unregister_alg(&camellia_cipher_alg);
crypto_unregister_skciphers(camellia_skcipher_algs,
ARRAY_SIZE(camellia_skcipher_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
MODULE_ALIAS_CRYPTO("camellia");
MODULE_ALIAS_CRYPTO("camellia-asm");
/*
* Glue Code for the AVX assembler implementation of the Cast5 Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast5.h>
#include <crypto/internal/simd.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
#define CAST5_PARALLEL_BLOCKS 16
asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
__be64 *iv);
static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
return cast5_setkey(&tfm->base, key, keylen); /*covered*/
}
static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
unsigned int nbytes)
{
return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
walk, fpu_enabled, nbytes);
}
static inline void cast5_fpu_end(bool fpu_enabled)
{
return glue_fpu_end(fpu_enabled);
}
static int ecb_crypt(struct skcipher_request *req, bool enc)
{
bool fpu_enabled = false;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes;
void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
u8 *wsrc = walk.src.virt.addr;
u8 *wdst = walk.dst.virt.addr;
fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
do {
fn(ctx, wdst, wsrc);
wsrc += bsize * CAST5_PARALLEL_BLOCKS;
wdst += bsize * CAST5_PARALLEL_BLOCKS;
nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
/* Handle leftovers */
do {
fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = skcipher_walk_done(&walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
return err;
}
static int ecb_encrypt(struct skcipher_request *req)
{
return ecb_crypt(req, true);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return ecb_crypt(req, false);
}
static int cbc_encrypt(struct skcipher_request *req)
{
const unsigned int bsize = CAST5_BLOCK_SIZE;
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
u64 *src = (u64 *)walk.src.virt.addr;
u64 *dst = (u64 *)walk.dst.virt.addr;
u64 *iv = (u64 *)walk.iv;
do {
*dst = *src ^ *iv;
__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src++;
dst++;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk.iv = *iv;
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
struct skcipher_walk *walk)
{
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
src -= CAST5_PARALLEL_BLOCKS - 1;
dst -= CAST5_PARALLEL_BLOCKS - 1;
cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
}
/* Handle leftovers */
for (;;) {
__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
bool fpu_enabled = false;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
nbytes = __cbc_decrypt(ctx, &walk);
err = skcipher_walk_done(&walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
return err;
}
static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
{
u8 *ctrblk = walk->iv;
u8 keystream[CAST5_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
__cast5_encrypt(ctx, keystream, ctrblk);
crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct skcipher_walk *walk,
struct cast5_ctx *ctx)
{
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
(__be64 *)walk->iv);
src += CAST5_PARALLEL_BLOCKS;
dst += CAST5_PARALLEL_BLOCKS;
nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
u64 ctrblk;
if (dst != src)
*dst = *src;
ctrblk = *(u64 *)walk->iv;
be64_add_cpu((__be64 *)walk->iv, 1);
__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
*dst ^= ctrblk;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
return nbytes;
}
static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
bool fpu_enabled = false;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
nbytes = __ctr_crypt(&walk, ctx);
err = skcipher_walk_done(&walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
if (walk.nbytes) {
ctr_crypt_final(&walk, ctx);
err = skcipher_walk_done(&walk, 0);
}
return err;
}
static struct skcipher_alg cast5_algs[] = {
{
.base.cra_name = "__ecb(cast5)",
.base.cra_driver_name = "__ecb-cast5-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST5_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast5_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.setkey = cast5_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(cast5)",
.base.cra_driver_name = "__cbc-cast5-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST5_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast5_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.ivsize = CAST5_BLOCK_SIZE,
.setkey = cast5_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(cast5)",
.base.cra_driver_name = "__ctr-cast5-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct cast5_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAST5_MIN_KEY_SIZE,
.max_keysize = CAST5_MAX_KEY_SIZE,
.ivsize = CAST5_BLOCK_SIZE,
.chunksize = CAST5_BLOCK_SIZE,
.setkey = cast5_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
static int __init cast5_init(void)
{
const char *feature_name;
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(cast5_algs,
ARRAY_SIZE(cast5_algs),
cast5_simd_algs);
}
static void __exit cast5_exit(void)
{
simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
cast5_simd_algs);
}
module_init(cast5_init);
module_exit(cast5_exit);
MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("cast5");
/*
* Glue Code for the AVX assembler implementation of the Cast6 Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/cast6.h>
#include <crypto/internal/simd.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
#define CAST6_PARALLEL_BLOCKS 8
asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
le128 *iv);
asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return cast6_setkey(&tfm->base, key, keylen); /*covered*/
}
static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_encrypt));
}
static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(__cast6_decrypt));
}
static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
le128_to_be128(&ctrblk, iv);
le128_inc(iv);
__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static const struct common_glue_ctx cast6_enc = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
} }
};
static const struct common_glue_ctx cast6_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
} }
};
static const struct common_glue_ctx cast6_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
} }
};
static const struct common_glue_ctx cast6_dec = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
} }
};
static const struct common_glue_ctx cast6_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
} }
};
static const struct common_glue_ctx cast6_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
} }
};
static int ecb_encrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&cast6_enc, req);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&cast6_dec, req);
}
static int cbc_encrypt(struct skcipher_request *req)
{
return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
req);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req);
}
static int ctr_crypt(struct skcipher_request *req)
{
return glue_ctr_req_128bit(&cast6_ctr, req);
}
struct cast6_xts_ctx {
struct cast6_ctx tweak_ctx;
struct cast6_ctx crypt_ctx;
};
static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
u32 *flags = &tfm->base.crt_flags;
int err;
err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
/* first half of xts-key is for crypt */
err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
if (err)
return err;
/* second half of xts-key is for tweak */
return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&cast6_enc_xts, req,
XTS_TWEAK_CAST(__cast6_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&cast6_dec_xts, req,
XTS_TWEAK_CAST(__cast6_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct skcipher_alg cast6_algs[] = {
{
.base.cra_name = "__ecb(cast6)",
.base.cra_driver_name = "__ecb-cast6-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST6_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast6_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.setkey = cast6_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(cast6)",
.base.cra_driver_name = "__cbc-cast6-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST6_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast6_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = cast6_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(cast6)",
.base.cra_driver_name = "__ctr-cast6-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct cast6_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CAST6_MIN_KEY_SIZE,
.max_keysize = CAST6_MAX_KEY_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.chunksize = CAST6_BLOCK_SIZE,
.setkey = cast6_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}, {
.base.cra_name = "__xts(cast6)",
.base.cra_driver_name = "__xts-cast6-avx",
.base.cra_priority = 200,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = CAST6_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct cast6_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * CAST6_MIN_KEY_SIZE,
.max_keysize = 2 * CAST6_MAX_KEY_SIZE,
.ivsize = CAST6_BLOCK_SIZE,
.setkey = xts_cast6_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
};
static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
static int __init cast6_init(void)
{
const char *feature_name;
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(cast6_algs,
ARRAY_SIZE(cast6_algs),
cast6_simd_algs);
}
static void __exit cast6_exit(void)
{
simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs),
cast6_simd_algs);
}
module_init(cast6_init);
module_exit(cast6_exit);
MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("cast6");
/*
* x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
* including ChaCha20 (RFC7539)
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <crypto/algapi.h>
#include <crypto/chacha.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/simd.h>
#define CHACHA_STATE_ALIGN 16
asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
#ifdef CONFIG_AS_AVX2
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
static bool chacha_use_avx2;
#ifdef CONFIG_AS_AVX512
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
static bool chacha_use_avx512vl;
#endif
#endif
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
}
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
#ifdef CONFIG_AS_AVX2
#ifdef CONFIG_AS_AVX512
if (chacha_use_avx512vl) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx512vl(state, dst, src, bytes,
nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
src += CHACHA_BLOCK_SIZE * 8;
dst += CHACHA_BLOCK_SIZE * 8;
state[12] += 8;
}
if (bytes > CHACHA_BLOCK_SIZE * 4) {
chacha_8block_xor_avx512vl(state, dst, src, bytes,
nrounds);
state[12] += chacha_advance(bytes, 8);
return;
}
if (bytes > CHACHA_BLOCK_SIZE * 2) {
chacha_4block_xor_avx512vl(state, dst, src, bytes,
nrounds);
state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes) {
chacha_2block_xor_avx512vl(state, dst, src, bytes,
nrounds);
state[12] += chacha_advance(bytes, 2);
return;
}
}
#endif
if (chacha_use_avx2) { /*covered*/
while (bytes >= CHACHA_BLOCK_SIZE * 8) { /*covered*/
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
src += CHACHA_BLOCK_SIZE * 8;
dst += CHACHA_BLOCK_SIZE * 8;
state[12] += 8;
}
if (bytes > CHACHA_BLOCK_SIZE * 4) { /*covered*/
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); /*covered*/
state[12] += chacha_advance(bytes, 8);
return;
}
if (bytes > CHACHA_BLOCK_SIZE * 2) { /*covered*/
chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); /*covered*/
state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes > CHACHA_BLOCK_SIZE) { /*covered*/
chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); /*covered*/
state[12] += chacha_advance(bytes, 2);
return;
}
}
#endif
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
src += CHACHA_BLOCK_SIZE * 4;
dst += CHACHA_BLOCK_SIZE * 4;
state[12] += 4;
}
if (bytes > CHACHA_BLOCK_SIZE) { /*covered*/
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes) { /*covered*/
chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); /*covered*/
state[12]++;
}
}
static int chacha_simd_stream_xor(struct skcipher_walk *walk,
struct chacha_ctx *ctx, u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
int next_yield = 4096; /* bytes until next FPU yield */
int err = 0;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); /*covered*/
crypto_chacha_init(state, ctx, iv);
while (walk->nbytes > 0) {
unsigned int nbytes = walk->nbytes;
if (nbytes < walk->total) { /*covered*/
nbytes = round_down(nbytes, walk->stride); /*covered*/
next_yield -= nbytes;
}
chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, /*covered*/
nbytes, ctx->nrounds);
if (next_yield <= 0) { /*covered*/
/* temporarily allow preemption */
kernel_fpu_end();
kernel_fpu_begin();
next_yield = 4096;
}
err = skcipher_walk_done(walk, walk->nbytes - nbytes); /*covered*/
}
return err; /*covered*/
}
static int chacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
int err;
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) /*covered*/
return crypto_chacha_crypt(req); /*covered*/
err = skcipher_walk_virt(&walk, req, true); /*covered*/
if (err)
return err;
kernel_fpu_begin(); /*covered*/
err = chacha_simd_stream_xor(&walk, ctx, req->iv);
kernel_fpu_end();
return err;
}
static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
struct chacha_ctx subctx;
u32 *state, state_buf[16 + 2] __aligned(8);
u8 real_iv[16];
int err;
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) /*covered*/
return crypto_xchacha_crypt(req); /*covered*/
err = skcipher_walk_virt(&walk, req, true); /*covered*/
if (err)
return err;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); /*covered*/
crypto_chacha_init(state, ctx, req->iv);
kernel_fpu_begin();
hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
kernel_fpu_end();
return err;
}
static struct skcipher_alg algs[] = {
{
.base.cra_name = "chacha20",
.base.cra_driver_name = "chacha20-simd",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = chacha_simd,
.decrypt = chacha_simd,
}, {
.base.cra_name = "xchacha20",
.base.cra_driver_name = "xchacha20-simd",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
}, {
.base.cra_name = "xchacha12",
.base.cra_driver_name = "xchacha12-simd",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct chacha_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = CHACHA_KEY_SIZE,
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha12_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
},
};
static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifdef CONFIG_AS_AVX512
chacha_use_avx512vl = chacha_use_avx2 &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
#endif
#endif
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
static void __exit chacha_simd_mod_fini(void)
{
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
module_exit(chacha_simd_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-simd");
MODULE_ALIAS_CRYPTO("xchacha20");
MODULE_ALIAS_CRYPTO("xchacha20-simd");
MODULE_ALIAS_CRYPTO("xchacha12");
MODULE_ALIAS_CRYPTO("xchacha12-simd");
/* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see http://www.gnu.org/licenses
*
* Please visit http://www.xyratex.com/contact if you need additional
* information or have any questions.
*
* GPL HEADER END
*/
/*
* Copyright 2012 Xyratex Technology Limited
*
* Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/api.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
* for crc32_pclmul_le_16 */
#define SCALE_F 16L /* size of xmm register */
#define SCALE_F_MASK (SCALE_F - 1)
u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
static u32 __attribute__((pure))
crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) /*covered*/
{
unsigned int iquotient;
unsigned int iremainder;
unsigned int prealign;
if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) /*covered*/
return crc32_le(crc, p, len); /*covered*/
if ((long)p & SCALE_F_MASK) { /*covered*/
/* align p to 16 byte */
prealign = SCALE_F - ((long)p & SCALE_F_MASK); /*covered*/
crc = crc32_le(crc, p, prealign);
len -= prealign;
p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
~SCALE_F_MASK);
}
iquotient = len & (~SCALE_F_MASK); /*covered*/
iremainder = len & SCALE_F_MASK;
kernel_fpu_begin();
crc = crc32_pclmul_le_16(p, iquotient, crc);
kernel_fpu_end();
if (iremainder)
crc = crc32_le(crc, p + iquotient, iremainder); /*covered*/
return crc;
}
static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = 0; /*covered*/
return 0;
}
static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) { /*covered*/
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key); /*covered*/
return 0; /*covered*/
}
static int crc32_pclmul_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm); /*covered*/
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = crc32_pclmul_le(*crcp, data, len); /*covered*/
return 0;
}
/* No final XOR 0xFFFFFFFF, like crc32_le */
static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
return 0;
}
static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); /*covered*/
}
static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = cpu_to_le32p(crcp); /*covered*/
return 0;
}
static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static struct shash_alg alg = {
.setkey = crc32_pclmul_setkey,
.init = crc32_pclmul_init,
.update = crc32_pclmul_update,
.final = crc32_pclmul_final,
.finup = crc32_pclmul_finup,
.digest = crc32_pclmul_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32",
.cra_driver_name = "crc32-pclmul",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_init = crc32_pclmul_cra_init,
}
};
static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
static int __init crc32_pclmul_mod_init(void)
{
if (!x86_match_cpu(crc32pclmul_cpu_id)) {
pr_info("PCLMULQDQ-NI instructions are not detected.\n");
return -ENODEV;
}
return crypto_register_shash(&alg);
}
static void __exit crc32_pclmul_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc32_pclmul_mod_init);
module_exit(crc32_pclmul_mod_fini);
MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32");
MODULE_ALIAS_CRYPTO("crc32-pclmul");
/*
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
* CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
* CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2A: Instruction Set Reference, A-M
*
* Copyright (C) 2008 Intel Corporation
* Authors: Austin Zhang <austin_zhang@linux.intel.com>
* Kent Liu <kent.liu@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
#include <asm/fpu/internal.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
#define SCALE_F sizeof(unsigned long)
#ifdef CONFIG_X86_64
#define REX_PRE "0x48, "
#else
#define REX_PRE
#endif
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
* size is >= 512 to account
* for fpu state save/restore overhead.
*/
#define CRC32C_PCL_BREAKEVEN 512
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
#endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
__asm__ __volatile__(
".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
:"=S"(crc)
:"0"(crc), "c"(*data) /*covered*/
);
data++;
}
return crc;
}
static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
{
unsigned int iquotient = len / SCALE_F;
unsigned int iremainder = len % SCALE_F;
unsigned long *ptmp = (unsigned long *)p;
while (iquotient--) {
__asm__ __volatile__(
".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
:"=S"(crc)
:"0"(crc), "c"(*ptmp) /*covered*/
);
ptmp++;
}
if (iremainder) /*covered*/
crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, /*covered*/
iremainder);
return crc;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~0, then XOR with ~0 before you set
* the seed.
*/
static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
unsigned int keylen)
{
u32 *mctx = crypto_shash_ctx(hash);
if (keylen != sizeof(u32)) { /*covered*/
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
*mctx = le32_to_cpup((__le32 *)key); /*covered*/
return 0; /*covered*/
}
static int crc32c_intel_init(struct shash_desc *desc)
{
u32 *mctx = crypto_shash_ctx(desc->tfm); /*covered*/
u32 *crcp = shash_desc_ctx(desc);
*crcp = *mctx;
return 0;
}
static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
*crcp = crc32c_intel_le_hw(*crcp, data, len);
return 0;
}
static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
return 0;
}
static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
}
static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
{
u32 *crcp = shash_desc_ctx(desc);
*(__le32 *)out = ~cpu_to_le32p(crcp); /*covered*/
return 0;
}
static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
{
u32 *key = crypto_tfm_ctx(tfm);
*key = ~0; /*covered*/
return 0;
}
#ifdef CONFIG_X86_64
static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len)
{
u32 *crcp = shash_desc_ctx(desc);
/*
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { /*covered*/
kernel_fpu_begin(); /*covered*/
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
} else
*crcp = crc32c_intel_le_hw(*crcp, data, len); /*covered*/
return 0; /*covered*/
}
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, /*covered*/
u8 *out)
{
if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { /*covered*/
kernel_fpu_begin(); /*covered*/
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
} else
*(__le32 *)out =
~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); /*covered*/
return 0; /*covered*/
}
static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); /*covered*/
}
static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
out);
}
#endif /* CONFIG_X86_64 */
static struct shash_alg alg = {
.setkey = crc32c_intel_setkey,
.init = crc32c_intel_init,
.update = crc32c_intel_update,
.final = crc32c_intel_final,
.finup = crc32c_intel_finup,
.digest = crc32c_intel_digest,
.descsize = sizeof(u32),
.digestsize = CHKSUM_DIGEST_SIZE,
.base = {
.cra_name = "crc32c",
.cra_driver_name = "crc32c-intel",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
.cra_blocksize = CHKSUM_BLOCK_SIZE,
.cra_ctxsize = sizeof(u32),
.cra_module = THIS_MODULE,
.cra_init = crc32c_intel_cra_init,
}
};
static const struct x86_cpu_id crc32c_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
static int __init crc32c_intel_mod_init(void)
{
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
}
#endif
return crypto_register_shash(&alg);
}
static void __exit crc32c_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc32c_intel_mod_init);
module_exit(crc32c_intel_mod_fini);
MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32c");
MODULE_ALIAS_CRYPTO("crc32c-intel");
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
*
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/fpu/api.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
size_t len);
struct chksum_desc_ctx {
__u16 crc;
};
/*
* Steps through buffer one byte at at time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0; /*covered*/
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
if (irq_fpu_usable()) { /*covered*/
kernel_fpu_begin(); /*covered*/
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
} else
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0; /*covered*/
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc; /*covered*/
return 0;
}
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, /*covered*/
u8 *out)
{
if (irq_fpu_usable()) { /*covered*/
kernel_fpu_begin(); /*covered*/
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
kernel_fpu_end();
} else
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
return 0; /*covered*/
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out); /*covered*/
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, length, out);
}
static struct shash_alg alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-pclmul",
.cra_priority = 200,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static const struct x86_cpu_id crct10dif_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
static int __init crct10dif_intel_mod_init(void)
{
if (!x86_match_cpu(crct10dif_cpu_id))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit crct10dif_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crct10dif_intel_mod_init);
module_exit(crct10dif_intel_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crct10dif");
MODULE_ALIAS_CRYPTO("crct10dif-pclmul");
/*
* Glue Code for assembler optimized version of 3DES
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <crypto/algapi.h>
#include <crypto/des.h>
#include <crypto/internal/skcipher.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
struct des3_ede_x86_ctx {
u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
};
/* regular block cipher functions */
asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
const u8 *src);
/* 3-way parallel cipher functions */
asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
const u8 *src);
static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *enc_ctx = ctx->enc_expkey; /*covered*/
des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
}
static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *dec_ctx = ctx->dec_expkey;
des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); /*covered*/
}
static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *enc_ctx = ctx->enc_expkey;
des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
}
static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
const u8 *src)
{
u32 *dec_ctx = ctx->dec_expkey;
des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
}
static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src); /*covered*/
}
static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
static int ecb_crypt(struct skcipher_request *req, const u32 *expkey)
{
const unsigned int bsize = DES3_EDE_BLOCK_SIZE;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false); /*covered*/
while ((nbytes = walk.nbytes)) {
u8 *wsrc = walk.src.virt.addr; /*covered*/
u8 *wdst = walk.dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 3) {
do {
des3_ede_x86_64_crypt_blk_3way(expkey, wdst, /*covered*/
wsrc);
wsrc += bsize * 3;
wdst += bsize * 3;
nbytes -= bsize * 3;
} while (nbytes >= bsize * 3);
if (nbytes < bsize) /*covered*/
goto done;
}
/* Handle leftovers */
do {
des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc); /*covered*/
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = skcipher_walk_done(&walk, nbytes); /*covered*/
}
return err; /*covered*/
}
static int ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_crypt(req, ctx->enc_expkey);
}
static int ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
return ecb_crypt(req, ctx->dec_expkey);
}
static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr; /*covered*/
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv; /*covered*/
des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv; /*covered*/
return nbytes;
}
static int cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) { /*covered*/
nbytes = __cbc_encrypt(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
return err; /*covered*/
}
static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr; /*covered*/
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 ivs[3 - 1];
u64 last_iv;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process four block batch */
if (nbytes >= bsize * 3) {
do {
nbytes -= bsize * 3 - bsize;
src -= 3 - 1; /*covered*/
dst -= 3 - 1;
ivs[0] = src[0];
ivs[1] = src[1];
des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
dst[1] ^= ivs[0];
dst[2] ^= ivs[1];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1); /*covered*/
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 3);
}
/* Handle leftovers */
for (;;) {
des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src); /*covered*/
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1); /*covered*/
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv; /*covered*/
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) { /*covered*/
nbytes = __cbc_decrypt(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
return err; /*covered*/
}
static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv; /*covered*/
u8 keystream[DES3_EDE_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
des3_ede_enc_blk(ctx, keystream, ctrblk);
crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx,
struct skcipher_walk *walk)
{
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
__be64 *src = (__be64 *)walk->src.virt.addr; /*covered*/
__be64 *dst = (__be64 *)walk->dst.virt.addr;
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
__be64 ctrblocks[3];
/* Process four block batch */
if (nbytes >= bsize * 3) {
do {
/* create ctrblks for parallel encrypt */
ctrblocks[0] = cpu_to_be64(ctrblk++); /*covered*/
ctrblocks[1] = cpu_to_be64(ctrblk++);
ctrblocks[2] = cpu_to_be64(ctrblk++);
des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
(u8 *)ctrblocks);
dst[0] = src[0] ^ ctrblocks[0];
dst[1] = src[1] ^ ctrblocks[1];
dst[2] = src[2] ^ ctrblocks[2];
src += 3;
dst += 3;
} while ((nbytes -= bsize * 3) >= bsize * 3);
if (nbytes < bsize) /*covered*/
goto done;
}
/* Handle leftovers */
do {
ctrblocks[0] = cpu_to_be64(ctrblk++); /*covered*/
des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
dst[0] = src[0] ^ ctrblocks[0];
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
*(__be64 *)walk->iv = cpu_to_be64(ctrblk); /*covered*/
return nbytes;
}
static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); /*covered*/
struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { /*covered*/
nbytes = __ctr_crypt(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
if (nbytes) { /*covered*/
ctr_crypt_final(ctx, &walk); /*covered*/
err = skcipher_walk_done(&walk, 0);
}
return err; /*covered*/
}
static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, /*covered*/
unsigned int keylen)
{
struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
u32 i, j, tmp;
int err;
/* Generate encryption context using generic implementation. */
err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen); /*covered*/
if (err < 0)
return err;
/* Fix encryption context for this implementation and form decryption
* context. */
j = DES3_EDE_EXPKEY_WORDS - 2;
for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) { /*covered*/
tmp = ror32(ctx->enc_expkey[i + 1], 4); /*covered*/
ctx->enc_expkey[i + 1] = tmp;
ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
ctx->dec_expkey[j + 1] = tmp;
}
return 0;
}
static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key,
unsigned int keylen)
{
return des3_ede_x86_setkey(&tfm->base, key, keylen); /*covered*/
}
static struct crypto_alg des3_ede_cipher = {
.cra_name = "des3_ede",
.cra_driver_name = "des3_ede-asm",
.cra_priority = 200,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_u = {
.cipher = {
.cia_min_keysize = DES3_EDE_KEY_SIZE,
.cia_max_keysize = DES3_EDE_KEY_SIZE,
.cia_setkey = des3_ede_x86_setkey,
.cia_encrypt = des3_ede_x86_encrypt,
.cia_decrypt = des3_ede_x86_decrypt,
}
}
};
static struct skcipher_alg des3_ede_skciphers[] = {
{
.base.cra_name = "ecb(des3_ede)",
.base.cra_driver_name = "ecb-des3_ede-asm",
.base.cra_priority = 300,
.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.setkey = des3_ede_x86_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "cbc(des3_ede)",
.base.cra_driver_name = "cbc-des3_ede-asm",
.base.cra_priority = 300,
.base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
.setkey = des3_ede_x86_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "ctr(des3_ede)",
.base.cra_driver_name = "ctr-des3_ede-asm",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
.ivsize = DES3_EDE_BLOCK_SIZE,
.chunksize = DES3_EDE_BLOCK_SIZE,
.setkey = des3_ede_x86_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}
};
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, des3_ede-x86_64 is slower than generic C
* implementation because use of 64bit rotates (which are really
* slow on P4). Therefore blacklist P4s.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init des3_ede_x86_init(void)
{
int err;
if (!force && is_blacklisted_cpu()) {
pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
return -ENODEV;
}
err = crypto_register_alg(&des3_ede_cipher);
if (err)
return err;
err = crypto_register_skciphers(des3_ede_skciphers,
ARRAY_SIZE(des3_ede_skciphers));
if (err)
crypto_unregister_alg(&des3_ede_cipher);
return err;
}
static void __exit des3_ede_x86_fini(void)
{
crypto_unregister_alg(&des3_ede_cipher);
crypto_unregister_skciphers(des3_ede_skciphers,
ARRAY_SIZE(des3_ede_skciphers));
}
module_init(des3_ede_x86_init);
module_exit(des3_ede_x86_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
MODULE_ALIAS_CRYPTO("des3_ede");
MODULE_ALIAS_CRYPTO("des3_ede-asm");
MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
/*
* Accelerated GHASH implementation with Intel PCLMULQDQ-NI
* instructions. This file contains glue code.
*
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*/
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/cryptd.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
void clmul_ghash_mul(char *dst, const u128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const u128 *shash);
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
struct ghash_ctx {
u128 shash;
};
struct ghash_desc_ctx {
u8 buffer[GHASH_BLOCK_SIZE];
u32 bytes;
};
static int ghash_init(struct shash_desc *desc)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); /*covered*/
memset(dctx, 0, sizeof(*dctx));
return 0;
}
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
be128 *x = (be128 *)key;
u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) { /*covered*/
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
/* perform multiplication by 'x' in GF(2^128) */
a = be64_to_cpu(x->a); /*covered*/
b = be64_to_cpu(x->b);
ctx->shash.a = (b << 1) | (a >> 63);
ctx->shash.b = (a << 1) | (b >> 63); /*covered*/
if (a >> 63)
ctx->shash.b ^= ((u64)0xc2) << 56; /*covered*/
return 0; /*covered*/
}
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); /*covered*/
u8 *dst = dctx->buffer;
kernel_fpu_begin();
if (dctx->bytes) {
int n = min(srclen, dctx->bytes); /*covered*/
u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
dctx->bytes -= n;
srclen -= n;
while (n--)
*pos++ ^= *src++; /*covered*/
if (!dctx->bytes) /*covered*/
clmul_ghash_mul(dst, &ctx->shash); /*covered*/
}
clmul_ghash_update(dst, src, srclen, &ctx->shash); /*covered*/
kernel_fpu_end();
if (srclen & 0xf) {
src += srclen - (srclen & 0xf); /*covered*/
srclen &= 0xf;
dctx->bytes = GHASH_BLOCK_SIZE - srclen;
while (srclen--)
*dst++ ^= *src++; /*covered*/
}
return 0; /*covered*/
}
static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
{
u8 *dst = dctx->buffer;
if (dctx->bytes) { /*covered*/
u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
while (dctx->bytes--) /*covered*/
*tmp++ ^= 0;
kernel_fpu_begin();
clmul_ghash_mul(dst, &ctx->shash);
kernel_fpu_end();
}
dctx->bytes = 0; /*covered*/
}
static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); /*covered*/
u8 *buf = dctx->buffer;
ghash_flush(ctx, dctx); /*covered*/
memcpy(dst, buf, GHASH_BLOCK_SIZE);
return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
.final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "__ghash",
.cra_driver_name = "__ghash-pclmulqdqni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
},
};
static int ghash_async_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); /*covered*/
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return crypto_shash_init(desc); /*covered*/
}
static int ghash_async_update(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req); /*covered*/
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { /*covered*/
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_update(cryptd_req); /*covered*/
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req); /*covered*/
return shash_ahash_update(req, desc);
}
}
static int ghash_async_final(struct ahash_request *req)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req); /*covered*/
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { /*covered*/
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_final(cryptd_req); /*covered*/
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req); /*covered*/
return crypto_shash_final(desc, req->result);
}
}
static int ghash_async_import(struct ahash_request *req, const void *in)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
ghash_async_init(req);
memcpy(dctx, in, sizeof(*dctx));
return 0;
}
static int ghash_async_export(struct ahash_request *req, void *out)
{
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
memcpy(out, dctx, sizeof(*dctx));
return 0;
}
static int ghash_async_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); /*covered*/
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
if (!irq_fpu_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { /*covered*/
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
return crypto_ahash_digest(cryptd_req); /*covered*/
} else {
struct shash_desc *desc = cryptd_shash_desc(cryptd_req); /*covered*/
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
desc->flags = req->base.flags;
return shash_ahash_digest(req, desc);
}
}
static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen)
{
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct crypto_ahash *child = &ctx->cryptd_tfm->base; /*covered*/
int err;
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
& CRYPTO_TFM_REQ_MASK);
err = crypto_ahash_setkey(child, key, keylen);
crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
& CRYPTO_TFM_RES_MASK);
return err;
}
static int ghash_async_init_tfm(struct crypto_tfm *tfm)
{
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", /*covered*/
CRYPTO_ALG_INTERNAL,
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm; /*covered*/
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
sizeof(struct ahash_request) +
crypto_ahash_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
{
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
cryptd_free_ahash(ctx->cryptd_tfm); /*covered*/
}
static struct ahash_alg ghash_async_alg = {
.init = ghash_async_init,
.update = ghash_async_update,
.final = ghash_async_final,
.setkey = ghash_async_setkey,
.digest = ghash_async_digest,
.export = ghash_async_export,
.import = ghash_async_import,
.halg = {
.digestsize = GHASH_DIGEST_SIZE,
.statesize = sizeof(struct ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-clmulni",
.cra_priority = 400,
.cra_ctxsize = sizeof(struct ghash_async_ctx),
.cra_flags = CRYPTO_ALG_ASYNC,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_module = THIS_MODULE,
.cra_init = ghash_async_init_tfm,
.cra_exit = ghash_async_exit_tfm,
},
},
};
static const struct x86_cpu_id pcmul_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
{}
};
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
static int __init ghash_pclmulqdqni_mod_init(void)
{
int err;
if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
err = crypto_register_shash(&ghash_alg);
if (err)
goto err_out;
err = crypto_register_ahash(&ghash_async_alg);
if (err)
goto err_shash;
return 0;
err_shash:
crypto_unregister_shash(&ghash_alg);
err_out:
return err;
}
static void __exit ghash_pclmulqdqni_mod_exit(void)
{
crypto_unregister_ahash(&ghash_async_alg);
crypto_unregister_shash(&ghash_alg);
}
module_init(ghash_pclmulqdqni_mod_init);
module_exit(ghash_pclmulqdqni_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
"accelerated by PCLMULQDQ-NI");
MODULE_ALIAS_CRYPTO("ghash");
/*
* Shared glue code for 128bit block ciphers
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req)
{
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); /*covered*/
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
bool fpu_enabled = false;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
const u8 *src = walk.src.virt.addr; /*covered*/
u8 *dst = walk.dst.virt.addr;
unsigned int func_bytes;
unsigned int i;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, /*covered*/
&walk, fpu_enabled, nbytes);
for (i = 0; i < gctx->num_funcs; i++) { /*covered*/
func_bytes = bsize * gctx->funcs[i].num_blocks; /*covered*/
if (nbytes < func_bytes) /*covered*/
continue;
/* Process multi-block batch */
do {
gctx->funcs[i].fn_u.ecb(ctx, dst, src); /*covered*/
src += func_bytes;
dst += func_bytes;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize) /*covered*/
break;
}
err = skcipher_walk_done(&walk, nbytes); /*covered*/
}
glue_fpu_end(fpu_enabled); /*covered*/
return err; /*covered*/
}
EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
struct skcipher_request *req)
{
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); /*covered*/
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
const u128 *src = (u128 *)walk.src.virt.addr; /*covered*/
u128 *dst = (u128 *)walk.dst.virt.addr;
u128 *iv = (u128 *)walk.iv;
do {
u128_xor(dst, src, iv); /*covered*/
fn(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src++;
dst++;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u128 *)walk.iv = *iv; /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
return err; /*covered*/
}
EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit);
int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req)
{
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); /*covered*/
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
bool fpu_enabled = false;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
const u128 *src = walk.src.virt.addr; /*covered*/
u128 *dst = walk.dst.virt.addr;
unsigned int func_bytes, num_blocks;
unsigned int i;
u128 last_iv;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
&walk, fpu_enabled, nbytes);
/* Start of the last block. */
src += nbytes / bsize - 1; /*covered*/
dst += nbytes / bsize - 1;
last_iv = *src;
for (i = 0; i < gctx->num_funcs; i++) { /*covered*/
num_blocks = gctx->funcs[i].num_blocks; /*covered*/
func_bytes = bsize * num_blocks;
if (nbytes < func_bytes)
continue;
/* Process multi-block batch */
do {
src -= num_blocks - 1; /*covered*/
dst -= num_blocks - 1;
gctx->funcs[i].fn_u.cbc(ctx, dst, src);
nbytes -= func_bytes;
if (nbytes < bsize)
goto done;
u128_xor(dst, dst, --src); /*covered*/
dst--;
} while (nbytes >= func_bytes);
}
done:
u128_xor(dst, dst, (u128 *)walk.iv); /*covered*/
*(u128 *)walk.iv = last_iv;
err = skcipher_walk_done(&walk, nbytes);
}
glue_fpu_end(fpu_enabled); /*covered*/
return err; /*covered*/
}
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req)
{
void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); /*covered*/
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
bool fpu_enabled = false;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= bsize) {
const u128 *src = walk.src.virt.addr; /*covered*/
u128 *dst = walk.dst.virt.addr;
unsigned int func_bytes, num_blocks;
unsigned int i;
le128 ctrblk;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
&walk, fpu_enabled, nbytes);
be128_to_le128(&ctrblk, (be128 *)walk.iv); /*covered*/
for (i = 0; i < gctx->num_funcs; i++) { /*covered*/
num_blocks = gctx->funcs[i].num_blocks; /*covered*/
func_bytes = bsize * num_blocks;
if (nbytes < func_bytes)
continue;
/* Process multi-block batch */
do {
gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); /*covered*/
src += num_blocks; /*covered*/
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize) /*covered*/
break;
}
le128_to_be128((be128 *)walk.iv, &ctrblk); /*covered*/
err = skcipher_walk_done(&walk, nbytes);
}
glue_fpu_end(fpu_enabled); /*covered*/
if (nbytes) { /*covered*/
le128 ctrblk;
u128 tmp;
be128_to_le128(&ctrblk, (be128 *)walk.iv); /*covered*/
memcpy(&tmp, walk.src.virt.addr, nbytes);
gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
&ctrblk);
memcpy(walk.dst.virt.addr, &tmp, nbytes);
le128_to_be128((be128 *)walk.iv, &ctrblk);
err = skcipher_walk_done(&walk, 0);
}
return err; /*covered*/
}
EXPORT_SYMBOL_GPL(glue_ctr_req_128bit);
static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
void *ctx,
struct skcipher_walk *walk)
{
const unsigned int bsize = 128 / 8;
unsigned int nbytes = walk->nbytes;
u128 *src = walk->src.virt.addr;
u128 *dst = walk->dst.virt.addr;
unsigned int num_blocks, func_bytes;
unsigned int i;
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
num_blocks = gctx->funcs[i].num_blocks;
func_bytes = bsize * num_blocks;
if (nbytes >= func_bytes) {
do {
gctx->funcs[i].fn_u.xts(ctx, dst, src,
walk->iv);
src += num_blocks;
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
goto done;
}
}
done:
return nbytes;
}
int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req,
common_glue_func_t tweak_fn, void *tweak_ctx,
void *crypt_ctx)
{
const unsigned int bsize = 128 / 8;
struct skcipher_walk walk;
bool fpu_enabled = false;
unsigned int nbytes;
int err;
err = skcipher_walk_virt(&walk, req, false);
nbytes = walk.nbytes;
if (!nbytes)
return err;
/* set minimum length to bsize, for tweak_fn */
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
&walk, fpu_enabled,
nbytes < bsize ? bsize : nbytes);
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
while (nbytes) {
nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
err = skcipher_walk_done(&walk, nbytes);
nbytes = walk.nbytes;
}
glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
common_glue_func_t fn)
{
le128 ivblk = *iv;
/* generate next IV */
gf128mul_x_ble(iv, &ivblk);
/* CC <- T xor C */
u128_xor(dst, src, (u128 *)&ivblk);
/* PP <- D(Key2,CC) */
fn(ctx, (u8 *)dst, (u8 *)dst);
/* P <- T xor PP */
u128_xor(dst, dst, (u128 *)&ivblk);
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
MODULE_LICENSE("GPL");
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Glue for AVX2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/internal/aead.h>
#include <crypto/morus1280_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
const void *iv);
asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
unsigned int length);
asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); /*covered*/
static int __init crypto_morus1280_avx2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_AVX2) ||
!boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_avx2_algs,
ARRAY_SIZE(crypto_morus1280_avx2_algs));
}
static void __exit crypto_morus1280_avx2_module_exit(void)
{
crypto_unregister_aeads(crypto_morus1280_avx2_algs,
ARRAY_SIZE(crypto_morus1280_avx2_algs));
}
module_init(crypto_morus1280_avx2_module_init);
module_exit(crypto_morus1280_avx2_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
MODULE_ALIAS_CRYPTO("morus1280");
MODULE_ALIAS_CRYPTO("morus1280-avx2");
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Glue for SSE2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/internal/aead.h>
#include <crypto/morus1280_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
const void *iv);
asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
unsigned int length);
asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); /*covered*/
static int __init crypto_morus1280_sse2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_sse2_algs,
ARRAY_SIZE(crypto_morus1280_sse2_algs));
}
static void __exit crypto_morus1280_sse2_module_exit(void)
{
crypto_unregister_aeads(crypto_morus1280_sse2_algs,
ARRAY_SIZE(crypto_morus1280_sse2_algs));
}
module_init(crypto_morus1280_sse2_module_init);
module_exit(crypto_morus1280_sse2_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
MODULE_ALIAS_CRYPTO("morus1280");
MODULE_ALIAS_CRYPTO("morus1280-sse2");
/*
* The MORUS-1280 Authenticated-Encryption Algorithm
* Common x86 SIMD glue skeleton
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus1280_glue.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <asm/fpu/api.h>
struct morus1280_state {
struct morus1280_block s[MORUS_STATE_BLOCKS];
};
struct morus1280_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, const void *src, void *dst,
unsigned int length);
void (*crypt_tail)(void *state, const void *src, void *dst,
unsigned int length);
};
static void crypto_morus1280_glue_process_ad(
struct morus1280_state *state,
const struct morus1280_glue_ops *ops,
struct scatterlist *sg_src, unsigned int assoclen)
{
struct scatter_walk walk;
struct morus1280_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= MORUS1280_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
pos = 0;
left -= fill;
src += fill;
}
ops->ad(state, src, left);
src += left & ~(MORUS1280_BLOCK_SIZE - 1);
left &= MORUS1280_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
}
}
static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
struct morus1280_ops ops,
struct aead_request *req)
{
struct skcipher_walk walk;
u8 *cursor_src, *cursor_dst;
unsigned int chunksize, base;
ops.skcipher_walk_init(&walk, req, false);
while (walk.nbytes) {
cursor_src = walk.src.virt.addr;
cursor_dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
cursor_src += base;
cursor_dst += base;
chunksize &= MORUS1280_BLOCK_SIZE - 1;
if (chunksize > 0)
ops.crypt_tail(state, cursor_src, cursor_dst,
chunksize);
skcipher_walk_done(&walk, 0);
}
}
int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
if (keylen == MORUS1280_BLOCK_SIZE) {
memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
memcpy(ctx->key.bytes, key, keylen);
memcpy(ctx->key.bytes + keylen, key, keylen);
} else {
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
static void crypto_morus1280_glue_crypt(struct aead_request *req,
struct morus1280_ops ops,
unsigned int cryptlen,
struct morus1280_block *tag_xor)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_state state;
kernel_fpu_begin();
ctx->ops->init(&state, &ctx->key, req->iv);
crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
crypto_morus1280_glue_process_crypt(&state, ops, req);
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
int crypto_morus1280_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = ctx->ops->enc,
.crypt_tail = ctx->ops->enc_tail,
};
struct morus1280_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
int crypto_morus1280_glue_decrypt(struct aead_request *req)
{
static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
struct morus1280_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = ctx->ops->dec,
.crypt_tail = ctx->ops->dec_tail,
};
struct morus1280_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
const struct morus1280_glue_ops *ops)
{
struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
ctx->ops = ops; /*covered*/
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey);
int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize);
int cryptd_morus1280_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_encrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt);
int cryptd_morus1280_glue_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt);
int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
const char *name = crypto_aead_alg(aead)->base.cra_driver_name; /*covered*/
char internal_name[CRYPTO_MAX_ALG_NAME];
if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
>= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, /*covered*/
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm);
void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx); /*covered*/
}
EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
/*
* The MORUS-640 Authenticated-Encryption Algorithm
* Glue for SSE2 implementation
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/internal/aead.h>
#include <crypto/morus640_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/cpu_device_id.h>
asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
const void *iv);
asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
unsigned int length);
asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
void *dst, unsigned int length);
asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); /*covered*/
static int __init crypto_morus640_sse2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus640_sse2_algs,
ARRAY_SIZE(crypto_morus640_sse2_algs));
}
static void __exit crypto_morus640_sse2_module_exit(void)
{
crypto_unregister_aeads(crypto_morus640_sse2_algs,
ARRAY_SIZE(crypto_morus640_sse2_algs));
}
module_init(crypto_morus640_sse2_module_init);
module_exit(crypto_morus640_sse2_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
MODULE_ALIAS_CRYPTO("morus640");
MODULE_ALIAS_CRYPTO("morus640-sse2");
/*
* The MORUS-640 Authenticated-Encryption Algorithm
* Common x86 SIMD glue skeleton
*
* Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus640_glue.h>
#include <crypto/scatterwalk.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <asm/fpu/api.h>
struct morus640_state {
struct morus640_block s[MORUS_STATE_BLOCKS];
};
struct morus640_ops {
int (*skcipher_walk_init)(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void (*crypt_blocks)(void *state, const void *src, void *dst,
unsigned int length);
void (*crypt_tail)(void *state, const void *src, void *dst,
unsigned int length);
};
static void crypto_morus640_glue_process_ad(
struct morus640_state *state,
const struct morus640_glue_ops *ops,
struct scatterlist *sg_src, unsigned int assoclen)
{
struct scatter_walk walk;
struct morus640_block buf;
unsigned int pos = 0;
scatterwalk_start(&walk, sg_src);
while (assoclen != 0) {
unsigned int size = scatterwalk_clamp(&walk, assoclen);
unsigned int left = size;
void *mapped = scatterwalk_map(&walk);
const u8 *src = (const u8 *)mapped;
if (pos + size >= MORUS640_BLOCK_SIZE) {
if (pos > 0) {
unsigned int fill = MORUS640_BLOCK_SIZE - pos;
memcpy(buf.bytes + pos, src, fill);
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
pos = 0;
left -= fill;
src += fill;
}
ops->ad(state, src, left);
src += left & ~(MORUS640_BLOCK_SIZE - 1);
left &= MORUS640_BLOCK_SIZE - 1;
}
memcpy(buf.bytes + pos, src, left);
pos += left;
assoclen -= size;
scatterwalk_unmap(mapped);
scatterwalk_advance(&walk, size);
scatterwalk_done(&walk, 0, assoclen);
}
if (pos > 0) {
memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE); /*covered*/
}
}
static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
struct morus640_ops ops,
struct aead_request *req)
{
struct skcipher_walk walk;
u8 *cursor_src, *cursor_dst;
unsigned int chunksize, base;
ops.skcipher_walk_init(&walk, req, false); /*covered*/
while (walk.nbytes) { /*covered*/
cursor_src = walk.src.virt.addr; /*covered*/
cursor_dst = walk.dst.virt.addr;
chunksize = walk.nbytes;
ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
base = chunksize & ~(MORUS640_BLOCK_SIZE - 1); /*covered*/
cursor_src += base;
cursor_dst += base;
chunksize &= MORUS640_BLOCK_SIZE - 1;
if (chunksize > 0)
ops.crypt_tail(state, cursor_src, cursor_dst,
chunksize);
skcipher_walk_done(&walk, 0); /*covered*/
}
}
int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
if (keylen != MORUS640_BLOCK_SIZE) { /*covered*/
crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); /*covered*/
return -EINVAL;
}
memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
return 0; /*covered*/
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
static void crypto_morus640_glue_crypt(struct aead_request *req,
struct morus640_ops ops,
unsigned int cryptlen,
struct morus640_block *tag_xor)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_state state;
kernel_fpu_begin();
ctx->ops->init(&state, &ctx->key, req->iv);
crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); /*covered*/
crypto_morus640_glue_process_crypt(&state, ops, req);
ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
int crypto_morus640_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req); /*covered*/
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_encrypt,
.crypt_blocks = ctx->ops->enc,
.crypt_tail = ctx->ops->enc_tail,
};
struct morus640_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
int crypto_morus640_glue_decrypt(struct aead_request *req)
{
static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
struct morus640_ops OPS = {
.skcipher_walk_init = skcipher_walk_aead_decrypt,
.crypt_blocks = ctx->ops->dec,
.crypt_tail = ctx->ops->dec_tail,
};
struct morus640_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
const struct morus640_glue_ops *ops)
{
struct morus640_ctx *ctx = crypto_aead_ctx(aead);
ctx->ops = ops; /*covered*/
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
unsigned int keylen)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx; /*covered*/
return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey);
int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
unsigned int authsize)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize);
int cryptd_morus640_glue_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req); /*covered*/
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() || /*covered*/
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm); /*covered*/
aead_request_set_tfm(req, aead); /*covered*/
return crypto_aead_encrypt(req); /*covered*/
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt);
int cryptd_morus640_glue_decrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
struct cryptd_aead *cryptd_tfm = *ctx;
aead = &cryptd_tfm->base;
if (irq_fpu_usable() && (!in_atomic() ||
!cryptd_aead_queued(cryptd_tfm)))
aead = cryptd_aead_child(cryptd_tfm);
aead_request_set_tfm(req, aead);
return crypto_aead_decrypt(req);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt);
int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead)
{
struct cryptd_aead *cryptd_tfm;
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
const char *name = crypto_aead_alg(aead)->base.cra_driver_name; /*covered*/
char internal_name[CRYPTO_MAX_ALG_NAME];
if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
>= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, /*covered*/
CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
*ctx = cryptd_tfm; /*covered*/
crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
return 0; /*covered*/
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm);
void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead)
{
struct cryptd_aead **ctx = crypto_aead_ctx(aead);
cryptd_free_aead(*ctx);
}
EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
/*
* Poly1305 authenticator algorithm, RFC7539, SIMD glue code
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/poly1305.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
#include <asm/simd.h>
struct poly1305_simd_desc_ctx {
struct poly1305_desc_ctx base;
/* derived key u set? */
bool uset;
#ifdef CONFIG_AS_AVX2
/* derived keys r^3, r^4 set? */
bool wset;
#endif
/* derived Poly1305 key r^2 */
u32 u[5];
/* ... silently appended r^3 and r^4 when using AVX2 */
};
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
const u32 *r, unsigned int blocks);
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
#ifdef CONFIG_AS_AVX2
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
static bool poly1305_use_avx2;
#endif
static int poly1305_simd_init(struct shash_desc *desc)
{
struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
sctx->uset = false; /*covered*/
#ifdef CONFIG_AS_AVX2
sctx->wset = false;
#endif
return crypto_poly1305_init(desc);
}
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
u8 m[POLY1305_BLOCK_SIZE];
memset(m, 0, sizeof(m)); /*covered*/
/* The poly1305 block function adds a hi-bit to the accumulator which
* we don't need for key multiplication; compensate for it. */
a[4] -= 1 << 24;
poly1305_block_sse2(a, m, b, 1);
}
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
struct poly1305_simd_desc_ctx *sctx;
unsigned int blocks, datalen;
BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
if (unlikely(!dctx->sset)) { /*covered*/
datalen = crypto_poly1305_setdesckey(dctx, src, srclen); /*covered*/
src += srclen - datalen;
srclen = datalen;
}
#ifdef CONFIG_AS_AVX2
if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { /*covered*/
if (unlikely(!sctx->wset)) { /*covered*/
if (!sctx->uset) { /*covered*/
memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); /*covered*/
poly1305_simd_mult(sctx->u, dctx->r.r);
sctx->uset = true;
}
memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); /*covered*/
poly1305_simd_mult(sctx->u + 5, dctx->r.r);
memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
poly1305_simd_mult(sctx->u + 10, dctx->r.r);
sctx->wset = true;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 4); /*covered*/
poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
sctx->u);
src += POLY1305_BLOCK_SIZE * 4 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
}
#endif
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { /*covered*/
if (unlikely(!sctx->uset)) { /*covered*/
memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
poly1305_simd_mult(sctx->u, dctx->r.r);
sctx->uset = true;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 2); /*covered*/
poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
sctx->u);
src += POLY1305_BLOCK_SIZE * 2 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
}
if (srclen >= POLY1305_BLOCK_SIZE) { /*covered*/
poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); /*covered*/
srclen -= POLY1305_BLOCK_SIZE;
}
return srclen; /*covered*/
}
static int poly1305_simd_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); /*covered*/
unsigned int bytes;
/* kernel_fpu_begin/end is costly, use fallback for small updates */
if (srclen <= 288 || !may_use_simd()) /*covered*/
return crypto_poly1305_update(desc, src, srclen); /*covered*/
kernel_fpu_begin();
if (unlikely(dctx->buflen)) {
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); /*covered*/
memcpy(dctx->buf + dctx->buflen, src, bytes);
src += bytes;
srclen -= bytes;
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
poly1305_simd_blocks(dctx, dctx->buf, /*covered*/
POLY1305_BLOCK_SIZE);
dctx->buflen = 0;
}
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) { /*covered*/
bytes = poly1305_simd_blocks(dctx, src, srclen); /*covered*/
src += srclen - bytes;
srclen = bytes;
}
kernel_fpu_end(); /*covered*/
if (unlikely(srclen)) {
dctx->buflen = srclen; /*covered*/
memcpy(dctx->buf, src, srclen);
}
return 0;
}
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
.init = poly1305_simd_init,
.update = poly1305_simd_update,
.final = crypto_poly1305_final,
.descsize = sizeof(struct poly1305_simd_desc_ctx),
.base = {
.cra_name = "poly1305",
.cra_driver_name = "poly1305-simd",
.cra_priority = 300,
.cra_blocksize = POLY1305_BLOCK_SIZE,
.cra_module = THIS_MODULE,
},
};
static int __init poly1305_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
if (poly1305_use_avx2)
alg.descsize += 10 * sizeof(u32);
#endif
return crypto_register_shash(&alg);
}
static void __exit poly1305_simd_mod_exit(void)
{
crypto_unregister_shash(&alg);
}
module_init(poly1305_simd_mod_init);
module_exit(poly1305_simd_mod_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
MODULE_DESCRIPTION("Poly1305 authenticator");
MODULE_ALIAS_CRYPTO("poly1305");
MODULE_ALIAS_CRYPTO("poly1305-simd");
/*
* Glue Code for x86_64/AVX2 assembler optimized version of Serpent
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
#include <asm/crypto/serpent-avx.h>
#define SERPENT_AVX2_PARALLEL_BLOCKS 16
/* 16-way AVX2 parallel cipher functions */
asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
le128 *iv);
asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); /*covered*/
}
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
}, {
.num_blocks = 8,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
} }
};
static const struct common_glue_ctx serpent_ctr = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
}, {
.num_blocks = 8,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
} }
};
static const struct common_glue_ctx serpent_enc_xts = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
}, {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
} }
};
static const struct common_glue_ctx serpent_dec = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
}, {
.num_blocks = 8,
.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
} }
};
static const struct common_glue_ctx serpent_dec_cbc = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
}, {
.num_blocks = 8,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
} }
};
static const struct common_glue_ctx serpent_dec_xts = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
}, {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
} }
};
static int ecb_encrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&serpent_enc, req);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&serpent_dec, req); /*covered*/
}
static int cbc_encrypt(struct skcipher_request *req)
{
return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
req);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
}
static int ctr_crypt(struct skcipher_request *req)
{
return glue_ctr_req_128bit(&serpent_ctr, req);
}
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&serpent_enc_xts, req,
XTS_TWEAK_CAST(__serpent_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&serpent_dec_xts, req,
XTS_TWEAK_CAST(__serpent_encrypt),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct skcipher_alg serpent_algs[] = {
{
.base.cra_name = "__ecb(serpent)",
.base.cra_driver_name = "__ecb-serpent-avx2",
.base.cra_priority = 600,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.setkey = serpent_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(serpent)",
.base.cra_driver_name = "__cbc-serpent-avx2",
.base.cra_priority = 600,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = serpent_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(serpent)",
.base.cra_driver_name = "__ctr-serpent-avx2",
.base.cra_priority = 600,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct serpent_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = SERPENT_MIN_KEY_SIZE,
.max_keysize = SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.chunksize = SERPENT_BLOCK_SIZE,
.setkey = serpent_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}, {
.base.cra_name = "__xts(serpent)",
.base.cra_driver_name = "__xts-serpent-avx2",
.base.cra_priority = 600,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = SERPENT_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * SERPENT_MIN_KEY_SIZE,
.max_keysize = 2 * SERPENT_MAX_KEY_SIZE,
.ivsize = SERPENT_BLOCK_SIZE,
.setkey = xts_serpent_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
};
static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
static int __init init(void)
{
const char *feature_name;
if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(serpent_algs,
ARRAY_SIZE(serpent_algs),
serpent_simd_algs);
}
static void __exit fini(void)
{
simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
serpent_simd_algs);
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
MODULE_ALIAS_CRYPTO("serpent");
MODULE_ALIAS_CRYPTO("serpent-asm");
/*
* Cryptographic API.
*
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
* Supplemental SSE3 instructions.
*
* This file is based on sha1_generic.c
*
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <asm/fpu/api.h>
typedef void (sha1_transform_fn)(u32 *digest, const char *data,
unsigned int rounds);
static int sha1_update(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len, sha1_transform_fn *sha1_xform)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
if (!irq_fpu_usable() || /*covered*/
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) /*covered*/
return crypto_sha1_update(desc, data, len); /*covered*/
/* make sure casting to sha1_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
kernel_fpu_begin(); /*covered*/
sha1_base_do_update(desc, data, len, /*covered*/
(sha1_block_fn *)sha1_xform);
kernel_fpu_end(); /*covered*/
return 0;
}
static int sha1_finup(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len, u8 *out, sha1_transform_fn *sha1_xform)
{
if (!irq_fpu_usable()) /*covered*/
return crypto_sha1_finup(desc, data, len, out);
kernel_fpu_begin(); /*covered*/
if (len)
sha1_base_do_update(desc, data, len, /*covered*/
(sha1_block_fn *)sha1_xform);
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); /*covered*/
kernel_fpu_end();
return sha1_base_finish(desc, out); /*covered*/
}
asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
unsigned int rounds);
static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha1_update(desc, data, len, /*covered*/
(sha1_transform_fn *) sha1_transform_ssse3);
}
static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha1_finup(desc, data, len, out, /*covered*/
(sha1_transform_fn *) sha1_transform_ssse3);
}
/* Add padding and return the message digest. */
static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
{
return sha1_ssse3_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha1_ssse3_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_ssse3_update,
.final = sha1_ssse3_final,
.finup = sha1_ssse3_finup,
.descsize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ssse3",
.cra_priority = 150,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int register_sha1_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
return crypto_register_shash(&sha1_ssse3_alg);
return 0;
}
static void unregister_sha1_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_shash(&sha1_ssse3_alg);
}
#ifdef CONFIG_AS_AVX
asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds);
static int sha1_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha1_update(desc, data, len, /*covered*/
(sha1_transform_fn *) sha1_transform_avx);
}
static int sha1_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha1_finup(desc, data, len, out, /*covered*/
(sha1_transform_fn *) sha1_transform_avx);
}
static int sha1_avx_final(struct shash_desc *desc, u8 *out)
{
return sha1_avx_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha1_avx_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_avx_update,
.final = sha1_avx_final,
.finup = sha1_avx_finup,
.descsize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-avx",
.cra_priority = 160,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
return true;
}
static int register_sha1_avx(void)
{
if (avx_usable())
return crypto_register_shash(&sha1_avx_alg);
return 0;
}
static void unregister_sha1_avx(void)
{
if (avx_usable())
crypto_unregister_shash(&sha1_avx_alg);
}
#else /* CONFIG_AS_AVX */
static inline int register_sha1_avx(void) { return 0; }
static inline void unregister_sha1_avx(void) { }
#endif /* CONFIG_AS_AVX */
#if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX)
#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
unsigned int rounds);
static bool avx2_usable(void)
{
if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
&& boot_cpu_has(X86_FEATURE_BMI1)
&& boot_cpu_has(X86_FEATURE_BMI2))
return true;
return false;
}
static void sha1_apply_transform_avx2(u32 *digest, const char *data,
unsigned int rounds)
{
/* Select the optimal transform based on data block size */
if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) /*covered*/
sha1_transform_avx2(digest, data, rounds); /*covered*/
else
sha1_transform_avx(digest, data, rounds); /*covered*/
} /*covered*/
static int sha1_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha1_update(desc, data, len, /*covered*/
(sha1_transform_fn *) sha1_apply_transform_avx2);
}
static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha1_finup(desc, data, len, out, /*covered*/
(sha1_transform_fn *) sha1_apply_transform_avx2);
}
static int sha1_avx2_final(struct shash_desc *desc, u8 *out)
{
return sha1_avx2_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha1_avx2_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_avx2_update,
.final = sha1_avx2_final,
.finup = sha1_avx2_finup,
.descsize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-avx2",
.cra_priority = 170,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int register_sha1_avx2(void)
{
if (avx2_usable())
return crypto_register_shash(&sha1_avx2_alg);
return 0;
}
static void unregister_sha1_avx2(void)
{
if (avx2_usable())
crypto_unregister_shash(&sha1_avx2_alg);
}
#else
static inline int register_sha1_avx2(void) { return 0; }
static inline void unregister_sha1_avx2(void) { }
#endif
#ifdef CONFIG_AS_SHA1_NI
asmlinkage void sha1_ni_transform(u32 *digest, const char *data,
unsigned int rounds);
static int sha1_ni_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha1_update(desc, data, len,
(sha1_transform_fn *) sha1_ni_transform);
}
static int sha1_ni_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha1_finup(desc, data, len, out,
(sha1_transform_fn *) sha1_ni_transform);
}
static int sha1_ni_final(struct shash_desc *desc, u8 *out)
{
return sha1_ni_finup(desc, NULL, 0, out);
}
static struct shash_alg sha1_ni_alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_base_init,
.update = sha1_ni_update,
.final = sha1_ni_final,
.finup = sha1_ni_finup,
.descsize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ni",
.cra_priority = 250,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int register_sha1_ni(void)
{
if (boot_cpu_has(X86_FEATURE_SHA_NI))
return crypto_register_shash(&sha1_ni_alg);
return 0;
}
static void unregister_sha1_ni(void)
{
if (boot_cpu_has(X86_FEATURE_SHA_NI))
crypto_unregister_shash(&sha1_ni_alg);
}
#else
static inline int register_sha1_ni(void) { return 0; }
static inline void unregister_sha1_ni(void) { }
#endif
static int __init sha1_ssse3_mod_init(void)
{
if (register_sha1_ssse3())
goto fail;
if (register_sha1_avx()) {
unregister_sha1_ssse3();
goto fail;
}
if (register_sha1_avx2()) {
unregister_sha1_avx();
unregister_sha1_ssse3();
goto fail;
}
if (register_sha1_ni()) {
unregister_sha1_avx2();
unregister_sha1_avx();
unregister_sha1_ssse3();
goto fail;
}
return 0;
fail:
return -ENODEV;
}
static void __exit sha1_ssse3_mod_fini(void)
{
unregister_sha1_ni();
unregister_sha1_avx2();
unregister_sha1_avx();
unregister_sha1_ssse3();
}
module_init(sha1_ssse3_mod_init);
module_exit(sha1_ssse3_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha1");
MODULE_ALIAS_CRYPTO("sha1-ssse3");
MODULE_ALIAS_CRYPTO("sha1-avx");
MODULE_ALIAS_CRYPTO("sha1-avx2");
#ifdef CONFIG_AS_SHA1_NI
MODULE_ALIAS_CRYPTO("sha1-ni");
#endif
/*
* Cryptographic API.
*
* Glue code for the SHA256 Secure Hash Algorithm assembler
* implementation using supplemental SSE3 / AVX / AVX2 instructions.
*
* This file is based on sha256_generic.c
*
* Copyright (C) 2013 Intel Corporation.
*
* Author:
* Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <asm/fpu/api.h>
#include <linux/string.h>
asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
u64 rounds);
typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
static int sha256_update(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len, sha256_transform_fn *sha256_xform)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
if (!irq_fpu_usable() || /*covered*/
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) /*covered*/
return crypto_sha256_update(desc, data, len); /*covered*/
/* make sure casting to sha256_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
kernel_fpu_begin(); /*covered*/
sha256_base_do_update(desc, data, len, /*covered*/
(sha256_block_fn *)sha256_xform);
kernel_fpu_end(); /*covered*/
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len, u8 *out, sha256_transform_fn *sha256_xform)
{
if (!irq_fpu_usable()) /*covered*/
return crypto_sha256_finup(desc, data, len, out);
kernel_fpu_begin(); /*covered*/
if (len)
sha256_base_do_update(desc, data, len, /*covered*/
(sha256_block_fn *)sha256_xform);
sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); /*covered*/
kernel_fpu_end();
return sha256_base_finish(desc, out); /*covered*/
}
static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha256_update(desc, data, len, sha256_transform_ssse3); /*covered*/
}
static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha256_finup(desc, data, len, out, sha256_transform_ssse3); /*covered*/
}
/* Add padding and return the message digest. */
static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
{
return sha256_ssse3_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha256_ssse3_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = sha256_ssse3_update,
.final = sha256_ssse3_final,
.finup = sha256_ssse3_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ssse3",
.cra_priority = 150,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = sha256_ssse3_update,
.final = sha256_ssse3_final,
.finup = sha256_ssse3_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ssse3",
.cra_priority = 150,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int register_sha256_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
return crypto_register_shashes(sha256_ssse3_algs,
ARRAY_SIZE(sha256_ssse3_algs));
return 0;
}
static void unregister_sha256_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_shashes(sha256_ssse3_algs,
ARRAY_SIZE(sha256_ssse3_algs));
}
#ifdef CONFIG_AS_AVX
asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
u64 rounds);
static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha256_update(desc, data, len, sha256_transform_avx); /*covered*/
}
static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha256_finup(desc, data, len, out, sha256_transform_avx); /*covered*/
}
static int sha256_avx_final(struct shash_desc *desc, u8 *out)
{
return sha256_avx_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha256_avx_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = sha256_avx_update,
.final = sha256_avx_final,
.finup = sha256_avx_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-avx",
.cra_priority = 160,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = sha256_avx_update,
.final = sha256_avx_final,
.finup = sha256_avx_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-avx",
.cra_priority = 160,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
return true;
}
static int register_sha256_avx(void)
{
if (avx_usable())
return crypto_register_shashes(sha256_avx_algs,
ARRAY_SIZE(sha256_avx_algs));
return 0;
}
static void unregister_sha256_avx(void)
{
if (avx_usable())
crypto_unregister_shashes(sha256_avx_algs,
ARRAY_SIZE(sha256_avx_algs));
}
#else
static inline int register_sha256_avx(void) { return 0; }
static inline void unregister_sha256_avx(void) { }
#endif
#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
u64 rounds);
static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha256_update(desc, data, len, sha256_transform_rorx); /*covered*/
}
static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha256_finup(desc, data, len, out, sha256_transform_rorx); /*covered*/
}
static int sha256_avx2_final(struct shash_desc *desc, u8 *out)
{
return sha256_avx2_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha256_avx2_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = sha256_avx2_update,
.final = sha256_avx2_final,
.finup = sha256_avx2_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-avx2",
.cra_priority = 170,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = sha256_avx2_update,
.final = sha256_avx2_final,
.finup = sha256_avx2_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-avx2",
.cra_priority = 170,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static bool avx2_usable(void)
{
if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_BMI2))
return true;
return false;
}
static int register_sha256_avx2(void)
{
if (avx2_usable())
return crypto_register_shashes(sha256_avx2_algs,
ARRAY_SIZE(sha256_avx2_algs));
return 0;
}
static void unregister_sha256_avx2(void)
{
if (avx2_usable())
crypto_unregister_shashes(sha256_avx2_algs,
ARRAY_SIZE(sha256_avx2_algs));
}
#else
static inline int register_sha256_avx2(void) { return 0; }
static inline void unregister_sha256_avx2(void) { }
#endif
#ifdef CONFIG_AS_SHA256_NI
asmlinkage void sha256_ni_transform(u32 *digest, const char *data,
u64 rounds); /*unsigned int rounds);*/
static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha256_update(desc, data, len, sha256_ni_transform);
}
static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha256_finup(desc, data, len, out, sha256_ni_transform);
}
static int sha256_ni_final(struct shash_desc *desc, u8 *out)
{
return sha256_ni_finup(desc, NULL, 0, out);
}
static struct shash_alg sha256_ni_algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_base_init,
.update = sha256_ni_update,
.final = sha256_ni_final,
.finup = sha256_ni_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ni",
.cra_priority = 250,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_base_init,
.update = sha256_ni_update,
.final = sha256_ni_final,
.finup = sha256_ni_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ni",
.cra_priority = 250,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int register_sha256_ni(void)
{
if (boot_cpu_has(X86_FEATURE_SHA_NI))
return crypto_register_shashes(sha256_ni_algs,
ARRAY_SIZE(sha256_ni_algs));
return 0;
}
static void unregister_sha256_ni(void)
{
if (boot_cpu_has(X86_FEATURE_SHA_NI))
crypto_unregister_shashes(sha256_ni_algs,
ARRAY_SIZE(sha256_ni_algs));
}
#else
static inline int register_sha256_ni(void) { return 0; }
static inline void unregister_sha256_ni(void) { }
#endif
static int __init sha256_ssse3_mod_init(void)
{
if (register_sha256_ssse3())
goto fail;
if (register_sha256_avx()) {
unregister_sha256_ssse3();
goto fail;
}
if (register_sha256_avx2()) {
unregister_sha256_avx();
unregister_sha256_ssse3();
goto fail;
}
if (register_sha256_ni()) {
unregister_sha256_avx2();
unregister_sha256_avx();
unregister_sha256_ssse3();
goto fail;
}
return 0;
fail:
return -ENODEV;
}
static void __exit sha256_ssse3_mod_fini(void)
{
unregister_sha256_ni();
unregister_sha256_avx2();
unregister_sha256_avx();
unregister_sha256_ssse3();
}
module_init(sha256_ssse3_mod_init);
module_exit(sha256_ssse3_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha256");
MODULE_ALIAS_CRYPTO("sha256-ssse3");
MODULE_ALIAS_CRYPTO("sha256-avx");
MODULE_ALIAS_CRYPTO("sha256-avx2");
MODULE_ALIAS_CRYPTO("sha224");
MODULE_ALIAS_CRYPTO("sha224-ssse3");
MODULE_ALIAS_CRYPTO("sha224-avx");
MODULE_ALIAS_CRYPTO("sha224-avx2");
#ifdef CONFIG_AS_SHA256_NI
MODULE_ALIAS_CRYPTO("sha256-ni");
MODULE_ALIAS_CRYPTO("sha224-ni");
#endif
/*
* Cryptographic API.
*
* Glue code for the SHA512 Secure Hash Algorithm assembler
* implementation using supplemental SSE3 / AVX / AVX2 instructions.
*
* This file is based on sha512_generic.c
*
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
#include <asm/fpu/api.h>
#include <linux/string.h>
asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
u64 rounds);
typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds);
static int sha512_update(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len, sha512_transform_fn *sha512_xform)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
if (!irq_fpu_usable() || /*covered*/
(sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) /*covered*/
return crypto_sha512_update(desc, data, len); /*covered*/
/* make sure casting to sha512_block_fn() is safe */
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
kernel_fpu_begin(); /*covered*/
sha512_base_do_update(desc, data, len, /*covered*/
(sha512_block_fn *)sha512_xform);
kernel_fpu_end(); /*covered*/
return 0;
}
static int sha512_finup(struct shash_desc *desc, const u8 *data, /*covered*/
unsigned int len, u8 *out, sha512_transform_fn *sha512_xform)
{
if (!irq_fpu_usable()) /*covered*/
return crypto_sha512_finup(desc, data, len, out);
kernel_fpu_begin(); /*covered*/
if (len)
sha512_base_do_update(desc, data, len, /*covered*/
(sha512_block_fn *)sha512_xform);
sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); /*covered*/
kernel_fpu_end();
return sha512_base_finish(desc, out); /*covered*/
}
static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_update(desc, data, len, sha512_transform_ssse3); /*covered*/
}
static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_finup(desc, data, len, out, sha512_transform_ssse3); /*covered*/
}
/* Add padding and return the message digest. */
static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
{
return sha512_ssse3_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha512_ssse3_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_ssse3_update,
.final = sha512_ssse3_final,
.finup = sha512_ssse3_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-ssse3",
.cra_priority = 150,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_ssse3_update,
.final = sha512_ssse3_final,
.finup = sha512_ssse3_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",
.cra_priority = 150,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int register_sha512_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
return crypto_register_shashes(sha512_ssse3_algs,
ARRAY_SIZE(sha512_ssse3_algs));
return 0;
}
static void unregister_sha512_ssse3(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_shashes(sha512_ssse3_algs,
ARRAY_SIZE(sha512_ssse3_algs));
}
#ifdef CONFIG_AS_AVX
asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
u64 rounds);
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
return true;
}
static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_update(desc, data, len, sha512_transform_avx); /*covered*/
}
static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_finup(desc, data, len, out, sha512_transform_avx); /*covered*/
}
/* Add padding and return the message digest. */
static int sha512_avx_final(struct shash_desc *desc, u8 *out)
{
return sha512_avx_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha512_avx_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_avx_update,
.final = sha512_avx_final,
.finup = sha512_avx_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-avx",
.cra_priority = 160,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_avx_update,
.final = sha512_avx_final,
.finup = sha512_avx_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-avx",
.cra_priority = 160,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int register_sha512_avx(void)
{
if (avx_usable())
return crypto_register_shashes(sha512_avx_algs,
ARRAY_SIZE(sha512_avx_algs));
return 0;
}
static void unregister_sha512_avx(void)
{
if (avx_usable())
crypto_unregister_shashes(sha512_avx_algs,
ARRAY_SIZE(sha512_avx_algs));
}
#else
static inline int register_sha512_avx(void) { return 0; }
static inline void unregister_sha512_avx(void) { }
#endif
#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
u64 rounds);
static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
return sha512_update(desc, data, len, sha512_transform_rorx); /*covered*/
}
static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
return sha512_finup(desc, data, len, out, sha512_transform_rorx); /*covered*/
}
/* Add padding and return the message digest. */
static int sha512_avx2_final(struct shash_desc *desc, u8 *out)
{
return sha512_avx2_finup(desc, NULL, 0, out); /*covered*/
}
static struct shash_alg sha512_avx2_algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_base_init,
.update = sha512_avx2_update,
.final = sha512_avx2_final,
.finup = sha512_avx2_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-avx2",
.cra_priority = 170,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_base_init,
.update = sha512_avx2_update,
.final = sha512_avx2_final,
.finup = sha512_avx2_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-avx2",
.cra_priority = 170,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static bool avx2_usable(void)
{
if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_BMI2))
return true;
return false;
}
static int register_sha512_avx2(void)
{
if (avx2_usable())
return crypto_register_shashes(sha512_avx2_algs,
ARRAY_SIZE(sha512_avx2_algs));
return 0;
}
static void unregister_sha512_avx2(void)
{
if (avx2_usable())
crypto_unregister_shashes(sha512_avx2_algs,
ARRAY_SIZE(sha512_avx2_algs));
}
#else
static inline int register_sha512_avx2(void) { return 0; }
static inline void unregister_sha512_avx2(void) { }
#endif
static int __init sha512_ssse3_mod_init(void)
{
if (register_sha512_ssse3())
goto fail;
if (register_sha512_avx()) {
unregister_sha512_ssse3();
goto fail;
}
if (register_sha512_avx2()) {
unregister_sha512_avx();
unregister_sha512_ssse3();
goto fail;
}
return 0;
fail:
return -ENODEV;
}
static void __exit sha512_ssse3_mod_fini(void)
{
unregister_sha512_avx2();
unregister_sha512_avx();
unregister_sha512_ssse3();
}
module_init(sha512_ssse3_mod_init);
module_exit(sha512_ssse3_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS_CRYPTO("sha512");
MODULE_ALIAS_CRYPTO("sha512-ssse3");
MODULE_ALIAS_CRYPTO("sha512-avx");
MODULE_ALIAS_CRYPTO("sha512-avx2");
MODULE_ALIAS_CRYPTO("sha384");
MODULE_ALIAS_CRYPTO("sha384-ssse3");
MODULE_ALIAS_CRYPTO("sha384-avx");
MODULE_ALIAS_CRYPTO("sha384-avx2");
/*
* Glue Code for AVX assembler version of Twofish Cipher
*
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/twofish.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
#include <asm/crypto/twofish.h>
#define TWOFISH_PARALLEL_BLOCKS 8
/* 8-way parallel cipher functions */
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return twofish_setkey(&tfm->base, key, keylen); /*covered*/
}
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false);
}
static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_enc_blk));
}
static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_dec_blk));
}
struct twofish_xts_ctx {
struct twofish_ctx tweak_ctx;
struct twofish_ctx crypt_ctx;
};
static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
u32 *flags = &tfm->base.crt_flags; /*covered*/
int err;
err = xts_verify_key(tfm, key, keylen); /*covered*/
if (err)
return err;
/* first half of xts-key is for crypt */
err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); /*covered*/
if (err) /*covered*/
return err;
/* second half of xts-key is for tweak */
return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, /*covered*/
flags);
}
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
} }
};
static const struct common_glue_ctx twofish_ctr = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
} }
};
static const struct common_glue_ctx twofish_enc_xts = {
.num_funcs = 2,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
} }
};
static const struct common_glue_ctx twofish_dec = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_cbc = {
.num_funcs = 3,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_xts = {
.num_funcs = 2,
.fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
} }
};
static int ecb_encrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&twofish_enc, req);
}
static int ecb_decrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&twofish_dec, req);
}
static int cbc_encrypt(struct skcipher_request *req)
{
return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
req);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
}
static int ctr_crypt(struct skcipher_request *req)
{
return glue_ctr_req_128bit(&twofish_ctr, req);
}
static int xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&twofish_enc_xts, req,
XTS_TWEAK_CAST(twofish_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
return glue_xts_req_128bit(&twofish_dec_xts, req,
XTS_TWEAK_CAST(twofish_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct skcipher_alg twofish_algs[] = {
{
.base.cra_name = "__ecb(twofish)",
.base.cra_driver_name = "__ecb-twofish-avx",
.base.cra_priority = 400,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "__cbc(twofish)",
.base.cra_driver_name = "__cbc-twofish-avx",
.base.cra_priority = 400,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = twofish_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "__ctr(twofish)",
.base.cra_driver_name = "__ctr-twofish-avx",
.base.cra_priority = 400,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.chunksize = TF_BLOCK_SIZE,
.setkey = twofish_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
}, {
.base.cra_name = "__xts(twofish)",
.base.cra_driver_name = "__xts-twofish-avx",
.base.cra_priority = 400,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_xts_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = 2 * TF_MIN_KEY_SIZE,
.max_keysize = 2 * TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = xts_twofish_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
};
static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
static int __init twofish_init(void)
{
const char *feature_name;
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
return simd_register_skciphers_compat(twofish_algs,
ARRAY_SIZE(twofish_algs),
twofish_simd_algs);
}
static void __exit twofish_exit(void)
{
simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs),
twofish_simd_algs);
}
module_init(twofish_init);
module_exit(twofish_exit);
MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("twofish");
/*
* Glue Code for 3-way parallel assembler optimized version of Twofish
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
#include <asm/crypto/glue_helper.h>
#include <asm/crypto/twofish.h>
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/internal/skcipher.h>
#include <crypto/twofish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return twofish_setkey(&tfm->base, key, keylen); /*covered*/
}
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false); /*covered*/
}
static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, true);
}
void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[2];
ivs[0] = src[0]; /*covered*/
ivs[1] = src[1];
twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
u128_xor(&dst[1], &dst[1], &ivs[0]);
u128_xor(&dst[2], &dst[2], &ivs[1]);
}
EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
if (dst != src) /*covered*/
*dst = *src; /*covered*/
le128_to_be128(&ctrblk, iv); /*covered*/
le128_inc(iv); /*covered*/
twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, dst, (u128 *)&ctrblk);
}
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
le128 *iv)
{
be128 ctrblks[3];
if (dst != src) { /*covered*/
dst[0] = src[0]; /*covered*/
dst[1] = src[1];
dst[2] = src[2];
}
le128_to_be128(&ctrblks[0], iv); /*covered*/
le128_inc(iv);
le128_to_be128(&ctrblks[1], iv); /*covered*/
le128_inc(iv);
le128_to_be128(&ctrblks[2], iv); /*covered*/
le128_inc(iv); /*covered*/
twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
} }
};
static const struct common_glue_ctx twofish_ctr = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
} }
};
static const struct common_glue_ctx twofish_dec = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_cbc = {
.num_funcs = 2,
.fpu_blocks_limit = -1,
.funcs = { {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
} }
};
static int ecb_encrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&twofish_enc, req); /*covered*/
}
static int ecb_decrypt(struct skcipher_request *req)
{
return glue_ecb_req_128bit(&twofish_dec, req); /*covered*/
}
static int cbc_encrypt(struct skcipher_request *req)
{
return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), /*covered*/
req);
}
static int cbc_decrypt(struct skcipher_request *req)
{
return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); /*covered*/
}
static int ctr_crypt(struct skcipher_request *req)
{
return glue_ctr_req_128bit(&twofish_ctr, req); /*covered*/
}
static struct skcipher_alg tf_skciphers[] = {
{
.base.cra_name = "ecb(twofish)",
.base.cra_driver_name = "ecb-twofish-3way",
.base.cra_priority = 300,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey_skcipher,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
}, {
.base.cra_name = "cbc(twofish)",
.base.cra_driver_name = "cbc-twofish-3way",
.base.cra_priority = 300,
.base.cra_blocksize = TF_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = twofish_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
}, {
.base.cra_name = "ctr(twofish)",
.base.cra_driver_name = "ctr-twofish-3way",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
.base.cra_ctxsize = sizeof(struct twofish_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.chunksize = TF_BLOCK_SIZE,
.setkey = twofish_setkey_skcipher,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
};
static bool is_blacklisted_cpu(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return false;
if (boot_cpu_data.x86 == 0x06 &&
(boot_cpu_data.x86_model == 0x1c ||
boot_cpu_data.x86_model == 0x26 ||
boot_cpu_data.x86_model == 0x36)) {
/*
* On Atom, twofish-3way is slower than original assembler
* implementation. Twofish-3way trades off some performance in
* storing blocks in 64bit registers to allow three blocks to
* be processed parallel. Parallel operation then allows gaining
* more performance than was trade off, on out-of-order CPUs.
* However Atom does not benefit from this parallellism and
* should be blacklisted.
*/
return true;
}
if (boot_cpu_data.x86 == 0x0f) {
/*
* On Pentium 4, twofish-3way is slower than original assembler
* implementation because excessive uses of 64bit rotate and
* left-shifts (which are really slow on P4) needed to store and
* handle 128bit block in two 64bit registers.
*/
return true;
}
return false;
}
static int force;
module_param(force, int, 0);
MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"twofish-x86_64-3way: performance on this CPU "
"would be suboptimal: disabling "
"twofish-x86_64-3way.\n");
return -ENODEV;
}
return crypto_register_skciphers(tf_skciphers,
ARRAY_SIZE(tf_skciphers));
}
static void __exit fini(void)
{
crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
MODULE_ALIAS_CRYPTO("twofish");
MODULE_ALIAS_CRYPTO("twofish-asm");
/*
* common.c - C code for kernel entry and exit
* Copyright (c) 2015 Andrew Lutomirski
* GPL v2
*
* Based on asm and ptrace code by many authors. The code here originated
* in ptrace.c and signal.c.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/export.h>
#include <linux/context_tracking.h>
#include <linux/user-return-notifier.h>
#include <linux/nospec.h>
#include <linux/uprobes.h>
#include <linux/livepatch.h>
#include <linux/syscalls.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
__visible inline void enter_from_user_mode(void)
{
CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit_irqoff();
}
#else
static inline void enter_from_user_mode(void) {}
#endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
#ifdef CONFIG_X86_64
if (arch == AUDIT_ARCH_X86_64) { /*covered*/
audit_syscall_entry(regs->orig_ax, regs->di,
regs->si, regs->dx, regs->r10);
} else
#endif
{
audit_syscall_entry(regs->orig_ax, regs->bx, /*covered*/
regs->cx, regs->dx, regs->si);
}
}
/*
* Returns the syscall nr to run (which should match regs->orig_ax) or -1
* to skip the syscall.
*/
static long syscall_trace_enter(struct pt_regs *regs)
{
u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; /*covered*/
struct thread_info *ti = current_thread_info(); /*covered*/
unsigned long ret = 0;
bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
if (unlikely(work & _TIF_SYSCALL_EMU))
emulated = true;
if ((emulated || (work & _TIF_SYSCALL_TRACE)) && /*covered*/
tracehook_report_syscall_entry(regs)) /*covered*/
return -1L;
if (emulated) /*covered*/
return -1L;
#ifdef CONFIG_SECCOMP
/*
* Do seccomp after ptrace, to catch any tracer changes.
*/
if (work & _TIF_SECCOMP) { /*covered*/
struct seccomp_data sd;
sd.arch = arch; /*covered*/
sd.nr = regs->orig_ax;
sd.instruction_pointer = regs->ip;
#ifdef CONFIG_X86_64
if (arch == AUDIT_ARCH_X86_64) {
sd.args[0] = regs->di;
sd.args[1] = regs->si;
sd.args[2] = regs->dx;
sd.args[3] = regs->r10;
sd.args[4] = regs->r8;
sd.args[5] = regs->r9;
} else
#endif
{
sd.args[0] = regs->bx; /*covered*/
sd.args[1] = regs->cx;
sd.args[2] = regs->dx;
sd.args[3] = regs->si;
sd.args[4] = regs->di;
sd.args[5] = regs->bp;
}
ret = __secure_computing(&sd); /*covered*/
if (ret == -1) /*covered*/
return ret;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) /*covered*/
trace_sys_enter(regs, regs->orig_ax); /*covered*/
do_audit_syscall_entry(regs, arch); /*covered*/
return ret ?: regs->orig_ax; /*covered*/
}
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
{
/*
* In order to return to user mode, we need to have IRQs off with
* none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags
* can be set at any time on preemptible kernels if we have IRQs on,
* so we need to loop. Disabling preemption wouldn't help: doing the
* work to clear some of the flags can sleep.
*/
while (true) {
/* We have work to do. */
local_irq_enable(); /*covered*/
if (cached_flags & _TIF_NEED_RESCHED)
schedule(); /*covered*/
if (cached_flags & _TIF_UPROBE) /*covered*/
uprobe_notify_resume(regs);
if (cached_flags & _TIF_PATCH_PENDING)
klp_update_patch_state(current); /*covered*/
/* deal with pending signal delivery */
if (cached_flags & _TIF_SIGPENDING) /*covered*/
do_signal(regs); /*covered*/
if (cached_flags & _TIF_NOTIFY_RESUME) { /*covered*/
clear_thread_flag(TIF_NOTIFY_RESUME); /*covered*/
tracehook_notify_resume(regs); /*covered*/
rseq_handle_notify_resume(NULL, regs); /*covered*/
}
if (cached_flags & _TIF_USER_RETURN_NOTIFY) /*covered*/
fire_user_return_notifiers();
/* Disable IRQs and retry */
local_irq_disable(); /*covered*/
cached_flags = READ_ONCE(current_thread_info()->flags);
if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
break;
}
} /*covered*/
/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
{
struct thread_info *ti = current_thread_info(); /*covered*/
u32 cached_flags;
addr_limit_user_check(); /*covered*/
lockdep_assert_irqs_disabled(); /*covered*/
lockdep_sys_exit(); /*covered*/
cached_flags = READ_ONCE(ti->flags);
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags); /*covered*/
#ifdef CONFIG_COMPAT
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
* returning to user mode. We need to clear it *after* signal
* handling, because syscall restart has a fixup for compat
* syscalls. The fixup is exercised by the ptrace_syscall_32
* selftest.
*
* We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
* special case only applies after poking regs and before the
* very next return to user mode.
*/
ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); /*covered*/
#endif
user_enter_irqoff();
}
#define SYSCALL_EXIT_WORK_FLAGS \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
{
bool step;
audit_syscall_exit(regs); /*covered*/
if (cached_flags & _TIF_SYSCALL_TRACEPOINT) /*covered*/
trace_sys_exit(regs, regs->ax); /*covered*/
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
* We already reported this syscall instruction in
* syscall_trace_enter().
*/
step = unlikely( /*covered*/
(cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
== _TIF_SINGLESTEP);
if (step || cached_flags & _TIF_SYSCALL_TRACE) /*covered*/
tracehook_report_syscall_exit(regs, step); /*covered*/
} /*covered*/
/*
* Called with IRQs on and fully valid regs. Returns with IRQs off in a
* state such that we can immediately switch to user mode.
*/
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
{
struct thread_info *ti = current_thread_info(); /*covered*/
u32 cached_flags = READ_ONCE(ti->flags);
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) /*covered*/
local_irq_enable();
rseq_syscall(regs);
/*
* First do one-time work. If these work items are enabled, we
* want to run them exactly once per syscall exit with IRQs on.
*/
if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) /*covered*/
syscall_slow_exit_work(regs, cached_flags); /*covered*/
local_irq_disable(); /*covered*/
prepare_exit_to_usermode(regs); /*covered*/
}
#ifdef CONFIG_X86_64
__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
struct thread_info *ti;
enter_from_user_mode();
local_irq_enable();
ti = current_thread_info();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
/*
* NB: Native and x32 syscalls are dispatched from the same
* table. The only functional difference is the x32 bit in
* regs->orig_ax, which changes the behavior of some syscalls.
*/
nr &= __SYSCALL_MASK;
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
}
syscall_return_slowpath(regs);
}
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/*
* Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
* all entry and exit work and returns with IRQs off. This function is
* extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance.
*/
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
{
struct thread_info *ti = current_thread_info(); /*covered*/
unsigned int nr = (unsigned int)regs->orig_ax; /*covered*/
#ifdef CONFIG_IA32_EMULATION
ti->status |= TS_COMPAT;
#endif
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
/*
* Subtlety here: if ptrace pokes something larger than
* 2^32-1 into orig_ax, this truncates it. This may or
* may not be necessary, but it matches the old asm
* behavior.
*/
nr = syscall_trace_enter(regs); /*covered*/
}
if (likely(nr < IA32_NR_syscalls)) { /*covered*/
nr = array_index_nospec(nr, IA32_NR_syscalls); /*covered*/
#ifdef CONFIG_IA32_EMULATION
regs->ax = ia32_sys_call_table[nr](regs);
#else
/*
* It's possible that a 32-bit syscall implementation
* takes a 64-bit parameter but nonetheless assumes that
* the high bits are zero. Make sure we zero-extend all
* of the args.
*/
regs->ax = ia32_sys_call_table[nr](
(unsigned int)regs->bx, (unsigned int)regs->cx,
(unsigned int)regs->dx, (unsigned int)regs->si,
(unsigned int)regs->di, (unsigned int)regs->bp);
#endif /* CONFIG_IA32_EMULATION */
}
syscall_return_slowpath(regs); /*covered*/
}
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
enter_from_user_mode();
local_irq_enable(); /*covered*/
do_syscall_32_irqs_on(regs); /*covered*/
}
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
{
/*
* Called using the internal vDSO SYSENTER/SYSCALL32 calling
* convention. Adjust regs so it looks like we entered using int80.
*/
unsigned long landing_pad = (unsigned long)current->mm->context.vdso + /*covered*/
vdso_image_32.sym_int80_landing_pad;
/*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
* so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
* Fix it up.
*/
regs->ip = landing_pad;
enter_from_user_mode();
local_irq_enable(); /*covered*/
/* Fetch EBP from where the vDSO stashed it. */
if (
#ifdef CONFIG_X86_64
/*
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
__get_user(*(u32 *)®s->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
get_user(*(u32 *)®s->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */
}
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs); /*covered*/
#ifdef CONFIG_X86_64
/*
* Opportunistic SYSRETL: if possible, try to return using SYSRETL.
* SYSRETL is available on all 64-bit CPUs, so we don't need to
* bother with SYSEXIT.
*
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
* because the ECX fixup above will ensure that this is essentially
* never the case.
*/
return regs->cs == __USER32_CS && regs->ss == __USER_DS && /*covered*/
regs->ip == landing_pad && /*covered*/
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; /*covered*/
#else
/*
* Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
*
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
* because the ECX fixup above will ensure that this is essentially
* never the case.
*
* We don't allow syscalls at all from VM86 mode, but we still
* need to check VM, because we might be returning from sys_vm86.
*/
return static_cpu_has(X86_FEATURE_SEP) &&
regs->cs == __USER_CS && regs->ss == __USER_DS &&
regs->ip == landing_pad &&
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}
#endif
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
*
* Based on the original implementation which is:
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright 2003 Andi Kleen, SuSE Labs.
*
* Parts of the original code have been moved to arch/x86/vdso/vma.c
*
* This file implements vsyscall emulation. vsyscalls are a legacy ABI:
* Userspace can request certain kernel services by calling fixed
* addresses. This concept is problematic:
*
* - It interferes with ASLR.
* - It's awkward to write code that lives in kernel addresses but is
* callable by userspace at fixed addresses.
* - The whole concept is impossible for 32-bit compat userspace.
* - UML cannot easily virtualize a vsyscall.
*
* As of mid-2014, I believe that there is no new userspace code that
* will use a vsyscall if the vDSO is present. I hope that there will
* soon be no new userspace code that will ever use a vsyscall.
*
* The code in this file emulates vsyscalls when notified of a page
* fault to a vsyscall address.
*/
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/sched/signal.h>
#include <linux/mm_types.h>
#include <linux/syscalls.h>
#include <linux/ratelimit.h>
#include <asm/vsyscall.h>
#include <asm/unistd.h>
#include <asm/fixmap.h>
#include <asm/traps.h>
#include <asm/paravirt.h>
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
static enum { EMULATE, NONE } vsyscall_mode =
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
#else
EMULATE;
#endif
static int __init vsyscall_setup(char *str)
{
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
return -EINVAL;
return 0;
}
return -EINVAL;
}
early_param("vsyscall", vsyscall_setup);
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
const char *message)
{
if (!show_unhandled_signals)
return;
printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
}
static int addr_to_vsyscall_nr(unsigned long addr)
{
int nr;
if ((addr & ~0xC00UL) != VSYSCALL_ADDR)
return -EINVAL;
nr = (addr & 0xC00UL) >> 10;
if (nr >= 3)
return -EINVAL;
return nr;
}
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
* XXX: if access_ok, get_user, and put_user handled
* sig_on_uaccess_err, this could go away.
*/
if (!access_ok((void __user *)ptr, size)) {
struct thread_struct *thread = ¤t->thread;
thread->error_code = X86_PF_USER | X86_PF_WRITE;
thread->cr2 = ptr;
thread->trap_nr = X86_TRAP_PF;
force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current);
return false;
} else {
return true;
}
}
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
int prev_sig_on_uaccess_err;
long ret;
unsigned long orig_dx;
/*
* No point in checking CS -- the only way to get here is a user mode
* trap to a high address, which means that we're in 64-bit user code.
*/
WARN_ON_ONCE(address != regs->ip);
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
return false;
}
vsyscall_nr = addr_to_vsyscall_nr(address);
trace_emulate_vsyscall(vsyscall_nr);
if (vsyscall_nr < 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
"misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
goto sigsegv;
}
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
"vsyscall with bad stack (exploit attempt?)");
goto sigsegv;
}
tsk = current;
/*
* Check for access_ok violations and find the syscall nr.
*
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
* 64-bit, so we don't need to special-case it here. For all the
* vsyscalls, NULL means "don't write anything" not "write it at
* address 0".
*/
switch (vsyscall_nr) {
case 0:
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone))) {
ret = -EFAULT;
goto check_fault;
}
syscall_nr = __NR_gettimeofday;
break;
case 1:
if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
ret = -EFAULT;
goto check_fault;
}
syscall_nr = __NR_time;
break;
case 2:
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned))) {
ret = -EFAULT;
goto check_fault;
}
syscall_nr = __NR_getcpu;
break;
}
/*
* Handle seccomp. regs->ip must be the original value.
* See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst.
*
* We could optimize the seccomp disabled case, but performance
* here doesn't matter.
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
tmp = secure_computing(NULL);
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
do_exit(SIGSYS);
}
regs->orig_ax = -1;
if (tmp)
goto do_ret; /* skip requested */
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
*/
prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
current->thread.sig_on_uaccess_err = 1;
ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
/* this decodes regs->di and regs->si on its own */
ret = __x64_sys_gettimeofday(regs);
break;
case 1:
/* this decodes regs->di on its own */
ret = __x64_sys_time(regs);
break;
case 2:
/* while we could clobber regs->dx, we didn't in the past... */
orig_dx = regs->dx;
regs->dx = 0;
/* this decodes regs->di, regs->si and regs->dx on its own */
ret = __x64_sys_getcpu(regs);
regs->dx = orig_dx;
break;
}
current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
check_fault:
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)");
/*
* If we failed to generate a signal for any reason,
* generate one here. (This should be impossible.)
*/
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
!sigismember(&tsk->pending.signal, SIGSEGV)))
goto sigsegv;
return true; /* Don't emulate the ret. */
}
regs->ax = ret;
do_ret:
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;
return true;
sigsegv:
force_sig(SIGSEGV, current);
return true;
}
/*
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
* not need special handling anymore:
*/
static const char *gate_vma_name(struct vm_area_struct *vma)
{
return "[vsyscall]";
}
static const struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_ADDR,
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
.vm_flags = VM_READ | VM_EXEC,
.vm_ops = &gate_vma_ops,
};
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_COMPAT
if (!mm || mm->context.ia32_compat) /*covered*/
return NULL;
#endif
if (vsyscall_mode == NONE) /*covered*/
return NULL; /*covered*/
return &gate_vma;
}
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma = get_gate_vma(mm); /*covered*/
if (!vma)
return 0;
return (addr >= vma->vm_start) && (addr < vma->vm_end); /*covered*/
}
/*
* Use this when you have no reliable mm, typically from interrupt
* context. It is less reliable than using a task's mm and may give
* false positives.
*/
int in_gate_area_no_mm(unsigned long addr)
{
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
}
/*
* The VSYSCALL page is the only user-accessible page in the kernel address
* range. Normally, the kernel page tables can have _PAGE_USER clear, but
* the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
* are enabled.
*
* Some day we may create a "minimal" vsyscall mode in which we emulate
* vsyscalls but leave the page not present. If so, we skip calling
* this.
*/
void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
#endif
pud = pud_offset(p4d, VSYSCALL_ADDR);
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
pmd = pmd_offset(pud, VSYSCALL_ADDR);
set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
}
void __init map_vsyscall(void)
{
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright 2003 Andi Kleen, SuSE Labs.
*
* Modified for x86 32 bit architecture by
* Stefani Seibold <stefani@seibold.net>
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
*
* Thanks to hpa@transmeta.com for some useful hint.
* Special thanks to Ingo Molnar for his early experience with
* a different vsyscall implementation for Linux/IA32 and for the name.
*
*/
#include <linux/timekeeper_internal.h>
#include <asm/vgtod.h>
#include <asm/vvar.h>
int vclocks_used __read_mostly;
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
void update_vsyscall_tz(void)
{
vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
}
void update_vsyscall(struct timekeeper *tk)
{
int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; /*covered*/
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
struct vgtod_ts *base;
u64 nsec;
/* Mark the new vclock used. */
BUILD_BUG_ON(VCLOCK_MAX >= 32);
WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
gtod_write_begin(vdata);
/* copy vsyscall data */
vdata->vclock_mode = vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;
base = &vdata->basetime[CLOCK_REALTIME];
base->sec = tk->xtime_sec;
base->nsec = tk->tkr_mono.xtime_nsec;
base = &vdata->basetime[CLOCK_TAI];
base->sec = tk->xtime_sec + (s64)tk->tai_offset;
base->nsec = tk->tkr_mono.xtime_nsec;
base = &vdata->basetime[CLOCK_MONOTONIC];
base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
nsec = tk->tkr_mono.xtime_nsec;
nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; /*covered*/
base->sec++;
}
base->nsec = nsec; /*covered*/
base = &vdata->basetime[CLOCK_REALTIME_COARSE];
base->sec = tk->xtime_sec;
base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
nsec += tk->wall_to_monotonic.tv_nsec;
while (nsec >= NSEC_PER_SEC) { /*covered*/
nsec -= NSEC_PER_SEC; /*covered*/
base->sec++;
}
base->nsec = nsec; /*covered*/
gtod_write_end(vdata);
}
/*
* Performance events x86 architecture code
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
* Copyright (C) 2009 Google, Inc., Stephane Eranian
*
* For licencing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/kdebug.h>
#include <linux/sched/mm.h>
#include <linux/sched/clock.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/nospec.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/timer.h>
#include <asm/desc.h>
#include <asm/ldt.h>
#include <asm/unwind.h>
#include "perf_event.h"
struct x86_pmu x86_pmu __read_mostly;
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
u64 __read_mostly hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
/*
* Propagate event elapsed time into the generic event.
* Can only be executed on the CPU where the event is active.
* Returns the delta events processed.
*/
u64 x86_perf_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int shift = 64 - x86_pmu.cntval_bits;
u64 prev_raw_count, new_raw_count;
int idx = hwc->idx;
u64 delta;
if (idx == INTEL_PMC_IDX_FIXED_BTS)
return 0;
/*
* Careful: an NMI might modify the previous event value.
*
* Our tactic to handle this is to first atomically read and
* exchange a new raw count - then add that new-prev delta
* count to the generic event atomically:
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
rdpmcl(hwc->event_base_rdpmc, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
/*
* Now we have the new raw value and have updated the prev
* timestamp already. We can now calculate the elapsed delta
* (event-)time and add that to the generic event.
*
* Careful, not all hw sign-extends above the physical width
* of the count.
*/
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
local64_add(delta, &event->count);
local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
/*
* Find and validate any extra registers to set up.
*/
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
{
struct hw_perf_event_extra *reg;
struct extra_reg *er;
reg = &event->hw.extra_reg;
if (!x86_pmu.extra_regs)
return 0;
for (er = x86_pmu.extra_regs; er->msr; er++) {
if (er->event != (config & er->config_mask))
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
/* Check if the extra msrs can be safely accessed*/
if (!er->extra_msr_access)
return -ENXIO;
reg->idx = er->idx;
reg->config = event->attr.config1;
reg->reg = er->msr;
break;
}
return 0;
}
static atomic_t active_events;
static atomic_t pmc_refcount;
static DEFINE_MUTEX(pmc_reserve_mutex);
#ifdef CONFIG_X86_LOCAL_APIC
static bool reserve_pmc_hardware(void)
{
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}
for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}
return true;
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu_config_addr(i));
i = x86_pmu.num_counters;
perfctr_fail:
for (i--; i >= 0; i--)
release_perfctr_nmi(x86_pmu_event_addr(i));
return false;
}
static void release_pmc_hardware(void)
{
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
release_perfctr_nmi(x86_pmu_event_addr(i));
release_evntsel_nmi(x86_pmu_config_addr(i));
}
}
#else
static bool reserve_pmc_hardware(void) { return true; }
static void release_pmc_hardware(void) {}
#endif
static bool check_hw_exists(void)
{
u64 val, val_fail = -1, val_new= ~0;
int i, reg, reg_fail = -1, ret = 0;
int bios_fail = 0;
int reg_safe = -1;
/*
* Check to see if the BIOS enabled any of the counters, if so
* complain and bail.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
bios_fail = 1;
val_fail = val;
reg_fail = reg;
} else {
reg_safe = i;
}
}
if (x86_pmu.num_counters_fixed) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
if (val & (0x03 << i*4)) {
bios_fail = 1;
val_fail = val;
reg_fail = reg;
}
}
}
/*
* If all the counters are enabled, the below test will always
* fail. The tools will also become useless in this scenario.
* Just fail and disable the hardware counters.
*/
if (reg_safe == -1) {
reg = reg_safe;
goto msr_fail;
}
/*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
reg = x86_pmu_event_addr(reg_safe);
if (rdmsrl_safe(reg, &val))
goto msr_fail;
val ^= 0xffffUL;
ret = wrmsrl_safe(reg, val);
ret |= rdmsrl_safe(reg, &val_new);
if (ret || val != val_new)
goto msr_fail;
/*
* We still allow the PMU driver to operate:
*/
if (bios_fail) {
pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
reg_fail, val_fail);
}
return true;
msr_fail:
if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
pr_cont("PMU not available due to virtualization, using software events only.\n");
} else {
pr_cont("Broken PMU hardware detected, using software events only.\n");
pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
reg, val_new);
}
return false;
}
static void hw_perf_event_destroy(struct perf_event *event)
{
x86_release_hardware();
atomic_dec(&active_events);
}
void hw_perf_lbr_event_destroy(struct perf_event *event)
{
hw_perf_event_destroy(event);
/* undo the lbr/bts event accounting */
x86_del_exclusive(x86_lbr_exclusive_lbr);
}
static inline int x86_pmu_initialized(void)
{
return x86_pmu.handle_irq != NULL;
}
static inline int
set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
unsigned int cache_type, cache_op, cache_result;
u64 config, val;
config = attr->config;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
val = hw_cache_event_ids[cache_type][cache_op][cache_result];
if (val == 0)
return -ENOENT;
if (val == -1)
return -EINVAL;
hwc->config |= val;
attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
return x86_pmu_extra_regs(val, event);
}
int x86_reserve_hardware(void)
{
int err = 0;
if (!atomic_inc_not_zero(&pmc_refcount)) {
mutex_lock(&pmc_reserve_mutex);
if (atomic_read(&pmc_refcount) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else
reserve_ds_buffers();
}
if (!err)
atomic_inc(&pmc_refcount);
mutex_unlock(&pmc_reserve_mutex);
}
return err;
}
void x86_release_hardware(void)
{
if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_ds_buffers();
mutex_unlock(&pmc_reserve_mutex);
}
}
/*
* Check if we can create event of a certain type (that no conflicting events
* are present).
*/
int x86_add_exclusive(unsigned int what)
{
int i;
/*
* When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
* LBR and BTS are still mutually exclusive.
*/
if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return 0;
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
mutex_lock(&pmc_reserve_mutex);
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
goto fail_unlock;
}
atomic_inc(&x86_pmu.lbr_exclusive[what]);
mutex_unlock(&pmc_reserve_mutex);
}
atomic_inc(&active_events);
return 0;
fail_unlock:
mutex_unlock(&pmc_reserve_mutex);
return -EBUSY;
}
void x86_del_exclusive(unsigned int what)
{
if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return;
atomic_dec(&x86_pmu.lbr_exclusive[what]);
atomic_dec(&active_events);
}
int x86_setup_perfctr(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
u64 config;
if (!is_sampling_event(event)) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
}
if (attr->type == PERF_TYPE_RAW)
return x86_pmu_extra_regs(event->attr.config, event);
if (attr->type == PERF_TYPE_HW_CACHE)
return set_ext_hw_attr(hwc, event);
if (attr->config >= x86_pmu.max_events)
return -EINVAL;
attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
/*
* The generic map:
*/
config = x86_pmu.event_map(attr->config);
if (config == 0)
return -ENOENT;
if (config == -1LL)
return -EINVAL;
hwc->config |= config;
return 0;
}
/*
* check that branch_sample_type is compatible with
* settings needed for precise_ip > 1 which implies
* using the LBR to capture ALL taken branches at the
* priv levels of the measurement
*/
static inline int precise_br_compat(struct perf_event *event)
{
u64 m = event->attr.branch_sample_type;
u64 b = 0;
/* must capture all branches */
if (!(m & PERF_SAMPLE_BRANCH_ANY))
return 0;
m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_user)
b |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
b |= PERF_SAMPLE_BRANCH_KERNEL;
/*
* ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
*/
return m == b;
}
int x86_pmu_max_precise(void)
{
int precise = 0;
/* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
/* Support for IP fixup */
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;
if (x86_pmu.pebs_prec_dist)
precise++;
}
return precise;
}
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
int precise = x86_pmu_max_precise();
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
/* There's no sense in having PEBS for non sampling events: */
if (!is_sampling_event(event))
return -EINVAL;
}
/*
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
if (!precise_br_compat(event))
return -EOPNOTSUPP;
/* branch_sample_type is compatible */
} else {
/*
* user did not specify branch_sample_type
*
* For PEBS fixups, we capture all
* the branches at the priv level of the
* event.
*/
*br_type = PERF_SAMPLE_BRANCH_ANY;
if (!event->attr.exclude_user)
*br_type |= PERF_SAMPLE_BRANCH_USER;
if (!event->attr.exclude_kernel)
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
}
}
if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
event->attach_state |= PERF_ATTACH_TASK_DATA;
/*
* Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
*/
event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
/*
* Count user and OS events unless requested not to
*/
if (!event->attr.exclude_user)
event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
if (!event->attr.exclude_kernel)
event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
if (event->attr.type == PERF_TYPE_RAW)
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
if (event->attr.sample_period && x86_pmu.limit_period) {
if (x86_pmu.limit_period(event, event->attr.sample_period) >
event->attr.sample_period)
return -EINVAL;
}
return x86_setup_perfctr(event);
}
/*
* Setup the hardware configuration for a given attr_type
*/
static int __x86_pmu_event_init(struct perf_event *event)
{
int err;
if (!x86_pmu_initialized())
return -ENODEV;
err = x86_reserve_hardware();
if (err)
return err;
atomic_inc(&active_events);
event->destroy = hw_perf_event_destroy;
event->hw.idx = -1;
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event);
}
void x86_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
u64 val;
if (!test_bit(idx, cpuc->active_mask))
continue;
rdmsrl(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(x86_pmu_config_addr(idx), val);
}
}
/*
* There may be PMI landing after enabled=0. The PMI hitting could be before or
* after disable_all.
*
* If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
* It will not be re-enabled in the NMI handler again, because enabled=0. After
* handling the NMI, disable_all will be called, which will not change the
* state either. If PMI hits after disable_all, the PMU is already disabled
* before entering NMI handler. The NMI handler will not change the state
* either.
*
* So either situation is harmless.
*/
static void x86_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu_initialized())
return;
if (!cpuc->enabled)
return;
cpuc->n_added = 0;
cpuc->enabled = 0;
barrier();
x86_pmu.disable_all();
}
void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask))
continue;
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}
static struct pmu pmu;
static inline int is_x86_event(struct perf_event *event)
{
return event->pmu == &pmu;
}
/*
* Event scheduler state:
*
* Assign events iterating over all events and counters, beginning
* with events with least weights first. Keep the current iterator
* state in struct sched_state.
*/
struct sched_state {
int weight;
int event; /* event index */
int counter; /* counter index */
int unassigned; /* number of events to be assigned left */
int nr_gp; /* number of GP counters used */
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};
/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
#define SCHED_STATES_MAX 2
struct perf_sched {
int max_weight;
int max_events;
int max_gp;
int saved_states;
struct event_constraint **constraints;
struct sched_state state;
struct sched_state saved[SCHED_STATES_MAX];
};
/*
* Initialize interator that runs through all events and counters.
*/
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
int num, int wmin, int wmax, int gpmax)
{
int idx;
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
sched->max_gp = gpmax;
sched->constraints = constraints;
for (idx = 0; idx < num; idx++) {
if (constraints[idx]->weight == wmin)
break;
}
sched->state.event = idx; /* start with min weight */
sched->state.weight = wmin;
sched->state.unassigned = num;
}
static void perf_sched_save_state(struct perf_sched *sched)
{
if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
return;
sched->saved[sched->saved_states] = sched->state;
sched->saved_states++;
}
static bool perf_sched_restore_state(struct perf_sched *sched)
{
if (!sched->saved_states)
return false;
sched->saved_states--;
sched->state = sched->saved[sched->saved_states];
/* continue with next counter: */
clear_bit(sched->state.counter++, sched->state.used);
return true;
}
/*
* Select a counter for the current event to schedule. Return true on
* success.
*/
static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
int idx;
if (!sched->state.unassigned)
return false;
if (sched->state.event >= sched->max_events)
return false;
c = sched->constraints[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
}
}
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used)) {
if (sched->state.nr_gp++ >= sched->max_gp)
return false;
goto done;
}
}
return false;
done:
sched->state.counter = idx;
if (c->overlap)
perf_sched_save_state(sched);
return true;
}
static bool perf_sched_find_counter(struct perf_sched *sched)
{
while (!__perf_sched_find_counter(sched)) {
if (!perf_sched_restore_state(sched))
return false;
}
return true;
}
/*
* Go through all unassigned events and find the next one to schedule.
* Take events with the least weight first. Return true on success.
*/
static bool perf_sched_next_event(struct perf_sched *sched)
{
struct event_constraint *c;
if (!sched->state.unassigned || !--sched->state.unassigned)
return false;
do {
/* next event */
sched->state.event++;
if (sched->state.event >= sched->max_events) {
/* next weight */
sched->state.event = 0;
sched->state.weight++;
if (sched->state.weight > sched->max_weight)
return false;
}
c = sched->constraints[sched->state.event];
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
return true;
}
/*
* Assign a counter for each event.
*/
int perf_assign_events(struct event_constraint **constraints, int n,
int wmin, int wmax, int gpmax, int *assign)
{
struct perf_sched sched;
perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
do {
if (!perf_sched_find_counter(&sched))
break; /* failed */
if (assign)
assign[sched.state.event] = sched.state.counter;
} while (perf_sched_next_event(&sched));
return sched.state.unassigned;
}
EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct perf_event *e;
int i, wmin, wmax, unsched = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
if (x86_pmu.start_scheduling)
x86_pmu.start_scheduling(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
cpuc->event_constraint[i] = NULL;
c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
cpuc->event_constraint[i] = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
/*
* fastpath, try to reuse previous register
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
c = cpuc->event_constraint[i];
/* never assigned */
if (hwc->idx == -1)
break;
/* constraint still honored */
if (!test_bit(hwc->idx, c->idxmsk))
break;
/* not already used */
if (test_bit(hwc->idx, used_mask))
break;
__set_bit(hwc->idx, used_mask);
if (assign)
assign[i] = hwc->idx;
}
/* slow path */
if (i != n) {
int gpmax = x86_pmu.num_counters;
/*
* Do not allow scheduling of more than half the available
* generic counters.
*
* This helps avoid counter starvation of sibling thread by
* ensuring at most half the counters cannot be in exclusive
* mode. There is no designated counters for the limits. Any
* N/2 counters can be used. This helps with events with
* specific counter constraints.
*/
if (is_ht_workaround_enabled() && !cpuc->is_fake &&
READ_ONCE(cpuc->excl_cntrs->exclusive_present))
gpmax /= 2;
unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
wmax, gpmax, assign);
}
/*
* In case of success (unsched = 0), mark events as committed,
* so we do not put_constraint() in case new events are added
* and fail to be scheduled
*
* We invoke the lower level commit callback to lock the resource
*
* We do not need to do all of this in case we are called to
* validate an event group (assign == NULL)
*/
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
} else {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
/*
* do not put_constraint() on comitted events,
* because they are good to go
*/
if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
continue;
/*
* release events that failed scheduling
*/
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, e);
}
}
if (x86_pmu.stop_scheduling)
x86_pmu.stop_scheduling(cpuc);
return unsched ? -EINVAL : 0;
}
/*
* dogrp: true if must collect siblings events (group)
* returns total number of events and error code
*/
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
struct perf_event *event;
int n, max_count;
max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
/* current number of events already accepted */
n = cpuc->n_events;
if (is_x86_event(leader)) {
if (n >= max_count)
return -EINVAL;
cpuc->event_list[n] = leader;
n++;
}
if (!dogrp)
return n;
for_each_sibling_event(event, leader) {
if (!is_x86_event(event) ||
event->state <= PERF_EVENT_STATE_OFF)
continue;
if (n >= max_count)
return -EINVAL;
cpuc->event_list[n] = event;
n++;
}
return n;
}
static inline void x86_assign_hw_event(struct perf_event *event,
struct cpu_hw_events *cpuc, int i)
{
struct hw_perf_event *hwc = &event->hw;
hwc->idx = cpuc->assign[i];
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];
if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
hwc->config_base = 0;
hwc->event_base = 0;
} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
}
}
/**
* x86_perf_rdpmc_index - Return PMC counter used for event
* @event: the perf_event to which the PMC counter was assigned
*
* The counter assigned to this performance event may change if interrupts
* are enabled. This counter should thus never be used while interrupts are
* enabled. Before this function is used to obtain the assigned counter the
* event should be checked for validity using, for example,
* perf_event_read_local(), within the same interrupt disabled section in
* which this counter is planned to be used.
*
* Return: The index of the performance monitoring counter assigned to
* @perf_event.
*/
int x86_perf_rdpmc_index(struct perf_event *event)
{
lockdep_assert_irqs_disabled();
return event->hw.event_base_rdpmc;
}
static inline int match_prev_assignment(struct hw_perf_event *hwc,
struct cpu_hw_events *cpuc,
int i)
{
return hwc->idx == cpuc->assign[i] &&
hwc->last_cpu == smp_processor_id() &&
hwc->last_tag == cpuc->tags[i];
}
static void x86_pmu_start(struct perf_event *event, int flags);
static void x86_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
struct hw_perf_event *hwc;
int i, added = cpuc->n_added;
if (!x86_pmu_initialized())
return;
if (cpuc->enabled)
return;
if (cpuc->n_added) {
int n_running = cpuc->n_events - cpuc->n_added;
/*
* apply assignment obtained either from
* hw_perf_group_sched_in() or x86_pmu_enable()
*
* step1: save events moving to new counters
*/
for (i = 0; i < n_running; i++) {
event = cpuc->event_list[i];
hwc = &event->hw;
/*
* we can avoid reprogramming counter if:
* - assigned same counter as last time
* - running on same CPU as last time
* - no other event has used the counter since
*/
if (hwc->idx == -1 ||
match_prev_assignment(hwc, cpuc, i))
continue;
/*
* Ensure we don't accidentally enable a stopped
* counter simply because we rescheduled.
*/
if (hwc->state & PERF_HES_STOPPED)
hwc->state |= PERF_HES_ARCH;
x86_pmu_stop(event, PERF_EF_UPDATE);
}
/*
* step2: reprogram moved events into new counters
*/
for (i = 0; i < cpuc->n_events; i++) {
event = cpuc->event_list[i];
hwc = &event->hw;
if (!match_prev_assignment(hwc, cpuc, i))
x86_assign_hw_event(event, cpuc, i);
else if (i < n_running)
continue;
if (hwc->state & PERF_HES_ARCH)
continue;
x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
perf_events_lapic_init();
}
cpuc->enabled = 1;
barrier();
x86_pmu.enable_all(added);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
/*
* Set the next IRQ period, based on the hwc->period_left value.
* To be called with the event disabled in hw:
*/
int x86_perf_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
if (idx == INTEL_PMC_IDX_FIXED_BTS)
return 0;
/*
* If we are way outside a reasonable range then just skip forward:
*/
if (unlikely(left <= -period)) {
left = period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
/*
* Quirk: certain CPUs dont like it if just 1 hw_event is left:
*/
if (unlikely(left < 2))
left = 2;
if (left > x86_pmu.max_period)
left = x86_pmu.max_period;
if (x86_pmu.limit_period)
left = x86_pmu.limit_period(event, left);
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
/*
* The hw event starts counting from this event offset,
* mark it to be able to extra future deltas:
*/
local64_set(&hwc->prev_count, (u64)-left);
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Due to erratum on certan cpu we need
* a second write to be sure the register
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}
perf_event_update_userpage(event);
return ret;
}
void x86_pmu_enable_event(struct perf_event *event)
{
if (__this_cpu_read(cpu_hw_events.enabled))
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
}
/*
* Add a single event to the PMU.
*
* The event is added to the group of enabled events
* but only if it can be scehduled with existing events.
*/
static int x86_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc;
int assign[X86_PMC_IDX_MAX];
int n, n0, ret;
hwc = &event->hw;
n0 = cpuc->n_events;
ret = n = collect_events(cpuc, event, false);
if (ret < 0)
goto out;
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(flags & PERF_EF_START))
hwc->state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be performed
* at commit time (->commit_txn) as a whole.
*
* If commit fails, we'll call ->del() on all events
* for which ->add() was called.
*/
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret)
goto out;
/*
* copy new assignment, now we know it is possible
* will be used by hw_perf_enable()
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
done_collect:
/*
* Commit the collect_events() state. See x86_pmu_del() and
* x86_pmu_*_txn().
*/
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
if (x86_pmu.add) {
/*
* This is before x86_pmu_enable() will call x86_pmu_start(),
* so we enable LBRs before an event needs them etc..
*/
x86_pmu.add(event);
}
ret = 0;
out:
return ret;
}
static void x86_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx = event->hw.idx;
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
if (WARN_ON_ONCE(idx == -1))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
x86_perf_event_set_period(event);
}
event->hw.state = 0;
cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
__set_bit(idx, cpuc->running);
x86_pmu.enable(event);
perf_event_update_userpage(event);
}
void perf_event_print_debug(void)
{
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
u64 pebs, debugctl;
struct cpu_hw_events *cpuc;
unsigned long flags;
int cpu, idx;
if (!x86_pmu.num_counters)
return;
local_irq_save(flags);
cpu = smp_processor_id();
cpuc = &per_cpu(cpu_hw_events, cpu);
if (x86_pmu.version >= 2) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
pr_info("\n");
pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
pr_info("CPU#%d: status: %016llx\n", cpu, status);
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
if (x86_pmu.pebs_constraints) {
rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
}
if (x86_pmu.lbr_nr) {
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl);
}
}
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
rdmsrl(x86_pmu_event_addr(idx), pmc_count);
prev_left = per_cpu(pmc_prev_left[idx], cpu);
pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
cpu, idx, pmc_ctrl);
pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
cpu, idx, pmc_count);
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
cpu, idx, pmc_count);
}
local_irq_restore(flags);
}
void x86_pmu_stop(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
x86_pmu.disable(event);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
/*
* Drain the remaining delta count out of a event
* that we are disabling:
*/
x86_perf_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
}
static void x86_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
/*
* event is descheduled
*/
event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
/*
* If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*
* XXX assumes any ->del() called during a TXN will only be on
* an event added during that same TXN.
*/
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto do_del;
/*
* Not a TXN, therefore cleanup properly.
*/
x86_pmu_stop(event, PERF_EF_UPDATE);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i])
break;
}
if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
return;
/* If we have a newly added event; make sure to decrease n_added. */
if (i >= cpuc->n_events - cpuc->n_added)
--cpuc->n_added;
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, event);
/* Delete the array entry. */
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
}
--cpuc->n_events;
perf_event_update_userpage(event);
do_del:
if (x86_pmu.del) {
/*
* This is after x86_pmu_stop(); so we disable LBRs after any
* event can need them etc..
*/
x86_pmu.del(event);
}
}
int x86_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct perf_event *event;
int idx, handled = 0;
u64 val;
cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* Some chipsets need to unmask the LVTPC in a particular spot
* inside the nmi handler. As a result, the unmasking was pushed
* into all the nmi handlers.
*
* This generic handler doesn't seem to have any issues where the
* unmasking occurs so it was left at the top.
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask)) {
/*
* Though we deactivated the counter some cpus
* might still deliver spurious interrupts still
* in flight. Catch them:
*/
if (__test_and_clear_bit(idx, cpuc->running))
handled++;
continue;
}
event = cpuc->events[idx];
val = x86_perf_event_update(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
continue;
/*
* event overflow
*/
handled++;
perf_sample_data_init(&data, 0, event->hw.last_period);
if (!x86_perf_event_set_period(event))
continue;
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
if (handled)
inc_irq_stat(apic_perf_irqs);
return handled;
}
void perf_events_lapic_init(void)
{
if (!x86_pmu.apic || !x86_pmu_initialized())
return;
/*
* Always use NMI for PMU
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
static int
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
u64 start_clock;
u64 finish_clock;
int ret;
/*
* All PMUs/events that share this PMI handler should make sure to
* increment active_events for their events.
*/
if (!atomic_read(&active_events))
return NMI_DONE;
start_clock = sched_clock();
ret = x86_pmu.handle_irq(regs);
finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
return ret;
}
NOKPROBE_SYMBOL(perf_event_nmi_handler);
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
static int x86_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
int i;
for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
cpuc->kfree_on_online[i] = NULL;
if (x86_pmu.cpu_prepare)
return x86_pmu.cpu_prepare(cpu);
return 0;
}
static int x86_pmu_dead_cpu(unsigned int cpu)
{
if (x86_pmu.cpu_dead)
x86_pmu.cpu_dead(cpu);
return 0;
}
static int x86_pmu_online_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
int i;
for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
kfree(cpuc->kfree_on_online[i]);
cpuc->kfree_on_online[i] = NULL;
}
return 0;
}
static int x86_pmu_starting_cpu(unsigned int cpu)
{
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
return 0;
}
static int x86_pmu_dying_cpu(unsigned int cpu)
{
if (x86_pmu.cpu_dying)
x86_pmu.cpu_dying(cpu);
return 0;
}
static void __init pmu_check_apic(void)
{
if (boot_cpu_has(X86_FEATURE_APIC))
return;
x86_pmu.apic = 0;
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
pr_info("no hardware sampling interrupt available.\n");
/*
* If we have a PMU initialized but no APIC
* interrupts, we cannot sample hardware
* events (user-space has to fall back and
* sample via a hrtimer based software event):
*/
pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
}
static struct attribute_group x86_pmu_format_group __ro_after_init = {
.name = "format",
.attrs = NULL,
};
/*
* Remove all undefined events (x86_pmu.event_map(id) == 0)
* out of events_attr attributes.
*/
static void __init filter_events(struct attribute **attrs)
{
struct device_attribute *d;
struct perf_pmu_events_attr *pmu_attr;
int offset = 0;
int i, j;
for (i = 0; attrs[i]; i++) {
d = (struct device_attribute *)attrs[i];
pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
/* str trumps id */
if (pmu_attr->event_str)
continue;
if (x86_pmu.event_map(i + offset))
continue;
for (j = i; attrs[j]; j++)
attrs[j] = attrs[j + 1];
/* Check the shifted attr. */
i--;
/*
* event_map() is index based, the attrs array is organized
* by increasing event index. If we shift the events, then
* we need to compensate for the event_map(), otherwise
* we are looking up the wrong event in the map
*/
offset++;
}
}
/* Merge two pointer arrays */
__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
{
struct attribute **new;
int j, i;
for (j = 0; a && a[j]; j++)
;
for (i = 0; b && b[i]; i++)
j++;
j++;
new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
if (!new)
return NULL;
j = 0;
for (i = 0; a && a[i]; i++)
new[j++] = a[i];
for (i = 0; b && b[i]; i++)
new[j++] = b[i];
new[j] = NULL;
return new;
}
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr);
u64 config = x86_pmu.event_map(pmu_attr->id);
/* string trumps id */
if (pmu_attr->event_str)
return sprintf(page, "%s", pmu_attr->event_str);
return x86_pmu.events_sysfs_show(page, config);
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_ht_attr *pmu_attr =
container_of(attr, struct perf_pmu_events_ht_attr, attr);
/*
* Report conditional events depending on Hyper-Threading.
*
* This is overly conservative as usually the HT special
* handling is not needed if the other CPU thread is idle.
*
* Note this does not (and cannot) handle the case when thread
* siblings are invisible, for example with virtualization
* if they are owned by some other guest. The user tool
* has to re-read when a thread sibling gets onlined later.
*/
return sprintf(page, "%s",
topology_max_smt_threads() > 1 ?
pmu_attr->event_str_ht :
pmu_attr->event_str_noht);
}
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
EVENT_ATTR(cache-misses, CACHE_MISSES );
EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
EVENT_ATTR(branch-misses, BRANCH_MISSES );
EVENT_ATTR(bus-cycles, BUS_CYCLES );
EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
static struct attribute *empty_attrs;
static struct attribute *events_attr[] = {
EVENT_PTR(CPU_CYCLES),
EVENT_PTR(INSTRUCTIONS),
EVENT_PTR(CACHE_REFERENCES),
EVENT_PTR(CACHE_MISSES),
EVENT_PTR(BRANCH_INSTRUCTIONS),
EVENT_PTR(BRANCH_MISSES),
EVENT_PTR(BUS_CYCLES),
EVENT_PTR(STALLED_CYCLES_FRONTEND),
EVENT_PTR(STALLED_CYCLES_BACKEND),
EVENT_PTR(REF_CPU_CYCLES),
NULL,
};
static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
};
ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
{
u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
ssize_t ret;
/*
* We have whole page size to spend and just little data
* to write, so we can safely use sprintf.
*/
ret = sprintf(page, "event=0x%02llx", event);
if (umask)
ret += sprintf(page + ret, ",umask=0x%02llx", umask);
if (edge)
ret += sprintf(page + ret, ",edge");
if (pc)
ret += sprintf(page + ret, ",pc");
if (any)
ret += sprintf(page + ret, ",any");
if (inv)
ret += sprintf(page + ret, ",inv");
if (cmask)
ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
ret += sprintf(page + ret, "\n");
return ret;
}
static struct attribute_group x86_pmu_attr_group;
static struct attribute_group x86_pmu_caps_group;
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
int err;
pr_info("Performance Events: ");
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
err = intel_pmu_init();
break;
case X86_VENDOR_AMD:
err = amd_pmu_init();
break;
case X86_VENDOR_HYGON:
err = amd_pmu_init();
x86_pmu.name = "HYGON";
break;
default:
err = -ENOTSUPP;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
return 0;
}
pmu_check_apic();
/* sanity check that the hardware exists or is emulated */
if (!check_hw_exists())
return 0;
pr_cont("%s PMU driver.\n", x86_pmu.name);
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
quirk->func();
if (!x86_pmu.intel_ctrl)
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
perf_events_lapic_init();
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
0, x86_pmu.num_counters, 0, 0);
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
if (x86_pmu.caps_attrs) {
struct attribute **tmp;
tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs);
if (!WARN_ON(!tmp))
x86_pmu_caps_group.attrs = tmp;
}
if (x86_pmu.event_attrs)
x86_pmu_events_group.attrs = x86_pmu.event_attrs;
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
else
filter_events(x86_pmu_events_group.attrs);
if (x86_pmu.cpu_events) {
struct attribute **tmp;
tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
if (!WARN_ON(!tmp))
x86_pmu_events_group.attrs = tmp;
}
if (x86_pmu.attrs) {
struct attribute **tmp;
tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs);
if (!WARN_ON(!tmp))
x86_pmu_attr_group.attrs = tmp;
}
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
/*
* Install callbacks. Core will call them for each online
* cpu.
*/
err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare",
x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
if (err)
return err;
err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
"perf/x86:starting", x86_pmu_starting_cpu,
x86_pmu_dying_cpu);
if (err)
goto out;
err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online",
x86_pmu_online_cpu, NULL);
if (err)
goto out1;
err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
if (err)
goto out2;
return 0;
out2:
cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
out1:
cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
out:
cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
return err;
}
early_initcall(init_hw_perf_events);
static inline void x86_pmu_read(struct perf_event *event)
{
if (x86_pmu.read)
return x86_pmu.read(event);
x86_perf_event_update(event);
}
/*
* Start group events scheduling transaction
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*
* We only support PERF_PMU_TXN_ADD transactions. Save the
* transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
* transactions.
*/
static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */
cpuc->txn_flags = txn_flags;
if (txn_flags & ~PERF_PMU_TXN_ADD)
return;
perf_pmu_disable(pmu);
__this_cpu_write(cpu_hw_events.n_txn, 0);
}
/*
* Stop group events scheduling transaction
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
static void x86_pmu_cancel_txn(struct pmu *pmu)
{
unsigned int txn_flags;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
txn_flags = cpuc->txn_flags;
cpuc->txn_flags = 0;
if (txn_flags & ~PERF_PMU_TXN_ADD)
return;
/*
* Truncate collected array by the number of events added in this
* transaction. See x86_pmu_add() and x86_pmu_*_txn().
*/
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
perf_pmu_enable(pmu);
}
/*
* Commit group events scheduling transaction
* Perform the group schedulability test as a whole
* Return 0 if success
*
* Does not cancel the transaction on failure; expects the caller to do this.
*/
static int x86_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int assign[X86_PMC_IDX_MAX];
int n, ret;
WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
cpuc->txn_flags = 0;
return 0;
}
n = cpuc->n_events;
if (!x86_pmu_initialized())
return -EAGAIN;
ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret)
return ret;
/*
* copy new assignment, now we know it is possible
* will be used by hw_perf_enable()
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
cpuc->txn_flags = 0;
perf_pmu_enable(pmu);
return 0;
}
/*
* a fake_cpuc is used to validate event groups. Due to
* the extra reg logic, we need to also allocate a fake
* per_core and per_cpu structure. Otherwise, group events
* using extra reg may conflict without the kernel being
* able to catch this when the last event gets added to
* the group.
*/
static void free_fake_cpuc(struct cpu_hw_events *cpuc)
{
kfree(cpuc->shared_regs);
kfree(cpuc);
}
static struct cpu_hw_events *allocate_fake_cpuc(void)
{
struct cpu_hw_events *cpuc;
int cpu = raw_smp_processor_id();
cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
if (!cpuc)
return ERR_PTR(-ENOMEM);
/* only needed, if we have extra_regs */
if (x86_pmu.extra_regs) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
goto error;
}
cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
return ERR_PTR(-ENOMEM);
}
/*
* validate that we can schedule this event
*/
static int validate_event(struct perf_event *event)
{
struct cpu_hw_events *fake_cpuc;
struct event_constraint *c;
int ret = 0;
fake_cpuc = allocate_fake_cpuc();
if (IS_ERR(fake_cpuc))
return PTR_ERR(fake_cpuc);
c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
if (!c || !c->weight)
ret = -EINVAL;
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(fake_cpuc, event);
free_fake_cpuc(fake_cpuc);
return ret;
}
/*
* validate a single event group
*
* validation include:
* - check events are compatible which each other
* - events do not compete for the same counter
* - number of events <= number of counters
*
* validation ensures the group can be loaded onto the
* PMU if it was the only group available.
*/
static int validate_group(struct perf_event *event)
{
struct perf_event *leader = event->group_leader;
struct cpu_hw_events *fake_cpuc;
int ret = -EINVAL, n;
fake_cpuc = allocate_fake_cpuc();
if (IS_ERR(fake_cpuc))
return PTR_ERR(fake_cpuc);
/*
* the event is not yet connected with its
* siblings therefore we must first collect
* existing siblings, then add the new event
* before we can simulate the scheduling
*/
n = collect_events(fake_cpuc, leader, true);
if (n < 0)
goto out;
fake_cpuc->n_events = n;
n = collect_events(fake_cpuc, event, false);
if (n < 0)
goto out;
fake_cpuc->n_events = n;
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out:
free_fake_cpuc(fake_cpuc);
return ret;
}
static int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
break;
default:
return -ENOENT;
}
err = __x86_pmu_event_init(event);
if (!err) {
/*
* we temporarily connect event to its pmu
* such that validate_group() can classify
* it as an x86 event using is_x86_event()
*/
tmp = event->pmu;
event->pmu = &pmu;
if (event->group_leader != event)
err = validate_group(event);
else
err = validate_event(event);
event->pmu = tmp;
}
if (err) {
if (event->destroy)
event->destroy(event);
}
if (READ_ONCE(x86_pmu.attr_rdpmc) &&
!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
return err;
}
static void refresh_pce(void *ignored)
{
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}
static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
/*
* This function relies on not being called concurrently in two
* tasks in the same mm. Otherwise one task could observe
* perf_rdpmc_allowed > 1 and return all the way back to
* userspace with CR4.PCE clear while another task is still
* doing on_each_cpu_mask() to propagate CR4.PCE.
*
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
lockdep_assert_held_exclusive(&mm->mmap_sem);
if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed))
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
}
static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
idx -= INTEL_PMC_IDX_FIXED;
idx |= 1 << 30;
}
return idx + 1;
}
static ssize_t get_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
}
static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
{
unsigned long val;
ssize_t ret;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
if (val > 2)
return -EINVAL;
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
/*
* Changing into or out of always available, aka
* perf-event-bypassing mode. This path is extremely slow,
* but only root can trigger it, so it's okay.
*/
if (val == 2)
static_branch_inc(&rdpmc_always_available_key);
else
static_branch_dec(&rdpmc_always_available_key);
on_each_cpu(refresh_pce, NULL, 1);
}
x86_pmu.attr_rdpmc = val;
return count;
}
static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
static struct attribute *x86_pmu_attrs[] = {
&dev_attr_rdpmc.attr,
NULL,
};
static struct attribute_group x86_pmu_attr_group __ro_after_init = {
.attrs = x86_pmu_attrs,
};
static ssize_t max_precise_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
}
static DEVICE_ATTR_RO(max_precise);
static struct attribute *x86_pmu_caps_attrs[] = {
&dev_attr_max_precise.attr,
NULL
};
static struct attribute_group x86_pmu_caps_group __ro_after_init = {
.name = "caps",
.attrs = x86_pmu_caps_attrs,
};
static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
&x86_pmu_format_group,
&x86_pmu_events_group,
&x86_pmu_caps_group,
NULL,
};
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
{
if (x86_pmu.sched_task)
x86_pmu.sched_task(ctx, sched_in);
}
void perf_check_microcode(void)
{
if (x86_pmu.check_microcode)
x86_pmu.check_microcode();
}
static int x86_pmu_check_period(struct perf_event *event, u64 value)
{
if (x86_pmu.check_period && x86_pmu.check_period(event, value))
return -EINVAL;
if (value && x86_pmu.limit_period) {
if (x86_pmu.limit_period(event, value) > value)
return -EINVAL;
}
return 0;
}
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
.event_mapped = x86_pmu_event_mapped,
.event_unmapped = x86_pmu_event_unmapped,
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.check_period = x86_pmu_check_period,
};
void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
struct cyc2ns_data data;
u64 offset;
userpg->cap_user_time = 0; /*covered*/
userpg->cap_user_time_zero = 0;
userpg->cap_user_rdpmc =
!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
userpg->pmc_width = x86_pmu.cntval_bits;
if (!using_native_sched_clock() || !sched_clock_stable())
return; /*covered*/
cyc2ns_read_begin(&data);
offset = data.cyc2ns_offset + __sched_clock_offset;
/*
* Internal timekeeping for enabled/running/stopped times
* is always in the local_clock domain.
*/
userpg->cap_user_time = 1;
userpg->time_mult = data.cyc2ns_mul;
userpg->time_shift = data.cyc2ns_shift;
userpg->time_offset = offset - now;
/*
* cap_user_time_zero doesn't make sense when we're using a different
* time base for the records.
*/
if (!event->attr.use_clockid) {
userpg->cap_user_time_zero = 1;
userpg->time_zero = offset;
}
cyc2ns_read_end();
}
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct unwind_state state;
unsigned long addr;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* TODO: We don't support guest os callchain now */
return;
}
if (perf_callchain_store(entry, regs->ip))
return;
for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || perf_callchain_store(entry, addr))
return;
}
}
static inline int
valid_user_frame(const void __user *fp, unsigned long size)
{
return (__range_not_ok(fp, size, TASK_SIZE) == 0); /*covered*/
}
static unsigned long get_segment_base(unsigned int segment)
{
struct desc_struct *desc;
unsigned int idx = segment >> 3; /*covered*/
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
/* IRQs are off, so this synchronizes with smp_store_release */
ldt = READ_ONCE(current->active_mm->context.ldt);
if (!ldt || idx >= ldt->nr_entries)
return 0;
desc = &ldt->entries[idx];
#else
return 0;
#endif
} else {
if (idx >= GDT_ENTRIES) /*covered*/
return 0;
desc = raw_cpu_ptr(gdt_page.gdt) + idx; /*covered*/
}
return get_desc_base(desc); /*covered*/
}
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
{
/* 32-bit process in 64-bit kernel. */
unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
const void __user *fp;
if (!test_thread_flag(TIF_IA32)) /*covered*/
return 0;
cs_base = get_segment_base(regs->cs); /*covered*/
ss_base = get_segment_base(regs->ss);
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
while (entry->nr < entry->max_stack) {
unsigned long bytes;
frame.next_frame = 0; /*covered*/
frame.return_address = 0;
if (!valid_user_frame(fp, sizeof(frame))) /*covered*/
break;
bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); /*covered*/
if (bytes != 0)
break;
bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4); /*covered*/
if (bytes != 0)
break;
perf_callchain_store(entry, cs_base + frame.return_address); /*covered*/
fp = compat_ptr(ss_base + frame.next_frame);
}
pagefault_enable(); /*covered*/
return 1;
}
#else
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
{
return 0;
}
#endif
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
struct stack_frame frame;
const unsigned long __user *fp;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /*covered*/
/* TODO: We don't support guest os callchain now */
return; /*covered*/
}
/*
* We don't know what to do with VM86 stacks.. ignore them for now.
*/
if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) /*covered*/
return;
fp = (unsigned long __user *)regs->bp; /*covered*/
perf_callchain_store(entry, regs->ip); /*covered*/
if (!nmi_uaccess_okay()) /*covered*/
return;
if (perf_callchain_user32(regs, entry)) /*covered*/
return;
pagefault_disable();
while (entry->nr < entry->max_stack) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;
if (!valid_user_frame(fp, sizeof(frame)))
break;
bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
if (bytes != 0)
break;
bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
if (bytes != 0)
break;
perf_callchain_store(entry, frame.return_address);
fp = (void __user *)frame.next_frame;
}
pagefault_enable();
}
/*
* Deal with code segment offsets for the various execution modes:
*
* VM86 - the good olde 16 bit days, where the linear address is
* 20 bits and we use regs->ip + 0x10 * regs->cs.
*
* IA32 - Where we need to look at GDT/LDT segment descriptor tables
* to figure out what the 32bit base address is.
*
* X32 - has TIF_X32 set, but is running in x86_64
*
* X86_64 - CS,DS,SS,ES are all zero based.
*/
static unsigned long code_segment_base(struct pt_regs *regs)
{
/*
* For IA32 we look at the GDT/LDT segment base to convert the
* effective IP to a linear address.
*/
#ifdef CONFIG_X86_32
/*
* If we are in VM86 mode, add the segment offset to convert to a
* linear address.
*/
if (regs->flags & X86_VM_MASK)
return 0x10 * regs->cs;
if (user_mode(regs) && regs->cs != __USER_CS)
return get_segment_base(regs->cs);
#else
if (user_mode(regs) && !user_64bit_mode(regs) && /*covered*/
regs->cs != __USER32_CS)
return get_segment_base(regs->cs); /*covered*/
#endif
return 0;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) /*covered*/
return perf_guest_cbs->get_guest_ip();
return regs->ip + code_segment_base(regs); /*covered*/
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
int misc = 0;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /*covered*/
if (perf_guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs)) /*covered*/
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
if (regs->flags & PERF_EFLAGS_EXACT) /*covered*/
misc |= PERF_RECORD_MISC_EXACT_IP;
return misc; /*covered*/
}
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
{
cap->version = x86_pmu.version; /*covered*/
cap->num_counters_gp = x86_pmu.num_counters;
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
cap->bit_width_gp = x86_pmu.cntval_bits;
cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
cap->events_mask_len = x86_pmu.events_mask_len;
}
EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
/*
* Per core/cpu state
*
* Used to coordinate shared registers between HT threads or
* among events on a single PMU.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/nmi.h>
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/apic.h>
#include "../perf_event.h"
/*
* Intel PerfMon, used on Core and later.
*/
static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
[PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
{
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_core2_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_snb_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
/*
* When HT is off these events can only run on the bottom 4 counters
* When HT is on, they are impacted by the HT bug and require EXCL access
*/
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
/*
* When HT is off these events can only run on the bottom 4 counters
* When HT is on, they are impacted by the HT bug and require EXCL access
*/
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
static struct event_constraint intel_v1_event_constraints[] __read_mostly =
{
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_gen_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
/*
* when HT is off, these can only run on the bottom 4 counters
*/
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */
EVENT_CONSTRAINT_END
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
/*
* Note the low 8 bits eventsel code is not a continuous field, containing
* some #GPing bits. These are masked out.
*/
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
static struct attribute *nhm_mem_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
/*
* topdown events for Intel Core CPUs.
*
* The events are all in slots, which is a free slot in a 4 wide
* pipeline. Some events are already reported in slots, for cycle
* events we multiply by the pipeline width (4).
*
* With Hyper Threading on, topdown metrics are either summed or averaged
* between the threads of a core: (count_t0 + count_t1).
*
* For the average case the metric is always scaled to pipeline width,
* so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
*/
EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
"event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
"event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
"event=0xe,umask=0x1"); /* uops_issued.any */
EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
"event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
"event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
"event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
"event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
static struct attribute *snb_events_attrs[] = {
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
EVENT_PTR(td_total_slots),
EVENT_PTR(td_total_slots_scale),
EVENT_PTR(td_recovery_bubbles),
EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
static struct attribute *snb_mem_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
NULL,
};
static struct event_constraint intel_hsw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
/*
* When HT is off these events can only run on the bottom 4 counters
* When HT is on, they are impacted by the HT bug and require EXCL access
*/
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
EVENT_CONSTRAINT_END
};
static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
/*
* when HT is off, these can only run on the bottom 4 counters
*/
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */
EVENT_CONSTRAINT_END
};
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
}
/*
* Notes on the events:
* - data reads do not include code reads (comparable to earlier tables)
* - data counts include speculative execution (except L1 write, dtlb, bpu)
* - remote node access includes remote memory, remote cache, remote mmio.
* - prefetches are not included in the counts.
* - icache miss does not include decoded icache
*/
#define SKL_DEMAND_DATA_RD BIT_ULL(0)
#define SKL_DEMAND_RFO BIT_ULL(1)
#define SKL_ANY_RESPONSE BIT_ULL(16)
#define SKL_SUPPLIER_NONE BIT_ULL(17)
#define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26)
#define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27)
#define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28)
#define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29)
#define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \
SKL_L3_MISS_REMOTE_HOP0_DRAM| \
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
#define SKL_SPL_HIT BIT_ULL(30)
#define SKL_SNOOP_NONE BIT_ULL(31)
#define SKL_SNOOP_NOT_NEEDED BIT_ULL(32)
#define SKL_SNOOP_MISS BIT_ULL(33)
#define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34)
#define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35)
#define SKL_SNOOP_HITM BIT_ULL(36)
#define SKL_SNOOP_NON_DRAM BIT_ULL(37)
#define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
#define SKL_DEMAND_READ SKL_DEMAND_DATA_RD
#define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \
SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
SKL_SNOOP_HITM|SKL_SPL_HIT)
#define SKL_DEMAND_WRITE SKL_DEMAND_RFO
#define SKL_LLC_ACCESS SKL_ANY_RESPONSE
#define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
SKL_L3_MISS_REMOTE_HOP1_DRAM| \
SKL_L3_MISS_REMOTE_HOP2P_DRAM)
static __initconst const u64 skl_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */
[ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};
static __initconst const u64 skl_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
SKL_L3_MISS|SKL_ANY_SNOOP|
SKL_SUPPLIER_NONE,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
SKL_LLC_ACCESS|SKL_ANY_SNOOP,
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
SKL_L3_MISS|SKL_ANY_SNOOP|
SKL_SUPPLIER_NONE,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
[ C(RESULT_MISS) ] = SKL_DEMAND_READ|
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
[ C(RESULT_MISS) ] = SKL_DEMAND_WRITE|
SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};
#define SNB_DMND_DATA_RD (1ULL << 0)
#define SNB_DMND_RFO (1ULL << 1)
#define SNB_DMND_IFETCH (1ULL << 2)
#define SNB_DMND_WB (1ULL << 3)
#define SNB_PF_DATA_RD (1ULL << 4)
#define SNB_PF_RFO (1ULL << 5)
#define SNB_PF_IFETCH (1ULL << 6)
#define SNB_LLC_DATA_RD (1ULL << 7)
#define SNB_LLC_RFO (1ULL << 8)
#define SNB_LLC_IFETCH (1ULL << 9)
#define SNB_BUS_LOCKS (1ULL << 10)
#define SNB_STRM_ST (1ULL << 11)
#define SNB_OTHER (1ULL << 15)
#define SNB_RESP_ANY (1ULL << 16)
#define SNB_NO_SUPP (1ULL << 17)
#define SNB_LLC_HITM (1ULL << 18)
#define SNB_LLC_HITE (1ULL << 19)
#define SNB_LLC_HITS (1ULL << 20)
#define SNB_LLC_HITF (1ULL << 21)
#define SNB_LOCAL (1ULL << 22)
#define SNB_REMOTE (0xffULL << 23)
#define SNB_SNP_NONE (1ULL << 31)
#define SNB_SNP_NOT_NEEDED (1ULL << 32)
#define SNB_SNP_MISS (1ULL << 33)
#define SNB_NO_FWD (1ULL << 34)
#define SNB_SNP_FWD (1ULL << 35)
#define SNB_HITM (1ULL << 36)
#define SNB_NON_DRAM (1ULL << 37)
#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
SNB_HITM)
#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
#define SNB_L3_ACCESS SNB_RESP_ANY
#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
static __initconst const u64 snb_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
[ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
[ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
[ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
[ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
[ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
[ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
},
},
};
static __initconst const u64 snb_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
[ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
[ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
},
};
/*
* Notes on the events:
* - data reads do not include code reads (comparable to earlier tables)
* - data counts include speculative execution (except L1 write, dtlb, bpu)
* - remote node access includes remote memory, remote cache, remote mmio.
* - prefetches are not included in the counts because they are not
* reliably counted.
*/
#define HSW_DEMAND_DATA_RD BIT_ULL(0)
#define HSW_DEMAND_RFO BIT_ULL(1)
#define HSW_ANY_RESPONSE BIT_ULL(16)
#define HSW_SUPPLIER_NONE BIT_ULL(17)
#define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22)
#define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27)
#define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28)
#define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29)
#define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \
HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
HSW_L3_MISS_REMOTE_HOP2P)
#define HSW_SNOOP_NONE BIT_ULL(31)
#define HSW_SNOOP_NOT_NEEDED BIT_ULL(32)
#define HSW_SNOOP_MISS BIT_ULL(33)
#define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34)
#define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35)
#define HSW_SNOOP_HITM BIT_ULL(36)
#define HSW_SNOOP_NON_DRAM BIT_ULL(37)
#define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \
HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
#define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
#define HSW_DEMAND_READ HSW_DEMAND_DATA_RD
#define HSW_DEMAND_WRITE HSW_DEMAND_RFO
#define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\
HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
#define HSW_LLC_ACCESS HSW_ANY_RESPONSE
#define BDW_L3_MISS_LOCAL BIT(26)
#define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \
HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
HSW_L3_MISS_REMOTE_HOP2P)
static __initconst const u64 hsw_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
[ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */
[ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};
static __initconst const u64 hsw_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
HSW_LLC_ACCESS,
[ C(RESULT_MISS) ] = HSW_DEMAND_READ|
HSW_L3_MISS|HSW_ANY_SNOOP,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
HSW_LLC_ACCESS,
[ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
HSW_L3_MISS|HSW_ANY_SNOOP,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
HSW_L3_MISS_LOCAL_DRAM|
HSW_SNOOP_DRAM,
[ C(RESULT_MISS) ] = HSW_DEMAND_READ|
HSW_L3_MISS_REMOTE|
HSW_SNOOP_DRAM,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
HSW_L3_MISS_LOCAL_DRAM|
HSW_SNOOP_DRAM,
[ C(RESULT_MISS) ] = HSW_DEMAND_WRITE|
HSW_L3_MISS_REMOTE|
HSW_SNOOP_DRAM,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
};
static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
/*
* Use RFO, not WRITEBACK, because a write miss would typically occur
* on RFO.
*/
[ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
[ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
},
};
/*
* Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
* See IA32 SDM Vol 3B 30.6.1.3
*/
#define NHM_DMND_DATA_RD (1 << 0)
#define NHM_DMND_RFO (1 << 1)
#define NHM_DMND_IFETCH (1 << 2)
#define NHM_DMND_WB (1 << 3)
#define NHM_PF_DATA_RD (1 << 4)
#define NHM_PF_DATA_RFO (1 << 5)
#define NHM_PF_IFETCH (1 << 6)
#define NHM_OFFCORE_OTHER (1 << 7)
#define NHM_UNCORE_HIT (1 << 8)
#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
#define NHM_OTHER_CORE_HITM (1 << 10)
/* reserved */
#define NHM_REMOTE_CACHE_FWD (1 << 12)
#define NHM_REMOTE_DRAM (1 << 13)
#define NHM_LOCAL_DRAM (1 << 14)
#define NHM_NON_DRAM (1 << 15)
#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
#define NHM_REMOTE (NHM_REMOTE_DRAM)
#define NHM_DMND_READ (NHM_DMND_DATA_RD)
#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
static __initconst const u64 nehalem_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
[ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
[ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
[ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
[ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
[ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
[ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
},
},
};
static __initconst const u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
[ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
[ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
[ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
/*
* Use RFO, not WRITEBACK, because a write miss would typically occur
* on RFO.
*/
[ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
[ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0x0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
[ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
[ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0x01b7,
},
},
};
static __initconst const u64 core2_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
[ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
[ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
[ C(RESULT_MISS) ] = 0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
[ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
[ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
[ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
[ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};
static __initconst const u64 atom_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
[ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
[ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
[ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
[ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
[ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
[ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};
EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
/* no_alloc_cycles.not_delivered */
EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
"event=0xca,umask=0x50");
EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
/* uops_retired.all */
EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
"event=0xc2,umask=0x10");
/* uops_retired.all */
EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
"event=0xc2,umask=0x10");
static struct attribute *slm_events_attrs[] = {
EVENT_PTR(td_total_slots_slm),
EVENT_PTR(td_total_slots_scale_slm),
EVENT_PTR(td_fetch_bubbles_slm),
EVENT_PTR(td_fetch_bubbles_scale_slm),
EVENT_PTR(td_slots_issued_slm),
EVENT_PTR(td_slots_retired_slm),
NULL
};
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
EVENT_EXTRA_END
};
#define SLM_DMND_READ SNB_DMND_DATA_RD
#define SLM_DMND_WRITE SNB_DMND_RFO
#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
#define SLM_LLC_ACCESS SNB_RESP_ANY
#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM)
static __initconst const u64 slm_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
[ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
[ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
},
},
};
static __initconst const u64 slm_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(L1I ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
[ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
[ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
[ C(BPU ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
[ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};
EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
/* UOPS_NOT_DELIVERED.ANY */
EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
/* UOPS_RETIRED.ANY */
EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
/* UOPS_ISSUED.ANY */
EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
static struct attribute *glm_events_attrs[] = {
EVENT_PTR(td_total_slots_glm),
EVENT_PTR(td_total_slots_scale_glm),
EVENT_PTR(td_fetch_bubbles_glm),
EVENT_PTR(td_recovery_bubbles_glm),
EVENT_PTR(td_slots_issued_glm),
EVENT_PTR(td_slots_retired_glm),
NULL
};
static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
EVENT_EXTRA_END
};
#define GLM_DEMAND_DATA_RD BIT_ULL(0)
#define GLM_DEMAND_RFO BIT_ULL(1)
#define GLM_ANY_RESPONSE BIT_ULL(16)
#define GLM_SNP_NONE_OR_MISS BIT_ULL(33)
#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD
#define GLM_DEMAND_WRITE GLM_DEMAND_RFO
#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
#define GLM_LLC_ACCESS GLM_ANY_RESPONSE
#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM)
static __initconst const u64 glm_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[C(RESULT_MISS)] = 0x0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[C(RESULT_MISS)] = 0x0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
[C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[C(RESULT_MISS)] = 0x0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[C(RESULT_MISS)] = 0x0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
[C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
[C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
};
static __initconst const u64 glm_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = GLM_DEMAND_READ|
GLM_LLC_ACCESS,
[C(RESULT_MISS)] = GLM_DEMAND_READ|
GLM_LLC_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
GLM_LLC_ACCESS,
[C(RESULT_MISS)] = GLM_DEMAND_WRITE|
GLM_LLC_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH|
GLM_LLC_ACCESS,
[C(RESULT_MISS)] = GLM_DEMAND_PREFETCH|
GLM_LLC_MISS,
},
},
};
static __initconst const u64 glp_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[C(RESULT_MISS)] = 0x0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[C(RESULT_MISS)] = 0x0,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
[C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
[C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
[C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
[C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
[C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = -1,
[C(RESULT_MISS)] = -1,
},
},
};
static __initconst const u64 glp_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = GLM_DEMAND_READ|
GLM_LLC_ACCESS,
[C(RESULT_MISS)] = GLM_DEMAND_READ|
GLM_LLC_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
GLM_LLC_ACCESS,
[C(RESULT_MISS)] = GLM_DEMAND_WRITE|
GLM_LLC_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = 0x0,
[C(RESULT_MISS)] = 0x0,
},
},
};
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
#define KNL_MCDRAM_FAR BIT_ULL(22)
#define KNL_DDR_LOCAL BIT_ULL(23)
#define KNL_DDR_FAR BIT_ULL(24)
#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
KNL_DDR_LOCAL | KNL_DDR_FAR)
#define KNL_L2_READ SLM_DMND_READ
#define KNL_L2_WRITE SLM_DMND_WRITE
#define KNL_L2_PREFETCH SLM_DMND_PREFETCH
#define KNL_L2_ACCESS SLM_LLC_ACCESS
#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
KNL_DRAM_ANY | SNB_SNP_ANY | \
SNB_NON_DRAM)
static __initconst const u64 knl_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
[C(RESULT_MISS)] = 0,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
[C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
[C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS,
},
},
};
/*
* Used from PMIs where the LBRs are already disabled.
*
* This function could be called consecutively. It is required to remain in
* disabled state if called consecutively.
*
* During consecutive calls, the same disable value will be written to related
* registers, so the PMU state remains unchanged.
*
* intel_bts events don't coexist with intel PMU's BTS events because of
* x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
* disabled around intel PMU's event batching etc, only inside the PMI handler.
*/
static void __intel_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
intel_pmu_pebs_disable_all();
}
static void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all();
intel_pmu_lbr_disable_all();
}
static void __intel_pmu_enable_all(int added, bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all(pmi);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct perf_event *event =
cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
if (WARN_ON_ONCE(!event))
return;
intel_pmu_enable_bts(event->hw.config);
}
}
static void intel_pmu_enable_all(int added)
{
__intel_pmu_enable_all(added, false);
}
/*
* Workaround for:
* Intel Errata AAK100 (model 26)
* Intel Errata AAP53 (model 30)
* Intel Errata BD53 (model 44)
*
* The official story:
* These chips need to be 'reset' when adding counters by programming the
* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
* in sequence on the same PMC or on different PMCs.
*
* In practise it appears some of these events do in fact count, and
* we need to program all 4 events.
*/
static void intel_pmu_nhm_workaround(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
static const unsigned long nhm_magic[4] = {
0x4300B5,
0x4300D2,
0x4300B1,
0x4300B1
};
struct perf_event *event;
int i;
/*
* The Errata requires below steps:
* 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
* 2) Configure 4 PERFEVTSELx with the magic events and clear
* the corresponding PMCx;
* 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
* 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
* 5) Clear 4 pairs of ERFEVTSELx and PMCx;
*/
/*
* The real steps we choose are a little different from above.
* A) To reduce MSR operations, we don't run step 1) as they
* are already cleared before this function is called;
* B) Call x86_perf_event_update to save PMCx before configuring
* PERFEVTSELx with magic number;
* C) With step 5), we do clear only when the PERFEVTSELx is
* not used currently.
* D) Call x86_perf_event_set_period to restore PMCx;
*/
/* We always operate 4 pairs of PERF Counters */
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
if (event)
x86_perf_event_update(event);
}
for (i = 0; i < 4; i++) {
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
}
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
if (event) {
x86_perf_event_set_period(event);
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
} else
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
}
static void intel_pmu_nhm_enable_all(int added)
{
if (added)
intel_pmu_nhm_workaround();
intel_pmu_enable_all(added);
}
static void enable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() |
DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}
static void disable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() &
~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
}
static inline u64 intel_pmu_get_status(void)
{
u64 status;
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
return status;
}
static inline void intel_pmu_ack_status(u64 ack)
{
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
{
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;
mask = 0xfULL << (idx * 4);
rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
wrmsrl(hwc->config_base, ctrl_val);
}
static inline bool event_is_checkpointed(struct perf_event *event)
{
return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
}
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
return;
}
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_disable(event);
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}
x86_pmu_disable_event(event);
}
static void intel_pmu_del_event(struct perf_event *event)
{
if (needs_branch_stack(event))
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
}
static void intel_pmu_read_event(struct perf_event *event)
{
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_auto_reload_read(event);
else
x86_perf_event_update(event);
}
static void intel_pmu_enable_fixed(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask, bits = 0;
/*
* Enable IRQ generation (0x8), if not PEBS,
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
* if requested:
*/
if (!event->attr.precise_ip)
bits |= 0x8;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
bits |= 0x2;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
bits |= 0x1;
/*
* ANY bit is supported in v3 and up
*/
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
bits |= 0x4;
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
ctrl_val |= bits;
wrmsrl(hwc->config_base, ctrl_val);
}
static void intel_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
return;
intel_pmu_enable_bts(hwc->config);
return;
}
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
if (event->attr.exclude_guest)
cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
if (unlikely(event_is_checkpointed(event)))
cpuc->intel_cp_status |= (1ull << hwc->idx);
if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_enable(event);
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(event);
return;
}
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
if (needs_branch_stack(event))
intel_pmu_lbr_add(event);
}
/*
* Save and restart an expired event. Called by NMI contexts,
* so it has to be careful about preempting normal event ops:
*/
int intel_pmu_save_and_restart(struct perf_event *event)
{
x86_perf_event_update(event);
/*
* For a checkpointed counter always reset back to 0. This
* avoids a situation where the counter overflows, aborts the
* transaction and is then set back to shortly before the
* overflow, and overflows and aborts again.
*/
if (unlikely(event_is_checkpointed(event))) {
/* No race with NMIs because the counter should not be armed */
wrmsrl(event->hw.event_base, 0);
local64_set(&event->hw.prev_count, 0);
}
return x86_perf_event_set_period(event);
}
static void intel_pmu_reset(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
unsigned long flags;
int idx;
if (!x86_pmu.num_counters)
return;
local_irq_save(flags);
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
if (ds)
ds->bts_index = ds->bts_buffer_base;
/* Ack all overflows and disable fixed counters */
if (x86_pmu.version >= 2) {
intel_pmu_ack_status(intel_pmu_get_status());
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
/* Reset LBRs and LBR freezing */
if (x86_pmu.lbr_nr) {
update_debugctlmsr(get_debugctlmsr() &
~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
}
local_irq_restore(flags);
}
static int handle_pmi_common(struct pt_regs *regs, u64 status)
{
struct perf_sample_data data;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int bit;
int handled = 0;
inc_irq_stat(apic_perf_irqs);
/*
* Ignore a range of extra bits in status that do not indicate
* overflow by themselves.
*/
status &= ~(GLOBAL_STATUS_COND_CHG |
GLOBAL_STATUS_ASIF |
GLOBAL_STATUS_LBRS_FROZEN);
if (!status)
return 0;
/*
* In case multiple PEBS events are sampled at the same time,
* it is possible to have GLOBAL_STATUS bit 62 set indicating
* PEBS buffer overflow and also seeing at most 3 PEBS counters
* having their bits set in the status register. This is a sign
* that there was at least one PEBS record pending at the time
* of the PMU interrupt. PEBS counters must only be processed
* via the drain_pebs() calls and not via the regular sample
* processing loop coming after that the function, otherwise
* phony regular samples may be generated in the sampling buffer
* not marked with the EXACT tag. Another possibility is to have
* one PEBS event and at least one non-PEBS event whic hoverflows
* while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
* not be set, yet the overflow status bit for the PEBS counter will
* be on Skylake.
*
* To avoid this problem, we systematically ignore the PEBS-enabled
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
status &= ~cpuc->pebs_enabled;
else
status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
/*
* PEBS overflow sets bit 62 in the global status register
*/
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
handled++;
x86_pmu.drain_pebs(regs);
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
}
/*
* Intel PT
*/
if (__test_and_clear_bit(55, (unsigned long *)&status)) {
handled++;
intel_pt_interrupt();
}
/*
* Checkpointed counters can lead to 'spurious' PMIs because the
* rollback caused by the PMI will have cleared the overflow status
* bit. Therefore always force probe these counters.
*/
status |= cpuc->intel_cp_status;
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
if (!intel_pmu_save_and_restart(event))
continue;
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
return handled;
}
static bool disable_counter_freezing = true;
static int __init intel_perf_counter_freezing_setup(char *s)
{
bool res;
if (kstrtobool(s, &res))
return -EINVAL;
disable_counter_freezing = !res;
return 1;
}
__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
/*
* Simplified handler for Arch Perfmon v4:
* - We rely on counter freezing/unfreezing to enable/disable the PMU.
* This is done automatically on PMU ack.
* - Ack the PMU only after the APIC.
*/
static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int handled = 0;
bool bts = false;
u64 status;
int pmu_enabled = cpuc->enabled;
int loops = 0;
/* PMU has been disabled because of counter freezing */
cpuc->enabled = 0;
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
bts = true;
intel_bts_disable_local();
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
}
status = intel_pmu_get_status();
if (!status)
goto done;
again:
intel_pmu_lbr_read();
if (++loops > 100) {
static bool warned;
if (!warned) {
WARN(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
warned = true;
}
intel_pmu_reset();
goto done;
}
handled += handle_pmi_common(regs, status);
done:
/* Ack the PMI in the APIC */
apic_write(APIC_LVTPC, APIC_DM_NMI);
/*
* The counters start counting immediately while ack the status.
* Make it as close as possible to IRET. This avoids bogus
* freezing on Skylake CPUs.
*/
if (status) {
intel_pmu_ack_status(status);
} else {
/*
* CPU may issues two PMIs very close to each other.
* When the PMI handler services the first one, the
* GLOBAL_STATUS is already updated to reflect both.
* When it IRETs, the second PMI is immediately
* handled and it sees clear status. At the meantime,
* there may be a third PMI, because the freezing bit
* isn't set since the ack in first PMI handlers.
* Double check if there is more work to be done.
*/
status = intel_pmu_get_status();
if (status)
goto again;
}
if (bts)
intel_bts_enable_local();
cpuc->enabled = pmu_enabled;
return handled;
}
/*
* This handler is triggered by the local APIC, so the APIC IRQ handling
* rules apply:
*/
static int intel_pmu_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc;
int loops;
u64 status;
int handled;
int pmu_enabled;
cpuc = this_cpu_ptr(&cpu_hw_events);
/*
* Save the PMU state.
* It needs to be restored when leaving the handler.
*/
pmu_enabled = cpuc->enabled;
/*
* No known reason to not always do late ACK,
* but just in case do it opt-in.
*/
if (!x86_pmu.late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local();
cpuc->enabled = 0;
__intel_pmu_disable_all();
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
status = intel_pmu_get_status();
if (!status)
goto done;
loops = 0;
again:
intel_pmu_lbr_read();
intel_pmu_ack_status(status);
if (++loops > 100) {
static bool warned;
if (!warned) {
WARN(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
warned = true;
}
intel_pmu_reset();
goto done;
}
handled += handle_pmi_common(regs, status);
/*
* Repeat if there is more work to be done:
*/
status = intel_pmu_get_status();
if (status)
goto again;
done:
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
cpuc->enabled = pmu_enabled;
if (pmu_enabled)
__intel_pmu_enable_all(0, true);
intel_bts_enable_local();
/*
* Only unmask the NMI after the overflow counters
* have been reset. This avoids spurious NMIs on
* Haswell CPUs.
*/
if (x86_pmu.late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
static struct event_constraint *
intel_bts_constraints(struct perf_event *event)
{
if (unlikely(intel_pmu_has_bts(event)))
return &bts_constraint;
return NULL;
}
static int intel_alt_er(int idx, u64 config)
{
int alt_idx = idx;
if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
return idx;
if (idx == EXTRA_REG_RSP_0)
alt_idx = EXTRA_REG_RSP_1;
if (idx == EXTRA_REG_RSP_1)
alt_idx = EXTRA_REG_RSP_0;
if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
return idx;
return alt_idx;
}
static void intel_fixup_er(struct perf_event *event, int idx)
{
event->hw.extra_reg.idx = idx;
if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}
/*
* manage allocation of shared extra msr for certain events
*
* sharing can be:
* per-cpu: to be shared between the various events on a single PMU
* per-core: per-cpu + shared by HT threads
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event,
struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
int idx = reg->idx;
/*
* reg->alloc can be set due to existing state, so for fake cpuc we
* need to ignore this, otherwise we might fail to allocate proper fake
* state for this extra reg constraint. Also see the comment below.
*/
if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
*/
raw_spin_lock_irqsave(&era->lock, flags);
if (!atomic_read(&era->ref) || era->config == reg->config) {
/*
* If its a fake cpuc -- as per validate_{group,event}() we
* shouldn't touch event state and we can avoid doing so
* since both will only call get_event_constraints() once
* on each event, this avoids the need for reg->alloc.
*
* Not doing the ER fixup will only result in era->reg being
* wrong, but since we won't actually try and program hardware
* this isn't a problem either.
*/
if (!cpuc->is_fake) {
if (idx != reg->idx)
intel_fixup_er(event, idx);
/*
* x86_schedule_events() can call get_event_constraints()
* multiple times on events in the case of incremental
* scheduling(). reg->alloc ensures we only do the ER
* allocation once.
*/
reg->alloc = 1;
}
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
/* one more user */
atomic_inc(&era->ref);
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
} else {
idx = intel_alt_er(idx, reg->config);
if (idx != reg->idx) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
}
}
raw_spin_unlock_irqrestore(&era->lock, flags);
return c;
}
static void
__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct hw_perf_event_extra *reg)
{
struct er_account *era;
/*
* Only put constraint if extra reg was actually allocated. Also takes
* care of event which do not use an extra shared reg.
*
* Also, if this is a fake cpuc we shouldn't touch any event state
* (reg->alloc) and we don't care about leaving inconsistent cpuc state
* either since it'll be thrown out.
*/
if (!reg->alloc || cpuc->is_fake)
return;
era = &cpuc->shared_regs->regs[reg->idx];
/* one fewer user */
atomic_dec(&era->ref);
/* allocate again next time */
reg->alloc = 0;
}
static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct event_constraint *c = NULL, *d;
struct hw_perf_event_extra *xreg, *breg;
xreg = &event->hw.extra_reg;
if (xreg->idx != EXTRA_REG_NONE) {
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
if (c == &emptyconstraint)
return c;
}
breg = &event->hw.branch_reg;
if (breg->idx != EXTRA_REG_NONE) {
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
if (d == &emptyconstraint) {
__intel_shared_reg_put_constraints(cpuc, xreg);
c = d;
}
}
return c;
}
struct event_constraint *
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
if ((event->hw.config & c->cmask) == c->code) {
event->hw.flags |= c->flags;
return c;
}
}
}
return &unconstrained;
}
static struct event_constraint *
__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
c = intel_bts_constraints(event);
if (c)
return c;
c = intel_shared_regs_constraints(cpuc, event);
if (c)
return c;
c = intel_pebs_constraints(event);
if (c)
return c;
return x86_get_event_constraints(cpuc, idx, event);
}
static void
intel_start_scheduling(struct cpu_hw_events *cpuc)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
/*
* nothing needed if in group validation mode
*/
if (cpuc->is_fake || !is_ht_workaround_enabled())
return;
/*
* no exclusion needed
*/
if (WARN_ON_ONCE(!excl_cntrs))
return;
xl = &excl_cntrs->states[tid];
xl->sched_started = true;
/*
* lock shared state until we are done scheduling
* in stop_event_scheduling()
* makes scheduling appear as a transaction
*/
raw_spin_lock(&excl_cntrs->lock);
}
static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct event_constraint *c = cpuc->event_constraint[idx];
struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
if (cpuc->is_fake || !is_ht_workaround_enabled())
return;
if (WARN_ON_ONCE(!excl_cntrs))
return;
if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
return;
xl = &excl_cntrs->states[tid];
lockdep_assert_held(&excl_cntrs->lock);
if (c->flags & PERF_X86_EVENT_EXCL)
xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
else
xl->state[cntr] = INTEL_EXCL_SHARED;
}
static void
intel_stop_scheduling(struct cpu_hw_events *cpuc)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xl;
int tid = cpuc->excl_thread_id;
/*
* nothing needed if in group validation mode
*/
if (cpuc->is_fake || !is_ht_workaround_enabled())
return;
/*
* no exclusion needed
*/
if (WARN_ON_ONCE(!excl_cntrs))
return;
xl = &excl_cntrs->states[tid];
xl->sched_started = false;
/*
* release shared state lock (acquired in intel_start_scheduling())
*/
raw_spin_unlock(&excl_cntrs->lock);
}
static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
int idx, struct event_constraint *c)
{
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
int is_excl, i;
/*
* validating a group does not require
* enforcing cross-thread exclusion
*/
if (cpuc->is_fake || !is_ht_workaround_enabled())
return c;
/*
* no exclusion needed
*/
if (WARN_ON_ONCE(!excl_cntrs))
return c;
/*
* because we modify the constraint, we need
* to make a copy. Static constraints come
* from static const tables.
*
* only needed when constraint has not yet
* been cloned (marked dynamic)
*/
if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
struct event_constraint *cx;
/*
* grab pre-allocated constraint entry
*/
cx = &cpuc->constraint_list[idx];
/*
* initialize dynamic constraint
* with static constraint
*/
*cx = *c;
/*
* mark constraint as dynamic, so we
* can free it later on
*/
cx->flags |= PERF_X86_EVENT_DYNAMIC;
c = cx;
}
/*
* From here on, the constraint is dynamic.
* Either it was just allocated above, or it
* was allocated during a earlier invocation
* of this function
*/
/*
* state of sibling HT
*/
xlo = &excl_cntrs->states[tid ^ 1];
/*
* event requires exclusive counter access
* across HT threads
*/
is_excl = c->flags & PERF_X86_EVENT_EXCL;
if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
if (!cpuc->n_excl++)
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
}
/*
* Modify static constraint with current dynamic
* state of thread
*
* EXCLUSIVE: sibling counter measuring exclusive event
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
__clear_bit(i, c->idxmsk);
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
__clear_bit(i, c->idxmsk);
}
/*
* recompute actual bit weight for scheduling algorithm
*/
c->weight = hweight64(c->idxmsk64);
/*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
if (c->weight == 0)
c = &emptyconstraint;
return c;
}
static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c1 = NULL;
struct event_constraint *c2;
if (idx >= 0) /* fake does < 0 */
c1 = cpuc->event_constraint[idx];
/*
* first time only
* - static constraint: no change across incremental scheduling calls
* - dynamic constraint: handled by intel_get_excl_constraints()
*/
c2 = __intel_get_event_constraints(cpuc, idx, event);
if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
c1->weight = c2->weight;
c2 = c1;
}
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
return c2;
}
static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
int tid = cpuc->excl_thread_id;
struct intel_excl_states *xl;
/*
* nothing needed if in group validation mode
*/
if (cpuc->is_fake)
return;
if (WARN_ON_ONCE(!excl_cntrs))
return;
if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
if (!--cpuc->n_excl)
WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
}
/*
* If event was actually assigned, then mark the counter state as
* unused now.
*/
if (hwc->idx >= 0) {
xl = &excl_cntrs->states[tid];
/*
* put_constraint may be called from x86_schedule_events()
* which already has the lock held so here make locking
* conditional.
*/
if (!xl->sched_started)
raw_spin_lock(&excl_cntrs->lock);
xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
if (!xl->sched_started)
raw_spin_unlock(&excl_cntrs->lock);
}
}
static void
intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct hw_perf_event_extra *reg;
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
reg = &event->hw.branch_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
intel_put_shared_regs_event_constraints(cpuc, event);
/*
* is PMU has exclusive counter restrictions, then
* all events are subject to and must call the
* put_excl_constraints() routine
*/
if (cpuc->excl_cntrs)
intel_put_excl_constraints(cpuc, event);
}
static void intel_pebs_aliases_core2(struct perf_event *event)
{
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
/*
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
* (0x003c) so that we can use it with PEBS.
*
* The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
* PEBS capable. However we can use INST_RETIRED.ANY_P
* (0x00c0), which is a PEBS capable event, to get the same
* count.
*
* INST_RETIRED.ANY_P counts the number of cycles that retires
* CNTMASK instructions. By setting CNTMASK to a value (16)
* larger than the maximum number of instructions that can be
* retired per cycle (4) and then inverting the condition, we
* count all cycles that retire 16 or less instructions, which
* is every cycle.
*
* Thereby we gain a PEBS capable cycle counter.
*/
u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
}
static void intel_pebs_aliases_snb(struct perf_event *event)
{
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
/*
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
* (0x003c) so that we can use it with PEBS.
*
* The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
* PEBS capable. However we can use UOPS_RETIRED.ALL
* (0x01c2), which is a PEBS capable event, to get the same
* count.
*
* UOPS_RETIRED.ALL counts the number of cycles that retires
* CNTMASK micro-ops. By setting CNTMASK to a value (16)
* larger than the maximum number of micro-ops that can be
* retired per cycle (4) and then inverting the condition, we
* count all cycles that retire 16 or less micro-ops, which
* is every cycle.
*
* Thereby we gain a PEBS capable cycle counter.
*/
u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
}
static void intel_pebs_aliases_precdist(struct perf_event *event)
{
if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
/*
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
* (0x003c) so that we can use it with PEBS.
*
* The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
* PEBS capable. However we can use INST_RETIRED.PREC_DIST
* (0x01c0), which is a PEBS capable event, to get the same
* count.
*
* The PREC_DIST event has special support to minimize sample
* shadowing effects. One drawback is that it can be
* only programmed on counter 1, but that seems like an
* acceptable trade off.
*/
u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
}
static void intel_pebs_aliases_ivb(struct perf_event *event)
{
if (event->attr.precise_ip < 3)
return intel_pebs_aliases_snb(event);
return intel_pebs_aliases_precdist(event);
}
static void intel_pebs_aliases_skl(struct perf_event *event)
{
if (event->attr.precise_ip < 3)
return intel_pebs_aliases_core2(event);
return intel_pebs_aliases_precdist(event);
}
static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
{
unsigned long flags = x86_pmu.large_pebs_flags;
if (event->attr.use_clockid)
flags &= ~PERF_SAMPLE_TIME;
if (!event->attr.exclude_kernel)
flags &= ~PERF_SAMPLE_REGS_USER;
if (event->attr.sample_regs_user & ~PEBS_REGS)
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
return flags;
}
static int intel_pmu_bts_config(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
if (unlikely(intel_pmu_has_bts(event))) {
/* BTS is not supported by this architecture. */
if (!x86_pmu.bts_active)
return -EOPNOTSUPP;
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
/* BTS is not allowed for precise events. */
if (attr->precise_ip)
return -EOPNOTSUPP;
/* disallow bts if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
event->destroy = hw_perf_lbr_event_destroy;
}
return 0;
}
static int core_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
if (ret)
return ret;
return intel_pmu_bts_config(event);
}
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
if (ret)
return ret;
ret = intel_pmu_bts_config(event);
if (ret)
return ret;
if (event->attr.precise_ip) {
if (!event->attr.freq) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
~intel_pmu_large_pebs_flags(event)))
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
}
if (needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
/*
* BTS is set up earlier in this path, so don't account twice
*/
if (!unlikely(intel_pmu_has_bts(event))) {
/* disallow lbr if conflicting events are present */
if (x86_add_exclusive(x86_lbr_exclusive_lbr))
return -EBUSY;
event->destroy = hw_perf_lbr_event_destroy;
}
}
if (event->attr.type != PERF_TYPE_RAW)
return 0;
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0;
if (x86_pmu.version < 3)
return -EINVAL;
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
return 0;
}
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
if (x86_pmu.guest_get_msrs) /*covered*/
return x86_pmu.guest_get_msrs(nr);
*nr = 0; /*covered*/
return NULL; /*covered*/
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
/*
* If PMU counter has PEBS enabled it is not enough to disable counter
* on a guest entry since PEBS memory write can overshoot guest entry
* and corrupt guest memory. Disabling PEBS solves the problem.
*/
arr[1].msr = MSR_IA32_PEBS_ENABLE;
arr[1].host = cpuc->pebs_enabled;
arr[1].guest = 0;
*nr = 2;
return arr;
}
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
struct perf_event *event = cpuc->events[idx];
arr[idx].msr = x86_pmu_config_addr(idx);
arr[idx].host = arr[idx].guest = 0;
if (!test_bit(idx, cpuc->active_mask))
continue;
arr[idx].host = arr[idx].guest =
event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
if (event->attr.exclude_host)
arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
else if (event->attr.exclude_guest)
arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
}
*nr = x86_pmu.num_counters;
return arr;
}
static void core_pmu_enable_event(struct perf_event *event)
{
if (!event->attr.exclude_host)
x86_pmu_enable_event(event);
}
static void core_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask) ||
cpuc->events[idx]->attr.exclude_host)
continue;
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}
static int hsw_hw_config(struct perf_event *event)
{
int ret = intel_pmu_hw_config(event);
if (ret)
return ret;
if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
return 0;
event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
/*
* IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
* PEBS or in ANY thread mode. Since the results are non-sensical forbid
* this combination.
*/
if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
event->attr.precise_ip > 0))
return -EOPNOTSUPP;
if (event_is_checkpointed(event)) {
/*
* Sampling of checkpointed events can cause situations where
* the CPU constantly aborts because of a overflow, which is
* then checkpointed back and ignored. Forbid checkpointing
* for sampling.
*
* But still allow a long sampling period, so that perf stat
* from KVM works.
*/
if (event->attr.sample_period > 0 &&
event->attr.sample_period < 0x7fffffff)
return -EOPNOTSUPP;
}
return 0;
}
static struct event_constraint counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
c = intel_get_event_constraints(cpuc, idx, event);
/* Handle special quirk on in_tx_checkpointed only in counter 2 */
if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
if (c->idxmsk64 & (1U << 2))
return &counter2_constraint;
return &emptyconstraint;
}
return c;
}
static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
/* :ppp means to do reduced skid PEBS which is PMC0 only. */
if (event->attr.precise_ip == 3)
return &counter0_constraint;
c = intel_get_event_constraints(cpuc, idx, event);
return c;
}
/*
* Broadwell:
*
* The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
* (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
* the two to enforce a minimum period of 128 (the smallest value that has bits
* 0-5 cleared and >= 100).
*
* Because of how the code in x86_perf_event_set_period() works, the truncation
* of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
* to make up for the 'lost' events due to carrying the 'error' in period_left.
*
* Therefore the effective (average) period matches the requested period,
* despite coarser hardware granularity.
*/
static u64 bdw_limit_period(struct perf_event *event, u64 left)
{
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
X86_CONFIG(.event=0xc0, .umask=0x01)) {
if (left < 128)
left = 128;
left &= ~0x3fULL;
}
return left;
}
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
PMU_FORMAT_ATTR(in_tx, "config:32");
PMU_FORMAT_ATTR(in_tx_cp, "config:33");
static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_pc.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
NULL,
};
ssize_t intel_event_sysfs_show(char *page, u64 config)
{
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
return x86_event_sysfs_show(page, config, event);
}
struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
int i;
regs = kzalloc_node(sizeof(struct intel_shared_regs),
GFP_KERNEL, cpu_to_node(cpu));
if (regs) {
/*
* initialize the locks to keep lockdep happy
*/
for (i = 0; i < EXTRA_REG_MAX; i++)
raw_spin_lock_init(®s->regs[i].lock);
regs->core_id = -1;
}
return regs;
}
static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
{
struct intel_excl_cntrs *c;
c = kzalloc_node(sizeof(struct intel_excl_cntrs),
GFP_KERNEL, cpu_to_node(cpu));
if (c) {
raw_spin_lock_init(&c->lock);
c->core_id = -1;
}
return c;
}
static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
goto err;
}
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
if (!cpuc->constraint_list)
goto err_shared_regs;
cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
if (!cpuc->excl_cntrs)
goto err_constraint_list;
cpuc->excl_thread_id = 0;
}
return 0;
err_constraint_list:
kfree(cpuc->constraint_list);
cpuc->constraint_list = NULL;
err_shared_regs:
kfree(cpuc->shared_regs);
cpuc->shared_regs = NULL;
err:
return -ENOMEM;
}
static void flip_smm_bit(void *data)
{
unsigned long set = *(unsigned long *)data;
if (set > 0) {
msr_set_bit(MSR_IA32_DEBUGCTLMSR,
DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
} else {
msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
}
}
static void intel_pmu_cpu_starting(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
int core_id = topology_core_id(cpu);
int i;
init_debug_store_on_cpu(cpu);
/*
* Deal with CPUs that don't clear their LBRs on power-up.
*/
intel_pmu_lbr_reset();
cpuc->lbr_sel = NULL;
if (x86_pmu.version > 1)
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
if (x86_pmu.counter_freezing)
enable_counter_freeze();
if (!cpuc->shared_regs)
return;
if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
for_each_cpu(i, topology_sibling_cpumask(cpu)) {
struct intel_shared_regs *pc;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online[0] = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
}
}
cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
}
if (x86_pmu.lbr_sel_map)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
for_each_cpu(i, topology_sibling_cpumask(cpu)) {
struct cpu_hw_events *sibling;
struct intel_excl_cntrs *c;
sibling = &per_cpu(cpu_hw_events, i);
c = sibling->excl_cntrs;
if (c && c->core_id == core_id) {
cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
cpuc->excl_cntrs = c;
if (!sibling->excl_thread_id)
cpuc->excl_thread_id = 1;
break;
}
}
cpuc->excl_cntrs->core_id = core_id;
cpuc->excl_cntrs->refcnt++;
}
}
static void free_excl_cntrs(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_excl_cntrs *c;
c = cpuc->excl_cntrs;
if (c) {
if (c->core_id == -1 || --c->refcnt == 0)
kfree(c);
cpuc->excl_cntrs = NULL;
kfree(cpuc->constraint_list);
cpuc->constraint_list = NULL;
}
}
static void intel_pmu_cpu_dying(int cpu)
{
fini_debug_store_on_cpu(cpu);
if (x86_pmu.counter_freezing)
disable_counter_freeze();
}
static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
pc = cpuc->shared_regs;
if (pc) {
if (pc->core_id == -1 || --pc->refcnt == 0)
kfree(pc);
cpuc->shared_regs = NULL;
}
free_excl_cntrs(cpu);
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
bool sched_in)
{
intel_pmu_pebs_sched_task(ctx, sched_in);
intel_pmu_lbr_sched_task(ctx, sched_in);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
{
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
PMU_FORMAT_ATTR(frontend, "config1:0-23");
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
&format_attr_edge.attr,
&format_attr_pc.attr,
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
NULL,
};
static struct attribute *hsw_format_attr[] = {
&format_attr_in_tx.attr,
&format_attr_in_tx_cp.attr,
&format_attr_offcore_rsp.attr,
&format_attr_ldlat.attr,
NULL
};
static struct attribute *nhm_format_attr[] = {
&format_attr_offcore_rsp.attr,
&format_attr_ldlat.attr,
NULL
};
static struct attribute *slm_format_attr[] = {
&format_attr_offcore_rsp.attr,
NULL
};
static struct attribute *skl_format_attr[] = {
&format_attr_frontend.attr,
NULL,
};
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
.enable_all = core_pmu_enable_all,
.enable = core_pmu_enable_event,
.disable = x86_pmu_disable_event,
.hw_config = core_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
.large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32-bit width,
* so we install an artificial 1<<31 period regardless of
* the generic event period:
*/
.max_period = (1ULL<<31) - 1,
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
/*
* Virtual (or funny metal) CPU can define x86_pmu.extra_regs
* together with PMU version 1 and thus be using core_pmu with
* shared_regs. We need following callbacks here to allocate
* it properly.
*/
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.cpu_dead = intel_pmu_cpu_dead,
.check_period = intel_pmu_check_period,
};
static struct attribute *intel_pmu_attrs[];
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
.disable_all = intel_pmu_disable_all,
.enable_all = intel_pmu_enable_all,
.enable = intel_pmu_enable_event,
.disable = intel_pmu_disable_event,
.add = intel_pmu_add_event,
.del = intel_pmu_del_event,
.read = intel_pmu_read_event,
.hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
.large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
* the generic event period:
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
.pebs_aliases = intel_pebs_aliases_core2,
.format_attrs = intel_arch3_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
.attrs = intel_pmu_attrs,
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.cpu_dead = intel_pmu_cpu_dead,
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
.check_period = intel_pmu_check_period,
};
static __init void intel_clovertown_quirk(void)
{
/*
* PEBS is unreliable due to:
*
* AJ67 - PEBS may experience CPL leaks
* AJ68 - PEBS PMI may be delayed by one event
* AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
* AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
*
* AJ67 could be worked around by restricting the OS/USR flags.
* AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
*
* AJ106 could possibly be worked around by not allowing LBR
* usage from PEBS, including the fixup.
* AJ68 could possibly be worked around by always programming
* a pebs_event_reset[0] value and coping with the lost events.
*
* But taken together it might just make sense to not enable PEBS on
* these chips.
*/
pr_warn("PEBS disabled due to CPU errata\n");
x86_pmu.pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
static int intel_snb_pebs_broken(int cpu)
{
u32 rev = UINT_MAX; /* default to broken for unknown models */
switch (cpu_data(cpu).x86_model) {
case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_stepping) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}
}
return (cpu_data(cpu).microcode < rev);
}
static void intel_snb_check_microcode(void)
{
int pebs_broken = 0;
int cpu;
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
if (pebs_broken == x86_pmu.pebs_broken)
return;
/*
* Serialized by the microcode lock..
*/
if (x86_pmu.pebs_broken) {
pr_info("PEBS enabled due to microcode update\n");
x86_pmu.pebs_broken = 0;
} else {
pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
x86_pmu.pebs_broken = 1;
}
}
static bool is_lbr_from(unsigned long msr)
{
unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
}
/*
* Under certain circumstances, access certain MSR may cause #GP.
* The function tests if the input MSR can be safely accessed.
*/
static bool check_msr(unsigned long msr, u64 mask)
{
u64 val_old, val_new, val_tmp;
/*
* Read the current value, change it and read it back to see if it
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
if (rdmsrl_safe(msr, &val_old))
return false;
/*
* Only change the bits which can be updated by wrmsrl.
*/
val_tmp = val_old ^ mask;
if (is_lbr_from(msr))
val_tmp = lbr_from_signext_quirk_wr(val_tmp);
if (wrmsrl_safe(msr, val_tmp) ||
rdmsrl_safe(msr, &val_new))
return false;
/*
* Quirk only affects validation in wrmsr(), so wrmsrl()'s value
* should equal rdmsrl()'s even with the quirk.
*/
if (val_new != val_tmp)
return false;
if (is_lbr_from(msr))
val_old = lbr_from_signext_quirk_wr(val_old);
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
wrmsrl(msr, val_old);
return true;
}
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
cpus_read_lock();
intel_snb_check_microcode();
cpus_read_unlock();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
};
static __init void intel_arch_events_quirk(void)
{
int bit;
/* disable event that reported as not presend by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
pr_warn("CPUID marked event: \'%s\' unavailable\n",
intel_arch_events_map[bit].name);
}
}
static __init void intel_nehalem_quirk(void)
{
union cpuid10_ebx ebx;
ebx.full = x86_pmu.events_maskl;
if (ebx.split.no_branch_misses_retired) {
/*
* Erratum AAJ80 detected, we work it around by using
* the BR_MISP_EXEC.ANY event. This will over-count
* branch-misses, but it's still much better than the
* architectural event which is often completely bogus:
*/
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
ebx.split.no_branch_misses_retired = 0;
x86_pmu.events_maskl = ebx.full;
pr_info("CPU erratum AAJ80 worked around\n");
}
}
static bool intel_glp_counter_freezing_broken(int cpu)
{
u32 rev = UINT_MAX; /* default to broken for unknown stepping */
switch (cpu_data(cpu).x86_stepping) {
case 1:
rev = 0x28;
break;
case 8:
rev = 0x6;
break;
}
return (cpu_data(cpu).microcode < rev);
}
static __init void intel_glp_counter_freezing_quirk(void)
{
/* Check if it's already disabled */
if (disable_counter_freezing)
return;
/*
* If the system starts with the wrong ucode, leave the
* counter-freezing feature permanently disabled.
*/
if (intel_glp_counter_freezing_broken(raw_smp_processor_id())) {
pr_info("PMU counter freezing disabled due to CPU errata,"
"please upgrade microcode\n");
x86_pmu.counter_freezing = false;
x86_pmu.handle_irq = intel_pmu_handle_irq;
}
}
/*
* enable software workaround for errata:
* SNB: BJ122
* IVB: BV98
* HSW: HSD29
*
* Only needed when HT is enabled. However detecting
* if HT is enabled is difficult (model specific). So instead,
* we enable the workaround in the early boot, and verify if
* it is needed in a later initcall phase once we have valid
* topology information to check if HT is actually enabled
*/
static __init void intel_ht_bug(void)
{
x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
x86_pmu.start_scheduling = intel_start_scheduling;
x86_pmu.commit_scheduling = intel_commit_scheduling;
x86_pmu.stop_scheduling = intel_stop_scheduling;
}
EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
/* Haswell special events */
EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1");
EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2");
EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4");
EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2");
EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1");
EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1");
EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2");
EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4");
EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2");
EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1");
EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
EVENT_PTR(td_fetch_bubbles),
EVENT_PTR(td_total_slots),
EVENT_PTR(td_total_slots_scale),
EVENT_PTR(td_recovery_bubbles),
EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
static struct attribute *hsw_mem_events_attrs[] = {
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
NULL,
};
static struct attribute *hsw_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_commit),
EVENT_PTR(tx_abort),
EVENT_PTR(tx_capacity),
EVENT_PTR(tx_conflict),
EVENT_PTR(el_start),
EVENT_PTR(el_commit),
EVENT_PTR(el_abort),
EVENT_PTR(el_capacity),
EVENT_PTR(el_conflict),
EVENT_PTR(cycles_t),
EVENT_PTR(cycles_ct),
NULL
};
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
}
static DEFINE_MUTEX(freeze_on_smi_mutex);
static ssize_t freeze_on_smi_store(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
{
unsigned long val;
ssize_t ret;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
if (val > 1)
return -EINVAL;
mutex_lock(&freeze_on_smi_mutex);
if (x86_pmu.attr_freeze_on_smi == val)
goto done;
x86_pmu.attr_freeze_on_smi = val;
get_online_cpus();
on_each_cpu(flip_smm_bit, &val, 1);
put_online_cpus();
done:
mutex_unlock(&freeze_on_smi_mutex);
return count;
}
static DEVICE_ATTR_RW(freeze_on_smi);
static ssize_t branches_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
}
static DEVICE_ATTR_RO(branches);
static struct attribute *lbr_attrs[] = {
&dev_attr_branches.attr,
NULL
};
static char pmu_name_str[30];
static ssize_t pmu_name_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
}
static DEVICE_ATTR_RO(pmu_name);
static struct attribute *intel_pmu_caps_attrs[] = {
&dev_attr_pmu_name.attr,
NULL
};
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
NULL,
};
static __init struct attribute **
get_events_attrs(struct attribute **base,
struct attribute **mem,
struct attribute **tsx)
{
struct attribute **attrs = base;
struct attribute **old;
if (mem && x86_pmu.pebs)
attrs = merge_attr(attrs, mem);
if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
old = attrs;
attrs = merge_attr(attrs, tsx);
if (old != base)
kfree(old);
}
return attrs;
}
__init int intel_pmu_init(void)
{
struct attribute **extra_attr = NULL;
struct attribute **mem_attr = NULL;
struct attribute **tsx_attr = NULL;
struct attribute **to_free = NULL;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
struct event_constraint *c;
unsigned int unused;
struct extra_reg *er;
int version, i;
char *name;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
case 0x6:
return p6_pmu_init();
case 0xb:
return knc_pmu_init();
case 0xf:
return p4_pmu_init();
}
return -ENODEV;
}
/*
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
return -ENODEV;
version = eax.split.version_id;
if (version < 2)
x86_pmu = core_pmu;
else
x86_pmu = intel_pmu;
x86_pmu.version = version;
x86_pmu.num_counters = eax.split.num_counters;
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events, when not running in a hypervisor:
*/
if (version > 1) {
int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
x86_pmu.num_counters_fixed =
max((int)edx.split.num_counters_fixed, assume);
}
if (version >= 4)
x86_pmu.counter_freezing = !disable_counter_freezing;
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
x86_pmu.intel_cap.capabilities = capabilities;
}
intel_ds_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
/*
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
name = "core";
break;
case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
case INTEL_FAM6_CORE2_MEROM_L:
case INTEL_FAM6_CORE2_PENRYN:
case INTEL_FAM6_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
intel_pmu_lbr_init_core();
x86_pmu.event_constraints = intel_core2_event_constraints;
x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
pr_cont("Core2 events, ");
name = "core2";
break;
case INTEL_FAM6_NEHALEM:
case INTEL_FAM6_NEHALEM_EP:
case INTEL_FAM6_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_nehalem_event_constraints;
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
x86_add_quirk(intel_nehalem_quirk);
x86_pmu.pebs_no_tlb = 1;
extra_attr = nhm_format_attr;
pr_cont("Nehalem events, ");
name = "nehalem";
break;
case INTEL_FAM6_ATOM_BONNELL:
case INTEL_FAM6_ATOM_BONNELL_MID:
case INTEL_FAM6_ATOM_SALTWELL:
case INTEL_FAM6_ATOM_SALTWELL_MID:
case INTEL_FAM6_ATOM_SALTWELL_TABLET:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
intel_pmu_lbr_init_atom();
x86_pmu.event_constraints = intel_gen_event_constraints;
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
pr_cont("Atom events, ");
name = "bonnell";
break;
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_X:
case INTEL_FAM6_ATOM_SILVERMONT_MID:
case INTEL_FAM6_ATOM_AIRMONT:
case INTEL_FAM6_ATOM_AIRMONT_MID:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_slm();
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.cpu_events = slm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Silvermont events, ");
name = "silvermont";
break;
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_GOLDMONT_X:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
/*
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
* for precise cycles.
* :pp is identical to :ppp
*/
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.cpu_events = glm_events_attrs;
extra_attr = slm_format_attr;
pr_cont("Goldmont events, ");
name = "goldmont";
break;
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
x86_add_quirk(intel_glp_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
/*
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
* for precise cycles.
*/
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
x86_pmu.cpu_events = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
event_attr_td_total_slots_scale_glm.event_str = "4";
extra_attr = slm_format_attr;
pr_cont("Goldmont plus events, ");
name = "goldmont_plus";
break;
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_westmere_event_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
x86_pmu.extra_regs = intel_westmere_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
mem_attr = nhm_mem_events_attrs;
/* UOPS_ISSUED.STALLED_CYCLES */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
intel_pmu_pebs_data_source_nhm();
extra_attr = nhm_format_attr;
pr_cont("Westmere events, ");
name = "westmere";
break;
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
extra_attr = nhm_format_attr;
pr_cont("SandyBridge events, ");
name = "sandybridge";
break;
case INTEL_FAM6_IVYBRIDGE:
case INTEL_FAM6_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
/* dTLB-load-misses on IVB is different than SNB */
hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_ivb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.cpu_events = snb_events_attrs;
mem_attr = snb_mem_events_attrs;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
extra_attr = nhm_format_attr;
pr_cont("IvyBridge events, ");
name = "ivybridge";
break;
case INTEL_FAM6_HASWELL_CORE:
case INTEL_FAM6_HASWELL_X:
case INTEL_FAM6_HASWELL_ULT:
case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Haswell events, ");
name = "haswell";
break;
case INTEL_FAM6_BROADWELL_CORE:
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
/* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
BDW_L3_MISS|HSW_SNOOP_DRAM;
hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
HSW_SNOOP_DRAM;
hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
intel_pmu_lbr_init_hsw();
x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.limit_period = bdw_limit_period;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
pr_cont("Broadwell events, ");
name = "broadwell";
break;
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_knl();
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_knl_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
extra_attr = slm_format_attr;
pr_cont("Knights Landing/Mill events, ");
name = "knights-landing";
break;
case INTEL_FAM6_SKYLAKE_MOBILE:
case INTEL_FAM6_SKYLAKE_DESKTOP:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
/* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
event_attr_td_recovery_bubbles.event_str_noht =
"event=0xd,umask=0x1,cmask=1";
event_attr_td_recovery_bubbles.event_str_ht =
"event=0xd,umask=0x1,cmask=1,any=1";
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
to_free = extra_attr;
x86_pmu.cpu_events = hsw_events_attrs;
mem_attr = hsw_mem_events_attrs;
tsx_attr = hsw_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
name = "skylake";
break;
default:
switch (x86_pmu.version) {
case 1:
x86_pmu.event_constraints = intel_v1_event_constraints;
pr_cont("generic architected perfmon v1, ");
name = "generic_arch_v1";
break;
default:
/*
* default constraints for v2 and up
*/
x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("generic architected perfmon, ");
name = "generic_arch_v2+";
break;
}
}
snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
if (version >= 2 && extra_attr) {
x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
extra_attr);
WARN_ON(!x86_pmu.format_attrs);
}
x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
mem_attr, tsx_attr);
if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
}
x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
}
x86_pmu.intel_ctrl |=
((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
if (x86_pmu.event_constraints) {
/*
* event on fixed counter2 (REF_CYCLES) only works on this
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
if (c->cmask == FIXED_EVENT_FLAGS
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
}
c->idxmsk64 &=
~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
c->weight = hweight64(c->idxmsk64);
}
}
/*
* Access LBR MSR may cause #GP under certain circumstances.
* E.g. KVM doesn't support LBR MSR
* Check all LBT MSR here.
* Disable LBR access if any LBR MSRs can not be accessed.
*/
if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
x86_pmu.lbr_nr = 0;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
x86_pmu.lbr_nr = 0;
}
x86_pmu.caps_attrs = intel_pmu_caps_attrs;
if (x86_pmu.lbr_nr) {
x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
}
/*
* Access extra MSR may cause #GP under certain circumstances.
* E.g. KVM doesn't support offcore event
* Check all extra_regs here.
*/
if (x86_pmu.extra_regs) {
for (er = x86_pmu.extra_regs; er->msr; er++) {
er->extra_msr_access = check_msr(er->msr, 0x11UL);
/* Disable LBR select mapping */
if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
x86_pmu.lbr_sel_map = NULL;
}
}
/* Support full width counters using alternative MSR range */
if (x86_pmu.intel_cap.full_width_write) {
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
x86_pmu.perfctr = MSR_IA32_PMC0;
pr_cont("full-width counters, ");
}
/*
* For arch perfmon 4 use counter freezing to avoid
* several MSR accesses in the PMI.
*/
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
kfree(to_free);
return 0;
}
/*
* HT bug: phase 2 init
* Called once we have valid topology information to check
* whether or not HT is enabled
* If HT is off, then we disable the workaround
*/
static __init int fixup_ht_bug(void)
{
int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
cpus_read_lock();
hardlockup_detector_perf_stop();
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
x86_pmu.start_scheduling = NULL;
x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
hardlockup_detector_perf_restart();
for_each_online_cpu(c)
free_excl_cntrs(c);
cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
return 0;
}
subsys_initcall(fixup_ht_bug)
/*
* Intel(R) Processor Trace PMU driver for perf
* Copyright (c) 2013-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* Intel PT is specified in the Intel Architecture Instruction Set Extensions
* Programming Reference:
* http://software.intel.com/en-us/intel-isa-extensions
*/
#undef DEBUG
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/intel_pt.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
#include "pt.h"
static DEFINE_PER_CPU(struct pt, pt_ctx);
static struct pt_pmu pt_pmu;
/*
* Capabilities of Intel PT hardware, such as number of address bits or
* supported output schemes, are cached and exported to userspace as "caps"
* attribute group of pt pmu device
* (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
* relevant bits together with intel_pt traces.
*
* These are necessary for both trace decoding (payloads_lip, contains address
* width encoded in IP-related packets), and event configuration (bitmasks with
* permitted values for certain bit fields).
*/
#define PT_CAP(_n, _l, _r, _m) \
[PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \
.reg = _r, .mask = _m }
static struct pt_cap_desc {
const char *name;
u32 leaf;
u8 reg;
u32 mask;
} pt_caps[] = {
PT_CAP(max_subleaf, 0, CPUID_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CPUID_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CPUID_EBX, BIT(1)),
PT_CAP(ip_filtering, 0, CPUID_EBX, BIT(2)),
PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)),
PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
};
u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability)
{
struct pt_cap_desc *cd = &pt_caps[capability];
u32 c = caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
unsigned int shift = __ffs(cd->mask);
return (c & cd->mask) >> shift;
}
EXPORT_SYMBOL_GPL(intel_pt_validate_cap);
u32 intel_pt_validate_hw_cap(enum pt_capabilities cap)
{
return intel_pt_validate_cap(pt_pmu.caps, cap);
}
EXPORT_SYMBOL_GPL(intel_pt_validate_hw_cap);
static ssize_t pt_cap_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
struct dev_ext_attribute *ea =
container_of(attr, struct dev_ext_attribute, attr);
enum pt_capabilities cap = (long)ea->var;
return snprintf(buf, PAGE_SIZE, "%x\n", intel_pt_validate_hw_cap(cap));
}
static struct attribute_group pt_cap_group __ro_after_init = {
.name = "caps",
};
PMU_FORMAT_ATTR(pt, "config:0" );
PMU_FORMAT_ATTR(cyc, "config:1" );
PMU_FORMAT_ATTR(pwr_evt, "config:4" );
PMU_FORMAT_ATTR(fup_on_ptw, "config:5" );
PMU_FORMAT_ATTR(mtc, "config:9" );
PMU_FORMAT_ATTR(tsc, "config:10" );
PMU_FORMAT_ATTR(noretcomp, "config:11" );
PMU_FORMAT_ATTR(ptw, "config:12" );
PMU_FORMAT_ATTR(branch, "config:13" );
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
static struct attribute *pt_formats_attr[] = {
&format_attr_pt.attr,
&format_attr_cyc.attr,
&format_attr_pwr_evt.attr,
&format_attr_fup_on_ptw.attr,
&format_attr_mtc.attr,
&format_attr_tsc.attr,
&format_attr_noretcomp.attr,
&format_attr_ptw.attr,
&format_attr_branch.attr,
&format_attr_mtc_period.attr,
&format_attr_cyc_thresh.attr,
&format_attr_psb_period.attr,
NULL,
};
static struct attribute_group pt_format_group = {
.name = "format",
.attrs = pt_formats_attr,
};
static ssize_t
pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr =
container_of(attr, struct perf_pmu_events_attr, attr);
switch (pmu_attr->id) {
case 0:
return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
case 1:
return sprintf(page, "%u:%u\n",
pt_pmu.tsc_art_num,
pt_pmu.tsc_art_den);
default:
break;
}
return -EINVAL;
}
PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
pt_timing_attr_show);
PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
pt_timing_attr_show);
static struct attribute *pt_timing_attr[] = {
&timing_attr_max_nonturbo_ratio.attr.attr,
&timing_attr_tsc_art_ratio.attr.attr,
NULL,
};
static struct attribute_group pt_timing_group = {
.attrs = pt_timing_attr,
};
static const struct attribute_group *pt_attr_groups[] = {
&pt_cap_group,
&pt_format_group,
&pt_timing_group,
NULL,
};
static int __init pt_pmu_hw_init(void)
{
struct dev_ext_attribute *de_attrs;
struct attribute **attrs;
size_t size;
u64 reg;
int ret;
long i;
rdmsrl(MSR_PLATFORM_INFO, reg);
pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
/*
* if available, read in TSC to core crystal clock ratio,
* otherwise, zero for numerator stands for "not enumerated"
* as per SDM
*/
if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
u32 eax, ebx, ecx, edx;
cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
pt_pmu.tsc_art_num = ebx;
pt_pmu.tsc_art_den = eax;
}
/* model-specific quirks */
switch (boot_cpu_data.x86_model) {
case INTEL_FAM6_BROADWELL_CORE:
case INTEL_FAM6_BROADWELL_XEON_D:
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
/* not setting BRANCH_EN will #GP, erratum BDM106 */
pt_pmu.branch_en_always_on = true;
break;
default:
break;
}
if (boot_cpu_has(X86_FEATURE_VMX)) {
/*
* Intel SDM, 36.5 "Tracing post-VMXON" says that
* "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
* post-VMXON.
*/
rdmsrl(MSR_IA32_VMX_MISC, reg);
if (reg & BIT(14))
pt_pmu.vmx = true;
}
attrs = NULL;
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
&pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
&pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
}
ret = -ENOMEM;
size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
attrs = kzalloc(size, GFP_KERNEL);
if (!attrs)
goto fail;
size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
de_attrs = kzalloc(size, GFP_KERNEL);
if (!de_attrs)
goto fail;
for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
struct dev_ext_attribute *de_attr = de_attrs + i;
de_attr->attr.attr.name = pt_caps[i].name;
sysfs_attr_init(&de_attr->attr.attr);
de_attr->attr.attr.mode = S_IRUGO;
de_attr->attr.show = pt_cap_show;
de_attr->var = (void *)i;
attrs[i] = &de_attr->attr.attr;
}
pt_cap_group.attrs = attrs;
return 0;
fail:
kfree(attrs);
return ret;
}
#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \
RTIT_CTL_CYC_THRESH | \
RTIT_CTL_PSB_FREQ)
#define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \
RTIT_CTL_MTC_RANGE)
#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \
RTIT_CTL_FUP_ON_PTW)
/*
* Bit 0 (TraceEn) in the attr.config is meaningless as the
* corresponding bit in the RTIT_CTL can only be controlled
* by the driver; therefore, repurpose it to mean: pass
* through the bit that was previously assumed to be always
* on for PT, thereby allowing the user to *not* set it if
* they so wish. See also pt_event_valid() and pt_config().
*/
#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN
#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN | \
RTIT_CTL_TSC_EN | \
RTIT_CTL_DISRETC | \
RTIT_CTL_BRANCH_EN | \
RTIT_CTL_CYC_PSB | \
RTIT_CTL_MTC | \
RTIT_CTL_PWR_EVT_EN | \
RTIT_CTL_FUP_ON_PTW | \
RTIT_CTL_PTW_EN)
static bool pt_event_valid(struct perf_event *event)
{
u64 config = event->attr.config;
u64 allowed, requested;
if ((config & PT_CONFIG_MASK) != config)
return false;
if (config & RTIT_CTL_CYC_PSB) {
if (!intel_pt_validate_hw_cap(PT_CAP_psb_cyc))
return false;
allowed = intel_pt_validate_hw_cap(PT_CAP_psb_periods);
requested = (config & RTIT_CTL_PSB_FREQ) >>
RTIT_CTL_PSB_FREQ_OFFSET;
if (requested && (!(allowed & BIT(requested))))
return false;
allowed = intel_pt_validate_hw_cap(PT_CAP_cycle_thresholds);
requested = (config & RTIT_CTL_CYC_THRESH) >>
RTIT_CTL_CYC_THRESH_OFFSET;
if (requested && (!(allowed & BIT(requested))))
return false;
}
if (config & RTIT_CTL_MTC) {
/*
* In the unlikely case that CPUID lists valid mtc periods,
* but not the mtc capability, drop out here.
*
* Spec says that setting mtc period bits while mtc bit in
* CPUID is 0 will #GP, so better safe than sorry.
*/
if (!intel_pt_validate_hw_cap(PT_CAP_mtc))
return false;
allowed = intel_pt_validate_hw_cap(PT_CAP_mtc_periods);
if (!allowed)
return false;
requested = (config & RTIT_CTL_MTC_RANGE) >>
RTIT_CTL_MTC_RANGE_OFFSET;
if (!(allowed & BIT(requested)))
return false;
}
if (config & RTIT_CTL_PWR_EVT_EN &&
!intel_pt_validate_hw_cap(PT_CAP_power_event_trace))
return false;
if (config & RTIT_CTL_PTW) {
if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite))
return false;
/* FUPonPTW without PTW doesn't make sense */
if ((config & RTIT_CTL_FUP_ON_PTW) &&
!(config & RTIT_CTL_PTW_EN))
return false;
}
/*
* Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
* clears the assomption that BranchEn must always be enabled,
* as was the case with the first implementation of PT.
* If this bit is not set, the legacy behavior is preserved
* for compatibility with the older userspace.
*
* Re-using bit 0 for this purpose is fine because it is never
* directly set by the user; previous attempts at setting it in
* the attr.config resulted in -EINVAL.
*/
if (config & RTIT_CTL_PASSTHROUGH) {
/*
* Disallow not setting BRANCH_EN where BRANCH_EN is
* always required.
*/
if (pt_pmu.branch_en_always_on &&
!(config & RTIT_CTL_BRANCH_EN))
return false;
} else {
/*
* Disallow BRANCH_EN without the PASSTHROUGH.
*/
if (config & RTIT_CTL_BRANCH_EN)
return false;
}
return true;
}
/*
* PT configuration helpers
* These all are cpu affine and operate on a local PT
*/
/* Address ranges and their corresponding msr configuration registers */
static const struct pt_address_range {
unsigned long msr_a;
unsigned long msr_b;
unsigned int reg_off;
} pt_address_ranges[] = {
{
.msr_a = MSR_IA32_RTIT_ADDR0_A,
.msr_b = MSR_IA32_RTIT_ADDR0_B,
.reg_off = RTIT_CTL_ADDR0_OFFSET,
},
{
.msr_a = MSR_IA32_RTIT_ADDR1_A,
.msr_b = MSR_IA32_RTIT_ADDR1_B,
.reg_off = RTIT_CTL_ADDR1_OFFSET,
},
{
.msr_a = MSR_IA32_RTIT_ADDR2_A,
.msr_b = MSR_IA32_RTIT_ADDR2_B,
.reg_off = RTIT_CTL_ADDR2_OFFSET,
},
{
.msr_a = MSR_IA32_RTIT_ADDR3_A,
.msr_b = MSR_IA32_RTIT_ADDR3_B,
.reg_off = RTIT_CTL_ADDR3_OFFSET,
}
};
static u64 pt_config_filters(struct perf_event *event)
{
struct pt_filters *filters = event->hw.addr_filters;
struct pt *pt = this_cpu_ptr(&pt_ctx);
unsigned int range = 0;
u64 rtit_ctl = 0;
if (!filters)
return 0;
perf_event_addr_filters_sync(event);
for (range = 0; range < filters->nr_filters; range++) {
struct pt_filter *filter = &filters->filter[range];
/*
* Note, if the range has zero start/end addresses due
* to its dynamic object not being loaded yet, we just
* go ahead and program zeroed range, which will simply
* produce no data. Note^2: if executable code at 0x0
* is a concern, we can set up an "invalid" configuration
* such as msr_b < msr_a.
*/
/* avoid redundant msr writes */
if (pt->filters.filter[range].msr_a != filter->msr_a) {
wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
pt->filters.filter[range].msr_a = filter->msr_a;
}
if (pt->filters.filter[range].msr_b != filter->msr_b) {
wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
pt->filters.filter[range].msr_b = filter->msr_b;
}
rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
}
return rtit_ctl;
}
static void pt_config(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg;
/* First round: clear STATUS, in particular the PSB byte counter. */
if (!event->hw.config) {
perf_event_itrace_started(event);
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
reg = pt_config_filters(event);
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
/*
* Previously, we had BRANCH_EN on by default, but now that PT has
* grown features outside of branch tracing, it is useful to allow
* the user to disable it. Setting bit 0 in the event's attr.config
* allows BRANCH_EN to pass through instead of being always on. See
* also the comment in pt_event_valid().
*/
if (event->attr.config & BIT(0)) {
reg |= event->attr.config & RTIT_CTL_BRANCH_EN;
} else {
reg |= RTIT_CTL_BRANCH_EN;
}
if (!event->attr.exclude_kernel)
reg |= RTIT_CTL_OS;
if (!event->attr.exclude_user)
reg |= RTIT_CTL_USR;
reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.config = reg;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
wrmsrl(MSR_IA32_RTIT_CTL, reg);
}
static void pt_config_stop(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 ctl = READ_ONCE(event->hw.config);
/* may be already stopped by a PMI */
if (!(ctl & RTIT_CTL_TRACEEN))
return;
ctl &= ~RTIT_CTL_TRACEEN;
if (!READ_ONCE(pt->vmx_on))
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
WRITE_ONCE(event->hw.config, ctl);
/*
* A wrmsr that disables trace generation serializes other PT
* registers and causes all data packets to be written to memory,
* but a fence is required for the data to become globally visible.
*
* The below WMB, separating data store and aux_head store matches
* the consumer's RMB that separates aux_head load and data load.
*/
wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
unsigned int output_off)
{
u64 reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
/*
* Keep ToPA table-related metadata on the same page as the actual table,
* taking up a few words from the top
*/
#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
/**
* struct topa - page-sized ToPA table with metadata at the top
* @table: actual ToPA table entries, as understood by PT hardware
* @list: linkage to struct pt_buffer's list of tables
* @phys: physical address of this page
* @offset: offset of the first entry in this table in the buffer
* @size: total size of all entries in this table
* @last: index of the last initialized entry in this table
*/
struct topa {
struct topa_entry table[TENTS_PER_PAGE];
struct list_head list;
u64 phys;
u64 offset;
size_t size;
int last;
};
/* make -1 stand for the last table entry */
#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
/**
* topa_alloc() - allocate page-sized ToPA table
* @cpu: CPU on which to allocate.
* @gfp: Allocation flags.
*
* Return: On success, return the pointer to ToPA table page.
*/
static struct topa *topa_alloc(int cpu, gfp_t gfp)
{
int node = cpu_to_node(cpu);
struct topa *topa;
struct page *p;
p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (!p)
return NULL;
topa = page_address(p);
topa->last = 0;
topa->phys = page_to_phys(p);
/*
* In case of singe-entry ToPA, always put the self-referencing END
* link as the 2nd entry in the table
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
TOPA_ENTRY(topa, 1)->end = 1;
}
return topa;
}
/**
* topa_free() - free a page-sized ToPA table
* @topa: Table to deallocate.
*/
static void topa_free(struct topa *topa)
{
free_page((unsigned long)topa);
}
/**
* topa_insert_table() - insert a ToPA table into a buffer
* @buf: PT buffer that's being extended.
* @topa: New topa table to be inserted.
*
* If it's the first table in this buffer, set up buffer's pointers
* accordingly; otherwise, add a END=1 link entry to @topa to the current
* "last" table and adjust the last table pointer to @topa.
*/
static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
{
struct topa *last = buf->last;
list_add_tail(&topa->list, &buf->tables);
if (!buf->first) {
buf->first = buf->last = buf->cur = topa;
return;
}
topa->offset = last->offset + last->size;
buf->last = topa;
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
return;
BUG_ON(last->last != TENTS_PER_PAGE - 1);
TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
TOPA_ENTRY(last, -1)->end = 1;
}
/**
* topa_table_full() - check if a ToPA table is filled up
* @topa: ToPA table.
*/
static bool topa_table_full(struct topa *topa)
{
/* single-entry ToPA is a special case */
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
return !!topa->last;
return topa->last == TENTS_PER_PAGE - 1;
}
/**
* topa_insert_pages() - create a list of ToPA tables
* @buf: PT buffer being initialized.
* @gfp: Allocation flags.
*
* This initializes a list of ToPA tables with entries from
* the data_pages provided by rb_alloc_aux().
*
* Return: 0 on success or error code.
*/
static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
{
struct topa *topa = buf->last;
int order = 0;
struct page *p;
p = virt_to_page(buf->data_pages[buf->nr_pages]);
if (PagePrivate(p))
order = page_private(p);
if (topa_table_full(topa)) {
topa = topa_alloc(buf->cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
}
TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
TOPA_ENTRY(topa, -1)->size = order;
if (!buf->snapshot &&
!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
TOPA_ENTRY(topa, -1)->intr = 1;
TOPA_ENTRY(topa, -1)->stop = 1;
}
topa->last++;
topa->size += sizes(order);
buf->nr_pages += 1ul << order;
return 0;
}
/**
* pt_topa_dump() - print ToPA tables and their entries
* @buf: PT buffer.
*/
static void pt_topa_dump(struct pt_buffer *buf)
{
struct topa *topa;
list_for_each_entry(topa, &buf->tables, list) {
int i;
pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
topa->phys, topa->offset, topa->size);
for (i = 0; i < TENTS_PER_PAGE; i++) {
pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
&topa->table[i],
(unsigned long)topa->table[i].base << TOPA_SHIFT,
sizes(topa->table[i].size),
topa->table[i].end ? 'E' : ' ',
topa->table[i].intr ? 'I' : ' ',
topa->table[i].stop ? 'S' : ' ',
*(u64 *)&topa->table[i]);
if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
topa->table[i].stop) ||
topa->table[i].end)
break;
}
}
}
/**
* pt_buffer_advance() - advance to the next output region
* @buf: PT buffer.
*
* Advance the current pointers in the buffer to the next ToPA entry.
*/
static void pt_buffer_advance(struct pt_buffer *buf)
{
buf->output_off = 0;
buf->cur_idx++;
if (buf->cur_idx == buf->cur->last) {
if (buf->cur == buf->last)
buf->cur = buf->first;
else
buf->cur = list_entry(buf->cur->list.next, struct topa,
list);
buf->cur_idx = 0;
}
}
/**
* pt_update_head() - calculate current offsets and sizes
* @pt: Per-cpu pt context.
*
* Update buffer's current write pointer position and data size.
*/
static void pt_update_head(struct pt *pt)
{
struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 topa_idx, base, old;
/* offset of the first region in this table from the beginning of buf */
base = buf->cur->offset + buf->output_off;
/* offset of the current output region within this table */
for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
base += sizes(buf->cur->table[topa_idx].size);
if (buf->snapshot) {
local_set(&buf->data_size, base);
} else {
old = (local64_xchg(&buf->head, base) &
((buf->nr_pages << PAGE_SHIFT) - 1));
if (base < old)
base += buf->nr_pages << PAGE_SHIFT;
local_add(base - old, &buf->data_size);
}
}
/**
* pt_buffer_region() - obtain current output region's address
* @buf: PT buffer.
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
}
/**
* pt_buffer_region_size() - obtain current output region's size
* @buf: PT buffer.
*/
static size_t pt_buffer_region_size(struct pt_buffer *buf)
{
return sizes(buf->cur->table[buf->cur_idx].size);
}
/**
* pt_handle_status() - take care of possible status conditions
* @pt: Per-cpu pt context.
*/
static void pt_handle_status(struct pt *pt)
{
struct pt_buffer *buf = perf_get_aux(&pt->handle);
int advance = 0;
u64 status;
rdmsrl(MSR_IA32_RTIT_STATUS, status);
if (status & RTIT_STATUS_ERROR) {
pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
pt_topa_dump(buf);
status &= ~RTIT_STATUS_ERROR;
}
if (status & RTIT_STATUS_STOPPED) {
status &= ~RTIT_STATUS_STOPPED;
/*
* On systems that only do single-entry ToPA, hitting STOP
* means we are already losing data; need to let the decoder
* know.
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
}
}
/*
* Also on single-entry ToPA implementations, interrupt will come
* before the output reaches its output region's boundary.
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
!buf->snapshot &&
pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
void *head = pt_buffer_region(buf);
/* everything within this margin needs to be zeroed out */
memset(head + buf->output_off, 0,
pt_buffer_region_size(buf) -
buf->output_off);
advance++;
}
if (advance)
pt_buffer_advance(buf);
wrmsrl(MSR_IA32_RTIT_STATUS, status);
}
/**
* pt_read_offset() - translate registers into buffer pointers
* @buf: PT buffer.
*
* Set buffer's output pointers from MSR values.
*/
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base_topa;
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
buf->cur = phys_to_virt(base_topa);
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
/* offset within current output region */
buf->output_off = offset >> 32;
/* index of current output region within this table */
buf->cur_idx = (offset & 0xffffff80) >> 7;
}
/**
* pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
* @buf: PT buffer.
* @pg: Page offset in the buffer.
*
* When advancing to the next output region (ToPA entry), given a page offset
* into the buffer, we need to find the offset of the first page in the next
* region.
*/
static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
{
struct topa_entry *te = buf->topa_index[pg];
/* one region */
if (buf->first == buf->last && buf->first->last == 1)
return pg;
do {
pg++;
pg &= buf->nr_pages - 1;
} while (buf->topa_index[pg] == te);
return pg;
}
/**
* pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
* @buf: PT buffer.
* @handle: Current output handle.
*
* Place INT and STOP marks to prevent overwriting old data that the consumer
* hasn't yet collected and waking up the consumer after a certain fraction of
* the buffer has filled up. Only needed and sensible for non-snapshot counters.
*
* This obviously relies on buf::head to figure out buffer markers, so it has
* to be called after pt_buffer_reset_offsets() and before the hardware tracing
* is enabled.
*/
static int pt_buffer_reset_markers(struct pt_buffer *buf,
struct perf_output_handle *handle)
{
unsigned long head = local64_read(&buf->head);
unsigned long idx, npages, wakeup;
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 <
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
return -EINVAL;
}
/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
return 0;
/* clear STOP and INT from current entry */
buf->topa_index[buf->stop_pos]->stop = 0;
buf->topa_index[buf->stop_pos]->intr = 0;
buf->topa_index[buf->intr_pos]->intr = 0;
/* how many pages till the STOP marker */
npages = handle->size >> PAGE_SHIFT;
/* if it's on a page boundary, fill up one more page */
if (!offset_in_page(head + handle->size + 1))
npages++;
idx = (head >> PAGE_SHIFT) + npages;
idx &= buf->nr_pages - 1;
buf->stop_pos = idx;
wakeup = handle->wakeup >> PAGE_SHIFT;
/* in the worst case, wake up the consumer one page before hard stop */
idx = (head >> PAGE_SHIFT) + npages - 1;
if (idx > wakeup)
idx = wakeup;
idx &= buf->nr_pages - 1;
buf->intr_pos = idx;
buf->topa_index[buf->stop_pos]->stop = 1;
buf->topa_index[buf->stop_pos]->intr = 1;
buf->topa_index[buf->intr_pos]->intr = 1;
return 0;
}
/**
* pt_buffer_setup_topa_index() - build topa_index[] table of regions
* @buf: PT buffer.
*
* topa_index[] references output regions indexed by offset into the
* buffer for purposes of quick reverse lookup.
*/
static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
{
struct topa *cur = buf->first, *prev = buf->last;
struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
*te_prev = TOPA_ENTRY(prev, prev->last - 1);
int pg = 0, idx = 0;
while (pg < buf->nr_pages) {
int tidx;
/* pages within one topa entry */
for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
buf->topa_index[pg] = te_prev;
te_prev = te_cur;
if (idx == cur->last - 1) {
/* advance to next topa table */
idx = 0;
cur = list_entry(cur->list.next, struct topa, list);
} else {
idx++;
}
te_cur = TOPA_ENTRY(cur, idx);
}
}
/**
* pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
* @buf: PT buffer.
* @head: Write pointer (aux_head) from AUX buffer.
*
* Find the ToPA table and entry corresponding to given @head and set buffer's
* "current" pointers accordingly. This is done after we have obtained the
* current aux_head position from a successful call to perf_aux_output_begin()
* to make sure the hardware is writing to the right place.
*
* This function modifies buf::{cur,cur_idx,output_off} that will be programmed
* into PT msrs when the tracing is enabled and buf::head and buf::data_size,
* which are used to determine INT and STOP markers' locations by a subsequent
* call to pt_buffer_reset_markers().
*/
static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
{
int pg;
if (buf->snapshot)
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
pg = pt_topa_next_entry(buf, pg);
buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
(unsigned long)buf->cur) / sizeof(struct topa_entry);
buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
local64_set(&buf->head, head);
local_set(&buf->data_size, 0);
}
/**
* pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
* @buf: PT buffer.
*/
static void pt_buffer_fini_topa(struct pt_buffer *buf)
{
struct topa *topa, *iter;
list_for_each_entry_safe(topa, iter, &buf->tables, list) {
/*
* right now, this is in free_aux() path only, so
* no need to unlink this table from the list
*/
topa_free(topa);
}
}
/**
* pt_buffer_init_topa() - initialize ToPA table for pt buffer
* @buf: PT buffer.
* @size: Total size of all regions within this ToPA.
* @gfp: Allocation flags.
*/
static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
gfp_t gfp)
{
struct topa *topa;
int err;
topa = topa_alloc(buf->cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
while (buf->nr_pages < nr_pages) {
err = topa_insert_pages(buf, gfp);
if (err) {
pt_buffer_fini_topa(buf);
return -ENOMEM;
}
}
pt_buffer_setup_topa_index(buf);
/* link last table to the first one, unless we're double buffering */
if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
TOPA_ENTRY(buf->last, -1)->end = 1;
}
pt_topa_dump(buf);
return 0;
}
/**
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
* @cpu: Cpu on which to allocate, -1 means current.
* @pages: Array of pointers to buffer pages passed from perf core.
* @nr_pages: Number of pages in the buffer.
* @snapshot: If this is a snapshot/overwrite counter.
*
* This is a pmu::setup_aux callback that sets up ToPA tables and all the
* bookkeeping for an AUX buffer.
*
* Return: Our private PT buffer structure.
*/
static void *
pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
{
struct pt_buffer *buf;
int node, ret;
if (!nr_pages)
return NULL;
if (cpu == -1)
cpu = raw_smp_processor_id();
node = cpu_to_node(cpu);
buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
GFP_KERNEL, node);
if (!buf)
return NULL;
buf->cpu = cpu;
buf->snapshot = snapshot;
buf->data_pages = pages;
INIT_LIST_HEAD(&buf->tables);
ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
if (ret) {
kfree(buf);
return NULL;
}
return buf;
}
/**
* pt_buffer_free_aux() - perf AUX deallocation path callback
* @data: PT buffer.
*/
static void pt_buffer_free_aux(void *data)
{
struct pt_buffer *buf = data;
pt_buffer_fini_topa(buf);
kfree(buf);
}
static int pt_addr_filters_init(struct perf_event *event)
{
struct pt_filters *filters;
int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
if (!intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
return 0;
filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
if (!filters)
return -ENOMEM;
if (event->parent)
memcpy(filters, event->parent->hw.addr_filters,
sizeof(*filters));
event->hw.addr_filters = filters;
return 0;
}
static void pt_addr_filters_fini(struct perf_event *event)
{
kfree(event->hw.addr_filters);
event->hw.addr_filters = NULL;
}
static inline bool valid_kernel_ip(unsigned long ip)
{
return virt_addr_valid(ip) && kernel_ip(ip);
}
static int pt_event_addr_filters_validate(struct list_head *filters)
{
struct perf_addr_filter *filter;
int range = 0;
list_for_each_entry(filter, filters, entry) {
/*
* PT doesn't support single address triggers and
* 'start' filters.
*/
if (!filter->size ||
filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
if (!filter->path.dentry) {
if (!valid_kernel_ip(filter->offset))
return -EINVAL;
if (!valid_kernel_ip(filter->offset + filter->size))
return -EINVAL;
}
if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges))
return -EOPNOTSUPP;
}
return 0;
}
static void pt_event_addr_filters_sync(struct perf_event *event)
{
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
struct pt_filters *filters = event->hw.addr_filters;
struct perf_addr_filter *filter;
int range = 0;
if (!filters)
return;
list_for_each_entry(filter, &head->list, entry) {
if (filter->path.dentry && !offs[range]) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
msr_a = filter->offset + offs[range];
msr_b = filter->size + msr_a - 1;
}
filters->filter[range].msr_a = msr_a;
filters->filter[range].msr_b = msr_b;
if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER)
filters->filter[range].config = 1;
else
filters->filter[range].config = 2;
range++;
}
filters->nr_filters = range;
}
/**
* intel_pt_interrupt() - PT PMI handler
*/
void intel_pt_interrupt(void)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
struct perf_event *event = pt->handle.event;
/*
* There may be a dangling PT bit in the interrupt status register
* after PT has been disabled by pt_event_stop(). Make sure we don't
* do anything (particularly, re-enable) for this event here.
*/
if (!READ_ONCE(pt->handle_nmi))
return;
if (!event)
return;
pt_config_stop(event);
buf = perf_get_aux(&pt->handle);
if (!buf)
return;
pt_read_offset(buf);
pt_handle_status(pt);
pt_update_head(pt);
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
if (!event->hw.state) {
int ret;
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf) {
event->hw.state = PERF_HES_STOPPED;
return;
}
pt_buffer_reset_offsets(buf, pt->handle.head);
/* snapshot counters don't use PMI, so it's safe */
ret = pt_buffer_reset_markers(buf, &pt->handle);
if (ret) {
perf_aux_output_end(&pt->handle, 0);
return;
}
pt_config_buffer(buf->cur->table, buf->cur_idx,
buf->output_off);
pt_config(event);
}
}
void intel_pt_handle_vmx(int on)
{
struct pt *pt = this_cpu_ptr(&pt_ctx); /*covered*/
struct perf_event *event;
unsigned long flags;
/* PT plays nice with VMX, do nothing */
if (pt_pmu.vmx)
return;
/*
* VMXON will clear RTIT_CTL.TraceEn; we need to make
* sure to not try to set it while VMX is on. Disable
* interrupts to avoid racing with pmu callbacks;
* concurrent PMI should be handled fine.
*/
local_irq_save(flags); /*covered*/
WRITE_ONCE(pt->vmx_on, on);
/*
* If an AUX transaction is in progress, it will contain
* gap(s), so flag it PARTIAL to inform the user.
*/
event = pt->handle.event;
if (event)
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_PARTIAL);
/* Turn PTs back on */
if (!on && event)
wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
local_irq_restore(flags); /*covered*/
}
EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
/*
* PMU callbacks
*/
static void pt_event_start(struct perf_event *event, int mode)
{
struct hw_perf_event *hwc = &event->hw;
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf;
buf = perf_aux_output_begin(&pt->handle, event);
if (!buf)
goto fail_stop;
pt_buffer_reset_offsets(buf, pt->handle.head);
if (!buf->snapshot) {
if (pt_buffer_reset_markers(buf, &pt->handle))
goto fail_end_stop;
}
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
pt_config_buffer(buf->cur->table, buf->cur_idx,
buf->output_off);
pt_config(event);
return;
fail_end_stop:
perf_aux_output_end(&pt->handle, 0);
fail_stop:
hwc->state = PERF_HES_STOPPED;
}
static void pt_event_stop(struct perf_event *event, int mode)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
/*
* Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt().
*/
WRITE_ONCE(pt->handle_nmi, 0);
pt_config_stop(event);
if (event->hw.state == PERF_HES_STOPPED)
return;
event->hw.state = PERF_HES_STOPPED;
if (mode & PERF_EF_UPDATE) {
struct pt_buffer *buf = perf_get_aux(&pt->handle);
if (!buf)
return;
if (WARN_ON_ONCE(pt->handle.event != event))
return;
pt_read_offset(buf);
pt_handle_status(pt);
pt_update_head(pt);
if (buf->snapshot)
pt->handle.head =
local_xchg(&buf->data_size,
buf->nr_pages << PAGE_SHIFT);
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
}
}
static void pt_event_del(struct perf_event *event, int mode)
{
pt_event_stop(event, PERF_EF_UPDATE);
}
static int pt_event_add(struct perf_event *event, int mode)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct hw_perf_event *hwc = &event->hw;
int ret = -EBUSY;
if (pt->handle.event)
goto fail;
if (mode & PERF_EF_START) {
pt_event_start(event, 0);
ret = -EINVAL;
if (hwc->state == PERF_HES_STOPPED)
goto fail;
} else {
hwc->state = PERF_HES_STOPPED;
}
ret = 0;
fail:
return ret;
}
static void pt_event_read(struct perf_event *event)
{
}
static void pt_event_destroy(struct perf_event *event)
{
pt_addr_filters_fini(event);
x86_del_exclusive(x86_lbr_exclusive_pt);
}
static int pt_event_init(struct perf_event *event)
{
if (event->attr.type != pt_pmu.pmu.type)
return -ENOENT;
if (!pt_event_valid(event))
return -EINVAL;
if (x86_add_exclusive(x86_lbr_exclusive_pt))
return -EBUSY;
if (pt_addr_filters_init(event)) {
x86_del_exclusive(x86_lbr_exclusive_pt);
return -ENOMEM;
}
event->destroy = pt_event_destroy;
return 0;
}
void cpu_emergency_stop_pt(void)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
if (pt->handle.event)
pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
}
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV;
get_online_cpus();
for_each_online_cpu(cpu) {
u64 ctl;
ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
if (!ret && (ctl & RTIT_CTL_TRACEEN))
prior_warn++;
}
put_online_cpus();
if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
pr_warn("PT is enabled at boot time, doing nothing\n");
return -EBUSY;
}
ret = pt_pmu_hw_init();
if (ret)
return ret;
if (!intel_pt_validate_hw_cap(PT_CAP_topa_output)) {
pr_warn("ToPA output is not supported on this CPU\n");
return -ENODEV;
}
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
pt_pmu.pmu.capabilities =
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init;
pt_pmu.pmu.add = pt_event_add;
pt_pmu.pmu.del = pt_event_del;
pt_pmu.pmu.start = pt_event_start;
pt_pmu.pmu.stop = pt_event_stop;
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
pt_pmu.pmu.nr_addr_filters =
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges);
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
return ret;
}
arch_initcall(pt_init);
/*
* Support Intel RAPL energy consumption counters
* Copyright (C) 2013 Google, Inc., Stephane Eranian
*
* Intel RAPL interface is specified in the IA-32 Manual Vol3b
* section 14.7.1 (September 2013)
*
* RAPL provides more controls than just reporting energy consumption
* however here we only expose the 3 energy consumption free running
* counters (pp0, pkg, dram).
*
* Each of those counters increments in a power unit defined by the
* RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
* but it can vary.
*
* Counter to rapl events mappings:
*
* pp0 counter: consumption of all physical cores (power plane 0)
* event: rapl_energy_cores
* perf code: 0x1
*
* pkg counter: consumption of the whole processor package
* event: rapl_energy_pkg
* perf code: 0x2
*
* dram counter: consumption of the dram domain (servers only)
* event: rapl_energy_dram
* perf code: 0x3
*
* gpu counter: consumption of the builtin-gpu domain (client only)
* event: rapl_energy_gpu
* perf code: 0x4
*
* psys counter: consumption of the builtin-psys domain (client only)
* event: rapl_energy_psys
* perf code: 0x5
*
* We manage those counters as free running (read-only). They may be
* use simultaneously by other tools, such as turbostat.
*
* The events only support system-wide mode counting. There is no
* sampling support because it does not make sense and is not
* supported by the RAPL hardware.
*
* Because we want to avoid floating-point operations in the kernel,
* the events are all reported in fixed point arithmetic (32.32).
* Tools must adjust the counts to convert them to Watts using
* the duration of the measurement. Tools may use a function such as
* ldexp(raw_count, -32);
*/
#define pr_fmt(fmt) "RAPL PMU: " fmt
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
/*
* RAPL energy status counters
*/
#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */
#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */
#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */
#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
#define NR_RAPL_DOMAINS 0x5
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
"pp1-gpu",
"psys",
};
/* Clients have PP0, PKG */
#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_PP1_NRG_STAT)
/* Servers have PP0, PKG, RAM */
#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
/* Servers have PP0, PKG, RAM, PP1 */
#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT|\
1<<RAPL_IDX_PP1_NRG_STAT)
/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT|\
1<<RAPL_IDX_PP1_NRG_STAT|\
1<<RAPL_IDX_PSYS_NRG_STAT)
/* Knights Landing has PKG, RAM */
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
*/
#define RAPL_EVENT_MASK 0xFFULL
#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
#define RAPL_CNTR_WIDTH 32
#define RAPL_EVENT_ATTR_STR(_name, v, str) \
static struct perf_pmu_events_attr event_attr_##v = { \
.attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
.id = 0, \
.event_str = str, \
};
struct rapl_pmu {
raw_spinlock_t lock;
int n_active;
int cpu;
struct list_head active_list;
struct pmu *pmu;
ktime_t timer_interval;
struct hrtimer hrtimer;
};
struct rapl_pmus {
struct pmu pmu;
unsigned int maxpkg;
struct rapl_pmu *pmus[];
};
/* 1/2^hw_unit Joule */
static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
static struct rapl_pmus *rapl_pmus;
static cpumask_t rapl_cpu_mask;
static unsigned int rapl_cntr_mask;
static u64 rapl_timer_ms;
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
unsigned int pkgid = topology_logical_package_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
{
u64 raw;
rdmsrl(event->hw.event_base, raw);
return raw;
}
static inline u64 rapl_scale(u64 v, int cfg)
{
if (cfg > NR_RAPL_DOMAINS) {
pr_warn("Invalid domain %d, failed to scale data\n", cfg);
return v;
}
/*
* scale delta to smallest unit (1/2^32)
* users must then scale back: count * 1/(1e9*2^32) to get Joules
* or use ldexp(count, -32).
* Watts = Joules/Time delta
*/
return v << (32 - rapl_hw_unit[cfg - 1]);
}
static u64 rapl_event_update(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
u64 prev_raw_count, new_raw_count;
s64 delta, sdelta;
int shift = RAPL_CNTR_WIDTH;
again:
prev_raw_count = local64_read(&hwc->prev_count);
rdmsrl(event->hw.event_base, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count) {
cpu_relax();
goto again;
}
/*
* Now we have the new raw value and have updated the prev
* timestamp already. We can now calculate the elapsed delta
* (event-)time and add that to the generic event.
*
* Careful, not all hw sign-extends above the physical width
* of the count.
*/
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
sdelta = rapl_scale(delta, event->hw.config);
local64_add(sdelta, &event->count);
return new_raw_count;
}
static void rapl_start_hrtimer(struct rapl_pmu *pmu)
{
hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
HRTIMER_MODE_REL_PINNED);
}
static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
{
struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
struct perf_event *event;
unsigned long flags;
if (!pmu->n_active)
return HRTIMER_NORESTART;
raw_spin_lock_irqsave(&pmu->lock, flags);
list_for_each_entry(event, &pmu->active_list, active_entry)
rapl_event_update(event);
raw_spin_unlock_irqrestore(&pmu->lock, flags);
hrtimer_forward_now(hrtimer, pmu->timer_interval);
return HRTIMER_RESTART;
}
static void rapl_hrtimer_init(struct rapl_pmu *pmu)
{
struct hrtimer *hr = &pmu->hrtimer;
hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hr->function = rapl_hrtimer_handle;
}
static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
struct perf_event *event)
{
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
event->hw.state = 0;
list_add_tail(&event->active_entry, &pmu->active_list);
local64_set(&event->hw.prev_count, rapl_read_counter(event));
pmu->n_active++;
if (pmu->n_active == 1)
rapl_start_hrtimer(pmu);
}
static void rapl_pmu_event_start(struct perf_event *event, int mode)
{
struct rapl_pmu *pmu = event->pmu_private;
unsigned long flags;
raw_spin_lock_irqsave(&pmu->lock, flags);
__rapl_pmu_event_start(pmu, event);
raw_spin_unlock_irqrestore(&pmu->lock, flags);
}
static void rapl_pmu_event_stop(struct perf_event *event, int mode)
{
struct rapl_pmu *pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
raw_spin_lock_irqsave(&pmu->lock, flags);
/* mark event as deactivated and stopped */
if (!(hwc->state & PERF_HES_STOPPED)) {
WARN_ON_ONCE(pmu->n_active <= 0);
pmu->n_active--;
if (pmu->n_active == 0)
hrtimer_cancel(&pmu->hrtimer);
list_del(&event->active_entry);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
}
/* check if update of sw counter is necessary */
if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
/*
* Drain the remaining delta count out of a event
* that we are disabling:
*/
rapl_event_update(event);
hwc->state |= PERF_HES_UPTODATE;
}
raw_spin_unlock_irqrestore(&pmu->lock, flags);
}
static int rapl_pmu_event_add(struct perf_event *event, int mode)
{
struct rapl_pmu *pmu = event->pmu_private;
struct hw_perf_event *hwc = &event->hw;
unsigned long flags;
raw_spin_lock_irqsave(&pmu->lock, flags);
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (mode & PERF_EF_START)
__rapl_pmu_event_start(pmu, event);
raw_spin_unlock_irqrestore(&pmu->lock, flags);
return 0;
}
static void rapl_pmu_event_del(struct perf_event *event, int flags)
{
rapl_pmu_event_stop(event, PERF_EF_UPDATE);
}
static int rapl_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config & RAPL_EVENT_MASK; /*covered*/
int bit, msr, ret = 0;
struct rapl_pmu *pmu;
/* only look at RAPL events */
if (event->attr.type != rapl_pmus->pmu.type)
return -ENOENT;
/* check only supported bits are set */
if (event->attr.config & ~RAPL_EVENT_MASK) /*covered*/
return -EINVAL;
if (event->cpu < 0) /*covered*/
return -EINVAL;
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; /*covered*/
/*
* check event is known (determines counter)
*/
switch (cfg) { /*covered*/
case INTEL_RAPL_PP0:
bit = RAPL_IDX_PP0_NRG_STAT;
msr = MSR_PP0_ENERGY_STATUS;
break;
case INTEL_RAPL_PKG:
bit = RAPL_IDX_PKG_NRG_STAT;
msr = MSR_PKG_ENERGY_STAT