| 1 2 11 5 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct skb_array' datastructure. * * Author: * Michael S. Tsirkin <mst@redhat.com> * * Copyright (C) 2016 Red Hat, Inc. * * Limited-size FIFO of skbs. Can be used more or less whenever * sk_buff_head can be used, except you need to know the queue size in * advance. * Implemented as a type-safe wrapper around ptr_ring. */ #ifndef _LINUX_SKB_ARRAY_H #define _LINUX_SKB_ARRAY_H 1 #ifdef __KERNEL__ #include <linux/ptr_ring.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> #endif struct skb_array { struct ptr_ring ring; }; /* Might be slightly faster than skb_array_full below, but callers invoking * this in a loop must use a compiler barrier, for example cpu_relax(). */ static inline bool __skb_array_full(struct skb_array *a) { return __ptr_ring_full(&a->ring); } static inline bool skb_array_full(struct skb_array *a) { return ptr_ring_full(&a->ring); } static inline int skb_array_produce(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce(&a->ring, skb); } static inline int skb_array_produce_irq(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce_irq(&a->ring, skb); } static inline int skb_array_produce_bh(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce_bh(&a->ring, skb); } static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb) { return ptr_ring_produce_any(&a->ring, skb); } /* Might be slightly faster than skb_array_empty below, but only safe if the * array is never resized. Also, callers invoking this in a loop must take care * to use a compiler barrier, for example cpu_relax(). */ static inline bool __skb_array_empty(struct skb_array *a) { return __ptr_ring_empty(&a->ring); } static inline struct sk_buff *__skb_array_peek(struct skb_array *a) { return __ptr_ring_peek(&a->ring); } static inline bool skb_array_empty(struct skb_array *a) { return ptr_ring_empty(&a->ring); } static inline bool skb_array_empty_bh(struct skb_array *a) { return ptr_ring_empty_bh(&a->ring); } static inline bool skb_array_empty_irq(struct skb_array *a) { return ptr_ring_empty_irq(&a->ring); } static inline bool skb_array_empty_any(struct skb_array *a) { return ptr_ring_empty_any(&a->ring); } static inline struct sk_buff *__skb_array_consume(struct skb_array *a) { return __ptr_ring_consume(&a->ring); } static inline struct sk_buff *skb_array_consume(struct skb_array *a) { return ptr_ring_consume(&a->ring); } static inline int skb_array_consume_batched(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched(&a->ring, (void **)array, n); } static inline struct sk_buff *skb_array_consume_irq(struct skb_array *a) { return ptr_ring_consume_irq(&a->ring); } static inline int skb_array_consume_batched_irq(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched_irq(&a->ring, (void **)array, n); } static inline struct sk_buff *skb_array_consume_any(struct skb_array *a) { return ptr_ring_consume_any(&a->ring); } static inline int skb_array_consume_batched_any(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched_any(&a->ring, (void **)array, n); } static inline struct sk_buff *skb_array_consume_bh(struct skb_array *a) { return ptr_ring_consume_bh(&a->ring); } static inline int skb_array_consume_batched_bh(struct skb_array *a, struct sk_buff **array, int n) { return ptr_ring_consume_batched_bh(&a->ring, (void **)array, n); } static inline int __skb_array_len_with_tag(struct sk_buff *skb) { if (likely(skb)) { int len = skb->len; if (skb_vlan_tag_present(skb)) len += VLAN_HLEN; return len; } else { return 0; } } static inline int skb_array_peek_len(struct skb_array *a) { return PTR_RING_PEEK_CALL(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_peek_len_irq(struct skb_array *a) { return PTR_RING_PEEK_CALL_IRQ(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_peek_len_bh(struct skb_array *a) { return PTR_RING_PEEK_CALL_BH(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_peek_len_any(struct skb_array *a) { return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag); } static inline int skb_array_init_noprof(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_init_noprof(&a->ring, size, gfp); } #define skb_array_init(...) alloc_hooks(skb_array_init_noprof(__VA_ARGS__)) static void __skb_array_destroy_skb(void *ptr) { kfree_skb(ptr); } static inline void skb_array_unconsume(struct skb_array *a, struct sk_buff **skbs, int n) { ptr_ring_unconsume(&a->ring, (void **)skbs, n, __skb_array_destroy_skb); } static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } static inline int skb_array_resize_multiple_bh_noprof(struct skb_array **rings, int nrings, unsigned int size, gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); return ptr_ring_resize_multiple_bh_noprof((struct ptr_ring **)rings, nrings, size, gfp, __skb_array_destroy_skb); } #define skb_array_resize_multiple_bh(...) \ alloc_hooks(skb_array_resize_multiple_bh_noprof(__VA_ARGS__)) static inline void skb_array_cleanup(struct skb_array *a) { ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb); } #endif /* _LINUX_SKB_ARRAY_H */ |
| 12 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | #ifndef __LINUX_ERSPAN_H #define __LINUX_ERSPAN_H /* * GRE header for ERSPAN type I encapsulation (4 octets [34:37]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |0|0|0|0|0|00000|000000000|00000| Protocol Type for ERSPAN | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * The Type I ERSPAN frame format is based on the barebones IP + GRE * encapsulation (as described above) on top of the raw mirrored frame. * There is no extra ERSPAN header. * * * GRE header for ERSPAN type II and II encapsulation (8 octets [34:41]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Sequence Number (increments per packet per session) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Note that in the above GRE header [RFC1701] out of the C, R, K, S, * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The * other fields are set to zero, so only a sequence number follows. * * ERSPAN Version 1 (Type II) header (8 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Ver | VLAN | COS | En|T| Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Reserved | Index | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * * ERSPAN Version 2 (Type III) header (12 octets [42:49]) * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Ver | VLAN | COS |BSO|T| Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Timestamp | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | SGT |P| FT | Hw ID |D|Gra|O| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Platform Specific SubHeader (8 octets, optional) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Platf ID | Platform Specific Info | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Platform Specific Info | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB */ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/skbuff.h> #include <uapi/linux/erspan.h> #define ERSPAN_VERSION 0x1 /* ERSPAN type II */ #define VER_MASK 0xf000 #define VLAN_MASK 0x0fff #define COS_MASK 0xe000 #define EN_MASK 0x1800 #define T_MASK 0x0400 #define ID_MASK 0x03ff #define INDEX_MASK 0xfffff #define ERSPAN_VERSION2 0x2 /* ERSPAN type III*/ #define BSO_MASK EN_MASK #define SGT_MASK 0xffff0000 #define P_MASK 0x8000 #define FT_MASK 0x7c00 #define HWID_MASK 0x03f0 #define DIR_MASK 0x0008 #define GRA_MASK 0x0006 #define O_MASK 0x0001 #define HWID_OFFSET 4 #define DIR_OFFSET 3 enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag preserved in frame */ }; #define ERSPAN_V1_MDSIZE 4 #define ERSPAN_V2_MDSIZE 8 struct erspan_base_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 vlan_upper:4, ver:4; __u8 vlan:8; __u8 session_id_upper:2, t:1, en:2, cos:3; __u8 session_id:8; #elif defined(__BIG_ENDIAN_BITFIELD) __u8 ver: 4, vlan_upper:4; __u8 vlan:8; __u8 cos:3, en:2, t:1, session_id_upper:2; __u8 session_id:8; #else #error "Please fix <asm/byteorder.h>" #endif }; static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id) { ershdr->session_id = id & 0xff; ershdr->session_id_upper = (id >> 8) & 0x3; } static inline u16 get_session_id(const struct erspan_base_hdr *ershdr) { return (ershdr->session_id_upper << 8) + ershdr->session_id; } static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan) { ershdr->vlan = vlan & 0xff; ershdr->vlan_upper = (vlan >> 8) & 0xf; } static inline u16 get_vlan(const struct erspan_base_hdr *ershdr) { return (ershdr->vlan_upper << 8) + ershdr->vlan; } static inline void set_hwid(struct erspan_md2 *md2, u8 hwid) { md2->hwid = hwid & 0xf; md2->hwid_upper = (hwid >> 4) & 0x3; } static inline u8 get_hwid(const struct erspan_md2 *md2) { return (md2->hwid_upper << 4) + md2->hwid; } static inline int erspan_hdr_len(int version) { if (version == 0) return 0; return sizeof(struct erspan_base_hdr) + (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); } static inline u8 tos_to_cos(u8 tos) { u8 dscp, cos; dscp = tos >> 2; cos = dscp >> 3; return cos; } static inline void erspan_build_header(struct sk_buff *skb, u32 id, u32 index, bool truncate, bool is_ipv4) { struct ethhdr *eth = (struct ethhdr *)skb->data; enum erspan_encap_type enc_type; struct erspan_base_hdr *ershdr; struct qtag_prefix { __be16 eth_type; __be16 tci; } *qp; u16 vlan_tci = 0; u8 tos; __be32 *idx; tos = is_ipv4 ? ip_hdr(skb)->tos : (ipv6_hdr(skb)->priority << 4) + (ipv6_hdr(skb)->flow_lbl[0] >> 4); enc_type = ERSPAN_ENCAP_NOVLAN; /* If mirrored packet has vlan tag, extract tci and * preserve vlan header in the mirrored frame. */ if (eth->h_proto == htons(ETH_P_8021Q)) { qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); vlan_tci = ntohs(qp->tci); enc_type = ERSPAN_ENCAP_INFRAME; } skb_push(skb, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); ershdr = (struct erspan_base_hdr *)skb->data; memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE); /* Build base header */ ershdr->ver = ERSPAN_VERSION; ershdr->cos = tos_to_cos(tos); ershdr->en = enc_type; ershdr->t = truncate; set_vlan(ershdr, vlan_tci); set_session_id(ershdr, id); /* Build metadata */ idx = (__be32 *)(ershdr + 1); *idx = htonl(index & INDEX_MASK); } /* ERSPAN GRA: timestamp granularity * 00b --> granularity = 100 microseconds * 01b --> granularity = 100 nanoseconds * 10b --> granularity = IEEE 1588 * Here we only support 100 microseconds. */ static inline __be32 erspan_get_timestamp(void) { u64 h_usecs; ktime_t kt; kt = ktime_get_real(); h_usecs = ktime_divns(kt, 100 * NSEC_PER_USEC); /* ERSPAN base header only has 32-bit, * so it wraps around 4 days. */ return htonl((u32)h_usecs); } /* ERSPAN BSO (Bad/Short/Oversized), see RFC1757 * 00b --> Good frame with no error, or unknown integrity * 01b --> Payload is a Short Frame * 10b --> Payload is an Oversized Frame * 11b --> Payload is a Bad Frame with CRC or Alignment Error */ enum erspan_bso { BSO_NOERROR = 0x0, BSO_SHORT = 0x1, BSO_OVERSIZED = 0x2, BSO_BAD = 0x3, }; static inline u8 erspan_detect_bso(struct sk_buff *skb) { /* BSO_BAD is not handled because the frame CRC * or alignment error information is in FCS. */ if (skb->len < ETH_ZLEN) return BSO_SHORT; if (skb->len > ETH_FRAME_LEN) return BSO_OVERSIZED; return BSO_NOERROR; } static inline void erspan_build_header_v2(struct sk_buff *skb, u32 id, u8 direction, u16 hwid, bool truncate, bool is_ipv4) { struct ethhdr *eth = (struct ethhdr *)skb->data; struct erspan_base_hdr *ershdr; struct erspan_md2 *md2; struct qtag_prefix { __be16 eth_type; __be16 tci; } *qp; u16 vlan_tci = 0; u8 gra = 0; /* 100 usec */ u8 bso = 0; /* Bad/Short/Oversized */ u8 sgt = 0; u8 tos; tos = is_ipv4 ? ip_hdr(skb)->tos : (ipv6_hdr(skb)->priority << 4) + (ipv6_hdr(skb)->flow_lbl[0] >> 4); /* Unlike v1, v2 does not have En field, * so only extract vlan tci field. */ if (eth->h_proto == htons(ETH_P_8021Q)) { qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); vlan_tci = ntohs(qp->tci); } bso = erspan_detect_bso(skb); skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); ershdr = (struct erspan_base_hdr *)skb->data; memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); /* Build base header */ ershdr->ver = ERSPAN_VERSION2; ershdr->cos = tos_to_cos(tos); ershdr->en = bso; ershdr->t = truncate; set_vlan(ershdr, vlan_tci); set_session_id(ershdr, id); /* Build metadata */ md2 = (struct erspan_md2 *)(ershdr + 1); md2->timestamp = erspan_get_timestamp(); md2->sgt = htons(sgt); md2->p = 1; md2->ft = 0; md2->dir = direction; md2->gra = gra; md2->o = 0; set_hwid(md2, hwid); } #endif |
| 61 1 1 1 1 2 2 2 1 2 2 1 2 5 2 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 35 35 35 1 1 1 1 1 1 1 1 1 1 38 49 49 49 38 38 38 38 35 35 35 38 37 38 35 35 1 1 34 35 35 3 3 38 38 37 7 7 7 1 7 7 7 7 5 2 2 1 1 2 2 1 1 7 1 1 1 1 1 1 4 3 3 1 1 4 35 34 1 35 35 35 35 1 36 2 2 7 1 7 1 1 7 1 1 1 1 1 38 38 35 35 35 35 49 3 37 3 38 3 3 3 38 38 38 38 38 2 1 38 1 1 1 1 38 1 34 33 1 35 35 34 34 36 36 38 38 38 35 35 34 49 38 34 49 4 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 9 9 7 3 1 6 3 1 1 2 1 1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 9 9 9 7 5 3 4 2 1 1 1 1 1 1 1 9 6 6 4 1 3 2 1 1 1 1 1 1 1 1 1 5 1 4 3 1 3 3 1 2 1 1 2 1 2 5 1 1 1 1 2 2 2 4 1 3 2 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 65 65 64 60 65 65 65 65 11 60 64 64 64 65 64 65 65 64 61 61 61 61 61 5 5 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_api.c Packet classifier API. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Changes: * * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/idr.h> #include <linux/jhash.h> #include <linux/rculist.h> #include <linux/rhashtable.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_police.h> #include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> #include <net/tc_act/tc_mpls.h> #include <net/tc_act/tc_gate.h> #include <net/flow_offload.h> #include <net/tc_wrapper.h> /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); static struct xarray tcf_exts_miss_cookies_xa; struct tcf_exts_miss_cookie_node { const struct tcf_chain *chain; const struct tcf_proto *tp; const struct tcf_exts *exts; u32 chain_index; u32 tp_prio; u32 handle; u32 miss_cookie_base; struct rcu_head rcu; }; /* Each tc action entry cookie will be comprised of 32bit miss_cookie_base + * action index in the exts tc actions array. */ union tcf_exts_miss_cookie { struct { u32 miss_cookie_base; u32 act_index; }; u64 miss_cookie; }; #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static int tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, u32 handle) { struct tcf_exts_miss_cookie_node *n; static u32 next; int err; if (WARN_ON(!handle || !tp->ops->get_exts)) return -EINVAL; n = kzalloc(sizeof(*n), GFP_KERNEL); if (!n) return -ENOMEM; n->chain_index = tp->chain->index; n->chain = tp->chain; n->tp_prio = tp->prio; n->tp = tp; n->exts = exts; n->handle = handle; err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, n, xa_limit_32b, &next, GFP_KERNEL); if (err < 0) goto err_xa_alloc; exts->miss_cookie_node = n; return 0; err_xa_alloc: kfree(n); return err; } static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts) { struct tcf_exts_miss_cookie_node *n; if (!exts->miss_cookie_node) return; n = exts->miss_cookie_node; xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base); kfree_rcu(n, rcu); } static struct tcf_exts_miss_cookie_node * tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index) { union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, }; *act_index = mc.act_index; return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base); } #else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */ static int tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, u32 handle) { return 0; } static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts) { } #endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */ static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index) { union tcf_exts_miss_cookie mc = { .act_index = act_index, }; if (!miss_cookie_base) return 0; mc.miss_cookie_base = miss_cookie_base; return mc.miss_cookie; } #ifdef CONFIG_NET_CLS_ACT DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc); EXPORT_SYMBOL(tc_skb_ext_tc); void tc_skb_ext_tc_enable(void) { static_branch_inc(&tc_skb_ext_tc); } EXPORT_SYMBOL(tc_skb_ext_tc_enable); void tc_skb_ext_tc_disable(void) { static_branch_dec(&tc_skb_ext_tc); } EXPORT_SYMBOL(tc_skb_ext_tc_disable); #endif static u32 destroy_obj_hashfn(const struct tcf_proto *tp) { return jhash_3words(tp->chain->index, tp->prio, (__force __u32)tp->protocol, 0); } static void tcf_proto_signal_destroying(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_block *block = chain->block; mutex_lock(&block->proto_destroy_lock); hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node, destroy_obj_hashfn(tp)); mutex_unlock(&block->proto_destroy_lock); } static bool tcf_proto_cmp(const struct tcf_proto *tp1, const struct tcf_proto *tp2) { return tp1->chain->index == tp2->chain->index && tp1->prio == tp2->prio && tp1->protocol == tp2->protocol; } static bool tcf_proto_exists_destroying(struct tcf_chain *chain, struct tcf_proto *tp) { u32 hash = destroy_obj_hashfn(tp); struct tcf_proto *iter; bool found = false; rcu_read_lock(); hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter, destroy_ht_node, hash) { if (tcf_proto_cmp(tp, iter)) { found = true; break; } } rcu_read_unlock(); return found; } static void tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_block *block = chain->block; mutex_lock(&block->proto_destroy_lock); if (hash_hashed(&tp->destroy_ht_node)) hash_del_rcu(&tp->destroy_ht_node); mutex_unlock(&block->proto_destroy_lock); } /* Find classifier type by string name */ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) { const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { if (strcmp(kind, t->kind) == 0) { if (try_module_get(t->owner)) res = t; break; } } read_unlock(&cls_mod_lock); } return res; } static const struct tcf_proto_ops * tcf_proto_lookup_ops(const char *kind, bool rtnl_held, struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; ops = __tcf_proto_lookup_ops(kind); if (ops) return ops; #ifdef CONFIG_MODULES if (rtnl_held) rtnl_unlock(); request_module(NET_CLS_ALIAS_PREFIX "%s", kind); if (rtnl_held) rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to perform * the module load. So, even if we succeeded in loading * the module we have to replay the request. We indicate * this using -EAGAIN. */ if (ops) { module_put(ops->owner); return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "TC classifier not found"); return ERR_PTR(-ENOENT); } /* Register(unregister) new classifier type */ int register_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; int rc = -EEXIST; write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) if (!strcmp(ops->kind, t->kind)) goto out; list_add_tail(&ops->head, &tcf_proto_base); rc = 0; out: write_unlock(&cls_mod_lock); return rc; } EXPORT_SYMBOL(register_tcf_proto_ops); static struct workqueue_struct *tc_filter_wq; void unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; int rc = -ENOENT; /* Wait for outstanding call_rcu()s, if any, from a * tcf_proto_ops's destroy() handler. */ rcu_barrier(); flush_workqueue(tc_filter_wq); write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { if (t == ops) { list_del(&t->head); rc = 0; break; } } write_unlock(&cls_mod_lock); WARN(rc, "unregister tc filter kind(%s) failed %d\n", ops->kind, rc); } EXPORT_SYMBOL(unregister_tcf_proto_ops); bool tcf_queue_work(struct rcu_work *rwork, work_func_t func) { INIT_RCU_WORK(rwork, func); return queue_rcu_work(tc_filter_wq, rwork); } EXPORT_SYMBOL(tcf_queue_work); /* Select new prio value from the range, managed by kernel. */ static inline u32 tcf_auto_prio(struct tcf_proto *tp) { u32 first = TC_H_MAKE(0xC0000000U, 0U); if (tp) first = tp->prio - 1; return TC_H_MAJ(first); } static bool tcf_proto_check_kind(struct nlattr *kind, char *name) { if (kind) return nla_strscpy(name, kind, IFNAMSIZ) < 0; memset(name, 0, IFNAMSIZ); return false; } static bool tcf_proto_is_unlocked(const char *kind) { const struct tcf_proto_ops *ops; bool ret; if (strlen(kind) == 0) return false; ops = tcf_proto_lookup_ops(kind, false, NULL); /* On error return false to take rtnl lock. Proto lookup/create * functions will perform lookup again and properly handle errors. */ if (IS_ERR(ops)) return false; ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); module_put(ops->owner); return ret; } static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_proto *tp; int err; tp = kzalloc(sizeof(*tp), GFP_KERNEL); if (!tp) return ERR_PTR(-ENOBUFS); tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); if (IS_ERR(tp->ops)) { err = PTR_ERR(tp->ops); goto errout; } tp->classify = tp->ops->classify; tp->protocol = protocol; tp->prio = prio; tp->chain = chain; tp->usesw = !tp->ops->reoffload; spin_lock_init(&tp->lock); refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); if (err) { module_put(tp->ops->owner); goto errout; } return tp; errout: kfree(tp); return ERR_PTR(err); } static void tcf_proto_get(struct tcf_proto *tp) { refcount_inc(&tp->refcnt); } static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add) { #ifdef CONFIG_NET_CLS_ACT struct tcf_block *block = tp->chain->block; bool counted = false; if (!add) { if (tp->usesw && tp->counted) { if (!atomic_dec_return(&block->useswcnt)) static_branch_dec(&tcf_sw_enabled_key); tp->counted = false; } return; } spin_lock(&tp->lock); if (tp->usesw && !tp->counted) { counted = true; tp->counted = true; } spin_unlock(&tp->lock); if (counted && atomic_inc_return(&block->useswcnt) == 1) static_branch_inc(&tcf_sw_enabled_key); #endif } static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); tcf_proto_count_usesw(tp, false); if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { if (refcount_dec_and_test(&tp->refcnt)) tcf_proto_destroy(tp, rtnl_held, true, extack); } static bool tcf_proto_check_delete(struct tcf_proto *tp) { if (tp->ops->delete_empty) return tp->ops->delete_empty(tp); tp->deleting = true; return tp->deleting; } static void tcf_proto_mark_delete(struct tcf_proto *tp) { spin_lock(&tp->lock); tp->deleting = true; spin_unlock(&tp->lock); } static bool tcf_proto_is_deleting(struct tcf_proto *tp) { bool deleting; spin_lock(&tp->lock); deleting = tp->deleting; spin_unlock(&tp->lock); return deleting; } #define ASSERT_BLOCK_LOCKED(block) \ lockdep_assert_held(&(block)->lock) struct tcf_filter_chain_list_item { struct list_head list; tcf_chain_head_change_t *chain_head_change; void *chain_head_change_priv; }; static struct tcf_chain *tcf_chain_create(struct tcf_block *block, u32 chain_index) { struct tcf_chain *chain; ASSERT_BLOCK_LOCKED(block); chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; list_add_tail_rcu(&chain->list, &block->chain_list); mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; chain->refcnt = 1; if (!chain->index) block->chain0.chain = chain; return chain; } static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item, struct tcf_proto *tp_head) { if (item->chain_head_change) item->chain_head_change(tp_head, item->chain_head_change_priv); } static void tcf_chain0_head_change(struct tcf_chain *chain, struct tcf_proto *tp_head) { struct tcf_filter_chain_list_item *item; struct tcf_block *block = chain->block; if (chain->index) return; mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); mutex_unlock(&block->lock); } /* Returns true if block can be safely freed. */ static bool tcf_chain_detach(struct tcf_chain *chain) { struct tcf_block *block = chain->block; ASSERT_BLOCK_LOCKED(block); list_del_rcu(&chain->list); if (!chain->index) block->chain0.chain = NULL; if (list_empty(&block->chain_list) && refcount_read(&block->refcnt) == 0) return true; return false; } static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); mutex_destroy(&block->proto_destroy_lock); xa_destroy(&block->ports); kfree_rcu(block, rcu); } static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) { struct tcf_block *block = chain->block; mutex_destroy(&chain->filter_chain_lock); kfree_rcu(chain, rcu); if (free_block) tcf_block_destroy(block); } static void tcf_chain_hold(struct tcf_chain *chain) { ASSERT_BLOCK_LOCKED(chain->block); ++chain->refcnt; } static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) { ASSERT_BLOCK_LOCKED(chain->block); /* In case all the references are action references, this * chain should not be shown to the user. */ return chain->refcnt == chain->action_refcnt; } static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, u32 chain_index) { struct tcf_chain *chain; ASSERT_BLOCK_LOCKED(block); list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; } return NULL; } #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block, u32 chain_index) { struct tcf_chain *chain; list_for_each_entry_rcu(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; } return NULL; } #endif static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast, struct netlink_ext_ack *extack); static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create, bool by_act) { struct tcf_chain *chain = NULL; bool is_first_reference; mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (chain) { tcf_chain_hold(chain); } else { if (!create) goto errout; chain = tcf_chain_create(block, chain_index); if (!chain) goto errout; } if (by_act) ++chain->action_refcnt; is_first_reference = chain->refcnt - chain->action_refcnt == 1; mutex_unlock(&block->lock); /* Send notification only in case we got the first * non-action reference. Until then, the chain acts only as * a placeholder for actions pointing to it and user ought * not know about them. */ if (is_first_reference && !by_act) tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false, NULL); return chain; errout: mutex_unlock(&block->lock); return chain; } static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create) { return __tcf_chain_get(block, chain_index, create, false); } struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) { return __tcf_chain_get(block, chain_index, true, true); } EXPORT_SYMBOL(tcf_chain_get_by_act); static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv); static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, u32 seq, u16 flags); static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, bool explicitly_created) { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; unsigned int refcnt, non_act_refcnt; bool free_block = false; void *tmplt_priv; mutex_lock(&block->lock); if (explicitly_created) { if (!chain->explicitly_created) { mutex_unlock(&block->lock); return; } chain->explicitly_created = false; } if (by_act) chain->action_refcnt--; /* tc_chain_notify_delete can't be called while holding block lock. * However, when block is unlocked chain can be changed concurrently, so * save these to temporary variables. */ refcnt = --chain->refcnt; non_act_refcnt = refcnt - chain->action_refcnt; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; if (non_act_refcnt == chain->explicitly_created && !by_act) { if (non_act_refcnt == 0) tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, block, NULL, 0, 0); /* Last reference to chain, no need to lock. */ chain->flushing = false; } if (refcnt == 0) free_block = tcf_chain_detach(chain); mutex_unlock(&block->lock); if (refcnt == 0) { tc_chain_tmplt_del(tmplt_ops, tmplt_priv); tcf_chain_destroy(chain, free_block); } } static void tcf_chain_put(struct tcf_chain *chain) { __tcf_chain_put(chain, false, false); } void tcf_chain_put_by_act(struct tcf_chain *chain) { __tcf_chain_put(chain, true, false); } EXPORT_SYMBOL(tcf_chain_put_by_act); static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) { __tcf_chain_put(chain, false, true); } static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) { struct tcf_proto *tp, *tp_next; mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_dereference(chain->filter_chain, chain); while (tp) { tp_next = rcu_dereference_protected(tp->next, 1); tcf_proto_signal_destroying(chain, tp); tp = tp_next; } tp = tcf_chain_dereference(chain->filter_chain, chain); RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); chain->flushing = true; mutex_unlock(&chain->filter_chain_lock); while (tp) { tp_next = rcu_dereference_protected(tp->next, 1); tcf_proto_put(tp, rtnl_held, NULL); tp = tp_next; } } static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); static void tcf_block_offload_init(struct flow_block_offload *bo, struct net_device *dev, struct Qdisc *sch, enum flow_block_command command, enum flow_block_binder_type binder_type, struct flow_block *flow_block, bool shared, struct netlink_ext_ack *extack) { bo->net = dev_net(dev); bo->command = command; bo->binder_type = binder_type; bo->block = flow_block; bo->block_shared = shared; bo->extack = extack; bo->sch = sch; bo->cb_list_head = &flow_block->cb_list; INIT_LIST_HEAD(&bo->cb_list); } static void tcf_block_unbind(struct tcf_block *block, struct flow_block_offload *bo); static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) { struct tcf_block *block = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, &block->flow_block, tcf_block_shared(block), &extack); rtnl_lock(); down_write(&block->cb_lock); list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); tcf_block_unbind(block, &bo); up_write(&block->cb_lock); rtnl_unlock(); } static bool tcf_block_offload_in_use(struct tcf_block *block) { return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, struct net_device *dev, struct Qdisc *sch, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type, &block->flow_block, tcf_block_shared(block), extack); if (dev->netdev_ops->ndo_setup_tc) { int err; err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); if (err < 0) { if (err != -EOPNOTSUPP) NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); return err; } return tcf_block_setup(block, &bo); } flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo, tc_block_indr_cleanup); tcf_block_setup(block, &bo); return -EOPNOTSUPP; } static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct net_device *dev = q->dev_queue->dev; int err; down_write(&block->cb_lock); /* If tc offload feature is disabled and the block we try to bind * to already has some offloaded filters, forbid to bind. */ if (dev->netdev_ops->ndo_setup_tc && !tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); err = -EOPNOTSUPP; goto err_unlock; } err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) goto err_unlock; up_write(&block->cb_lock); return 0; no_offload_dev_inc: if (tcf_block_offload_in_use(block)) goto err_unlock; err = 0; block->nooffloaddevcnt++; err_unlock: up_write(&block->cb_lock); return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct net_device *dev = q->dev_queue->dev; int err; down_write(&block->cb_lock); err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); up_write(&block->cb_lock); } static int tcf_chain0_head_change_cb_add(struct tcf_block *block, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct tcf_filter_chain_list_item *item; struct tcf_chain *chain0; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) { NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed"); return -ENOMEM; } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; mutex_lock(&block->lock); chain0 = block->chain0.chain; if (chain0) tcf_chain_hold(chain0); else list_add(&item->list, &block->chain0.filter_chain_list); mutex_unlock(&block->lock); if (chain0) { struct tcf_proto *tp_head; mutex_lock(&chain0->filter_chain_lock); tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); if (tp_head) tcf_chain_head_change_item(item, tp_head); mutex_lock(&block->lock); list_add(&item->list, &block->chain0.filter_chain_list); mutex_unlock(&block->lock); mutex_unlock(&chain0->filter_chain_lock); tcf_chain_put(chain0); } return 0; } static void tcf_chain0_head_change_cb_del(struct tcf_block *block, struct tcf_block_ext_info *ei) { struct tcf_filter_chain_list_item *item; mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { if (block->chain0.chain) tcf_chain_head_change_item(item, NULL); list_del(&item->list); mutex_unlock(&block->lock); kfree(item); return; } } mutex_unlock(&block->lock); WARN_ON(1); } struct tcf_net { spinlock_t idr_lock; /* Protects idr */ struct idr idr; }; static unsigned int tcf_net_id; static int tcf_block_insert(struct tcf_block *block, struct net *net, struct netlink_ext_ack *extack) { struct tcf_net *tn = net_generic(net, tcf_net_id); int err; idr_preload(GFP_KERNEL); spin_lock(&tn->idr_lock); err = idr_alloc_u32(&tn->idr, block, &block->index, block->index, GFP_NOWAIT); spin_unlock(&tn->idr_lock); idr_preload_end(); return err; } static void tcf_block_remove(struct tcf_block *block, struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); spin_lock(&tn->idr_lock); idr_remove(&tn->idr, block->index); spin_unlock(&tn->idr_lock); } static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; block = kzalloc(sizeof(*block), GFP_KERNEL); if (!block) { NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); mutex_init(&block->proto_destroy_lock); init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); INIT_LIST_HEAD(&block->chain0.filter_chain_list); refcount_set(&block->refcnt, 1); block->net = net; block->index = block_index; xa_init(&block->ports); /* Don't store q pointer for blocks which are shared */ if (!tcf_block_shared(block)) block->q = q; return block; } struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) { struct tcf_net *tn = net_generic(net, tcf_net_id); return idr_find(&tn->idr, block_index); } EXPORT_SYMBOL(tcf_block_lookup); static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) { struct tcf_block *block; rcu_read_lock(); block = tcf_block_lookup(net, block_index); if (block && !refcount_inc_not_zero(&block->refcnt)) block = NULL; rcu_read_unlock(); return block; } static struct tcf_chain * __tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { mutex_lock(&block->lock); if (chain) chain = list_is_last(&chain->list, &block->chain_list) ? NULL : list_next_entry(chain, list); else chain = list_first_entry_or_null(&block->chain_list, struct tcf_chain, list); /* skip all action-only chains */ while (chain && tcf_chain_held_by_acts_only(chain)) chain = list_is_last(&chain->list, &block->chain_list) ? NULL : list_next_entry(chain, list); if (chain) tcf_chain_hold(chain); mutex_unlock(&block->lock); return chain; } /* Function to be used by all clients that want to iterate over all chains on * block. It properly obtains block->lock and takes reference to chain before * returning it. Users of this function must be tolerant to concurrent chain * insertion/deletion or ensure that no concurrent chain modification is * possible. Note that all netlink dump callbacks cannot guarantee to provide * consistent dump because rtnl lock is released each time skb is filled with * data and sent to user-space. */ struct tcf_chain * tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); if (chain) tcf_chain_put(chain); return chain_next; } EXPORT_SYMBOL(tcf_get_next_chain); static struct tcf_proto * __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { u32 prio = 0; ASSERT_RTNL(); mutex_lock(&chain->filter_chain_lock); if (!tp) { tp = tcf_chain_dereference(chain->filter_chain, chain); } else if (tcf_proto_is_deleting(tp)) { /* 'deleting' flag is set and chain->filter_chain_lock was * unlocked, which means next pointer could be invalid. Restart * search. */ prio = tp->prio + 1; tp = tcf_chain_dereference(chain->filter_chain, chain); for (; tp; tp = tcf_chain_dereference(tp->next, chain)) if (!tp->deleting && tp->prio >= prio) break; } else { tp = tcf_chain_dereference(tp->next, chain); } if (tp) tcf_proto_get(tp); mutex_unlock(&chain->filter_chain_lock); return tp; } /* Function to be used by all clients that want to iterate over all tp's on * chain. Users of this function must be tolerant to concurrent tp * insertion/deletion or ensure that no concurrent chain modification is * possible. Note that all netlink dump callbacks cannot guarantee to provide * consistent dump because rtnl lock is released each time skb is filled with * data and sent to user-space. */ struct tcf_proto * tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); if (tp) tcf_proto_put(tp, true, NULL); return tp_next; } EXPORT_SYMBOL(tcf_get_next_proto); static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) { struct tcf_chain *chain; /* Last reference to block. At this point chains cannot be added or * removed concurrently. */ for (chain = tcf_get_next_chain(block, NULL); chain; chain = tcf_get_next_chain(block, chain)) { tcf_chain_put_explicitly_created(chain); tcf_chain_flush(chain, rtnl_held); } } /* Lookup Qdisc and increments its reference counter. * Set parent, if necessary. */ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, u32 *parent, int ifindex, bool rtnl_held, struct netlink_ext_ack *extack) { const struct Qdisc_class_ops *cops; struct net_device *dev; int err = 0; if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) return 0; rcu_read_lock(); /* Find link */ dev = dev_get_by_index_rcu(net, ifindex); if (!dev) { rcu_read_unlock(); return -ENODEV; } /* Find qdisc */ if (!*parent) { *q = rcu_dereference(dev->qdisc); *parent = (*q)->handle; } else { *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } } *q = qdisc_refcount_inc_nz(*q); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } /* Is it classful? */ cops = (*q)->ops->cl_ops; if (!cops) { NL_SET_ERR_MSG(extack, "Qdisc not classful"); err = -EINVAL; goto errout_qdisc; } if (!cops->tcf_block) { NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); err = -EOPNOTSUPP; goto errout_qdisc; } errout_rcu: /* At this point we know that qdisc is not noop_qdisc, * which means that qdisc holds a reference to net_device * and we hold a reference to qdisc, so it is safe to release * rcu read lock. */ rcu_read_unlock(); return err; errout_qdisc: rcu_read_unlock(); if (rtnl_held) qdisc_put(*q); else qdisc_put_unlocked(*q); *q = NULL; return err; } static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, int ifindex, struct netlink_ext_ack *extack) { if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) return 0; /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { const struct Qdisc_class_ops *cops = q->ops->cl_ops; *cl = cops->find(q, parent); if (*cl == 0) { NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); return -ENOENT; } } return 0; } static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, unsigned long cl, int ifindex, u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_refcnt_get(net, block_index); if (!block) { NL_SET_ERR_MSG(extack, "Block of given index was not found"); return ERR_PTR(-EINVAL); } } else { const struct Qdisc_class_ops *cops = q->ops->cl_ops; block = cops->tcf_block(q, cl, extack); if (!block) return ERR_PTR(-EINVAL); if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); return ERR_PTR(-EOPNOTSUPP); } /* Always take reference to block in order to support execution * of rules update path of cls API without rtnl lock. Caller * must release block when it is finished using it. 'if' block * of this conditional obtain reference to block by calling * tcf_block_refcnt_get(). */ refcount_inc(&block->refcnt); } return block; } static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei, bool rtnl_held) { if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { /* Flushing/putting all chains will cause the block to be * deallocated when last chain is freed. However, if chain_list * is empty, block has to be manually deallocated. After block * reference counter reached 0, it is no longer possible to * increment it or add new chains to block. */ bool free_block = list_empty(&block->chain_list); mutex_unlock(&block->lock); if (tcf_block_shared(block)) tcf_block_remove(block, block->net); if (q) tcf_block_offload_unbind(block, q, ei); if (free_block) tcf_block_destroy(block); else tcf_block_flush_all_chains(block, rtnl_held); } else if (q) { tcf_block_offload_unbind(block, q, ei); } } static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) { __tcf_block_put(block, NULL, NULL, rtnl_held); } /* Find tcf block. * Set q, parent, cl when appropriate. */ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, u32 *parent, unsigned long *cl, int ifindex, u32 block_index, struct netlink_ext_ack *extack) { struct tcf_block *block; int err = 0; ASSERT_RTNL(); err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); if (err) goto errout; err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); if (err) goto errout_qdisc; block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout_qdisc; } return block; errout_qdisc: if (*q) qdisc_put(*q); errout: *q = NULL; return ERR_PTR(err); } static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, bool rtnl_held) { if (!IS_ERR_OR_NULL(block)) tcf_block_refcnt_put(block, rtnl_held); if (q) { if (rtnl_held) qdisc_put(q); else qdisc_put_unlocked(q); } } struct tcf_block_owner_item { struct list_head list; struct Qdisc *q; enum flow_block_binder_type binder_type; }; static void tcf_block_owner_netif_keep_dst(struct tcf_block *block, struct Qdisc *q, enum flow_block_binder_type binder_type) { if (block->keep_dst && binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) netif_keep_dst(qdisc_dev(q)); } void tcf_block_netif_keep_dst(struct tcf_block *block) { struct tcf_block_owner_item *item; block->keep_dst = true; list_for_each_entry(item, &block->owner_list, list) tcf_block_owner_netif_keep_dst(block, item->q, item->binder_type); } EXPORT_SYMBOL(tcf_block_netif_keep_dst); static int tcf_block_owner_add(struct tcf_block *block, struct Qdisc *q, enum flow_block_binder_type binder_type) { struct tcf_block_owner_item *item; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) return -ENOMEM; item->q = q; item->binder_type = binder_type; list_add(&item->list, &block->owner_list); return 0; } static void tcf_block_owner_del(struct tcf_block *block, struct Qdisc *q, enum flow_block_binder_type binder_type) { struct tcf_block_owner_item *item; list_for_each_entry(item, &block->owner_list, list) { if (item->q == q && item->binder_type == binder_type) { list_del(&item->list); kfree(item); return; } } WARN_ON(1); } static bool tcf_block_tracks_dev(struct tcf_block *block, struct tcf_block_ext_info *ei) { return tcf_block_shared(block) && (ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS || ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS); } int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { struct net_device *dev = qdisc_dev(q); struct net *net = qdisc_net(q); struct tcf_block *block = NULL; int err; if (ei->block_index) /* block_index not 0 means the shared block is requested */ block = tcf_block_refcnt_get(net, ei->block_index); if (!block) { block = tcf_block_create(net, q, ei->block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); if (tcf_block_shared(block)) { err = tcf_block_insert(block, net, extack); if (err) goto err_block_insert; } } err = tcf_block_owner_add(block, q, ei->binder_type); if (err) goto err_block_owner_add; tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); err = tcf_chain0_head_change_cb_add(block, ei, extack); if (err) goto err_chain0_head_change_cb_add; err = tcf_block_offload_bind(block, q, ei, extack); if (err) goto err_block_offload_bind; if (tcf_block_tracks_dev(block, ei)) { err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); if (err) { NL_SET_ERR_MSG(extack, "block dev insert failed"); goto err_dev_insert; } } *p_block = block; return 0; err_dev_insert: tcf_block_offload_unbind(block, q, ei); err_block_offload_bind: tcf_chain0_head_change_cb_del(block, ei); err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: err_block_insert: tcf_block_refcnt_put(block, true); return err; } EXPORT_SYMBOL(tcf_block_get_ext); static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) { struct tcf_proto __rcu **p_filter_chain = priv; rcu_assign_pointer(*p_filter_chain, tp_head); } int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, struct netlink_ext_ack *extack) { struct tcf_block_ext_info ei = { .chain_head_change = tcf_chain_head_change_dflt, .chain_head_change_priv = p_filter_chain, }; WARN_ON(!p_filter_chain); return tcf_block_get_ext(p_block, q, &ei, extack); } EXPORT_SYMBOL(tcf_block_get); /* XXX: Standalone actions are not allowed to jump to any chain, and bound * actions should be all removed after flushing. */ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, struct tcf_block_ext_info *ei) { struct net_device *dev = qdisc_dev(q); if (!block) return; if (tcf_block_tracks_dev(block, ei)) xa_erase(&block->ports, dev->ifindex); tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); __tcf_block_put(block, q, ei, true); } EXPORT_SYMBOL(tcf_block_put_ext); void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; if (!block) return; tcf_block_put_ext(block, block->q, &ei); } EXPORT_SYMBOL(tcf_block_put); static int tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { struct tcf_chain *chain, *chain_prev; struct tcf_proto *tp, *tp_prev; int err; lockdep_assert_held(&block->cb_lock); for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { if (chain->tmplt_ops && add) chain->tmplt_ops->tmplt_reoffload(chain, true, cb, cb_priv); for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), tcf_proto_put(tp_prev, true, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); if (err && add) goto err_playback_remove; } else if (add && offload_in_use) { err = -EOPNOTSUPP; NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support"); goto err_playback_remove; } } if (chain->tmplt_ops && !add) chain->tmplt_ops->tmplt_reoffload(chain, false, cb, cb_priv); } return 0; err_playback_remove: tcf_proto_put(tp, true, NULL); tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); return err; } static int tcf_block_bind(struct tcf_block *block, struct flow_block_offload *bo) { struct flow_block_cb *block_cb, *next; int err, i = 0; lockdep_assert_held(&block->cb_lock); list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, tcf_block_offload_in_use(block), bo->extack); if (err) goto err_unroll; if (!bo->unlocked_driver_cb) block->lockeddevcnt++; i++; } list_splice(&bo->cb_list, &block->flow_block.cb_list); return 0; err_unroll: list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { list_del(&block_cb->driver_list); if (i-- > 0) { list_del(&block_cb->list); tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); if (!bo->unlocked_driver_cb) block->lockeddevcnt--; } flow_block_cb_free(block_cb); } return err; } static void tcf_block_unbind(struct tcf_block *block, struct flow_block_offload *bo) { struct flow_block_cb *block_cb, *next; lockdep_assert_held(&block->cb_lock); list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); if (!bo->unlocked_driver_cb) block->lockeddevcnt--; } } static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo) { int err; switch (bo->command) { case FLOW_BLOCK_BIND: err = tcf_block_bind(block, bo); break; case FLOW_BLOCK_UNBIND: err = 0; tcf_block_unbind(block, bo); break; default: WARN_ON_ONCE(1); err = -EOPNOTSUPP; } return err; } /* Main classifier routine: scans classifier chain attached * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ static inline int __tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, const struct tcf_proto *orig_tp, struct tcf_result *res, bool compat_mode, struct tcf_exts_miss_cookie_node *n, int act_index, u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; int limit = 0; reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { __be16 protocol = skb_protocol(skb, false); int err = 0; if (n) { struct tcf_exts *exts; if (n->tp_prio != tp->prio) continue; /* We re-lookup the tp and chain based on index instead * of having hard refs and locks to them, so do a sanity * check if any of tp,chain,exts was replaced by the * time we got here with a cookie from hardware. */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || !tp->ops->get_exts)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } exts = tp->ops->get_exts(tp, n->handle); if (unlikely(!exts || n->exts != exts)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } n = NULL; err = tcf_exts_exec_ex(skb, exts, act_index, res); } else { if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; err = tc_classify(skb, tp, res); } #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; *last_executed_chain = first_tp->chain->index; goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; *last_executed_chain = err & TC_ACT_EXT_VAL_MASK; goto reset; } #endif if (err >= 0) return err; } if (unlikely(n)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT reset: if (unlikely(limit++ >= max_reclassify_loop)) { net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n", tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP); return TC_ACT_SHOT; } tp = first_tp; goto reclassify; #endif } int tcf_classify(struct sk_buff *skb, const struct tcf_block *block, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 last_executed_chain = 0; return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0, &last_executed_chain); #else u32 last_executed_chain = tp ? tp->chain->index : 0; struct tcf_exts_miss_cookie_node *n = NULL; const struct tcf_proto *orig_tp = tp; struct tc_skb_ext *ext; int act_index = 0; int ret; if (block) { ext = skb_ext_find(skb, TC_SKB_EXT); if (ext && (ext->chain || ext->act_miss)) { struct tcf_chain *fchain; u32 chain; if (ext->act_miss) { n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); if (!n) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } chain = n->chain_index; } else { chain = ext->chain; } fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } /* Consume, so cloned/redirect skbs won't inherit ext */ skb_ext_del(skb, TC_SKB_EXT); tp = rcu_dereference_bh(fchain->filter_chain); last_executed_chain = fchain->index; } } ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index, &last_executed_chain); if (tc_skb_ext_tc_enabled()) { /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) { tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; ext->post_ct_snat = cb->post_ct_snat; ext->post_ct_dnat = cb->post_ct_dnat; ext->zone = cb->zone; } } return ret; #endif } EXPORT_SYMBOL(tcf_classify); struct tcf_chain_info { struct tcf_proto __rcu **pprev; struct tcf_proto __rcu *next; }; static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, struct tcf_chain_info *chain_info) { return tcf_chain_dereference(*chain_info->pprev, chain); } static int tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { if (chain->flushing) return -EAGAIN; RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); rcu_assign_pointer(*chain_info->pprev, tp); return 0; } static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); tcf_proto_mark_delete(tp); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, bool prio_allocate, struct netlink_ext_ack *extack); /* Try to insert new proto. * If proto with specified priority already exists, free new proto * and return existing one. */ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, struct tcf_proto *tp_new, u32 protocol, u32 prio, bool rtnl_held) { struct tcf_chain_info chain_info; struct tcf_proto *tp; int err = 0; mutex_lock(&chain->filter_chain_lock); if (tcf_proto_exists_destroying(chain, tp_new)) { mutex_unlock(&chain->filter_chain_lock); tcf_proto_destroy(tp_new, rtnl_held, false, NULL); return ERR_PTR(-EAGAIN); } tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, NULL); if (!tp) err = tcf_chain_tp_insert(chain, &chain_info, tp_new); mutex_unlock(&chain->filter_chain_lock); if (tp) { tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = tp; } else if (err) { tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = ERR_PTR(err); } return tp_new; } static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_chain_info chain_info; struct tcf_proto *tp_iter; struct tcf_proto **pprev; struct tcf_proto *next; mutex_lock(&chain->filter_chain_lock); /* Atomically find and remove tp from chain. */ for (pprev = &chain->filter_chain; (tp_iter = tcf_chain_dereference(*pprev, chain)); pprev = &tp_iter->next) { if (tp_iter == tp) { chain_info.pprev = pprev; chain_info.next = tp_iter->next; WARN_ON(tp_iter->deleting); break; } } /* Verify that tp still exists and no new filters were inserted * concurrently. * Mark tp for deletion if it is empty. */ if (!tp_iter || !tcf_proto_check_delete(tp)) { mutex_unlock(&chain->filter_chain_lock); return; } tcf_proto_signal_destroying(chain, tp); next = tcf_chain_dereference(chain_info.next, chain); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info.pprev, next); mutex_unlock(&chain->filter_chain_lock); tcf_proto_put(tp, rtnl_held, extack); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, struct tcf_chain_info *chain_info, u32 protocol, u32 prio, bool prio_allocate, struct netlink_ext_ack *extack) { struct tcf_proto **pprev; struct tcf_proto *tp; /* Check the chain for existence of proto-tcf with this priority */ for (pprev = &chain->filter_chain; (tp = tcf_chain_dereference(*pprev, chain)); pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (prio_allocate) { NL_SET_ERR_MSG(extack, "Lowest ID from auto-alloc range already in use"); return ERR_PTR(-ENOSPC); } if (tp->protocol != protocol && protocol) { NL_SET_ERR_MSG(extack, "Protocol mismatch for filter with specified priority"); return ERR_PTR(-EINVAL); } } else { tp = NULL; } break; } } chain_info->pprev = pprev; if (tp) { chain_info->next = tp->next; tcf_proto_get(tp); } else { chain_info->next = NULL; } return tp; } static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, u32 portid, u32 seq, u16 flags, int event, bool terse_dump, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); int ret = -EMSGSIZE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; if (q) { tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = parent; } else { tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; tcm->tcm_block_index = block->index; } tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) goto nla_put_failure; if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) goto nla_put_failure; if (!fh) { tcm->tcm_handle = 0; } else if (terse_dump) { if (tp->ops->terse_dump) { if (tp->ops->terse_dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } else { goto cls_op_not_supp; } } else { if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto nla_put_failure; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; cls_op_not_supp: ret = -EOPNOTSUPP; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return ret; } static struct sk_buff *tfilter_notify_prep(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, int event, u32 portid, bool rtnl_held, struct netlink_ext_ack *extack) { unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE; struct sk_buff *skb; int ret; retry: skb = alloc_skb(size, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, n->nlmsg_seq, n->nlmsg_flags, event, false, rtnl_held, extack); if (ret <= 0) { kfree_skb(skb); if (ret == -EMSGSIZE) { size += NLMSG_GOODSIZE; goto retry; } return ERR_PTR(-EINVAL); } return skb; } static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, int event, bool unicast, bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err = 0; if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return 0; skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event, portid, rtnl_held, extack); if (IS_ERR(skb)) return PTR_ERR(skb); if (unicast) err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); return err; } static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return tp->ops->delete(tp, fh, last, rtnl_held, extack); skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, RTM_DELTFILTER, portid, rtnl_held, extack); if (IS_ERR(skb)) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); return PTR_ERR(skb); } err = tp->ops->delete(tp, fh, last, rtnl_held, extack); if (err) { kfree_skb(skb); return err; } err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); return err; } static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, struct tcf_chain *chain, int event, struct netlink_ext_ack *extack) { struct tcf_proto *tp; for (tp = tcf_get_next_proto(chain, NULL); tp; tp = tcf_get_next_proto(chain, tp)) tfilter_notify(net, oskb, n, tp, block, q, parent, NULL, event, false, true, extack); } static void tfilter_put(struct tcf_proto *tp, void *fh) { if (tp->ops->put && fh) tp->ops->put(tp, fh); } static bool is_qdisc_ingress(__u32 classid) { return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS)); } static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; bool prio_allocate; u32 parent; u32 chain_index; struct Qdisc *q; struct tcf_chain_info chain_info; struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; void *fh; int err; int tp_created; bool rtnl_held = false; u32 flags; replay: tp_created = 0; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); prio_allocate = false; parent = t->tcm_parent; tp = NULL; cl = 0; block = NULL; q = NULL; chain = NULL; flags = 0; if (prio == 0) { /* If no priority is provided by the user, * we allocate one. */ if (n->nlmsg_flags & NLM_F_CREATE) { prio = TC_H_MAKE(0x80000000U, 0U); prio_allocate = true; } else { NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } } /* Find head of filter chain. */ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); if (err) return err; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); err = -EINVAL; goto errout; } /* Take rtnl mutex if rtnl_held was set to true on previous iteration, * block is shared (no qdisc found), qdisc is not unlocked, classifier * type is not specified, classifier is not unlocked. */ if (rtnl_held || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); if (err) goto errout; block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; } block->classid = parent; chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, true); if (!chain) { NL_SET_ERR_MSG(extack, "Cannot create specified filter chain"); err = -ENOMEM; goto errout; } mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate, extack); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_locked; } if (tp == NULL) { struct tcf_proto *tp_new = NULL; if (chain->flushing) { err = -EAGAIN; goto errout_locked; } /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; goto errout_locked; } if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout_locked; } if (prio_allocate) prio = tcf_auto_prio(tcf_chain_tp_prev(chain, &chain_info)); mutex_unlock(&chain->filter_chain_lock); tp_new = tcf_proto_create(name, protocol, prio, chain, rtnl_held, extack); if (IS_ERR(tp_new)) { err = PTR_ERR(tp_new); goto errout_tp; } tp_created = 1; tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, rtnl_held); if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_tp; } } else { mutex_unlock(&chain->filter_chain_lock); } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; } fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; goto errout; } } else if (n->nlmsg_flags & NLM_F_EXCL) { tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; } if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); err = -EINVAL; goto errout; } if (!(n->nlmsg_flags & NLM_F_CREATE)) flags |= TCA_ACT_FLAGS_REPLACE; if (!rtnl_held) flags |= TCA_ACT_FLAGS_NO_RTNL; if (is_qdisc_ingress(parent)) flags |= TCA_ACT_FLAGS_AT_INGRESS; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, flags, extack); if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); tcf_proto_count_usesw(tp, true); /* q pointer is NULL for shared blocks */ if (q) q->flags &= ~TCQ_F_CAN_BYPASS; } errout: if (err && tp_created) tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); errout_tp: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, rtnl_held, NULL); if (!tp_created) tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); if (rtnl_held) rtnl_unlock(); if (err == -EAGAIN) { /* Take rtnl lock in case EAGAIN is caused by concurrent flush * of target chain. */ rtnl_held = true; /* Replay the request. */ goto replay; } return err; errout_locked: mutex_unlock(&chain->filter_chain_lock); goto errout; } static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; u32 parent; u32 chain_index; struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; bool rtnl_held = false; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); parent = t->tcm_parent; if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) { NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); return -ENOENT; } /* Find head of filter chain. */ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); if (err) return err; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); err = -EINVAL; goto errout; } /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc * found), qdisc is not unlocked, classifier type is not specified, * classifier is not unlocked. */ if (!prio || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); if (err) goto errout; block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; } chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, false); if (!chain) { /* User requested flush on non-existent chain. Nothing to do, * so just return success. */ if (prio == 0) { err = 0; goto errout; } NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -ENOENT; goto errout; } if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, chain, RTM_DELTFILTER, extack); tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; } mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, extack); if (!tp) { err = -ENOENT; NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); goto errout_locked; } else if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout_locked; } else if (t->tcm_handle == 0) { tcf_proto_signal_destroying(chain, tp); tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); tcf_proto_put(tp, rtnl_held, NULL); tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_DELTFILTER, false, rtnl_held, extack); err = 0; goto errout; } mutex_unlock(&chain->filter_chain_lock); fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { NL_SET_ERR_MSG(extack, "Specified filter handle not found"); err = -ENOENT; } else { bool last; err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, &last, rtnl_held, extack); if (err) goto errout; if (last) tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); } errout: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); if (rtnl_held) rtnl_unlock(); return err; errout_locked: mutex_unlock(&chain->filter_chain_lock); goto errout; } static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; u32 parent; u32 chain_index; struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; bool rtnl_held = false; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); parent = t->tcm_parent; if (prio == 0) { NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); return -ENOENT; } /* Find head of filter chain. */ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); if (err) return err; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); err = -EINVAL; goto errout; } /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not * unlocked, classifier type is not specified, classifier is not * unlocked. */ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); if (err) goto errout; block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; } chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout; } chain = tcf_chain_get(block, chain_index, false); if (!chain) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; goto errout; } mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false, extack); mutex_unlock(&chain->filter_chain_lock); if (!tp) { err = -ENOENT; NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); goto errout; } else if (IS_ERR(tp)) { err = PTR_ERR(tp); goto errout; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; } fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { NL_SET_ERR_MSG(extack, "Specified filter handle not found"); err = -ENOENT; } else { err = tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, true, rtnl_held, NULL); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } tfilter_put(tp, fh); errout: if (chain) { if (tp && !IS_ERR(tp)) tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); } tcf_block_release(q, block, rtnl_held); if (rtnl_held) rtnl_unlock(); return err; } struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; struct tcf_block *block; struct Qdisc *q; u32 parent; bool terse_dump; }; static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER, a->terse_dump, true, NULL); } static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, struct sk_buff *skb, struct netlink_callback *cb, long index_start, long *p_index, bool terse) { struct net *net = sock_net(skb->sk); struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); struct tcf_proto *tp, *tp_prev; struct tcf_dump_args arg; for (tp = __tcf_get_next_proto(chain, NULL); tp; tp_prev = tp, tp = __tcf_get_next_proto(chain, tp), tcf_proto_put(tp_prev, true, NULL), (*p_index)++) { if (*p_index < index_start) continue; if (TC_H_MAJ(tcm->tcm_info) && TC_H_MAJ(tcm->tcm_info) != tp->prio) continue; if (TC_H_MIN(tcm->tcm_info) && TC_H_MIN(tcm->tcm_info) != tp->protocol) continue; if (*p_index > index_start) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER, false, true, NULL) <= 0) goto errout; cb->args[1] = 1; } if (!tp->ops->walk) continue; arg.w.fn = tcf_node_dump; arg.skb = skb; arg.cb = cb; arg.block = block; arg.q = q; arg.parent = parent; arg.w.stop = 0; arg.w.skip = cb->args[1] - 1; arg.w.count = 0; arg.w.cookie = cb->args[2]; arg.terse_dump = terse; tp->ops->walk(tp, &arg.w, true); cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) goto errout; } return true; errout: tcf_proto_put(tp, true, NULL); return false; } static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = { [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE), }; /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; struct tcmsg *tcm = nlmsg_data(cb->nlh); bool terse_dump = false; long index_start; long index; u32 parent; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, tcf_tfilter_dump_policy, cb->extack); if (err) return err; if (tca[TCA_DUMP_FLAGS]) { struct nla_bitfield32 flags = nla_get_bitfield32(tca[TCA_DUMP_FLAGS]); terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE; } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; /* If we work with block index, q is NULL and parent value * will never be used in the following code. The check * in tcf_fill_node prevents it. However, compiler does not * see that far, so set parent to zero to silence the warning * about parent being uninitialized. */ parent = 0; } else { const struct Qdisc_class_ops *cops; struct net_device *dev; unsigned long cl = 0; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return skb->len; parent = tcm->tcm_parent; if (!parent) q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) goto out; cops = q->ops->cl_ops; if (!cops) goto out; if (!cops->tcf_block) goto out; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->find(q, tcm->tcm_parent); if (cl == 0) goto out; } block = cops->tcf_block(q, cl, NULL); if (!block) goto out; parent = block->classid; if (tcf_block_shared(block)) q = NULL; } index_start = cb->args[0]; index = 0; for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, chain = __tcf_get_next_chain(block, chain), tcf_chain_put(chain_prev)) { if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, index_start, &index, terse_dump)) { tcf_chain_put(chain); err = -EMSGSIZE; break; } } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) tcf_block_refcnt_put(block, true); cb->args[0] = index; out: /* If we did no progress, the error (EMSGSIZE) is real */ if (skb->len == 0 && err) return err; return skb->len; } static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct net *net, struct sk_buff *skb, struct tcf_block *block, u32 portid, u32 seq, u16 flags, int event, struct netlink_ext_ack *extack) { unsigned char *b = skb_tail_pointer(skb); const struct tcf_proto_ops *ops; struct nlmsghdr *nlh; struct tcmsg *tcm; void *priv; ops = tmplt_ops; priv = tmplt_priv; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_handle = 0; if (block->q) { tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; tcm->tcm_parent = block->q->handle; } else { tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; tcm->tcm_block_index = block->index; } if (nla_put_u32(skb, TCA_CHAIN, chain_index)) goto nla_put_failure; if (ops) { if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; if (ops->tmplt_dump(skb, net, priv) < 0) goto nla_put_failure; } if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -EMSGSIZE; } static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, u32 seq, u16 flags, int event, bool unicast, struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct tcf_block *block = chain->block; struct net *net = block->net; struct sk_buff *skb; int err = 0; if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC)) return 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, chain->index, net, skb, block, portid, seq, flags, event, extack) <= 0) { kfree_skb(skb); return -EINVAL; } if (unicast) err = rtnl_unicast(skb, net, portid); else err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); return err; } static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, u32 seq, u16 flags) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct net *net = block->net; struct sk_buff *skb; if (!rtnl_notify_needed(net, flags, RTNLGRP_TC)) return 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, block, portid, seq, flags, RTM_DELCHAIN, NULL) <= 0) { kfree_skb(skb); return -EINVAL; } return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct nlattr **tca, struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; char name[IFNAMSIZ]; void *tmplt_priv; /* If kind is not set, user did not specify template. */ if (!tca[TCA_KIND]) return 0; if (tcf_proto_check_kind(tca[TCA_KIND], name)) { NL_SET_ERR_MSG(extack, "Specified TC chain template name too long"); return -EINVAL; } ops = tcf_proto_lookup_ops(name, true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump || !ops->tmplt_reoffload) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); module_put(ops->owner); return -EOPNOTSUPP; } tmplt_priv = ops->tmplt_create(net, chain, tca, extack); if (IS_ERR(tmplt_priv)) { module_put(ops->owner); return PTR_ERR(tmplt_priv); } chain->tmplt_ops = ops; chain->tmplt_priv = tmplt_priv; return 0; } static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv) { /* If template ops are set, no work to do for us. */ if (!tmplt_ops) return; tmplt_ops->tmplt_destroy(tmplt_priv); module_put(tmplt_ops->owner); } /* Add/delete/get a chain */ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct tcmsg *t; u32 parent; u32 chain_index; struct Qdisc *q; struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; replay: q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; t = nlmsg_data(n); parent = t->tcm_parent; cl = 0; block = tcf_block_find(net, &q, &parent, &cl, t->tcm_ifindex, t->tcm_block_index, extack); if (IS_ERR(block)) return PTR_ERR(block); chain_index = nla_get_u32_default(tca[TCA_CHAIN], 0); if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); err = -EINVAL; goto errout_block; } mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { if (chain) { if (tcf_chain_held_by_acts_only(chain)) { /* The chain exists only because there is * some action referencing it. */ tcf_chain_hold(chain); } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); err = -EEXIST; goto errout_block_locked; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); err = -ENOENT; goto errout_block_locked; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); err = -ENOMEM; goto errout_block_locked; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; goto errout_block_locked; } tcf_chain_hold(chain); } if (n->nlmsg_type == RTM_NEWCHAIN) { /* Modifying chain requires holding parent block lock. In case * the chain was successfully added, take a reference to the * chain. This ensures that an empty chain does not disappear at * the end of this function. */ tcf_chain_hold(chain); chain->explicitly_created = true; } mutex_unlock(&block->lock); switch (n->nlmsg_type) { case RTM_NEWCHAIN: err = tc_chain_tmplt_add(chain, net, tca, extack); if (err) { tcf_chain_put_explicitly_created(chain); goto errout; } tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false, extack); break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, chain, RTM_DELTFILTER, extack); /* Flush the chain first as the user requested chain removal. */ tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference * to the chain previously taken during addition. */ tcf_chain_put_explicitly_created(chain); break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, n->nlmsg_flags, n->nlmsg_type, true, extack); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); break; default: err = -EOPNOTSUPP; NL_SET_ERR_MSG(extack, "Unsupported message type"); goto errout; } errout: tcf_chain_put(chain); errout_block: tcf_block_release(q, block, true); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; errout_block_locked: mutex_unlock(&block->lock); goto errout_block; } /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; struct tcmsg *tcm = nlmsg_data(cb->nlh); struct tcf_chain *chain; long index_start; long index; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, cb->extack); if (err) return err; if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_refcnt_get(net, tcm->tcm_block_index); if (!block) goto out; } else { const struct Qdisc_class_ops *cops; struct net_device *dev; unsigned long cl = 0; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return skb->len; if (!tcm->tcm_parent) q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) goto out; cops = q->ops->cl_ops; if (!cops) goto out; if (!cops->tcf_block) goto out; if (TC_H_MIN(tcm->tcm_parent)) { cl = cops->find(q, tcm->tcm_parent); if (cl == 0) goto out; } block = cops->tcf_block(q, cl, NULL); if (!block) goto out; if (tcf_block_shared(block)) q = NULL; } index_start = cb->args[0]; index = 0; mutex_lock(&block->lock); list_for_each_entry(chain, &block->chain_list, list) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; if (index < index_start) { index++; continue; } if (tcf_chain_held_by_acts_only(chain)) continue; err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN, NULL); if (err <= 0) break; index++; } mutex_unlock(&block->lock); if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) tcf_block_refcnt_put(block, true); cb->args[0] = index; out: /* If we did no progress, the error (EMSGSIZE) is real */ if (skb->len == 0 && err) return err; return skb->len; } int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action, int police, struct tcf_proto *tp, u32 handle, bool use_action_miss) { int err = 0; #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; exts->miss_cookie_node = NULL; /* Note: we do not own yet a reference on net. * This reference might be taken later from tcf_exts_get_net(). */ exts->net = net; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); if (!exts->actions) return -ENOMEM; #endif exts->action = action; exts->police = police; if (!use_action_miss) return 0; err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle); if (err) goto err_miss_alloc; return 0; err_miss_alloc: tcf_exts_destroy(exts); #ifdef CONFIG_NET_CLS_ACT exts->actions = NULL; #endif return err; } EXPORT_SYMBOL(tcf_exts_init_ex); void tcf_exts_destroy(struct tcf_exts *exts) { tcf_exts_miss_cookie_base_destroy(exts); #ifdef CONFIG_NET_CLS_ACT if (exts->actions) { tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); kfree(exts->actions); } exts->nr_actions = 0; #endif } EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { int init_res[TCA_ACT_MAX_PRIO] = {}; struct tc_action *act; size_t attr_size = 0; if (exts->police && tb[exts->police]) { struct tc_action_ops *a_o; flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; a_o = tc_action_load_ops(tb[exts->police], flags, extack); if (IS_ERR(a_o)) return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, a_o, init_res, flags, extack); module_put(a_o->owner); if (IS_ERR(act)) return PTR_ERR(act); act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; tcf_idr_insert_many(exts->actions, init_res); } else if (exts->action && tb[exts->action]) { int err; flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, exts->actions, init_res, &attr_size, flags, fl_flags, extack); if (err < 0) return err; exts->nr_actions = err; } } #else if ((exts->action && tb[exts->action]) || (exts->police && tb[exts->police])) { NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)"); return -EOPNOTSUPP; } #endif return 0; } EXPORT_SYMBOL(tcf_exts_validate_ex); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack) { return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts, flags, 0, extack); } EXPORT_SYMBOL(tcf_exts_validate); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT struct tcf_exts old = *dst; *dst = *src; tcf_exts_destroy(&old); #endif } EXPORT_SYMBOL(tcf_exts_change); #ifdef CONFIG_NET_CLS_ACT static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) { if (exts->nr_actions == 0) return NULL; else return exts->actions[0]; } #endif int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; if (exts->action && tcf_exts_has_actions(exts)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { nest = nla_nest_start_noflag(skb, exts->action); if (nest == NULL) goto nla_put_failure; if (tcf_action_dump(skb, exts->actions, 0, 0, false) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); nest = nla_nest_start_noflag(skb, exts->police); if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } } return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -1; #else return 0; #endif } EXPORT_SYMBOL(tcf_exts_dump); int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; if (!exts->action || !tcf_exts_has_actions(exts)) return 0; nest = nla_nest_start_noflag(skb, exts->action); if (!nest) goto nla_put_failure; if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -1; #else return 0; #endif } EXPORT_SYMBOL(tcf_exts_terse_dump); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct tc_action *a = tcf_exts_first_act(exts); if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) return -1; #endif return 0; } EXPORT_SYMBOL(tcf_exts_dump_stats); static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) { if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; atomic_inc(&block->offloadcnt); } static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) { if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; atomic_dec(&block->offloadcnt); } static void tc_cls_offload_cnt_update(struct tcf_block *block, struct tcf_proto *tp, u32 *cnt, u32 *flags, u32 diff, bool add) { lockdep_assert_held(&block->cb_lock); spin_lock(&tp->lock); if (add) { if (!*cnt) tcf_block_offload_inc(block, flags); *cnt += diff; } else { *cnt -= diff; if (!*cnt) tcf_block_offload_dec(block, flags); } spin_unlock(&tp->lock); } static void tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, u32 *cnt, u32 *flags) { lockdep_assert_held(&block->cb_lock); spin_lock(&tp->lock); tcf_block_offload_dec(block, flags); *cnt = 0; spin_unlock(&tp->lock); } static int __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { if (err_stop) return err; } else { ok_count++; } } return ok_count; } int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); /* Non-destructive filter add. If filter that wasn't already in hardware is * successfully offloaded, increment block offloads counter. On failure, * previously offloaded filter is considered to be intact and offloads counter * is not decremented. */ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; goto err_unlock; } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); if (ok_count < 0) goto err_unlock; if (tp->ops->hw_add) tp->ops->hw_add(tp, type_data); if (ok_count > 0) tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, ok_count, true); err_unlock: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_add); /* Destructive filter replace. If filter that wasn't already in hardware is * successfully offloaded, increment block offload counter. On failure, * previously offloaded filter is considered to be destroyed and offload counter * is decremented. */ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *old_flags, unsigned int *old_in_hw_count, u32 *new_flags, unsigned int *new_in_hw_count, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; goto err_unlock; } tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); if (tp->ops->hw_del) tp->ops->hw_del(tp, type_data); ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); if (ok_count < 0) goto err_unlock; if (tp->ops->hw_add) tp->ops->hw_add(tp, type_data); if (ok_count > 0) tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags, ok_count, true); err_unlock: up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_replace); /* Destroy filter and decrement block offload counter, if filter was previously * offloaded. */ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; retry: if (take_rtnl) rtnl_lock(); down_read(&block->cb_lock); /* Need to obtain rtnl lock if block is bound to devs that require it. * In block bind code cb_lock is obtained while holding rtnl, so we must * obtain the locks in same order here. */ if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { up_read(&block->cb_lock); take_rtnl = true; goto retry; } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); if (tp->ops->hw_del) tp->ops->hw_del(tp, type_data); up_read(&block->cb_lock); if (take_rtnl) rtnl_unlock(); return min(ok_count, 0); } EXPORT_SYMBOL(tc_setup_cb_destroy); int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, enum tc_setup_type type, void *type_data, void *cb_priv, u32 *flags, unsigned int *in_hw_count) { int err = cb(type, type_data, cb_priv); if (err) { if (add && tc_skip_sw(*flags)) return err; } else { tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, add); } return 0; } EXPORT_SYMBOL(tc_setup_cb_reoffload); static int tcf_act_get_user_cookie(struct flow_action_entry *entry, const struct tc_action *act) { struct tc_cookie *user_cookie; int err = 0; rcu_read_lock(); user_cookie = rcu_dereference(act->user_cookie); if (user_cookie) { entry->user_cookie = flow_action_cookie_create(user_cookie->data, user_cookie->len, GFP_ATOMIC); if (!entry->user_cookie) err = -ENOMEM; } rcu_read_unlock(); return err; } static void tcf_act_put_user_cookie(struct flow_action_entry *entry) { flow_action_cookie_destroy(entry->user_cookie); } void tc_cleanup_offload_action(struct flow_action *flow_action) { struct flow_action_entry *entry; int i; flow_action_for_each(i, entry, flow_action) { tcf_act_put_user_cookie(entry); if (entry->destructor) entry->destructor(entry->destructor_priv); } } EXPORT_SYMBOL(tc_cleanup_offload_action); static int tc_setup_offload_act(struct tc_action *act, struct flow_action_entry *entry, u32 *index_inc, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT if (act->ops->offload_act_setup) { return act->ops->offload_act_setup(act, entry, index_inc, true, extack); } else { NL_SET_ERR_MSG(extack, "Action does not support offload"); return -EOPNOTSUPP; } #else return 0; #endif } int tc_setup_action(struct flow_action *flow_action, struct tc_action *actions[], u32 miss_cookie_base, struct netlink_ext_ack *extack) { int i, j, k, index, err = 0; struct tc_action *act; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE); BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED); if (!actions) return 0; j = 0; tcf_act_for_each_action(i, act, actions) { struct flow_action_entry *entry; entry = &flow_action->entries[j]; spin_lock_bh(&act->tcfa_lock); err = tcf_act_get_user_cookie(entry, act); if (err) goto err_out_locked; index = 0; err = tc_setup_offload_act(act, entry, &index, extack); if (err) goto err_out_locked; for (k = 0; k < index ; k++) { entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); entry[k].hw_index = act->tcfa_index; entry[k].cookie = (unsigned long)act; entry[k].miss_cookie = tcf_exts_miss_cookie_get(miss_cookie_base, i); } j += index; spin_unlock_bh(&act->tcfa_lock); } err_out: if (err) tc_cleanup_offload_action(flow_action); return err; err_out_locked: spin_unlock_bh(&act->tcfa_lock); goto err_out; } int tc_setup_offload_action(struct flow_action *flow_action, const struct tcf_exts *exts, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT u32 miss_cookie_base; if (!exts) return 0; miss_cookie_base = exts->miss_cookie_node ? exts->miss_cookie_node->miss_cookie_base : 0; return tc_setup_action(flow_action, exts->actions, miss_cookie_base, extack); #else return 0; #endif } EXPORT_SYMBOL(tc_setup_offload_action); unsigned int tcf_exts_num_actions(struct tcf_exts *exts) { unsigned int num_acts = 0; struct tc_action *act; int i; tcf_exts_for_each_action(i, act, exts) { if (is_tcf_pedit(act)) num_acts += tcf_pedit_nkeys(act); else num_acts++; } return num_acts; } EXPORT_SYMBOL(tcf_exts_num_actions); #ifdef CONFIG_NET_CLS_ACT static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr, u32 *p_block_index, struct netlink_ext_ack *extack) { *p_block_index = nla_get_u32(block_index_attr); if (!*p_block_index) { NL_SET_ERR_MSG(extack, "Block number may not be zero"); return -EINVAL; } return 0; } int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, enum flow_block_binder_type binder_type, struct nlattr *block_index_attr, struct netlink_ext_ack *extack) { u32 block_index; int err; if (!block_index_attr) return 0; err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); if (err) return err; qe->info.binder_type = binder_type; qe->info.chain_head_change = tcf_chain_head_change_dflt; qe->info.chain_head_change_priv = &qe->filter_chain; qe->info.block_index = block_index; return tcf_block_get_ext(&qe->block, sch, &qe->info, extack); } EXPORT_SYMBOL(tcf_qevent_init); void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) { if (qe->info.block_index) tcf_block_put_ext(qe->block, sch, &qe->info); } EXPORT_SYMBOL(tcf_qevent_destroy); int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, struct netlink_ext_ack *extack) { u32 block_index; int err; if (!block_index_attr) return 0; err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); if (err) return err; /* Bounce newly-configured block or change in block. */ if (block_index != qe->info.block_index) { NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); return -EINVAL; } return 0; } EXPORT_SYMBOL(tcf_qevent_validate_change); struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, struct sk_buff **to_free, int *ret) { struct tcf_result cl_res; struct tcf_proto *fl; if (!qe->info.block_index) return skb; fl = rcu_dereference_bh(qe->filter_chain); switch (tcf_classify(skb, NULL, fl, &cl_res, false)) { case TC_ACT_SHOT: qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); *ret = __NET_XMIT_BYPASS; return NULL; case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: __qdisc_drop(skb, to_free); *ret = __NET_XMIT_STOLEN; return NULL; case TC_ACT_REDIRECT: skb_do_redirect(skb); *ret = __NET_XMIT_STOLEN; return NULL; } return skb; } EXPORT_SYMBOL(tcf_qevent_handle); int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) { if (!qe->info.block_index) return 0; return nla_put_u32(skb, attr_name, qe->info.block_index); } EXPORT_SYMBOL(tcf_qevent_dump); #endif static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); spin_lock_init(&tn->idr_lock); idr_init(&tn->idr); return 0; } static void __net_exit tcf_net_exit(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); idr_destroy(&tn->idr); } static struct pernet_operations tcf_net_ops = { .init = tcf_net_init, .exit = tcf_net_exit, .id = &tcf_net_id, .size = sizeof(struct tcf_net), }; static const struct rtnl_msg_handler tc_filter_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWTFILTER, .doit = tc_new_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_DELTFILTER, .doit = tc_del_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_GETTFILTER, .doit = tc_get_tfilter, .dumpit = tc_dump_tfilter, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_NEWCHAIN, .doit = tc_ctl_chain}, {.msgtype = RTM_DELCHAIN, .doit = tc_ctl_chain}, {.msgtype = RTM_GETCHAIN, .doit = tc_ctl_chain, .dumpit = tc_dump_chain}, }; static int __init tc_filter_init(void) { int err; tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); if (!tc_filter_wq) return -ENOMEM; err = register_pernet_subsys(&tcf_net_ops); if (err) goto err_register_pernet_subsys; xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1); rtnl_register_many(tc_filter_rtnl_msg_handlers); return 0; err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; } subsys_initcall(tc_filter_init); |
| 3 3 3 3 3 1 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2019 FORTH-ICS/CARV * Nick Kossifidis <mick@ics.forth.gr> */ #include <linux/kexec.h> #include <asm/kexec.h> /* For riscv_kexec_* symbol defines */ #include <linux/smp.h> /* For smp_send_stop () */ #include <asm/cacheflush.h> /* For local_flush_icache_all() */ #include <asm/barrier.h> /* For smp_wmb() */ #include <asm/page.h> /* For PAGE_MASK */ #include <linux/libfdt.h> /* For fdt_check_header() */ #include <asm/set_memory.h> /* For set_memory_x() */ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/irq.h> /* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has * provided us with an image to be loaded. Its goal is to validate * the image and prepare the control code buffer as needed. * Note that kimage_alloc_init has already been called and the * control buffer has already been allocated. */ int machine_kexec_prepare(struct kimage *image) { struct kimage_arch *internal = &image->arch; struct fdt_header fdt = {0}; void *control_code_buffer = NULL; unsigned int control_code_buffer_sz = 0; int i = 0; /* Find the Flattened Device Tree and save its physical address */ for (i = 0; i < image->nr_segments; i++) { if (image->segment[i].memsz <= sizeof(fdt)) continue; if (image->file_mode) memcpy(&fdt, image->segment[i].buf, sizeof(fdt)); else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) continue; if (fdt_check_header(&fdt)) continue; internal->fdt_addr = (unsigned long) image->segment[i].mem; break; } if (!internal->fdt_addr) { pr_err("Device tree not included in the provided image\n"); return -EINVAL; } /* Copy the assembler code for relocation to the control page */ if (image->type != KEXEC_TYPE_CRASH) { control_code_buffer = page_address(image->control_code_page); control_code_buffer_sz = page_size(image->control_code_page); if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) { pr_err("Relocation code doesn't fit within a control page\n"); return -EINVAL; } memcpy(control_code_buffer, riscv_kexec_relocate, riscv_kexec_relocate_size); /* Mark the control page executable */ set_memory_x((unsigned long) control_code_buffer, 1); } return 0; } /* * machine_kexec_cleanup - Cleanup any leftovers from * machine_kexec_prepare * * This function is called by kimage_free to handle any arch-specific * allocations done on machine_kexec_prepare. Since we didn't do any * allocations there, this is just an empty function. Note that the * control buffer is freed by kimage_free. */ void machine_kexec_cleanup(struct kimage *image) { } /* * machine_shutdown - Prepare for a kexec reboot * * This function is called by kernel_kexec just before machine_kexec * below. Its goal is to prepare the rest of the system (the other * harts and possibly devices etc) for a kexec reboot. */ void machine_shutdown(void) { /* * No more interrupts on this hart * until we are back up. */ local_irq_disable(); #if defined(CONFIG_HOTPLUG_CPU) smp_shutdown_nonboot_cpus(smp_processor_id()); #endif } /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec * and its goal is to shutdown non-crashing cpus and save registers. */ void machine_crash_shutdown(struct pt_regs *regs) { local_irq_disable(); /* shutdown non-crashing cpus */ crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); machine_kexec_mask_interrupts(); pr_info("Starting crashdump kernel...\n"); } /* * machine_kexec - Jump to the loaded kimage * * This function is called by kernel_kexec which is called by the * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC, * or by crash_kernel which is called by the kernel's arch-specific * trap handler in case of a kernel panic. It's the final stage of * the kexec process where the pre-loaded kimage is ready to be * executed. We assume at this point that all other harts are * suspended and this hart will be the new boot hart. */ void __noreturn machine_kexec(struct kimage *image) { struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; unsigned long first_ind_entry = (unsigned long) &image->head; unsigned long this_cpu_id = __smp_processor_id(); unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); unsigned long fdt_addr = internal->fdt_addr; void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; #ifdef CONFIG_SMP WARN(smp_crash_stop_failed(), "Some CPUs may be stale, kdump will be unreliable.\n"); #endif if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate; pr_notice("Will call new kernel at %08lx from hart id %lx\n", jump_addr, this_hart_id); pr_notice("FDT image at %08lx\n", fdt_addr); /* Make sure the relocation code is visible to the hart */ local_flush_icache_all(); /* Jump to the relocation code */ pr_notice("Bye...\n"); kexec_method(first_ind_entry, jump_addr, fdt_addr, this_hart_id, kernel_map.va_pa_offset); unreachable(); } |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | /* SPDX-License-Identifier: GPL-2.0+ */ /* * MACsec netdev header, used for h/w accelerated implementations. * * Copyright (c) 2015 Sabrina Dubroca <sd@queasysnail.net> */ #ifndef _NET_MACSEC_H_ #define _NET_MACSEC_H_ #include <linux/u64_stats_sync.h> #include <linux/if_vlan.h> #include <uapi/linux/if_link.h> #include <uapi/linux/if_macsec.h> #define MACSEC_DEFAULT_PN_LEN 4 #define MACSEC_XPN_PN_LEN 8 #define MACSEC_NUM_AN 4 /* 2 bits for the association number */ #define MACSEC_SCI_LEN 8 #define MACSEC_PORT_ES (htons(0x0001)) #define MACSEC_TCI_VERSION 0x80 #define MACSEC_TCI_ES 0x40 /* end station */ #define MACSEC_TCI_SC 0x20 /* SCI present */ #define MACSEC_TCI_SCB 0x10 /* epon */ #define MACSEC_TCI_E 0x08 /* encryption */ #define MACSEC_TCI_C 0x04 /* changed text */ #define MACSEC_AN_MASK 0x03 /* association number */ #define MACSEC_TCI_CONFID (MACSEC_TCI_E | MACSEC_TCI_C) #define MACSEC_DEFAULT_ICV_LEN 16 typedef u64 __bitwise sci_t; typedef u32 __bitwise ssci_t; struct metadata_dst; typedef union salt { struct { ssci_t ssci; __be64 pn; } __packed; u8 bytes[MACSEC_SALT_LEN]; } __packed salt_t; typedef union pn { struct { #if defined(__LITTLE_ENDIAN_BITFIELD) u32 lower; u32 upper; #elif defined(__BIG_ENDIAN_BITFIELD) u32 upper; u32 lower; #else #error "Please fix <asm/byteorder.h>" #endif }; u64 full64; } pn_t; /** * struct macsec_key - SA key * @id: user-provided key identifier * @tfm: crypto struct, key storage * @salt: salt used to generate IV in XPN cipher suites */ struct macsec_key { u8 id[MACSEC_KEYID_LEN]; struct crypto_aead *tfm; salt_t salt; }; struct macsec_rx_sc_stats { __u64 InOctetsValidated; __u64 InOctetsDecrypted; __u64 InPktsUnchecked; __u64 InPktsDelayed; __u64 InPktsOK; __u64 InPktsInvalid; __u64 InPktsLate; __u64 InPktsNotValid; __u64 InPktsNotUsingSA; __u64 InPktsUnusedSA; }; struct macsec_rx_sa_stats { __u32 InPktsOK; __u32 InPktsInvalid; __u32 InPktsNotValid; __u32 InPktsNotUsingSA; __u32 InPktsUnusedSA; }; struct macsec_tx_sa_stats { __u32 OutPktsProtected; __u32 OutPktsEncrypted; }; struct macsec_tx_sc_stats { __u64 OutPktsProtected; __u64 OutPktsEncrypted; __u64 OutOctetsProtected; __u64 OutOctetsEncrypted; }; struct macsec_dev_stats { __u64 OutPktsUntagged; __u64 InPktsUntagged; __u64 OutPktsTooLong; __u64 InPktsNoTag; __u64 InPktsBadTag; __u64 InPktsUnknownSCI; __u64 InPktsNoSCI; __u64 InPktsOverrun; }; /** * struct macsec_rx_sa - receive secure association * @active: * @next_pn: packet number expected for the next packet * @lock: protects next_pn manipulations * @key: key structure * @ssci: short secure channel identifier * @stats: per-SA stats */ struct macsec_rx_sa { struct macsec_key key; ssci_t ssci; spinlock_t lock; union { pn_t next_pn_halves; u64 next_pn; }; refcount_t refcnt; bool active; struct macsec_rx_sa_stats __percpu *stats; struct macsec_rx_sc *sc; struct rcu_head rcu; }; struct pcpu_rx_sc_stats { struct macsec_rx_sc_stats stats; struct u64_stats_sync syncp; }; struct pcpu_tx_sc_stats { struct macsec_tx_sc_stats stats; struct u64_stats_sync syncp; }; /** * struct macsec_rx_sc - receive secure channel * @sci: secure channel identifier for this SC * @active: channel is active * @sa: array of secure associations * @stats: per-SC stats */ struct macsec_rx_sc { struct macsec_rx_sc __rcu *next; sci_t sci; bool active; struct macsec_rx_sa __rcu *sa[MACSEC_NUM_AN]; struct pcpu_rx_sc_stats __percpu *stats; refcount_t refcnt; struct rcu_head rcu_head; }; /** * struct macsec_tx_sa - transmit secure association * @active: * @next_pn: packet number to use for the next packet * @lock: protects next_pn manipulations * @key: key structure * @ssci: short secure channel identifier * @stats: per-SA stats */ struct macsec_tx_sa { struct macsec_key key; ssci_t ssci; spinlock_t lock; union { pn_t next_pn_halves; u64 next_pn; }; refcount_t refcnt; bool active; struct macsec_tx_sa_stats __percpu *stats; struct rcu_head rcu; }; /** * struct macsec_tx_sc - transmit secure channel * @active: * @encoding_sa: association number of the SA currently in use * @encrypt: encrypt packets on transmit, or authenticate only * @send_sci: always include the SCI in the SecTAG * @end_station: * @scb: single copy broadcast flag * @sa: array of secure associations * @stats: stats for this TXSC * @md_dst: MACsec offload metadata dst */ struct macsec_tx_sc { bool active; u8 encoding_sa; bool encrypt; bool send_sci; bool end_station; bool scb; struct macsec_tx_sa __rcu *sa[MACSEC_NUM_AN]; struct pcpu_tx_sc_stats __percpu *stats; struct metadata_dst *md_dst; }; /** * struct macsec_secy - MACsec Security Entity * @netdev: netdevice for this SecY * @n_rx_sc: number of receive secure channels configured on this SecY * @sci: secure channel identifier used for tx * @key_len: length of keys used by the cipher suite * @icv_len: length of ICV used by the cipher suite * @validate_frames: validation mode * @xpn: enable XPN for this SecY * @operational: MAC_Operational flag * @protect_frames: enable protection for this SecY * @replay_protect: enable packet number checks on receive * @replay_window: size of the replay window * @tx_sc: transmit secure channel * @rx_sc: linked list of receive secure channels */ struct macsec_secy { struct net_device *netdev; unsigned int n_rx_sc; sci_t sci; u16 key_len; u16 icv_len; enum macsec_validation_type validate_frames; bool xpn; bool operational; bool protect_frames; bool replay_protect; u32 replay_window; struct macsec_tx_sc tx_sc; struct macsec_rx_sc __rcu *rx_sc; }; /** * struct macsec_context - MACsec context for hardware offloading * @netdev: a valid pointer to a struct net_device if @offload == * MACSEC_OFFLOAD_MAC * @phydev: a valid pointer to a struct phy_device if @offload == * MACSEC_OFFLOAD_PHY * @offload: MACsec offload status * @secy: pointer to a MACsec SecY * @rx_sc: pointer to a RX SC * @update_pn: when updating the SA, update the next PN * @assoc_num: association number of the target SA * @key: key of the target SA * @rx_sa: pointer to an RX SA if a RX SA is added/updated/removed * @tx_sa: pointer to an TX SA if a TX SA is added/updated/removed * @tx_sc_stats: pointer to TX SC stats structure * @tx_sa_stats: pointer to TX SA stats structure * @rx_sc_stats: pointer to RX SC stats structure * @rx_sa_stats: pointer to RX SA stats structure * @dev_stats: pointer to dev stats structure */ struct macsec_context { union { struct net_device *netdev; struct phy_device *phydev; }; enum macsec_offload offload; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct { bool update_pn; unsigned char assoc_num; u8 key[MACSEC_MAX_KEY_LEN]; union { struct macsec_rx_sa *rx_sa; struct macsec_tx_sa *tx_sa; }; } sa; union { struct macsec_tx_sc_stats *tx_sc_stats; struct macsec_tx_sa_stats *tx_sa_stats; struct macsec_rx_sc_stats *rx_sc_stats; struct macsec_rx_sa_stats *rx_sa_stats; struct macsec_dev_stats *dev_stats; } stats; }; /** * struct macsec_ops - MACsec offloading operations * @mdo_dev_open: called when the MACsec interface transitions to the up state * @mdo_dev_stop: called when the MACsec interface transitions to the down * state * @mdo_add_secy: called when a new SecY is added * @mdo_upd_secy: called when the SecY flags are changed or the MAC address of * the MACsec interface is changed * @mdo_del_secy: called when the hw offload is disabled or the MACsec * interface is removed * @mdo_add_rxsc: called when a new RX SC is added * @mdo_upd_rxsc: called when a certain RX SC is updated * @mdo_del_rxsc: called when a certain RX SC is removed * @mdo_add_rxsa: called when a new RX SA is added * @mdo_upd_rxsa: called when a certain RX SA is updated * @mdo_del_rxsa: called when a certain RX SA is removed * @mdo_add_txsa: called when a new TX SA is added * @mdo_upd_txsa: called when a certain TX SA is updated * @mdo_del_txsa: called when a certain TX SA is removed * @mdo_get_dev_stats: called when dev stats are read * @mdo_get_tx_sc_stats: called when TX SC stats are read * @mdo_get_tx_sa_stats: called when TX SA stats are read * @mdo_get_rx_sc_stats: called when RX SC stats are read * @mdo_get_rx_sa_stats: called when RX SA stats are read * @mdo_insert_tx_tag: called to insert the TX tag * @needed_headroom: number of bytes reserved at the beginning of the sk_buff * for the TX tag * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the * TX tag * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst */ struct macsec_ops { /* Device wide */ int (*mdo_dev_open)(struct macsec_context *ctx); int (*mdo_dev_stop)(struct macsec_context *ctx); /* SecY */ int (*mdo_add_secy)(struct macsec_context *ctx); int (*mdo_upd_secy)(struct macsec_context *ctx); int (*mdo_del_secy)(struct macsec_context *ctx); /* Security channels */ int (*mdo_add_rxsc)(struct macsec_context *ctx); int (*mdo_upd_rxsc)(struct macsec_context *ctx); int (*mdo_del_rxsc)(struct macsec_context *ctx); /* Security associations */ int (*mdo_add_rxsa)(struct macsec_context *ctx); int (*mdo_upd_rxsa)(struct macsec_context *ctx); int (*mdo_del_rxsa)(struct macsec_context *ctx); int (*mdo_add_txsa)(struct macsec_context *ctx); int (*mdo_upd_txsa)(struct macsec_context *ctx); int (*mdo_del_txsa)(struct macsec_context *ctx); /* Statistics */ int (*mdo_get_dev_stats)(struct macsec_context *ctx); int (*mdo_get_tx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx); int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx); /* Offload tag */ int (*mdo_insert_tx_tag)(struct phy_device *phydev, struct sk_buff *skb); unsigned int needed_headroom; unsigned int needed_tailroom; bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); static inline bool macsec_send_sci(const struct macsec_secy *secy) { const struct macsec_tx_sc *tx_sc = &secy->tx_sc; return tx_sc->send_sci || (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); } struct net_device *macsec_get_real_dev(const struct net_device *dev); bool macsec_netdev_is_offloaded(struct net_device *dev); static inline void *macsec_netdev_priv(const struct net_device *dev) { #if IS_ENABLED(CONFIG_VLAN_8021Q) if (is_vlan_dev(dev)) return netdev_priv(vlan_dev_priv(dev)->real_dev); #endif return netdev_priv(dev); } static inline u64 sci_to_cpu(sci_t sci) { return be64_to_cpu((__force __be64)sci); } #endif /* _NET_MACSEC_H_ */ |
| 11 11 11 1 1 1 28 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | // SPDX-License-Identifier: MIT #include <linux/export.h> #include <linux/fb.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> /* * struct fb_ops */ static int drm_fbdev_shmem_fb_open(struct fb_info *info, int user) { struct drm_fb_helper *fb_helper = info->par; /* No need to take a ref for fbcon because it unbinds on unregister */ if (user && !try_module_get(fb_helper->dev->driver->fops->owner)) return -ENODEV; return 0; } static int drm_fbdev_shmem_fb_release(struct fb_info *info, int user) { struct drm_fb_helper *fb_helper = info->par; if (user) module_put(fb_helper->dev->driver->fops->owner); return 0; } FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(drm_fbdev_shmem, drm_fb_helper_damage_range, drm_fb_helper_damage_area); static int drm_fbdev_shmem_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return fb_deferred_io_mmap(info, vma); } static void drm_fbdev_shmem_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; if (!fb_helper->dev) return; fb_deferred_io_cleanup(info); drm_fb_helper_fini(fb_helper); drm_client_buffer_vunmap(fb_helper->buffer); drm_client_framebuffer_delete(fb_helper->buffer); drm_client_release(&fb_helper->client); drm_fb_helper_unprepare(fb_helper); kfree(fb_helper); } static const struct fb_ops drm_fbdev_shmem_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_shmem_fb_open, .fb_release = drm_fbdev_shmem_fb_release, __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_shmem), DRM_FB_HELPER_DEFAULT_OPS, __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_shmem), .fb_mmap = drm_fbdev_shmem_fb_mmap, .fb_destroy = drm_fbdev_shmem_fb_destroy, }; static struct page *drm_fbdev_shmem_get_page(struct fb_info *info, unsigned long offset) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); unsigned int i = offset >> PAGE_SHIFT; struct page *page; if (fb_WARN_ON_ONCE(info, offset > obj->size)) return NULL; page = shmem->pages[i]; // protected by active vmap if (page) get_page(page); fb_WARN_ON_ONCE(info, !page); return page; } /* * struct drm_fb_helper */ static int drm_fbdev_shmem_helper_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) { struct drm_device *dev = helper->dev; int ret; /* Call damage handlers only if necessary */ if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) return 0; if (helper->fb->funcs->dirty) { ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) return ret; } return 0; } static const struct drm_fb_helper_funcs drm_fbdev_shmem_helper_funcs = { .fb_dirty = drm_fbdev_shmem_helper_fb_dirty, }; /* * struct drm_driver */ int drm_fbdev_shmem_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_client_buffer *buffer; struct drm_gem_shmem_object *shmem; struct drm_framebuffer *fb; struct fb_info *info; u32 format; struct iosys_map map; int ret; drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, sizes->surface_bpp); format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, sizes->surface_depth); buffer = drm_client_framebuffer_create(client, sizes->surface_width, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); shmem = to_drm_gem_shmem_obj(buffer->gem); fb = buffer->fb; ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; } else if (drm_WARN_ON(dev, map.is_iomem)) { ret = -ENODEV; /* I/O memory not supported; use generic emulation */ goto err_drm_client_buffer_delete; } fb_helper->funcs = &drm_fbdev_shmem_helper_funcs; fb_helper->buffer = buffer; fb_helper->fb = fb; info = drm_fb_helper_alloc_info(fb_helper); if (IS_ERR(info)) { ret = PTR_ERR(info); goto err_drm_client_buffer_vunmap; } drm_fb_helper_fill_info(info, fb_helper, sizes); info->fbops = &drm_fbdev_shmem_fb_ops; /* screen */ info->flags |= FBINFO_VIRTFB; /* system memory */ if (!shmem->map_wc) info->flags |= FBINFO_READS_FAST; /* signal caching */ info->screen_size = sizes->surface_height * fb->pitches[0]; info->screen_buffer = map.vaddr; info->fix.smem_len = info->screen_size; /* deferred I/O */ fb_helper->fbdefio.delay = HZ / 20; fb_helper->fbdefio.get_page = drm_fbdev_shmem_get_page; fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; info->fbdefio = &fb_helper->fbdefio; ret = fb_deferred_io_init(info); if (ret) goto err_drm_fb_helper_release_info; return 0; err_drm_fb_helper_release_info: drm_fb_helper_release_info(fb_helper); err_drm_client_buffer_vunmap: fb_helper->fb = NULL; fb_helper->buffer = NULL; drm_client_buffer_vunmap(buffer); err_drm_client_buffer_delete: drm_client_framebuffer_delete(buffer); return ret; } EXPORT_SYMBOL(drm_fbdev_shmem_driver_fbdev_probe); |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | // SPDX-License-Identifier: GPL-2.0 /* * seq_buf.c * * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * * The seq_buf is a handy tool that allows you to pass a descriptor around * to a buffer that other functions can write to. It is similar to the * seq_file functionality but has some differences. * * To use it, the seq_buf must be initialized with seq_buf_init(). * This will set up the counters within the descriptor. You can call * seq_buf_init() more than once to reset the seq_buf to start * from scratch. */ #include <linux/bug.h> #include <linux/err.h> #include <linux/export.h> #include <linux/hex.h> #include <linux/minmax.h> #include <linux/printk.h> #include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/sprintf.h> #include <linux/string.h> #include <linux/types.h> #include <linux/uaccess.h> /** * seq_buf_can_fit - can the new data fit in the current buffer? * @s: the seq_buf descriptor * @len: The length to see if it can fit in the current buffer * * Returns: true if there's enough unused space in the seq_buf buffer * to fit the amount of new data according to @len. */ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) { return s->len + len <= s->size; } /** * seq_buf_print_seq - move the contents of seq_buf into a seq_file * @m: the seq_file descriptor that is the destination * @s: the seq_buf descriptor that is the source. * * Returns: zero on success, non-zero otherwise. */ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) { unsigned int len = seq_buf_used(s); return seq_write(m, s->buffer, len); } /** * seq_buf_vprintf - sequence printing of information. * @s: seq_buf descriptor * @fmt: printf format string * @args: va_list of arguments from a printf() type function * * Writes a vnprintf() format into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) { int len; WARN_ON(s->size == 0); if (s->len < s->size) { len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); if (s->len + len < s->size) { s->len += len; return 0; } } seq_buf_set_overflow(s); return -1; } /** * seq_buf_printf - sequence printing of information * @s: seq_buf descriptor * @fmt: printf format string * * Writes a printf() format into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); ret = seq_buf_vprintf(s, fmt, ap); va_end(ap); return ret; } EXPORT_SYMBOL_GPL(seq_buf_printf); /** * seq_buf_do_printk - printk() seq_buf line by line * @s: seq_buf descriptor * @lvl: printk level * * printk()-s a multi-line sequential buffer line by line. The function * makes sure that the buffer in @s is NUL-terminated and safe to read * as a string. */ void seq_buf_do_printk(struct seq_buf *s, const char *lvl) { const char *start, *lf; if (s->size == 0 || s->len == 0) return; start = seq_buf_str(s); while ((lf = strchr(start, '\n'))) { int len = lf - start + 1; printk("%s%.*s", lvl, len, start); start = ++lf; } /* No trailing LF */ if (start < s->buffer + s->len) printk("%s%s\n", lvl, start); } EXPORT_SYMBOL_GPL(seq_buf_do_printk); #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments * @s: seq_buf descriptor * @fmt: The format string for the @binary arguments * @binary: The binary arguments for @fmt. * * When recording in a fast path, a printf may be recorded with just * saving the format and the arguments as they were passed to the * function, instead of wasting cycles converting the arguments into * ASCII characters. Instead, the arguments are saved in a 32 bit * word array that is defined by the format string constraints. * * This function will take the format and the binary array and finish * the conversion into the ASCII string within the buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) { unsigned int len = seq_buf_buffer_left(s); int ret; WARN_ON(s->size == 0); if (s->len < s->size) { ret = bstr_printf(s->buffer + s->len, len, fmt, binary); if (s->len + ret < s->size) { s->len += ret; return 0; } } seq_buf_set_overflow(s); return -1; } #endif /* CONFIG_BINARY_PRINTF */ /** * seq_buf_puts - sequence printing of simple string * @s: seq_buf descriptor * @str: simple string to record * * Copy a simple string into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_puts(struct seq_buf *s, const char *str) { size_t len = strlen(str); WARN_ON(s->size == 0); /* Add 1 to len for the trailing null byte which must be there */ len += 1; if (seq_buf_can_fit(s, len)) { memcpy(s->buffer + s->len, str, len); /* Don't count the trailing null byte against the capacity */ s->len += len - 1; return 0; } seq_buf_set_overflow(s); return -1; } EXPORT_SYMBOL_GPL(seq_buf_puts); /** * seq_buf_putc - sequence printing of simple character * @s: seq_buf descriptor * @c: simple character to record * * Copy a single character into the sequence buffer. * * Returns: zero on success, -1 on overflow. */ int seq_buf_putc(struct seq_buf *s, unsigned char c) { WARN_ON(s->size == 0); if (seq_buf_can_fit(s, 1)) { s->buffer[s->len++] = c; return 0; } seq_buf_set_overflow(s); return -1; } EXPORT_SYMBOL_GPL(seq_buf_putc); /** * seq_buf_putmem - write raw data into the sequence buffer * @s: seq_buf descriptor * @mem: The raw memory to copy into the buffer * @len: The length of the raw memory to copy (in bytes) * * There may be cases where raw memory needs to be written into the * buffer and a strcpy() would not work. Using this function allows * for such cases. * * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) { WARN_ON(s->size == 0); if (seq_buf_can_fit(s, len)) { memcpy(s->buffer + s->len, mem, len); s->len += len; return 0; } seq_buf_set_overflow(s); return -1; } #define MAX_MEMHEX_BYTES 8U #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) /** * seq_buf_putmem_hex - write raw memory into the buffer in ASCII hex * @s: seq_buf descriptor * @mem: The raw memory to write its hex ASCII representation of * @len: The length of the raw memory to copy (in bytes) * * This is similar to seq_buf_putmem() except instead of just copying the * raw memory into the buffer it writes its ASCII representation of it * in hex characters. * * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len) { unsigned char hex[HEX_CHARS]; const unsigned char *data = mem; unsigned int start_len; int i, j; WARN_ON(s->size == 0); BUILD_BUG_ON(MAX_MEMHEX_BYTES * 2 >= HEX_CHARS); while (len) { start_len = min(len, MAX_MEMHEX_BYTES); #ifdef __BIG_ENDIAN for (i = 0, j = 0; i < start_len; i++) { #else for (i = start_len-1, j = 0; i >= 0; i--) { #endif hex[j++] = hex_asc_hi(data[i]); hex[j++] = hex_asc_lo(data[i]); } if (WARN_ON_ONCE(j == 0 || j/2 > len)) break; /* j increments twice per loop */ hex[j++] = ' '; seq_buf_putmem(s, hex, j); if (seq_buf_has_overflowed(s)) return -1; len -= start_len; data += start_len; } return 0; } /** * seq_buf_path - copy a path into the sequence buffer * @s: seq_buf descriptor * @path: path to write into the sequence buffer. * @esc: set of characters to escape in the output * * Write a path name into the sequence buffer. * * Returns: the number of written bytes on success, -1 on overflow. */ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) { char *buf; size_t size = seq_buf_get_buf(s, &buf); int res = -1; WARN_ON(s->size == 0); if (size) { char *p = d_path(path, buf, size); if (!IS_ERR(p)) { char *end = mangle_path(buf, p, esc); if (end) res = end - buf; } } seq_buf_commit(s, res); return res; } /** * seq_buf_to_user - copy the sequence buffer to user space * @s: seq_buf descriptor * @ubuf: The userspace memory location to copy to * @start: The first byte in the buffer to copy * @cnt: The amount to copy * * Copies the sequence buffer into the userspace memory pointed to * by @ubuf. It starts from @start and writes up to @cnt characters * or until it reaches the end of the content in the buffer (@s->len), * whichever comes first. * * Returns: * On success, it returns a positive number of the number of bytes * it copied. * * On failure it returns -EBUSY if all of the content in the * sequence has been already read, which includes nothing in the * sequence (@s->len == @start). * * Returns -EFAULT if the copy to userspace fails. */ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt) { int len; int ret; if (!cnt) return 0; len = seq_buf_used(s); if (len <= start) return -EBUSY; len -= start; if (cnt > len) cnt = len; ret = copy_to_user(ubuf, s->buffer + start, cnt); if (ret == cnt) return -EFAULT; return cnt - ret; } /** * seq_buf_hex_dump - print formatted hex dump into the sequence buffer * @s: seq_buf descriptor * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @rowsize: number of bytes to print per line; must be 16 or 32 * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1) * @buf: data blob to dump * @len: number of bytes in the @buf * @ascii: include ASCII after the hex output * * Function is an analogue of print_hex_dump() and thus has similar interface. * * linebuf size is maximal length for one line. * 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for * separating space * 2 - spaces separating hex dump and ASCII representation * 32 - ASCII representation * 1 - terminating '\0' * * Returns: zero on success, -1 on overflow. */ int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; unsigned char linebuf[32 * 3 + 2 + 32 + 1]; int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; for (i = 0; i < len; i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, linebuf, sizeof(linebuf), ascii); switch (prefix_type) { case DUMP_PREFIX_ADDRESS: ret = seq_buf_printf(s, "%s%p: %s\n", prefix_str, ptr + i, linebuf); break; case DUMP_PREFIX_OFFSET: ret = seq_buf_printf(s, "%s%.8x: %s\n", prefix_str, i, linebuf); break; default: ret = seq_buf_printf(s, "%s%s\n", prefix_str, linebuf); break; } if (ret) return ret; } return 0; } |
| 91 18853 22 19031 19031 18847 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2020 SiFive */ #ifndef __ASM_RISCV_VECTOR_H #define __ASM_RISCV_VECTOR_H #include <linux/types.h> #include <uapi/asm-generic/errno.h> #ifdef CONFIG_RISCV_ISA_V #include <linux/stringify.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <asm/ptrace.h> #include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/asm.h> #include <asm/vendorid_list.h> #include <asm/vendor_extensions.h> #include <asm/vendor_extensions/thead.h> #define __riscv_v_vstate_or(_val, TYPE) ({ \ typeof(_val) _res = _val; \ if (has_xtheadvector()) \ _res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \ else \ _res = (_res & ~SR_VS) | SR_VS_##TYPE; \ _res; \ }) #define __riscv_v_vstate_check(_val, TYPE) ({ \ bool _res; \ if (has_xtheadvector()) \ _res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \ else \ _res = ((_val) & SR_VS) == SR_VS_##TYPE; \ _res; \ }) extern unsigned long riscv_v_vsize; int riscv_v_setup_vsize(void); bool insn_is_vector(u32 insn_buf); bool riscv_v_first_use_handler(struct pt_regs *regs); void kernel_vector_begin(void); void kernel_vector_end(void); void get_cpu_vector_context(void); void put_cpu_vector_context(void); void riscv_v_thread_free(struct task_struct *tsk); void __init riscv_v_setup_ctx_cache(void); void riscv_v_thread_alloc(struct task_struct *tsk); static inline u32 riscv_v_flags(void) { return READ_ONCE(current->thread.riscv_v_flags); } static __always_inline bool has_vector(void) { return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X); } static __always_inline bool has_xtheadvector_no_alternatives(void) { if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR); else return false; } static __always_inline bool has_xtheadvector(void) { if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID, RISCV_ISA_VENDOR_EXT_XTHEADVECTOR); else return false; } static inline void __riscv_v_vstate_clean(struct pt_regs *regs) { regs->status = __riscv_v_vstate_or(regs->status, CLEAN); } static inline void __riscv_v_vstate_dirty(struct pt_regs *regs) { regs->status = __riscv_v_vstate_or(regs->status, DIRTY); } static inline void riscv_v_vstate_off(struct pt_regs *regs) { regs->status = __riscv_v_vstate_or(regs->status, OFF); } static inline void riscv_v_vstate_on(struct pt_regs *regs) { regs->status = __riscv_v_vstate_or(regs->status, INITIAL); } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return !__riscv_v_vstate_check(regs->status, OFF); } static __always_inline void riscv_v_enable(void) { if (has_xtheadvector()) csr_set(CSR_SSTATUS, SR_VS_THEAD); else csr_set(CSR_SSTATUS, SR_VS); } static __always_inline void riscv_v_disable(void) { if (has_xtheadvector()) csr_clear(CSR_SSTATUS, SR_VS_THEAD); else csr_clear(CSR_SSTATUS, SR_VS); } static __always_inline bool riscv_v_is_on(void) { return !!(csr_read(CSR_SSTATUS) & SR_VS); } static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) { asm volatile ( "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), "=r" (dest->vcsr) : :); if (has_xtheadvector()) { unsigned long status; /* * CSR_VCSR is defined as * [2:1] - vxrm[1:0] * [0] - vxsat * The earlier vector spec implemented by T-Head uses separate * registers for the same bit-elements, so just combine those * into the existing output field. * * Additionally T-Head cores need FS to be enabled when accessing * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. * Though the cores do not implement the VXRM and VXSAT fields in the * FCSR CSR that vector-0.7.1 specifies. */ status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT; dest->vlenb = riscv_v_vsize / 32; if ((status & SR_FS) != SR_FS_DIRTY) csr_write(CSR_STATUS, status); } else { dest->vcsr = csr_read(CSR_VCSR); dest->vlenb = csr_read(CSR_VLENB); } } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) { asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl)); if (has_xtheadvector()) { unsigned long status = csr_read(CSR_SSTATUS); /* * Similar to __vstate_csr_save above, restore values for the * separate VXRM and VXSAT CSRs from the vcsr variable. */ status = csr_read_set(CSR_STATUS, SR_FS_DIRTY); csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK); csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK); if ((status & SR_FS) != SR_FS_DIRTY) csr_write(CSR_STATUS, status); } else { csr_write(CSR_VCSR, src->vcsr); } } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, void *datap) { unsigned long vl; riscv_v_enable(); __vstate_csr_save(save_to); if (has_xtheadvector()) { asm volatile ( "mv t0, %0\n\t" THEAD_VSETVLI_T4X0E8M8D1 THEAD_VSB_V_V0T0 "add t0, t0, t4\n\t" THEAD_VSB_V_V8T0 "add t0, t0, t4\n\t" THEAD_VSB_V_V16T0 "add t0, t0, t4\n\t" THEAD_VSB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" "vse8.v v0, (%1)\n\t" "add %1, %1, %0\n\t" "vse8.v v8, (%1)\n\t" "add %1, %1, %0\n\t" "vse8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vse8.v v24, (%1)\n\t" ".option pop\n\t" : "=&r" (vl) : "r" (datap) : "memory"); } riscv_v_disable(); } static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_from, void *datap) { unsigned long vl; riscv_v_enable(); if (has_xtheadvector()) { asm volatile ( "mv t0, %0\n\t" THEAD_VSETVLI_T4X0E8M8D1 THEAD_VLB_V_V0T0 "add t0, t0, t4\n\t" THEAD_VLB_V_V8T0 "add t0, t0, t4\n\t" THEAD_VLB_V_V16T0 "add t0, t0, t4\n\t" THEAD_VLB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" "vle8.v v0, (%1)\n\t" "add %1, %1, %0\n\t" "vle8.v v8, (%1)\n\t" "add %1, %1, %0\n\t" "vle8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vle8.v v24, (%1)\n\t" ".option pop\n\t" : "=&r" (vl) : "r" (datap) : "memory"); } __vstate_csr_restore(restore_from); riscv_v_disable(); } static inline void __riscv_v_vstate_discard(void) { unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1); riscv_v_enable(); if (has_xtheadvector()) asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4"); else asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" ".option pop\n\t": "=&r" (vl)); asm volatile ( ".option push\n\t" ".option arch, +zve32x\n\t" "vmv.v.i v0, -1\n\t" "vmv.v.i v8, -1\n\t" "vmv.v.i v16, -1\n\t" "vmv.v.i v24, -1\n\t" "vsetvl %0, x0, %1\n\t" ".option pop\n\t" : "=&r" (vl) : "r" (vtype_inval)); riscv_v_disable(); } static inline void riscv_v_vstate_discard(struct pt_regs *regs) { if (riscv_v_vstate_query(regs)) { __riscv_v_vstate_discard(); __riscv_v_vstate_dirty(regs); } } static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { if (__riscv_v_vstate_check(regs->status, DIRTY)) { __riscv_v_vstate_save(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate, struct pt_regs *regs) { if (riscv_v_vstate_query(regs)) { __riscv_v_vstate_restore(vstate, vstate->datap); __riscv_v_vstate_clean(regs); } } static inline void riscv_v_vstate_set_restore(struct task_struct *task, struct pt_regs *regs) { if (riscv_v_vstate_query(regs)) { set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE); riscv_v_vstate_on(regs); } } #ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY); } static inline bool riscv_preempt_v_restore(struct task_struct *task) { return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE); } static inline void riscv_preempt_v_clear_dirty(struct task_struct *task) { barrier(); task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY; } static inline void riscv_preempt_v_set_restore(struct task_struct *task) { barrier(); task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE; } static inline bool riscv_preempt_v_started(struct task_struct *task) { return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V); } #else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */ static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; } static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; } static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; } #define riscv_preempt_v_clear_dirty(tsk) do {} while (0) #define riscv_preempt_v_set_restore(tsk) do {} while (0) #endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */ static inline void __switch_to_vector(struct task_struct *prev, struct task_struct *next) { struct pt_regs *regs; if (riscv_preempt_v_started(prev)) { if (riscv_v_is_on()) { WARN_ON(prev->thread.riscv_v_flags & RISCV_V_CTX_DEPTH_MASK); riscv_v_disable(); prev->thread.riscv_v_flags |= RISCV_PREEMPT_V_IN_SCHEDULE; } if (riscv_preempt_v_dirty(prev)) { __riscv_v_vstate_save(&prev->thread.kernel_vstate, prev->thread.kernel_vstate.datap); riscv_preempt_v_clear_dirty(prev); } } else { regs = task_pt_regs(prev); riscv_v_vstate_save(&prev->thread.vstate, regs); } if (riscv_preempt_v_started(next)) { if (next->thread.riscv_v_flags & RISCV_PREEMPT_V_IN_SCHEDULE) { next->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_IN_SCHEDULE; riscv_v_enable(); } else { riscv_preempt_v_set_restore(next); } } else { riscv_v_vstate_set_restore(next, task_pt_regs(next)); } } void riscv_v_vstate_ctrl_init(struct task_struct *tsk); bool riscv_v_vstate_ctrl_user_allowed(void); #else /* ! CONFIG_RISCV_ISA_V */ struct pt_regs; static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; } static __always_inline bool has_vector(void) { return false; } static __always_inline bool insn_is_vector(u32 insn_buf) { return false; } static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; } static __always_inline bool has_xtheadvector(void) { return false; } static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; } static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_vsize (0) #define riscv_v_vstate_discard(regs) do {} while (0) #define riscv_v_vstate_save(vstate, regs) do {} while (0) #define riscv_v_vstate_restore(vstate, regs) do {} while (0) #define __switch_to_vector(__prev, __next) do {} while (0) #define riscv_v_vstate_off(regs) do {} while (0) #define riscv_v_vstate_on(regs) do {} while (0) #define riscv_v_thread_free(tsk) do {} while (0) #define riscv_v_setup_ctx_cache() do {} while (0) #define riscv_v_thread_alloc(tsk) do {} while (0) #endif /* CONFIG_RISCV_ISA_V */ /* * Return the implementation's vlen value. * * riscv_v_vsize contains the value of "32 vector registers with vlenb length" * so rebuild the vlen value in bits from it. */ static inline int riscv_vector_vlen(void) { return riscv_v_vsize / 32 * 8; } #endif /* ! __ASM_RISCV_VECTOR_H */ |
| 183 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Filesystem superblock creation and reconfiguration context. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_FS_CONTEXT_H #define _LINUX_FS_CONTEXT_H #include <linux/kernel.h> #include <linux/refcount.h> #include <linux/errno.h> #include <linux/security.h> #include <linux/mutex.h> struct cred; struct dentry; struct file_operations; struct file_system_type; struct mnt_namespace; struct net; struct pid_namespace; struct super_block; struct user_namespace; struct vfsmount; struct path; enum fs_context_purpose { FS_CONTEXT_FOR_MOUNT, /* New superblock for explicit mount */ FS_CONTEXT_FOR_SUBMOUNT, /* New superblock for automatic submount */ FS_CONTEXT_FOR_RECONFIGURE, /* Superblock reconfiguration (remount) */ }; /* * Userspace usage phase for fsopen/fspick. */ enum fs_context_phase { FS_CONTEXT_CREATE_PARAMS, /* Loading params for sb creation */ FS_CONTEXT_CREATING, /* A superblock is being created */ FS_CONTEXT_AWAITING_MOUNT, /* Superblock created, awaiting fsmount() */ FS_CONTEXT_AWAITING_RECONF, /* Awaiting initialisation for reconfiguration */ FS_CONTEXT_RECONF_PARAMS, /* Loading params for reconfiguration */ FS_CONTEXT_RECONFIGURING, /* Reconfiguring the superblock */ FS_CONTEXT_FAILED, /* Failed to correctly transition a context */ }; /* * Type of parameter value. */ enum fs_value_type { fs_value_is_undefined, fs_value_is_flag, /* Value not given a value */ fs_value_is_string, /* Value is a string */ fs_value_is_blob, /* Value is a binary blob */ fs_value_is_filename, /* Value is a filename* + dirfd */ fs_value_is_file, /* Value is a file* */ }; /* * Configuration parameter. */ struct fs_parameter { const char *key; /* Parameter name */ enum fs_value_type type:8; /* The type of value here */ union { char *string; void *blob; struct filename *name; struct file *file; }; size_t size; int dirfd; }; struct p_log { const char *prefix; struct fc_log *log; }; /* * Filesystem context for holding the parameters used in the creation or * reconfiguration of a superblock. * * Superblock creation fills in ->root whereas reconfiguration begins with this * already set. * * See Documentation/filesystems/mount_api.rst */ struct fs_context { const struct fs_context_operations *ops; struct mutex uapi_mutex; /* Userspace access mutex */ struct file_system_type *fs_type; void *fs_private; /* The filesystem's context */ void *sget_key; struct dentry *root; /* The root and superblock */ struct user_namespace *user_ns; /* The user namespace for this mount */ struct net *net_ns; /* The network namespace for this mount */ const struct cred *cred; /* The mounter's credentials */ struct p_log log; /* Logging buffer */ const char *source; /* The source name (eg. dev path) */ void *security; /* LSM options */ void *s_fs_info; /* Proposed s_fs_info */ unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ unsigned int sb_flags_mask; /* Superblock flags that were changed */ unsigned int s_iflags; /* OR'd with sb->s_iflags */ enum fs_context_purpose purpose:8; enum fs_context_phase phase:8; /* The phase the context is in */ bool need_free:1; /* Need to call ops->free() */ bool global:1; /* Goes into &init_user_ns */ bool oldapi:1; /* Coming from mount(2) */ bool exclusive:1; /* create new superblock, reject existing one */ }; struct fs_context_operations { void (*free)(struct fs_context *fc); int (*dup)(struct fs_context *fc, struct fs_context *src_fc); int (*parse_param)(struct fs_context *fc, struct fs_parameter *param); int (*parse_monolithic)(struct fs_context *fc, void *data); int (*get_tree)(struct fs_context *fc); int (*reconfigure)(struct fs_context *fc); }; /* * fs_context manipulation functions. */ extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, unsigned int sb_flags); extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry, unsigned int sb_flags, unsigned int sb_flags_mask); extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type, struct dentry *reference); extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_qstr(struct fs_context *fc, const char *key, const struct qstr *value); static inline int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value) { return vfs_parse_fs_qstr(fc, key, value ? &QSTR(value) : NULL); } int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); extern void fc_drop_locked(struct fs_context *fc); extern int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); extern int get_tree_single(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); extern int get_tree_keyed(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc), void *key); int setup_bdev_super(struct super_block *sb, int sb_flags, struct fs_context *fc); #define GET_TREE_BDEV_QUIET_LOOKUP 0x0001 int get_tree_bdev_flags(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc), unsigned int flags); extern int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); extern const struct file_operations fscontext_fops; /* * Mount error, warning and informational message logging. This structure is * shareable between a mount and a subordinate mount. */ struct fc_log { refcount_t usage; u8 head; /* Insertion index in buffer[] */ u8 tail; /* Removal index in buffer[] */ u8 need_free; /* Mask of kfree'able items in buffer[] */ struct module *owner; /* Owner module for strings that don't then need freeing */ char *buffer[8]; }; extern __attribute__((format(printf, 4, 5))) void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...); #define __logfc(fc, l, fmt, ...) \ logfc((fc)->log.log, NULL, (l), (fmt), ## __VA_ARGS__) #define __plogp(p, prefix, l, fmt, ...) \ logfc((p)->log, (prefix), (l), (fmt), ## __VA_ARGS__) #define __plog(p, l, fmt, ...) __plogp(p, (p)->prefix, l, fmt, ## __VA_ARGS__) /** * infof - Store supplementary informational message * @fc: The context in which to log the informational message * @fmt: The format string * * Store the supplementary informational message for the process if the process * has enabled the facility. */ #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__) #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__) #define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) #define infofcp(fc, prefix, fmt, ...) \ __plogp((&(fc)->log), prefix, 'i', fmt, ## __VA_ARGS__) /** * warnf - Store supplementary warning message * @fc: The context in which to log the error message * @fmt: The format string * * Store the supplementary warning message for the process if the process has * enabled the facility. */ #define warnf(fc, fmt, ...) __logfc(fc, 'w', fmt, ## __VA_ARGS__) #define warn_plog(p, fmt, ...) __plog(p, 'w', fmt, ## __VA_ARGS__) #define warnfc(fc, fmt, ...) __plog((&(fc)->log), 'w', fmt, ## __VA_ARGS__) #define warnfcp(fc, prefix, fmt, ...) \ __plogp((&(fc)->log), prefix, 'w', fmt, ## __VA_ARGS__) /** * errorf - Store supplementary error message * @fc: The context in which to log the error message * @fmt: The format string * * Store the supplementary error message for the process if the process has * enabled the facility. */ #define errorf(fc, fmt, ...) __logfc(fc, 'e', fmt, ## __VA_ARGS__) #define error_plog(p, fmt, ...) __plog(p, 'e', fmt, ## __VA_ARGS__) #define errorfc(fc, fmt, ...) __plog((&(fc)->log), 'e', fmt, ## __VA_ARGS__) #define errorfcp(fc, prefix, fmt, ...) \ __plogp((&(fc)->log), prefix, 'e', fmt, ## __VA_ARGS__) /** * invalf - Store supplementary invalid argument error message * @fc: The context in which to log the error message * @fmt: The format string * * Store the supplementary error message for the process if the process has * enabled the facility and return -EINVAL. */ #define invalf(fc, fmt, ...) (errorf(fc, fmt, ## __VA_ARGS__), -EINVAL) #define inval_plog(p, fmt, ...) (error_plog(p, fmt, ## __VA_ARGS__), -EINVAL) #define invalfc(fc, fmt, ...) (errorfc(fc, fmt, ## __VA_ARGS__), -EINVAL) #define invalfcp(fc, prefix, fmt, ...) \ (errorfcp(fc, prefix, fmt, ## __VA_ARGS__), -EINVAL) #endif /* _LINUX_FS_CONTEXT_H */ |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | // SPDX-License-Identifier: GPL-2.0 #include <linux/sysctl.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/xfrm.h> static void __net_init __xfrm_sysctl_init(struct net *net) { net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; net->xfrm.sysctl_larval_drop = 1; net->xfrm.sysctl_acq_expires = 30; } #ifdef CONFIG_SYSCTL static struct ctl_table xfrm_table[] = { { .procname = "xfrm_aevent_etime", .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_douintvec }, { .procname = "xfrm_aevent_rseqth", .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_douintvec }, { .procname = "xfrm_larval_drop", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { .procname = "xfrm_acq_expires", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, }; int __net_init xfrm_sysctl_init(struct net *net) { struct ctl_table *table; size_t table_size = ARRAY_SIZE(xfrm_table); __xfrm_sysctl_init(net); table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); if (!table) goto out_kmemdup; table[0].data = &net->xfrm.sysctl_aevent_etime; table[1].data = &net->xfrm.sysctl_aevent_rseqth; table[2].data = &net->xfrm.sysctl_larval_drop; table[3].data = &net->xfrm.sysctl_acq_expires; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) table_size = 0; net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table, table_size); if (!net->xfrm.sysctl_hdr) goto out_register; return 0; out_register: kfree(table); out_kmemdup: return -ENOMEM; } void __net_exit xfrm_sysctl_fini(struct net *net) { const struct ctl_table *table; table = net->xfrm.sysctl_hdr->ctl_table_arg; unregister_net_sysctl_table(net->xfrm.sysctl_hdr); kfree(table); } #else int __net_init xfrm_sysctl_init(struct net *net) { __xfrm_sysctl_init(net); return 0; } #endif |
| 1 13 13 13 13 2 7 7 7 11 12 12 7 7 7 13 4 13 12 10 7 7 7 10 3 3 3 3 4 4 4 3 36 36 456 36 5 416 65 64 63 64 415 1 4 4 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 1 1 1 1 1 1 1 12 5 7 1 11 191 191 191 191 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 | // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/wakeup.c - System wakeup events framework * * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. */ #define pr_fmt(fmt) "PM: " fmt #include <linux/device.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/capability.h> #include <linux/export.h> #include <linux/suspend.h> #include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/pm_wakeirq.h> #include <trace/events/power.h> #include "power.h" #define list_for_each_entry_rcu_locked(pos, head, member) \ list_for_each_entry_rcu(pos, head, member, \ srcu_read_lock_held(&wakeup_srcu)) /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. */ bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ static unsigned int wakeup_irq[2] __read_mostly; static DEFINE_RAW_SPINLOCK(wakeup_irq_lock); /* If greater than 0 and the system is suspending, terminate the suspend. */ static atomic_t pm_abort_suspend __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. * They need to be modified together atomically, so it's better to use one * atomic variable to hold them both. */ static atomic_t combined_event_count = ATOMIC_INIT(0); #define IN_PROGRESS_BITS (sizeof(int) * 4) #define MAX_IN_PROGRESS ((1 << IN_PROGRESS_BITS) - 1) static void split_counters(unsigned int *cnt, unsigned int *inpr) { unsigned int comb = atomic_read(&combined_event_count); *cnt = (comb >> IN_PROGRESS_BITS); *inpr = comb & MAX_IN_PROGRESS; } /* A preserved old value of the events counter. */ static unsigned int saved_count; static DEFINE_RAW_SPINLOCK(events_lock); static void pm_wakeup_timer_fn(struct timer_list *t); static LIST_HEAD(wakeup_sources); static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); DEFINE_STATIC_SRCU(wakeup_srcu); static struct wakeup_source deleted_ws = { .name = "deleted", .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), }; static DEFINE_IDA(wakeup_ida); /** * wakeup_source_create - Create a struct wakeup_source object. * @name: Name of the new wakeup source. */ static struct wakeup_source *wakeup_source_create(const char *name) { struct wakeup_source *ws; const char *ws_name; int id; ws = kzalloc(sizeof(*ws), GFP_KERNEL); if (!ws) goto err_ws; ws_name = kstrdup_const(name, GFP_KERNEL); if (!ws_name) goto err_name; ws->name = ws_name; id = ida_alloc(&wakeup_ida, GFP_KERNEL); if (id < 0) goto err_id; ws->id = id; return ws; err_id: kfree_const(ws->name); err_name: kfree(ws); err_ws: return NULL; } /* * Record wakeup_source statistics being deleted into a dummy wakeup_source. */ static void wakeup_source_record(struct wakeup_source *ws) { unsigned long flags; spin_lock_irqsave(&deleted_ws.lock, flags); if (ws->event_count) { deleted_ws.total_time = ktime_add(deleted_ws.total_time, ws->total_time); deleted_ws.prevent_sleep_time = ktime_add(deleted_ws.prevent_sleep_time, ws->prevent_sleep_time); deleted_ws.max_time = ktime_compare(deleted_ws.max_time, ws->max_time) > 0 ? deleted_ws.max_time : ws->max_time; deleted_ws.event_count += ws->event_count; deleted_ws.active_count += ws->active_count; deleted_ws.relax_count += ws->relax_count; deleted_ws.expire_count += ws->expire_count; deleted_ws.wakeup_count += ws->wakeup_count; } spin_unlock_irqrestore(&deleted_ws.lock, flags); } static void wakeup_source_free(struct wakeup_source *ws) { ida_free(&wakeup_ida, ws->id); kfree_const(ws->name); kfree(ws); } /** * wakeup_source_destroy - Destroy a struct wakeup_source object. * @ws: Wakeup source to destroy. * * Use only for wakeup source objects created with wakeup_source_create(). */ static void wakeup_source_destroy(struct wakeup_source *ws) { if (!ws) return; __pm_relax(ws); wakeup_source_record(ws); wakeup_source_free(ws); } /** * wakeup_source_add - Add given object to the list of wakeup sources. * @ws: Wakeup source object to add to the list. */ static void wakeup_source_add(struct wakeup_source *ws) { unsigned long flags; if (WARN_ON(!ws)) return; spin_lock_init(&ws->lock); timer_setup(&ws->timer, pm_wakeup_timer_fn, 0); ws->active = false; raw_spin_lock_irqsave(&events_lock, flags); list_add_rcu(&ws->entry, &wakeup_sources); raw_spin_unlock_irqrestore(&events_lock, flags); } /** * wakeup_source_remove - Remove given object from the wakeup sources list. * @ws: Wakeup source object to remove from the list. */ static void wakeup_source_remove(struct wakeup_source *ws) { unsigned long flags; if (WARN_ON(!ws)) return; raw_spin_lock_irqsave(&events_lock, flags); list_del_rcu(&ws->entry); raw_spin_unlock_irqrestore(&events_lock, flags); synchronize_srcu(&wakeup_srcu); timer_delete_sync(&ws->timer); /* * Clear timer.function to make wakeup_source_not_registered() treat * this wakeup source as not registered. */ ws->timer.function = NULL; } /** * wakeup_source_register - Create wakeup source and add it to the list. * @dev: Device this wakeup source is associated with (or NULL if virtual). * @name: Name of the wakeup source to register. */ struct wakeup_source *wakeup_source_register(struct device *dev, const char *name) { struct wakeup_source *ws; int ret; ws = wakeup_source_create(name); if (ws) { if (!dev || device_is_registered(dev)) { ret = wakeup_source_sysfs_add(dev, ws); if (ret) { wakeup_source_free(ws); return NULL; } } wakeup_source_add(ws); } return ws; } EXPORT_SYMBOL_GPL(wakeup_source_register); /** * wakeup_source_unregister - Remove wakeup source from the list and remove it. * @ws: Wakeup source object to unregister. */ void wakeup_source_unregister(struct wakeup_source *ws) { if (ws) { wakeup_source_remove(ws); if (ws->dev) wakeup_source_sysfs_remove(ws); wakeup_source_destroy(ws); } } EXPORT_SYMBOL_GPL(wakeup_source_unregister); /** * wakeup_sources_read_lock - Lock wakeup source list for read. * * Returns an index of srcu lock for struct wakeup_srcu. * This index must be passed to the matching wakeup_sources_read_unlock(). */ int wakeup_sources_read_lock(void) { return srcu_read_lock(&wakeup_srcu); } EXPORT_SYMBOL_GPL(wakeup_sources_read_lock); /** * wakeup_sources_read_unlock - Unlock wakeup source list. * @idx: return value from corresponding wakeup_sources_read_lock() */ void wakeup_sources_read_unlock(int idx) { srcu_read_unlock(&wakeup_srcu, idx); } EXPORT_SYMBOL_GPL(wakeup_sources_read_unlock); /** * wakeup_sources_walk_start - Begin a walk on wakeup source list * * Returns first object of the list of wakeup sources. * * Note that to be safe, wakeup sources list needs to be locked by calling * wakeup_source_read_lock() for this. */ struct wakeup_source *wakeup_sources_walk_start(void) { struct list_head *ws_head = &wakeup_sources; return list_entry_rcu(ws_head->next, struct wakeup_source, entry); } EXPORT_SYMBOL_GPL(wakeup_sources_walk_start); /** * wakeup_sources_walk_next - Get next wakeup source from the list * @ws: Previous wakeup source object * * Note that to be safe, wakeup sources list needs to be locked by calling * wakeup_source_read_lock() for this. */ struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws) { struct list_head *ws_head = &wakeup_sources; return list_next_or_null_rcu(ws_head, &ws->entry, struct wakeup_source, entry); } EXPORT_SYMBOL_GPL(wakeup_sources_walk_next); /** * device_wakeup_attach - Attach a wakeup source object to a device object. * @dev: Device to handle. * @ws: Wakeup source object to attach to @dev. * * This causes @dev to be treated as a wakeup device. */ static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws) { spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { spin_unlock_irq(&dev->power.lock); return -EEXIST; } dev->power.wakeup = ws; if (dev->power.wakeirq) device_wakeup_attach_irq(dev, dev->power.wakeirq); spin_unlock_irq(&dev->power.lock); return 0; } /** * device_wakeup_enable - Enable given device to be a wakeup source. * @dev: Device to handle. * * Create a wakeup source object, register it and attach it to @dev. */ int device_wakeup_enable(struct device *dev) { struct wakeup_source *ws; int ret; if (!dev || !dev->power.can_wakeup) return -EINVAL; if (pm_sleep_transition_in_progress()) dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__); ws = wakeup_source_register(dev, dev_name(dev)); if (!ws) return -ENOMEM; ret = device_wakeup_attach(dev, ws); if (ret) wakeup_source_unregister(ws); return ret; } EXPORT_SYMBOL_GPL(device_wakeup_enable); /** * device_wakeup_attach_irq - Attach a wakeirq to a wakeup source * @dev: Device to handle * @wakeirq: Device specific wakeirq entry * * Attach a device wakeirq to the wakeup source so the device * wake IRQ can be configured automatically for suspend and * resume. * * Call under the device's power.lock lock. */ void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) { struct wakeup_source *ws; ws = dev->power.wakeup; if (!ws) return; if (ws->wakeirq) dev_err(dev, "Leftover wakeup IRQ found, overriding\n"); ws->wakeirq = wakeirq; } /** * device_wakeup_detach_irq - Detach a wakeirq from a wakeup source * @dev: Device to handle * * Removes a device wakeirq from the wakeup source. * * Call under the device's power.lock lock. */ void device_wakeup_detach_irq(struct device *dev) { struct wakeup_source *ws; ws = dev->power.wakeup; if (ws) ws->wakeirq = NULL; } /** * device_wakeup_arm_wake_irqs - * * Iterates over the list of device wakeirqs to arm them. */ void device_wakeup_arm_wake_irqs(void) { struct wakeup_source *ws; int srcuidx; srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) dev_pm_arm_wake_irq(ws->wakeirq); srcu_read_unlock(&wakeup_srcu, srcuidx); } /** * device_wakeup_disarm_wake_irqs - * * Iterates over the list of device wakeirqs to disarm them. */ void device_wakeup_disarm_wake_irqs(void) { struct wakeup_source *ws; int srcuidx; srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) dev_pm_disarm_wake_irq(ws->wakeirq); srcu_read_unlock(&wakeup_srcu, srcuidx); } /** * device_wakeup_detach - Detach a device's wakeup source object from it. * @dev: Device to detach the wakeup source object from. * * After it returns, @dev will not be treated as a wakeup device any more. */ static struct wakeup_source *device_wakeup_detach(struct device *dev) { struct wakeup_source *ws; spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; dev->power.wakeup = NULL; spin_unlock_irq(&dev->power.lock); return ws; } /** * device_wakeup_disable - Do not regard a device as a wakeup source any more. * @dev: Device to handle. * * Detach the @dev's wakeup source object from it, unregister this wakeup source * object and destroy it. */ void device_wakeup_disable(struct device *dev) { struct wakeup_source *ws; if (!dev || !dev->power.can_wakeup) return; ws = device_wakeup_detach(dev); wakeup_source_unregister(ws); } EXPORT_SYMBOL_GPL(device_wakeup_disable); /** * device_set_wakeup_capable - Set/reset device wakeup capability flag. * @dev: Device to handle. * @capable: Whether or not @dev is capable of waking up the system from sleep. * * If @capable is set, set the @dev's power.can_wakeup flag and add its * wakeup-related attributes to sysfs. Otherwise, unset the @dev's * power.can_wakeup flag and remove its wakeup-related attributes from sysfs. * * This function may sleep and it can't be called from any context where * sleeping is not allowed. */ void device_set_wakeup_capable(struct device *dev, bool capable) { if (!!dev->power.can_wakeup == !!capable) return; dev->power.can_wakeup = capable; if (device_is_registered(dev) && !list_empty(&dev->power.entry)) { if (capable) { int ret = wakeup_sysfs_add(dev); if (ret) dev_info(dev, "Wakeup sysfs attributes not added\n"); } else { wakeup_sysfs_remove(dev); } } } EXPORT_SYMBOL_GPL(device_set_wakeup_capable); /** * device_set_wakeup_enable - Enable or disable a device to wake up the system. * @dev: Device to handle. * @enable: enable/disable flag */ int device_set_wakeup_enable(struct device *dev, bool enable) { if (enable) return device_wakeup_enable(dev); device_wakeup_disable(dev); return 0; } EXPORT_SYMBOL_GPL(device_set_wakeup_enable); /** * wakeup_source_not_registered - validate the given wakeup source. * @ws: Wakeup source to be validated. */ static bool wakeup_source_not_registered(struct wakeup_source *ws) { /* * Use timer struct to check if the given source is initialized * by wakeup_source_add. */ return ws->timer.function != pm_wakeup_timer_fn; } /* * The functions below use the observation that each wakeup event starts a * period in which the system should not be suspended. The moment this period * will end depends on how the wakeup event is going to be processed after being * detected and all of the possible cases can be divided into two distinct * groups. * * First, a wakeup event may be detected by the same functional unit that will * carry out the entire processing of it and possibly will pass it to user space * for further processing. In that case the functional unit that has detected * the event may later "close" the "no suspend" period associated with it * directly as soon as it has been dealt with. The pair of pm_stay_awake() and * pm_relax(), balanced with each other, is supposed to be used in such * situations. * * Second, a wakeup event may be detected by one functional unit and processed * by another one. In that case the unit that has detected it cannot really * "close" the "no suspend" period associated with it, unless it knows in * advance what's going to happen to the event during processing. This * knowledge, however, may not be available to it, so it can simply specify time * to wait before the system can be suspended and pass it as the second * argument of pm_wakeup_event(). * * It is valid to call pm_relax() after pm_wakeup_event(), in which case the * "no suspend" period will be ended either by the pm_relax(), or by the timer * function executed when the timer expires, whichever comes first. */ /** * wakeup_source_activate - Mark given wakeup source as active. * @ws: Wakeup source to handle. * * Update the @ws' statistics and, if @ws has just been activated, notify the PM * core of the event by incrementing the counter of the wakeup events being * processed. */ static void wakeup_source_activate(struct wakeup_source *ws) { unsigned int cec; if (WARN_ONCE(wakeup_source_not_registered(ws), "unregistered wakeup source\n")) return; ws->active = true; ws->active_count++; ws->last_time = ktime_get(); if (ws->autosleep_enabled) ws->start_prevent_time = ws->last_time; /* Increment the counter of events in progress. */ cec = atomic_inc_return(&combined_event_count); trace_wakeup_source_activate(ws->name, cec); } /** * wakeup_source_report_event - Report wakeup event using the given source. * @ws: Wakeup source to report the event for. * @hard: If set, abort suspends in progress and wake up from suspend-to-idle. */ static void wakeup_source_report_event(struct wakeup_source *ws, bool hard) { ws->event_count++; /* This is racy, but the counter is approximate anyway. */ if (events_check_enabled) ws->wakeup_count++; if (!ws->active) wakeup_source_activate(ws); if (hard) pm_system_wakeup(); } /** * __pm_stay_awake - Notify the PM core of a wakeup event. * @ws: Wakeup source object associated with the source of the event. * * It is safe to call this function from interrupt context. */ void __pm_stay_awake(struct wakeup_source *ws) { unsigned long flags; if (!ws) return; spin_lock_irqsave(&ws->lock, flags); wakeup_source_report_event(ws, false); timer_delete(&ws->timer); ws->timer_expires = 0; spin_unlock_irqrestore(&ws->lock, flags); } EXPORT_SYMBOL_GPL(__pm_stay_awake); /** * pm_stay_awake - Notify the PM core that a wakeup event is being processed. * @dev: Device the wakeup event is related to. * * Notify the PM core of a wakeup event (signaled by @dev) by calling * __pm_stay_awake for the @dev's wakeup source object. * * Call this function after detecting of a wakeup event if pm_relax() is going * to be called directly after processing the event (and possibly passing it to * user space for further processing). */ void pm_stay_awake(struct device *dev) { unsigned long flags; if (!dev) return; spin_lock_irqsave(&dev->power.lock, flags); __pm_stay_awake(dev->power.wakeup); spin_unlock_irqrestore(&dev->power.lock, flags); } EXPORT_SYMBOL_GPL(pm_stay_awake); #ifdef CONFIG_PM_AUTOSLEEP static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) { ktime_t delta = ktime_sub(now, ws->start_prevent_time); ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta); } #else static inline void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) {} #endif /** * wakeup_source_deactivate - Mark given wakeup source as inactive. * @ws: Wakeup source to handle. * * Update the @ws' statistics and notify the PM core that the wakeup source has * become inactive by decrementing the counter of wakeup events being processed * and incrementing the counter of registered wakeup events. */ static void wakeup_source_deactivate(struct wakeup_source *ws) { unsigned int cnt, inpr, cec; ktime_t duration; ktime_t now; ws->relax_count++; /* * __pm_relax() may be called directly or from a timer function. * If it is called directly right after the timer function has been * started, but before the timer function calls __pm_relax(), it is * possible that __pm_stay_awake() will be called in the meantime and * will set ws->active. Then, ws->active may be cleared immediately * by the __pm_relax() called from the timer function, but in such a * case ws->relax_count will be different from ws->active_count. */ if (ws->relax_count != ws->active_count) { ws->relax_count--; return; } ws->active = false; now = ktime_get(); duration = ktime_sub(now, ws->last_time); ws->total_time = ktime_add(ws->total_time, duration); if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time)) ws->max_time = duration; ws->last_time = now; timer_delete(&ws->timer); ws->timer_expires = 0; if (ws->autosleep_enabled) update_prevent_sleep_time(ws, now); /* * Increment the counter of registered wakeup events and decrement the * counter of wakeup events in progress simultaneously. */ cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); trace_wakeup_source_deactivate(ws->name, cec); split_counters(&cnt, &inpr); if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) wake_up(&wakeup_count_wait_queue); } /** * __pm_relax - Notify the PM core that processing of a wakeup event has ended. * @ws: Wakeup source object associated with the source of the event. * * Call this function for wakeup events whose processing started with calling * __pm_stay_awake(). * * It is safe to call it from interrupt context. */ void __pm_relax(struct wakeup_source *ws) { unsigned long flags; if (!ws) return; spin_lock_irqsave(&ws->lock, flags); if (ws->active) wakeup_source_deactivate(ws); spin_unlock_irqrestore(&ws->lock, flags); } EXPORT_SYMBOL_GPL(__pm_relax); /** * pm_relax - Notify the PM core that processing of a wakeup event has ended. * @dev: Device that signaled the event. * * Execute __pm_relax() for the @dev's wakeup source object. */ void pm_relax(struct device *dev) { unsigned long flags; if (!dev) return; spin_lock_irqsave(&dev->power.lock, flags); __pm_relax(dev->power.wakeup); spin_unlock_irqrestore(&dev->power.lock, flags); } EXPORT_SYMBOL_GPL(pm_relax); /** * pm_wakeup_timer_fn - Delayed finalization of a wakeup event. * @t: timer list * * Call wakeup_source_deactivate() for the wakeup source whose address is stored * in @data if it is currently active and its timer has not been canceled and * the expiration time of the timer is not in future. */ static void pm_wakeup_timer_fn(struct timer_list *t) { struct wakeup_source *ws = timer_container_of(ws, t, timer); unsigned long flags; spin_lock_irqsave(&ws->lock, flags); if (ws->active && ws->timer_expires && time_after_eq(jiffies, ws->timer_expires)) { wakeup_source_deactivate(ws); ws->expire_count++; } spin_unlock_irqrestore(&ws->lock, flags); } /** * pm_wakeup_ws_event - Notify the PM core of a wakeup event. * @ws: Wakeup source object associated with the event source. * @msec: Anticipated event processing time (in milliseconds). * @hard: If set, abort suspends in progress and wake up from suspend-to-idle. * * Notify the PM core of a wakeup event whose source is @ws that will take * approximately @msec milliseconds to be processed by the kernel. If @ws is * not active, activate it. If @msec is nonzero, set up the @ws' timer to * execute pm_wakeup_timer_fn() in future. * * It is safe to call this function from interrupt context. */ void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard) { unsigned long flags; unsigned long expires; if (!ws) return; spin_lock_irqsave(&ws->lock, flags); wakeup_source_report_event(ws, hard); if (!msec) { wakeup_source_deactivate(ws); goto unlock; } expires = jiffies + msecs_to_jiffies(msec); if (!expires) expires = 1; if (!ws->timer_expires || time_after(expires, ws->timer_expires)) { mod_timer(&ws->timer, expires); ws->timer_expires = expires; } unlock: spin_unlock_irqrestore(&ws->lock, flags); } EXPORT_SYMBOL_GPL(pm_wakeup_ws_event); /** * pm_wakeup_dev_event - Notify the PM core of a wakeup event. * @dev: Device the wakeup event is related to. * @msec: Anticipated event processing time (in milliseconds). * @hard: If set, abort suspends in progress and wake up from suspend-to-idle. * * Call pm_wakeup_ws_event() for the @dev's wakeup source object. */ void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard) { unsigned long flags; if (!dev) return; spin_lock_irqsave(&dev->power.lock, flags); pm_wakeup_ws_event(dev->power.wakeup, msec, hard); spin_unlock_irqrestore(&dev->power.lock, flags); } EXPORT_SYMBOL_GPL(pm_wakeup_dev_event); void pm_print_active_wakeup_sources(void) { struct wakeup_source *ws; int srcuidx, active = 0; struct wakeup_source *last_activity_ws = NULL; srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) { if (ws->active) { pm_pr_dbg("active wakeup source: %s\n", ws->name); active = 1; } else if (!active && (!last_activity_ws || ktime_to_ns(ws->last_time) > ktime_to_ns(last_activity_ws->last_time))) { last_activity_ws = ws; } } if (!active && last_activity_ws) pm_pr_dbg("last active wakeup source: %s\n", last_activity_ws->name); srcu_read_unlock(&wakeup_srcu, srcuidx); } EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources); /** * pm_wakeup_pending - Check if power transition in progress should be aborted. * * Compare the current number of registered wakeup events with its preserved * value from the past and return true if new wakeup events have been registered * since the old value was stored. Also return true if the current number of * wakeup events being processed is different from zero. */ bool pm_wakeup_pending(void) { unsigned long flags; bool ret = false; raw_spin_lock_irqsave(&events_lock, flags); if (events_check_enabled) { unsigned int cnt, inpr; split_counters(&cnt, &inpr); ret = (cnt != saved_count || inpr > 0); events_check_enabled = !ret; } raw_spin_unlock_irqrestore(&events_lock, flags); if (ret) { pm_pr_dbg("Wakeup pending, aborting suspend\n"); pm_print_active_wakeup_sources(); } return ret || atomic_read(&pm_abort_suspend) > 0; } EXPORT_SYMBOL_GPL(pm_wakeup_pending); void pm_system_wakeup(void) { atomic_inc(&pm_abort_suspend); s2idle_wake(); } EXPORT_SYMBOL_GPL(pm_system_wakeup); void pm_system_cancel_wakeup(void) { atomic_dec_if_positive(&pm_abort_suspend); } void pm_wakeup_clear(unsigned int irq_number) { raw_spin_lock_irq(&wakeup_irq_lock); if (irq_number && wakeup_irq[0] == irq_number) wakeup_irq[0] = wakeup_irq[1]; else wakeup_irq[0] = 0; wakeup_irq[1] = 0; raw_spin_unlock_irq(&wakeup_irq_lock); if (!irq_number) atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) { unsigned long flags; raw_spin_lock_irqsave(&wakeup_irq_lock, flags); if (wakeup_irq[0] == 0) wakeup_irq[0] = irq_number; else if (wakeup_irq[1] == 0) wakeup_irq[1] = irq_number; else irq_number = 0; pm_pr_dbg("Triggering wakeup from IRQ %d\n", irq_number); raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags); if (irq_number) pm_system_wakeup(); } unsigned int pm_wakeup_irq(void) { return wakeup_irq[0]; } /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. * @block: Whether or not to block. * * Store the number of registered wakeup events at the address in @count. If * @block is set, block until the current number of wakeup events being * processed is zero. * * Return 'false' if the current number of wakeup events being processed is * nonzero. Otherwise return 'true'. */ bool pm_get_wakeup_count(unsigned int *count, bool block) { unsigned int cnt, inpr; if (block) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait(&wakeup_count_wait_queue, &wait, TASK_INTERRUPTIBLE); split_counters(&cnt, &inpr); if (inpr == 0 || signal_pending(current)) break; pm_print_active_wakeup_sources(); schedule(); } finish_wait(&wakeup_count_wait_queue, &wait); } split_counters(&cnt, &inpr); *count = cnt; return !inpr; } /** * pm_save_wakeup_count - Save the current number of registered wakeup events. * @count: Value to compare with the current number of registered wakeup events. * * If @count is equal to the current number of registered wakeup events and the * current number of wakeup events being processed is zero, store @count as the * old number of registered wakeup events for pm_check_wakeup_events(), enable * wakeup events detection and return 'true'. Otherwise disable wakeup events * detection and return 'false'. */ bool pm_save_wakeup_count(unsigned int count) { unsigned int cnt, inpr; unsigned long flags; events_check_enabled = false; raw_spin_lock_irqsave(&events_lock, flags); split_counters(&cnt, &inpr); if (cnt == count && inpr == 0) { saved_count = count; events_check_enabled = true; } raw_spin_unlock_irqrestore(&events_lock, flags); return events_check_enabled; } #ifdef CONFIG_PM_AUTOSLEEP /** * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources. * @set: Whether to set or to clear the autosleep_enabled flags. */ void pm_wakep_autosleep_enabled(bool set) { struct wakeup_source *ws; ktime_t now = ktime_get(); int srcuidx; srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) { spin_lock_irq(&ws->lock); if (ws->autosleep_enabled != set) { ws->autosleep_enabled = set; if (ws->active) { if (set) ws->start_prevent_time = now; else update_prevent_sleep_time(ws, now); } } spin_unlock_irq(&ws->lock); } srcu_read_unlock(&wakeup_srcu, srcuidx); } #endif /* CONFIG_PM_AUTOSLEEP */ /** * print_wakeup_source_stats - Print wakeup source statistics information. * @m: seq_file to print the statistics into. * @ws: Wakeup source object to print the statistics for. */ static int print_wakeup_source_stats(struct seq_file *m, struct wakeup_source *ws) { unsigned long flags; ktime_t total_time; ktime_t max_time; unsigned long active_count; ktime_t active_time; ktime_t prevent_sleep_time; spin_lock_irqsave(&ws->lock, flags); total_time = ws->total_time; max_time = ws->max_time; prevent_sleep_time = ws->prevent_sleep_time; active_count = ws->active_count; if (ws->active) { ktime_t now = ktime_get(); active_time = ktime_sub(now, ws->last_time); total_time = ktime_add(total_time, active_time); if (active_time > max_time) max_time = active_time; if (ws->autosleep_enabled) prevent_sleep_time = ktime_add(prevent_sleep_time, ktime_sub(now, ws->start_prevent_time)); } else { active_time = 0; } seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", ws->name, active_count, ws->event_count, ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), ktime_to_ms(max_time), ktime_to_ms(ws->last_time), ktime_to_ms(prevent_sleep_time)); spin_unlock_irqrestore(&ws->lock, flags); return 0; } static void *wakeup_sources_stats_seq_start(struct seq_file *m, loff_t *pos) { struct wakeup_source *ws; loff_t n = *pos; int *srcuidx = m->private; if (n == 0) { seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" "last_change\tprevent_suspend_time\n"); } *srcuidx = srcu_read_lock(&wakeup_srcu); list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) { if (n-- <= 0) return ws; } return NULL; } static void *wakeup_sources_stats_seq_next(struct seq_file *m, void *v, loff_t *pos) { struct wakeup_source *ws = v; struct wakeup_source *next_ws = NULL; ++(*pos); list_for_each_entry_continue_rcu(ws, &wakeup_sources, entry) { next_ws = ws; break; } if (!next_ws) print_wakeup_source_stats(m, &deleted_ws); return next_ws; } static void wakeup_sources_stats_seq_stop(struct seq_file *m, void *v) { int *srcuidx = m->private; srcu_read_unlock(&wakeup_srcu, *srcuidx); } /** * wakeup_sources_stats_seq_show - Print wakeup sources statistics information. * @m: seq_file to print the statistics into. * @v: wakeup_source of each iteration */ static int wakeup_sources_stats_seq_show(struct seq_file *m, void *v) { struct wakeup_source *ws = v; print_wakeup_source_stats(m, ws); return 0; } static const struct seq_operations wakeup_sources_stats_seq_ops = { .start = wakeup_sources_stats_seq_start, .next = wakeup_sources_stats_seq_next, .stop = wakeup_sources_stats_seq_stop, .show = wakeup_sources_stats_seq_show, }; static int wakeup_sources_stats_open(struct inode *inode, struct file *file) { return seq_open_private(file, &wakeup_sources_stats_seq_ops, sizeof(int)); } static const struct file_operations wakeup_sources_stats_fops = { .owner = THIS_MODULE, .open = wakeup_sources_stats_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, }; static int __init wakeup_sources_debugfs_init(void) { debugfs_create_file("wakeup_sources", 0444, NULL, NULL, &wakeup_sources_stats_fops); return 0; } postcore_initcall(wakeup_sources_debugfs_init); |
| 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include <linux/phy.h> #include <linux/phylib_stubs.h> struct linkstate_req_info { struct ethnl_req_info base; }; struct linkstate_reply_data { struct ethnl_reply_data base; int link; int sqi; int sqi_max; struct ethtool_link_ext_stats link_stats; bool link_ext_state_provided; struct ethtool_link_ext_state_info ethtool_link_ext_state_info; }; #define LINKSTATE_REPDATA(__reply_base) \ container_of(__reply_base, struct linkstate_reply_data, base) const struct nla_policy ethnl_linkstate_get_policy[] = { [ETHTOOL_A_LINKSTATE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static int linkstate_get_sqi(struct phy_device *phydev) { int ret; if (!phydev) return -EOPNOTSUPP; mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi) ret = -EOPNOTSUPP; else if (!phydev->link) ret = -ENETDOWN; else ret = phydev->drv->get_sqi(phydev); mutex_unlock(&phydev->lock); return ret; } static int linkstate_get_sqi_max(struct phy_device *phydev) { int ret; if (!phydev) return -EOPNOTSUPP; mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi_max) ret = -EOPNOTSUPP; else if (!phydev->link) ret = -ENETDOWN; else ret = phydev->drv->get_sqi_max(phydev); mutex_unlock(&phydev->lock); return ret; }; static bool linkstate_sqi_critical_error(int sqi) { return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN; } static bool linkstate_sqi_valid(struct linkstate_reply_data *data) { return data->sqi >= 0 && data->sqi_max >= 0 && data->sqi <= data->sqi_max; } static int linkstate_get_link_ext_state(struct net_device *dev, struct linkstate_reply_data *data) { int err; if (!dev->ethtool_ops->get_link_ext_state) return -EOPNOTSUPP; err = dev->ethtool_ops->get_link_ext_state(dev, &data->ethtool_link_ext_state_info); if (err) return err; data->link_ext_state_provided = true; return 0; } static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); struct net_device *dev = reply_base->dev; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); if (IS_ERR(phydev)) { ret = PTR_ERR(phydev); goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->link = __ethtool_get_link(dev); ret = linkstate_get_sqi(phydev); if (linkstate_sqi_critical_error(ret)) goto out; data->sqi = ret; ret = linkstate_get_sqi_max(phydev); if (linkstate_sqi_critical_error(ret)) goto out; data->sqi_max = ret; if (dev->flags & IFF_UP) { ret = linkstate_get_link_ext_state(dev, data); if (ret < 0 && ret != -EOPNOTSUPP && ret != -ENODATA) goto out; } ethtool_stats_init((u64 *)&data->link_stats, sizeof(data->link_stats) / 8); if (req_base->flags & ETHTOOL_FLAG_STATS) { if (phydev) phy_ethtool_get_link_ext_stats(phydev, &data->link_stats); if (dev->ethtool_ops->get_link_ext_stats) dev->ethtool_ops->get_link_ext_stats(dev, &data->link_stats); } ret = 0; out: ethnl_ops_complete(dev); return ret; } static int linkstate_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); int len; len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; if (linkstate_sqi_valid(data)) { len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */ len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */ } if (data->link_ext_state_provided) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ if (data->ethtool_link_ext_state_info.__link_ext_substate) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_SUBSTATE */ if (data->link_stats.link_down_events != ETHTOOL_STAT_NOT_SET) len += nla_total_size(sizeof(u32)); return len; } static int linkstate_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); if (data->link >= 0 && nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; if (linkstate_sqi_valid(data)) { if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) return -EMSGSIZE; } if (data->link_ext_state_provided) { if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE, data->ethtool_link_ext_state_info.link_ext_state)) return -EMSGSIZE; if (data->ethtool_link_ext_state_info.__link_ext_substate && nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, data->ethtool_link_ext_state_info.__link_ext_substate)) return -EMSGSIZE; } if (data->link_stats.link_down_events != ETHTOOL_STAT_NOT_SET) if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, data->link_stats.link_down_events)) return -EMSGSIZE; return 0; } const struct ethnl_request_ops ethnl_linkstate_request_ops = { .request_cmd = ETHTOOL_MSG_LINKSTATE_GET, .reply_cmd = ETHTOOL_MSG_LINKSTATE_GET_REPLY, .hdr_attr = ETHTOOL_A_LINKSTATE_HEADER, .req_info_size = sizeof(struct linkstate_req_info), .reply_data_size = sizeof(struct linkstate_reply_data), .prepare_data = linkstate_prepare_data, .reply_size = linkstate_reply_size, .fill_reply = linkstate_fill_reply, }; |
| 664 482 482 482 477 239 4 4 240 5 242 70 242 242 4 647 48 646 650 647 2 13 13 650 650 646 643 646 644 12 650 645 645 643 641 1268 1276 1279 1271 1276 384 1273 1279 1271 373 266 265 254 168 120 98 145 267 12 6 6 12 269 266 269 1261 1263 1268 1264 1111 1262 1137 1130 22 1111 1133 5 5 5 5 5 5 5 7 7 7 7 7 7 7 7 7 7 374 368 374 374 3 374 3 377 373 373 1 6 6 6 8 8 8 8 378 378 374 105 105 105 105 103 105 103 105 103 105 106 105 106 106 106 106 106 43 62 726 724 725 141 732 725 503 9 1 9 476 502 504 500 208 505 723 478 725 579 726 501 500 502 27 27 27 27 27 26 27 39 39 38 15 15 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/swap.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ /* * This file contains the default values for the operation of the * Linux VM subsystem. Fine-tuning documentation can be found in * Documentation/admin-guide/sysctl/vm.rst. * Started 18.12.91 * Swap aging added 23.2.95, Stephen Tweedie. * Buffermem limits added 12.3.98, Rik van Riel. */ #include <linux/mm.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/init.h> #include <linux/export.h> #include <linux/mm_inline.h> #include <linux/percpu_counter.h> #include <linux/memremap.h> #include <linux/percpu.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/backing-dev.h> #include <linux/memcontrol.h> #include <linux/gfp.h> #include <linux/uio.h> #include <linux/hugetlb.h> #include <linux/page_idle.h> #include <linux/local_lock.h> #include <linux/buffer_head.h> #include "internal.h" #define CREATE_TRACE_POINTS #include <trace/events/pagemap.h> /* How many pages do we try to swap or page in/out together? As a power of 2 */ int page_cluster; static const int page_cluster_max = 31; struct cpu_fbatches { /* * The following folio batches are grouped together because they are protected * by disabling preemption (and interrupts remain enabled). */ local_lock_t lock; struct folio_batch lru_add; struct folio_batch lru_deactivate_file; struct folio_batch lru_deactivate; struct folio_batch lru_lazyfree; #ifdef CONFIG_SMP struct folio_batch lru_activate; #endif /* Protecting the following batches which require disabling interrupts */ local_lock_t lock_irq; struct folio_batch lru_move_tail; }; static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = { .lock = INIT_LOCAL_LOCK(lock), .lock_irq = INIT_LOCAL_LOCK(lock_irq), }; static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp, unsigned long *flagsp) { if (folio_test_lru(folio)) { folio_lruvec_relock_irqsave(folio, lruvecp, flagsp); lruvec_del_folio(*lruvecp, folio); __folio_clear_lru_flags(folio); } } /* * This path almost never happens for VM activity - pages are normally freed * in batches. But it gets used by networking - and for compound pages. */ static void page_cache_release(struct folio *folio) { struct lruvec *lruvec = NULL; unsigned long flags; __page_cache_release(folio, &lruvec, &flags); if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); } void __folio_put(struct folio *folio) { if (unlikely(folio_is_zone_device(folio))) { free_zone_device_folio(folio); return; } if (folio_test_hugetlb(folio)) { free_huge_folio(folio); return; } page_cache_release(folio); folio_unqueue_deferred_split(folio); mem_cgroup_uncharge(folio); free_frozen_pages(&folio->page, folio_order(folio)); } EXPORT_SYMBOL(__folio_put); typedef void (*move_fn_t)(struct lruvec *lruvec, struct folio *folio); static void lru_add(struct lruvec *lruvec, struct folio *folio) { int was_unevictable = folio_test_clear_unevictable(folio); long nr_pages = folio_nr_pages(folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* * Is an smp_mb__after_atomic() still required here, before * folio_evictable() tests the mlocked flag, to rule out the possibility * of stranding an evictable folio on an unevictable LRU? I think * not, because __munlock_folio() only clears the mlocked flag * while the LRU lock is held. * * (That is not true of __page_cache_release(), and not necessarily * true of folios_put(): but those only clear the mlocked flag after * folio_put_testzero() has excluded any other users of the folio.) */ if (folio_evictable(folio)) { if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { folio_clear_active(folio); folio_set_unevictable(folio); /* * folio->mlock_count = !!folio_test_mlocked(folio)? * But that leaves __mlock_folio() in doubt whether another * actor has already counted the mlock or not. Err on the * safe side, underestimate, let page reclaim fix it, rather * than leaving a page on the unevictable LRU indefinitely. */ folio->mlock_count = 0; if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } lruvec_add_folio(lruvec, folio); trace_mm_lru_insertion(folio); } static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) { int i; struct lruvec *lruvec = NULL; unsigned long flags = 0; for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; /* block memcg migration while the folio moves between lru */ if (move_fn != lru_add && !folio_test_clear_lru(folio)) continue; folio_lruvec_relock_irqsave(folio, &lruvec, &flags); move_fn(lruvec, folio); folio_set_lru(folio); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); folios_put(fbatch); } static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, struct folio *folio, move_fn_t move_fn, bool disable_irq) { unsigned long flags; folio_get(folio); if (disable_irq) local_lock_irqsave(&cpu_fbatches.lock_irq, flags); else local_lock(&cpu_fbatches.lock); if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || !folio_may_be_lru_cached(folio) || lru_cache_disabled()) folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn); if (disable_irq) local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags); else local_unlock(&cpu_fbatches.lock); } #define folio_batch_add_and_move(folio, op) \ __folio_batch_add_and_move( \ &cpu_fbatches.op, \ folio, \ op, \ offsetof(struct cpu_fbatches, op) >= \ offsetof(struct cpu_fbatches, lock_irq) \ ) static void lru_move_tail(struct lruvec *lruvec, struct folio *folio) { if (folio_test_unevictable(folio)) return; lruvec_del_folio(lruvec, folio); folio_clear_active(folio); lruvec_add_folio_tail(lruvec, folio); __count_vm_events(PGROTATED, folio_nr_pages(folio)); } /* * Writeback is about to end against a folio which has been marked for * immediate reclaim. If it still appears to be reclaimable, move it * to the tail of the inactive list. * * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races. */ void folio_rotate_reclaimable(struct folio *folio) { if (folio_test_locked(folio) || folio_test_dirty(folio) || folio_test_unevictable(folio) || !folio_test_lru(folio)) return; folio_batch_add_and_move(folio, lru_move_tail); } void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, unsigned int nr_io, unsigned int nr_rotated) __releases(lruvec->lru_lock) { unsigned long cost; /* * Reflect the relative cost of incurring IO and spending CPU * time on rotations. This doesn't attempt to make a precise * comparison, it just says: if reloads are about comparable * between the LRU lists, or rotations are overwhelmingly * different between them, adjust scan balance for CPU work. */ cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated; if (!cost) { spin_unlock_irq(&lruvec->lru_lock); return; } for (;;) { unsigned long lrusize; /* Record cost event */ if (file) lruvec->file_cost += cost; else lruvec->anon_cost += cost; /* * Decay previous events * * Because workloads change over time (and to avoid * overflow) we keep these statistics as a floating * average, which ends up weighing recent refaults * more than old ones. */ lrusize = lruvec_page_state(lruvec, NR_INACTIVE_ANON) + lruvec_page_state(lruvec, NR_ACTIVE_ANON) + lruvec_page_state(lruvec, NR_INACTIVE_FILE) + lruvec_page_state(lruvec, NR_ACTIVE_FILE); if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) { lruvec->file_cost /= 2; lruvec->anon_cost /= 2; } spin_unlock_irq(&lruvec->lru_lock); lruvec = parent_lruvec(lruvec); if (!lruvec) break; spin_lock_irq(&lruvec->lru_lock); } } void lru_note_cost_refault(struct folio *folio) { struct lruvec *lruvec; lruvec = folio_lruvec_lock_irq(folio); lru_note_cost_unlock_irq(lruvec, folio_is_file_lru(folio), folio_nr_pages(folio), 0); } static void lru_activate(struct lruvec *lruvec, struct folio *folio) { long nr_pages = folio_nr_pages(folio); if (folio_test_active(folio) || folio_test_unevictable(folio)) return; lruvec_del_folio(lruvec, folio); folio_set_active(folio); lruvec_add_folio(lruvec, folio); trace_mm_lru_activate(folio); __count_vm_events(PGACTIVATE, nr_pages); count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, nr_pages); } #ifdef CONFIG_SMP static void folio_activate_drain(int cpu) { struct folio_batch *fbatch = &per_cpu(cpu_fbatches.lru_activate, cpu); if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_activate); } void folio_activate(struct folio *folio) { if (folio_test_active(folio) || folio_test_unevictable(folio) || !folio_test_lru(folio)) return; folio_batch_add_and_move(folio, lru_activate); } #else static inline void folio_activate_drain(int cpu) { } void folio_activate(struct folio *folio) { struct lruvec *lruvec; if (!folio_test_clear_lru(folio)) return; lruvec = folio_lruvec_lock_irq(folio); lru_activate(lruvec, folio); unlock_page_lruvec_irq(lruvec); folio_set_lru(folio); } #endif static void __lru_cache_activate_folio(struct folio *folio) { struct folio_batch *fbatch; int i; local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.lru_add); /* * Search backwards on the optimistic assumption that the folio being * activated has just been added to this batch. Note that only * the local batch is examined as a !LRU folio could be in the * process of being released, reclaimed, migrated or on a remote * batch that is currently being drained. Furthermore, marking * a remote batch's folio active potentially hits a race where * a folio is marked active just after it is added to the inactive * list causing accounting errors and BUG_ON checks to trigger. */ for (i = folio_batch_count(fbatch) - 1; i >= 0; i--) { struct folio *batch_folio = fbatch->folios[i]; if (batch_folio == folio) { folio_set_active(folio); break; } } local_unlock(&cpu_fbatches.lock); } #ifdef CONFIG_LRU_GEN static void lru_gen_inc_refs(struct folio *folio) { unsigned long new_flags, old_flags = READ_ONCE(folio->flags.f); if (folio_test_unevictable(folio)) return; /* see the comment on LRU_REFS_FLAGS */ if (!folio_test_referenced(folio)) { set_mask_bits(&folio->flags.f, LRU_REFS_MASK, BIT(PG_referenced)); return; } do { if ((old_flags & LRU_REFS_MASK) == LRU_REFS_MASK) { if (!folio_test_workingset(folio)) folio_set_workingset(folio); return; } new_flags = old_flags + BIT(LRU_REFS_PGOFF); } while (!try_cmpxchg(&folio->flags.f, &old_flags, new_flags)); } static bool lru_gen_clear_refs(struct folio *folio) { struct lru_gen_folio *lrugen; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); if (gen < 0) return true; set_mask_bits(&folio->flags.f, LRU_REFS_FLAGS | BIT(PG_workingset), 0); lrugen = &folio_lruvec(folio)->lrugen; /* whether can do without shuffling under the LRU lock */ return gen == lru_gen_from_seq(READ_ONCE(lrugen->min_seq[type])); } #else /* !CONFIG_LRU_GEN */ static void lru_gen_inc_refs(struct folio *folio) { } static bool lru_gen_clear_refs(struct folio *folio) { return false; } #endif /* CONFIG_LRU_GEN */ /** * folio_mark_accessed - Mark a folio as having seen activity. * @folio: The folio to mark. * * This function will perform one of the following transitions: * * * inactive,unreferenced -> inactive,referenced * * inactive,referenced -> active,unreferenced * * active,unreferenced -> active,referenced * * When a newly allocated folio is not yet visible, so safe for non-atomic ops, * __folio_set_referenced() may be substituted for folio_mark_accessed(). */ void folio_mark_accessed(struct folio *folio) { if (folio_test_dropbehind(folio)) return; if (lru_gen_enabled()) { lru_gen_inc_refs(folio); return; } if (!folio_test_referenced(folio)) { folio_set_referenced(folio); } else if (folio_test_unevictable(folio)) { /* * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, * this list is never rotated or maintained, so marking an * unevictable page accessed has no effect. */ } else if (!folio_test_active(folio)) { /* * If the folio is on the LRU, queue it for activation via * cpu_fbatches.lru_activate. Otherwise, assume the folio is in a * folio_batch, mark it active and it'll be moved to the active * LRU on the next drain. */ if (folio_test_lru(folio)) folio_activate(folio); else __lru_cache_activate_folio(folio); folio_clear_referenced(folio); workingset_activation(folio); } if (folio_test_idle(folio)) folio_clear_idle(folio); } EXPORT_SYMBOL(folio_mark_accessed); /** * folio_add_lru - Add a folio to an LRU list. * @folio: The folio to be added to the LRU. * * Queue the folio for addition to the LRU. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the * folio_batch is drained. This gives a chance for the caller of folio_add_lru() * have the folio added to the active list using folio_mark_accessed(). */ void folio_add_lru(struct folio *folio) { VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* see the comment in lru_gen_folio_seq() */ if (lru_gen_enabled() && !folio_test_unevictable(folio) && lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) folio_set_active(folio); folio_batch_add_and_move(folio, lru_add); } EXPORT_SYMBOL(folio_add_lru); /** * folio_add_lru_vma() - Add a folio to the appropate LRU list for this VMA. * @folio: The folio to be added to the LRU. * @vma: VMA in which the folio is mapped. * * If the VMA is mlocked, @folio is added to the unevictable list. * Otherwise, it is treated the same way as folio_add_lru(). */ void folio_add_lru_vma(struct folio *folio, struct vm_area_struct *vma) { VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED)) mlock_new_folio(folio); else folio_add_lru(folio); } /* * If the folio cannot be invalidated, it is moved to the * inactive list to speed up its reclaim. It is moved to the * head of the list, rather than the tail, to give the flusher * threads some time to write it out, as this is much more * effective than the single-page writeout from reclaim. * * If the folio isn't mapped and dirty/writeback, the folio * could be reclaimed asap using the reclaim flag. * * 1. active, mapped folio -> none * 2. active, dirty/writeback folio -> inactive, head, reclaim * 3. inactive, mapped folio -> none * 4. inactive, dirty/writeback folio -> inactive, head, reclaim * 5. inactive, clean -> inactive, tail * 6. Others -> none * * In 4, it moves to the head of the inactive list so the folio is * written out by flusher threads as this is much more efficient * than the single-page writeout from reclaim. */ static void lru_deactivate_file(struct lruvec *lruvec, struct folio *folio) { bool active = folio_test_active(folio) || lru_gen_enabled(); long nr_pages = folio_nr_pages(folio); if (folio_test_unevictable(folio)) return; /* Some processes are using the folio */ if (folio_mapped(folio)) return; lruvec_del_folio(lruvec, folio); folio_clear_active(folio); folio_clear_referenced(folio); if (folio_test_writeback(folio) || folio_test_dirty(folio)) { /* * Setting the reclaim flag could race with * folio_end_writeback() and confuse readahead. But the * race window is _really_ small and it's not a critical * problem. */ lruvec_add_folio(lruvec, folio); folio_set_reclaim(folio); } else { /* * The folio's writeback ended while it was in the batch. * We move that folio to the tail of the inactive list. */ lruvec_add_folio_tail(lruvec, folio); __count_vm_events(PGROTATED, nr_pages); } if (active) { __count_vm_events(PGDEACTIVATE, nr_pages); count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } } static void lru_deactivate(struct lruvec *lruvec, struct folio *folio) { long nr_pages = folio_nr_pages(folio); if (folio_test_unevictable(folio) || !(folio_test_active(folio) || lru_gen_enabled())) return; lruvec_del_folio(lruvec, folio); folio_clear_active(folio); folio_clear_referenced(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(PGDEACTIVATE, nr_pages); count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } static void lru_lazyfree(struct lruvec *lruvec, struct folio *folio) { long nr_pages = folio_nr_pages(folio); if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) || folio_test_swapcache(folio) || folio_test_unevictable(folio)) return; lruvec_del_folio(lruvec, folio); folio_clear_active(folio); if (lru_gen_enabled()) lru_gen_clear_refs(folio); else folio_clear_referenced(folio); /* * Lazyfree folios are clean anonymous folios. They have * the swapbacked flag cleared, to distinguish them from normal * anonymous folios */ folio_clear_swapbacked(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(PGLAZYFREE, nr_pages); count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages); } /* * Drain pages out of the cpu's folio_batch. * Either "cpu" is the current CPU, and preemption has already been * disabled; or "cpu" is being hot-unplugged, and is already dead. */ void lru_add_drain_cpu(int cpu) { struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu); struct folio_batch *fbatch = &fbatches->lru_add; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_add); fbatch = &fbatches->lru_move_tail; /* Disabling interrupts below acts as a compiler barrier. */ if (data_race(folio_batch_count(fbatch))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ local_lock_irqsave(&cpu_fbatches.lock_irq, flags); folio_batch_move_lru(fbatch, lru_move_tail); local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags); } fbatch = &fbatches->lru_deactivate_file; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_deactivate_file); fbatch = &fbatches->lru_deactivate; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_deactivate); fbatch = &fbatches->lru_lazyfree; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_lazyfree); folio_activate_drain(cpu); } /** * deactivate_file_folio() - Deactivate a file folio. * @folio: Folio to deactivate. * * This function hints to the VM that @folio is a good reclaim candidate, * for example if its invalidation fails due to the folio being dirty * or under writeback. * * Context: Caller holds a reference on the folio. */ void deactivate_file_folio(struct folio *folio) { /* Deactivating an unevictable folio will not accelerate reclaim */ if (folio_test_unevictable(folio) || !folio_test_lru(folio)) return; if (lru_gen_enabled() && lru_gen_clear_refs(folio)) return; folio_batch_add_and_move(folio, lru_deactivate_file); } /* * folio_deactivate - deactivate a folio * @folio: folio to deactivate * * folio_deactivate() moves @folio to the inactive list if @folio was on the * active list and was not unevictable. This is done to accelerate the * reclaim of @folio. */ void folio_deactivate(struct folio *folio) { if (folio_test_unevictable(folio) || !folio_test_lru(folio)) return; if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio)) return; folio_batch_add_and_move(folio, lru_deactivate); } /** * folio_mark_lazyfree - make an anon folio lazyfree * @folio: folio to deactivate * * folio_mark_lazyfree() moves @folio to the inactive file list. * This is done to accelerate the reclaim of @folio. */ void folio_mark_lazyfree(struct folio *folio) { if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) || !folio_test_lru(folio) || folio_test_swapcache(folio) || folio_test_unevictable(folio)) return; folio_batch_add_and_move(folio, lru_lazyfree); } void lru_add_drain(void) { local_lock(&cpu_fbatches.lock); lru_add_drain_cpu(smp_processor_id()); local_unlock(&cpu_fbatches.lock); mlock_drain_local(); } /* * It's called from per-cpu workqueue context in SMP case so * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on * the same cpu. It shouldn't be a problem in !SMP case since * the core is only one and the locks will disable preemption. */ static void lru_add_and_bh_lrus_drain(void) { local_lock(&cpu_fbatches.lock); lru_add_drain_cpu(smp_processor_id()); local_unlock(&cpu_fbatches.lock); invalidate_bh_lrus_cpu(); mlock_drain_local(); } void lru_add_drain_cpu_zone(struct zone *zone) { local_lock(&cpu_fbatches.lock); lru_add_drain_cpu(smp_processor_id()); drain_local_pages(zone); local_unlock(&cpu_fbatches.lock); mlock_drain_local(); } #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_and_bh_lrus_drain(); } static bool cpu_needs_drain(unsigned int cpu) { struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu); /* Check these in order of likelihood that they're not zero */ return folio_batch_count(&fbatches->lru_add) || folio_batch_count(&fbatches->lru_move_tail) || folio_batch_count(&fbatches->lru_deactivate_file) || folio_batch_count(&fbatches->lru_deactivate) || folio_batch_count(&fbatches->lru_lazyfree) || folio_batch_count(&fbatches->lru_activate) || need_mlock_drain(cpu) || has_bh_in_lru(cpu, NULL); } /* * Doesn't need any cpu hotplug locking because we do rely on per-cpu * kworkers being shut down before our page_alloc_cpu_dead callback is * executed on the offlined cpu. * Calling this function with cpu hotplug locks held can actually lead * to obscure indirect dependencies via WQ context. */ static inline void __lru_add_drain_all(bool force_all_cpus) { /* * lru_drain_gen - Global pages generation number * * (A) Definition: global lru_drain_gen = x implies that all generations * 0 < n <= x are already *scheduled* for draining. * * This is an optimization for the highly-contended use case where a * user space workload keeps constantly generating a flow of pages for * each CPU. */ static unsigned int lru_drain_gen; static struct cpumask has_work; static DEFINE_MUTEX(lock); unsigned cpu, this_gen; /* * Make sure nobody triggers this path before mm_percpu_wq is fully * initialized. */ if (WARN_ON(!mm_percpu_wq)) return; /* * Guarantee folio_batch counter stores visible by this CPU * are visible to other CPUs before loading the current drain * generation. */ smp_mb(); /* * (B) Locally cache global LRU draining generation number * * The read barrier ensures that the counter is loaded before the mutex * is taken. It pairs with smp_mb() inside the mutex critical section * at (D). */ this_gen = smp_load_acquire(&lru_drain_gen); /* It helps everyone if we do our own local drain immediately. */ lru_add_drain(); mutex_lock(&lock); /* * (C) Exit the draining operation if a newer generation, from another * lru_add_drain_all(), was already scheduled for draining. Check (A). */ if (unlikely(this_gen != lru_drain_gen && !force_all_cpus)) goto done; /* * (D) Increment global generation number * * Pairs with smp_load_acquire() at (B), outside of the critical * section. Use a full memory barrier to guarantee that the * new global drain generation number is stored before loading * folio_batch counters. * * This pairing must be done here, before the for_each_online_cpu loop * below which drains the page vectors. * * Let x, y, and z represent some system CPU numbers, where x < y < z. * Assume CPU #z is in the middle of the for_each_online_cpu loop * below and has already reached CPU #y's per-cpu data. CPU #x comes * along, adds some pages to its per-cpu vectors, then calls * lru_add_drain_all(). * * If the paired barrier is done at any later step, e.g. after the * loop, CPU #x will just exit at (C) and miss flushing out all of its * added pages. */ WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1); smp_mb(); cpumask_clear(&has_work); for_each_online_cpu(cpu) { struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (cpu_needs_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); queue_work_on(cpu, mm_percpu_wq, work); __cpumask_set_cpu(cpu, &has_work); } } for_each_cpu(cpu, &has_work) flush_work(&per_cpu(lru_add_drain_work, cpu)); done: mutex_unlock(&lock); } void lru_add_drain_all(void) { __lru_add_drain_all(false); } #else void lru_add_drain_all(void) { lru_add_drain(); } #endif /* CONFIG_SMP */ atomic_t lru_disable_count = ATOMIC_INIT(0); /* * lru_cache_disable() needs to be called before we start compiling * a list of folios to be migrated using folio_isolate_lru(). * It drains folios on LRU cache and then disable on all cpus until * lru_cache_enable is called. * * Must be paired with a call to lru_cache_enable(). */ void lru_cache_disable(void) { atomic_inc(&lru_disable_count); /* * Readers of lru_disable_count are protected by either disabling * preemption or rcu_read_lock: * * preempt_disable, local_irq_disable [bh_lru_lock()] * rcu_read_lock [rt_spin_lock CONFIG_PREEMPT_RT] * preempt_disable [local_lock !CONFIG_PREEMPT_RT] * * Since v5.1 kernel, synchronize_rcu() is guaranteed to wait on * preempt_disable() regions of code. So any CPU which sees * lru_disable_count = 0 will have exited the critical * section when synchronize_rcu() returns. */ synchronize_rcu_expedited(); #ifdef CONFIG_SMP __lru_add_drain_all(true); #else lru_add_and_bh_lrus_drain(); #endif } /** * folios_put_refs - Reduce the reference count on a batch of folios. * @folios: The folios. * @refs: The number of refs to subtract from each folio. * * Like folio_put(), but for a batch of folios. This is more efficient * than writing the loop yourself as it will optimise the locks which need * to be taken if the folios are freed. The folios batch is returned * empty and ready to be reused for another batch; there is no need * to reinitialise it. If @refs is NULL, we subtract one from each * folio refcount. * * Context: May be called in process or interrupt context, but not in NMI * context. May be called while holding a spinlock. */ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) { int i, j; struct lruvec *lruvec = NULL; unsigned long flags = 0; for (i = 0, j = 0; i < folios->nr; i++) { struct folio *folio = folios->folios[i]; unsigned int nr_refs = refs ? refs[i] : 1; if (is_huge_zero_folio(folio)) continue; if (folio_is_zone_device(folio)) { if (lruvec) { unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } if (folio_ref_sub_and_test(folio, nr_refs)) free_zone_device_folio(folio); continue; } if (!folio_ref_sub_and_test(folio, nr_refs)) continue; /* hugetlb has its own memcg */ if (folio_test_hugetlb(folio)) { if (lruvec) { unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } free_huge_folio(folio); continue; } folio_unqueue_deferred_split(folio); __page_cache_release(folio, &lruvec, &flags); if (j != i) folios->folios[j] = folio; j++; } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); if (!j) { folio_batch_reinit(folios); return; } folios->nr = j; mem_cgroup_uncharge_folios(folios); free_unref_folios(folios); } EXPORT_SYMBOL(folios_put_refs); /** * release_pages - batched put_page() * @arg: array of pages to release * @nr: number of pages * * Decrement the reference count on all the pages in @arg. If it * fell to zero, remove the page from the LRU and free it. * * Note that the argument can be an array of pages, encoded pages, * or folio pointers. We ignore any encoded bits, and turn any of * them into just a folio that gets free'd. */ void release_pages(release_pages_arg arg, int nr) { struct folio_batch fbatch; int refs[PAGEVEC_SIZE]; struct encoded_page **encoded = arg.encoded_pages; int i; folio_batch_init(&fbatch); for (i = 0; i < nr; i++) { /* Turn any of the argument types into a folio */ struct folio *folio = page_folio(encoded_page_ptr(encoded[i])); /* Is our next entry actually "nr_pages" -> "nr_refs" ? */ refs[fbatch.nr] = 1; if (unlikely(encoded_page_flags(encoded[i]) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) refs[fbatch.nr] = encoded_nr_pages(encoded[++i]); if (folio_batch_add(&fbatch, folio) > 0) continue; folios_put_refs(&fbatch, refs); } if (fbatch.nr) folios_put_refs(&fbatch, refs); } EXPORT_SYMBOL(release_pages); /* * The folios which we're about to release may be in the deferred lru-addition * queues. That would prevent them from really being freed right now. That's * OK from a correctness point of view but is inefficient - those folios may be * cache-warm and we want to give them back to the page allocator ASAP. * * So __folio_batch_release() will drain those queues here. * folio_batch_move_lru() calls folios_put() directly to avoid * mutual recursion. */ void __folio_batch_release(struct folio_batch *fbatch) { if (!fbatch->percpu_pvec_drained) { lru_add_drain(); fbatch->percpu_pvec_drained = true; } folios_put(fbatch); } EXPORT_SYMBOL(__folio_batch_release); /** * folio_batch_remove_exceptionals() - Prune non-folios from a batch. * @fbatch: The batch to prune * * find_get_entries() fills a batch with both folios and shadow/swap/DAX * entries. This function prunes all the non-folio entries from @fbatch * without leaving holes, so that it can be passed on to folio-only batch * operations. */ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) { unsigned int i, j; for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; if (!xa_is_value(folio)) fbatch->folios[j++] = folio; } fbatch->nr = j; } static const struct ctl_table swap_sysctl_table[] = { { .procname = "page-cluster", .data = &page_cluster, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = (void *)&page_cluster_max, } }; /* * Perform any setup for the swap system */ void __init swap_setup(void) { unsigned long megs = PAGES_TO_MB(totalram_pages()); /* Use a smaller cluster for small-memory machines */ if (megs < 16) page_cluster = 2; else page_cluster = 3; /* * Right now other parts of the system means that we * _really_ don't want to cluster much more */ register_sysctl_init("vm", swap_sysctl_table); } |
| 421 6 416 410 670 401 421 494 6 6 487 7 7 403 414 408 238 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * pm_runtime.h - Device run-time power management helper functions. * * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl> */ #ifndef _LINUX_PM_RUNTIME_H #define _LINUX_PM_RUNTIME_H #include <linux/device.h> #include <linux/notifier.h> #include <linux/pm.h> #include <linux/jiffies.h> /* Runtime PM flag argument bits */ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ #define RPM_AUTO 0x08 /* Use autosuspend_delay */ #define RPM_TRANSPARENT 0x10 /* Succeed if runtime PM is disabled */ /* * Use this for defining a set of PM operations to be used in all situations * (system suspend, hibernation or runtime PM). * * Note that the behaviour differs from the deprecated UNIVERSAL_DEV_PM_OPS() * macro, which uses the provided callbacks for both runtime PM and system * sleep, while DEFINE_RUNTIME_DEV_PM_OPS() uses pm_runtime_force_suspend() * and pm_runtime_force_resume() for its system sleep callbacks. * * If the underlying dev_pm_ops struct symbol has to be exported, use * EXPORT_RUNTIME_DEV_PM_OPS() or EXPORT_GPL_RUNTIME_DEV_PM_OPS() instead. */ #define DEFINE_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ _DEFINE_DEV_PM_OPS(name, pm_runtime_force_suspend, \ pm_runtime_force_resume, suspend_fn, \ resume_fn, idle_fn) #define EXPORT_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ EXPORT_DEV_PM_OPS(name) = { \ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } #define EXPORT_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \ EXPORT_GPL_DEV_PM_OPS(name) = { \ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } #define EXPORT_NS_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ EXPORT_NS_DEV_PM_OPS(name, ns) = { \ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } #define EXPORT_NS_GPL_RUNTIME_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn, ns) \ EXPORT_NS_GPL_DEV_PM_OPS(name, ns) = { \ RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } #ifdef CONFIG_PM extern struct workqueue_struct *pm_wq; static inline bool queue_pm_work(struct work_struct *work) { return queue_work(pm_wq, work); } extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_runtime_get_if_active(struct device *dev); extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern bool pm_runtime_block_if_disabled(struct device *dev); extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); extern void pm_runtime_forbid(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. * * The dependencies of @dev on its children will not be taken into account by * the runtime PM framework going forward if @enable is %true, or they will * be taken into account otherwise. */ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { dev->power.ignore_children = enable; } /** * pm_runtime_get_noresume - Bump up runtime PM usage counter of a device. * @dev: Target device. */ static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); } /** * pm_runtime_put_noidle - Drop runtime PM usage counter of a device. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev unless it is 0 already. */ static inline void pm_runtime_put_noidle(struct device *dev) { atomic_add_unless(&dev->power.usage_count, -1, 0); } /** * pm_runtime_suspended - Check whether or not a device is runtime-suspended. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev and its runtime PM status is * %RPM_SUSPENDED, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED && !dev->power.disable_depth; } /** * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev and its runtime PM * status cannot change. */ static inline bool pm_runtime_active(struct device *dev) { return dev->power.runtime_status == RPM_ACTIVE || dev->power.disable_depth; } /** * pm_runtime_status_suspended - Check if runtime PM status is "suspended". * @dev: Target device. * * Return %true if the runtime PM status of @dev is %RPM_SUSPENDED, or %false * otherwise, regardless of whether or not runtime PM has been enabled for @dev. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which the * runtime PM status of @dev cannot change. */ static inline bool pm_runtime_status_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED; } /** * pm_runtime_enabled - Check if runtime PM is enabled. * @dev: Target device. * * Return %true if runtime PM is enabled for @dev or %false otherwise. * * Note that the return value of this function can only be trusted if it is * called under the runtime PM lock of @dev or under conditions in which * runtime PM cannot be either disabled or enabled for @dev. */ static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; } /** * pm_runtime_blocked - Check if runtime PM enabling is blocked. * @dev: Target device. * * Do not call this function outside system suspend/resume code paths. */ static inline bool pm_runtime_blocked(struct device *dev) { return dev->power.last_status == RPM_BLOCKED; } /** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. * * Return %true if @dev is a special device without runtime PM callbacks or * %false otherwise. */ static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return dev->power.no_callbacks; } /** * pm_runtime_mark_last_busy - Update the last access time of a device. * @dev: Target device. * * Update the last access time of @dev used by the runtime PM autosuspend * mechanism to the current time as returned by ktime_get_mono_fast_ns(). */ static inline void pm_runtime_mark_last_busy(struct device *dev) { WRITE_ONCE(dev->power.last_busy, ktime_get_mono_fast_ns()); } /** * pm_runtime_is_irq_safe - Check if runtime PM can work in interrupt context. * @dev: Target device. * * Return %true if @dev has been marked as an "IRQ-safe" device (with respect * to runtime PM), in which case its runtime PM callabcks can be expected to * work correctly when invoked from interrupt handlers. */ static inline bool pm_runtime_is_irq_safe(struct device *dev) { return dev->power.irq_safe; } extern u64 pm_runtime_suspended_time(struct device *dev); #else /* !CONFIG_PM */ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int __pm_runtime_idle(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) { return -ENOSYS; } static inline int __pm_runtime_resume(struct device *dev, int rpmflags) { return 1; } static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } static inline int pm_runtime_get_if_active(struct device *dev) { return -EINVAL; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } static inline bool pm_runtime_block_if_disabled(struct device *dev) { return true; } static inline void pm_runtime_unblock(struct device *dev) {} static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} static inline void pm_runtime_irq_safe(struct device *dev) {} static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; } static inline bool pm_runtime_has_no_callbacks(struct device *dev) { return false; } static inline void pm_runtime_mark_last_busy(struct device *dev) {} static inline void __pm_runtime_use_autosuspend(struct device *dev, bool use) {} static inline void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) {} static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} static inline void pm_runtime_release_supplier(struct device_link *link) {} #endif /* !CONFIG_PM */ #ifdef CONFIG_PM_SLEEP bool pm_runtime_need_not_resume(struct device *dev); int pm_runtime_force_resume(struct device *dev); #else /* !CONFIG_PM_SLEEP */ static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; } static inline int pm_runtime_force_resume(struct device *dev) { return -ENXIO; } #endif /* CONFIG_PM_SLEEP */ /** * pm_runtime_idle - Conditionally set up autosuspend of a device or suspend it. * @dev: Target device. * * Invoke the "idle check" callback of @dev and, depending on its return value, * set up autosuspend of @dev or suspend it (depending on whether or not * autosuspend has been enabled for it). * * Return: * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. * Other values and conditions for the above values are possible as returned by * Runtime PM idle and suspend callbacks. */ static inline int pm_runtime_idle(struct device *dev) { return __pm_runtime_idle(dev, 0); } /** * pm_runtime_suspend - Suspend a device synchronously. * @dev: Target device. * * Return: * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ static inline int pm_runtime_suspend(struct device *dev) { return __pm_runtime_suspend(dev, 0); } /** * pm_runtime_autosuspend - Update the last access time and set up autosuspend * of a device. * @dev: Target device. * * First update the last access time, then set up autosuspend of @dev or suspend * it (depending on whether or not autosuspend is enabled for it) without * engaging its "idle check" callback. * * Return: * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ static inline int pm_runtime_autosuspend(struct device *dev) { pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_AUTO); } /** * pm_runtime_resume - Resume a device synchronously. * @dev: Target device. */ static inline int pm_runtime_resume(struct device *dev) { return __pm_runtime_resume(dev, 0); } /** * pm_request_idle - Queue up "idle check" execution for a device. * @dev: Target device. * * Queue up a work item to run an equivalent of pm_runtime_idle() for @dev * asynchronously. * * Return: * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter non-zero, Runtime PM status change * ongoing or device not in %RPM_ACTIVE state. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. */ static inline int pm_request_idle(struct device *dev) { return __pm_runtime_idle(dev, RPM_ASYNC); } /** * pm_request_resume - Queue up runtime-resume of a device. * @dev: Target device. */ static inline int pm_request_resume(struct device *dev) { return __pm_runtime_resume(dev, RPM_ASYNC); } /** * pm_request_autosuspend - Update the last access time and queue up autosuspend * of a device. * @dev: Target device. * * Update the last access time of a device and queue up a work item to run an * equivalent pm_runtime_autosuspend() for @dev asynchronously. * * Return: * * 1: Success; device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter non-zero or Runtime PM status change * ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. */ static inline int pm_request_autosuspend(struct device *dev) { pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_get - Bump up usage counter and queue up resume of a device. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and queue up a work item to * carry out runtime-resume of it. */ static inline int pm_runtime_get(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } /** * pm_runtime_get_sync - Bump up usage counter of a device and resume it. * @dev: Target device. * * Bump up the runtime PM usage counter of @dev and carry out runtime-resume of * it synchronously. * * The possible return values of this function are the same as for * pm_runtime_resume() and the runtime PM usage counter of @dev remains * incremented in all cases, even if it returns an error code. * Consider using pm_runtime_resume_and_get() instead of it, especially * if its return value is checked by the caller, as this is likely to result * in cleaner code. */ static inline int pm_runtime_get_sync(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT); } static inline int pm_runtime_get_active(struct device *dev, int rpmflags) { int ret; ret = __pm_runtime_resume(dev, RPM_GET_PUT | rpmflags); if (ret < 0) { pm_runtime_put_noidle(dev); return ret; } return 0; } /** * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. * @dev: Target device. * * Resume @dev synchronously and if that is successful, increment its runtime * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been * incremented or a negative error code otherwise. */ static inline int pm_runtime_resume_and_get(struct device *dev) { return pm_runtime_get_active(dev, 0); } /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_idle(). * * Return: * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. */ static inline int pm_runtime_put(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } /** * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). * * Return: * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. */ static inline int __pm_runtime_put_autosuspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); } /** * pm_runtime_put_autosuspend - Update the last access time of a device, drop * its usage counter and queue autosuspend if the usage counter becomes 0. * @dev: Target device. * * Update the last access time of @dev, decrement runtime PM usage counter of * @dev and if it turns out to be equal to 0, queue up a work item for @dev like * in pm_request_autosuspend(). * * Return: * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. */ static inline int pm_runtime_put_autosuspend(struct device *dev) { pm_runtime_mark_last_busy(dev); return __pm_runtime_put_autosuspend(dev); } DEFINE_GUARD(pm_runtime_noresume, struct device *, pm_runtime_get_noresume(_T), pm_runtime_put_noidle(_T)); DEFINE_GUARD(pm_runtime_active, struct device *, pm_runtime_get_sync(_T), pm_runtime_put(_T)); DEFINE_GUARD(pm_runtime_active_auto, struct device *, pm_runtime_get_sync(_T), pm_runtime_put_autosuspend(_T)); /* * Use the following guards with ACQUIRE()/ACQUIRE_ERR(). * * The difference between the "_try" and "_try_enabled" variants is that the * former do not produce an error when runtime PM is disabled for the given * device. */ DEFINE_GUARD_COND(pm_runtime_active, _try, pm_runtime_get_active(_T, RPM_TRANSPARENT)) DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, pm_runtime_resume_and_get(_T)) DEFINE_GUARD_COND(pm_runtime_active_auto, _try, pm_runtime_get_active(_T, RPM_TRANSPARENT)) DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, pm_runtime_resume_and_get(_T)) /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, invoke the "idle check" callback of @dev and, depending on its * return value, set up autosuspend of @dev or suspend it (depending on whether * or not autosuspend has been enabled for it). * * The runtime PM usage counter of @dev remains decremented in all cases, even * if it returns an error code. * * Return: * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_suspend - Drop device usage counter and suspend if 0. * @dev: Target device. * * Decrement the runtime PM usage counter of @dev and if it turns out to be * equal to 0, carry out runtime-suspend of @dev synchronously. * * The runtime PM usage counter of @dev remains decremented in all cases, even * if it returns an error code. * * Return: * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -ENOSYS: CONFIG_PM not enabled. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync_suspend(struct device *dev) { return __pm_runtime_suspend(dev, RPM_GET_PUT); } /** * pm_runtime_put_sync_autosuspend - Update the last access time of a device, * drop device usage counter and autosuspend if 0. * @dev: Target device. * * Update the last access time of @dev, decrement the runtime PM usage counter * of @dev and if it turns out to be equal to 0, set up autosuspend of @dev or * suspend it synchronously (depending on whether or not autosuspend has been * enabled for it). * * The runtime PM usage counter of @dev remains decremented in all cases, even * if it returns an error code. * * Return: * * 1: Success. Usage counter dropped to zero, but device was already suspended. * * 0: Success. * * -EINVAL: Runtime PM error. * * -EACCES: Runtime PM disabled. * * -EAGAIN: Runtime PM usage counter became non-zero or Runtime PM status * change ongoing. * * -EBUSY: Runtime PM child_count non-zero. * * -EPERM: Device PM QoS resume latency 0. * * -EINPROGRESS: Suspend already in progress. * * -ENOSYS: CONFIG_PM not enabled. * Other values and conditions for the above values are possible as returned by * Runtime PM suspend callbacks. */ static inline int pm_runtime_put_sync_autosuspend(struct device *dev) { pm_runtime_mark_last_busy(dev); return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); } /** * pm_runtime_set_active - Set runtime PM status to "active". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_ACTIVE and ensure that dependencies * of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_active(struct device *dev) { return __pm_runtime_set_status(dev, RPM_ACTIVE); } /** * pm_runtime_set_suspended - Set runtime PM status to "suspended". * @dev: Target device. * * Set the runtime PM status of @dev to %RPM_SUSPENDED and ensure that * dependencies of it will be taken into account. * * It is not valid to call this function for devices with runtime PM enabled. */ static inline int pm_runtime_set_suspended(struct device *dev) { return __pm_runtime_set_status(dev, RPM_SUSPENDED); } /** * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * * Prevent the runtime PM framework from working with @dev by incrementing its * "disable" counter. * * If the counter is zero when this function runs and there is a pending runtime * resume request for @dev, it will be resumed. If the counter is still zero at * that point, all of the pending runtime PM requests for @dev will be canceled * and all runtime PM operations in progress involving it will be waited for to * complete. * * For each invocation of this function for @dev, there must be a matching * pm_runtime_enable() call, so that runtime PM is eventually enabled for it * again. */ static inline void pm_runtime_disable(struct device *dev) { __pm_runtime_disable(dev, true); } /** * pm_runtime_use_autosuspend - Allow autosuspend to be used for a device. * @dev: Target device. * * Allow the runtime PM autosuspend mechanism to be used for @dev whenever * requested (or "autosuspend" will be handled as direct runtime-suspend for * it). * * NOTE: It's important to undo this with pm_runtime_dont_use_autosuspend() * at driver exit time unless your driver initially enabled pm_runtime * with devm_pm_runtime_enable() (which handles it for you). */ static inline void pm_runtime_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, true); } /** * pm_runtime_dont_use_autosuspend - Prevent autosuspend from being used. * @dev: Target device. * * Prevent the runtime PM autosuspend mechanism from being used for @dev which * means that "autosuspend" will be handled as direct runtime-suspend for it * going forward. */ static inline void pm_runtime_dont_use_autosuspend(struct device *dev) { __pm_runtime_use_autosuspend(dev, false); } #endif |
| 24 24 24 8 6 8 7 1 1 1 1 1 1 8 8 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * seq_oss_writeq.c - write queue and sync * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_writeq.h" #include "seq_oss_event.h" #include "seq_oss_timer.h" #include <sound/seq_oss_legacy.h> #include "../seq_lock.h" #include "../seq_clientmgr.h" #include <linux/wait.h> #include <linux/slab.h> #include <linux/sched/signal.h> /* * create a write queue record */ struct seq_oss_writeq * snd_seq_oss_writeq_new(struct seq_oss_devinfo *dp, int maxlen) { struct seq_oss_writeq *q; struct snd_seq_client_pool pool; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; q->dp = dp; q->maxlen = maxlen; spin_lock_init(&q->sync_lock); q->sync_event_put = 0; q->sync_time = 0; init_waitqueue_head(&q->sync_sleep); memset(&pool, 0, sizeof(pool)); pool.client = dp->cseq; pool.output_pool = maxlen; pool.output_room = maxlen / 2; snd_seq_oss_control(dp, SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, &pool); return q; } /* * delete the write queue */ void snd_seq_oss_writeq_delete(struct seq_oss_writeq *q) { if (q) { snd_seq_oss_writeq_clear(q); /* to be sure */ kfree(q); } } /* * reset the write queue */ void snd_seq_oss_writeq_clear(struct seq_oss_writeq *q) { struct snd_seq_remove_events reset; memset(&reset, 0, sizeof(reset)); reset.remove_mode = SNDRV_SEQ_REMOVE_OUTPUT; /* remove all */ snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_REMOVE_EVENTS, &reset); /* wake up sleepers if any */ snd_seq_oss_writeq_wakeup(q, 0); } /* * wait until the write buffer has enough room */ int snd_seq_oss_writeq_sync(struct seq_oss_writeq *q) { struct seq_oss_devinfo *dp = q->dp; abstime_t time; time = snd_seq_oss_timer_cur_tick(dp->timer); if (q->sync_time >= time) return 0; /* already finished */ if (! q->sync_event_put) { struct snd_seq_event ev; union evrec *rec; /* put echoback event */ memset(&ev, 0, sizeof(ev)); ev.flags = 0; ev.type = SNDRV_SEQ_EVENT_ECHO; ev.time.tick = time; /* echo back to itself */ snd_seq_oss_fill_addr(dp, &ev, dp->addr.client, dp->addr.port); rec = (union evrec *)&ev.data; rec->t.code = SEQ_SYNCTIMER; rec->t.time = time; q->sync_event_put = 1; snd_seq_kernel_client_enqueue(dp->cseq, &ev, NULL, true); } wait_event_interruptible_timeout(q->sync_sleep, ! q->sync_event_put, HZ); if (signal_pending(current)) /* interrupted - return 0 to finish sync */ q->sync_event_put = 0; if (! q->sync_event_put || q->sync_time >= time) return 0; return 1; } /* * wake up sync - echo event was catched */ void snd_seq_oss_writeq_wakeup(struct seq_oss_writeq *q, abstime_t time) { guard(spinlock_irqsave)(&q->sync_lock); q->sync_time = time; q->sync_event_put = 0; wake_up(&q->sync_sleep); } /* * return the unused pool size */ int snd_seq_oss_writeq_get_free_size(struct seq_oss_writeq *q) { struct snd_seq_client_pool pool; pool.client = q->dp->cseq; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, &pool); return pool.output_free; } /* * set output threshold size from ioctl */ void snd_seq_oss_writeq_set_output(struct seq_oss_writeq *q, int val) { struct snd_seq_client_pool pool; pool.client = q->dp->cseq; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, &pool); pool.output_room = val; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, &pool); } |
| 21 21 21 21 21 21 200 200 200 21 21 2 2 2 2 2 2 5 5 5 5 5 5 5 4 3 3 3 3 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004 IBM Corporation * * Author: Serge Hallyn <serue@us.ibm.com> */ #include <linux/export.h> #include <linux/uts.h> #include <linux/utsname.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/user_namespace.h> #include <linux/proc_ns.h> #include <linux/nstree.h> #include <linux/sched/task.h> static struct kmem_cache *uts_ns_cache __ro_after_init; static struct ucounts *inc_uts_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES); } static void dec_uts_namespaces(struct ucounts *ucounts) { dec_ucount(ucounts, UCOUNT_UTS_NAMESPACES); } /* * Clone a new ns copying an original utsname, setting refcount to 1 * @old_ns: namespace to clone * Return ERR_PTR(-ENOMEM) on error (failure to allocate), new ns otherwise */ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, struct uts_namespace *old_ns) { struct uts_namespace *ns; struct ucounts *ucounts; int err; err = -ENOSPC; ucounts = inc_uts_namespaces(user_ns); if (!ucounts) goto fail; err = -ENOMEM; ns = kmem_cache_zalloc(uts_ns_cache, GFP_KERNEL); if (!ns) goto fail_dec; err = ns_common_init(ns); if (err) goto fail_free; ns->ucounts = ucounts; down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); ns->user_ns = get_user_ns(user_ns); up_read(&uts_sem); ns_tree_add(ns); return ns; fail_free: kmem_cache_free(uts_ns_cache, ns); fail_dec: dec_uts_namespaces(ucounts); fail: return ERR_PTR(err); } /* * Copy task tsk's utsname namespace, or clone it if flags * specifies CLONE_NEWUTS. In latter case, changes to the * utsname of this process won't be seen by parent, and vice * versa. */ struct uts_namespace *copy_utsname(u64 flags, struct user_namespace *user_ns, struct uts_namespace *old_ns) { struct uts_namespace *new_ns; BUG_ON(!old_ns); get_uts_ns(old_ns); if (!(flags & CLONE_NEWUTS)) return old_ns; new_ns = clone_uts_ns(user_ns, old_ns); put_uts_ns(old_ns); return new_ns; } void free_uts_ns(struct uts_namespace *ns) { ns_tree_remove(ns); dec_uts_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_common_free(ns); /* Concurrent nstree traversal depends on a grace period. */ kfree_rcu(ns, ns.ns_rcu); } static struct ns_common *utsns_get(struct task_struct *task) { struct uts_namespace *ns = NULL; struct nsproxy *nsproxy; task_lock(task); nsproxy = task->nsproxy; if (nsproxy) { ns = nsproxy->uts_ns; get_uts_ns(ns); } task_unlock(task); return ns ? &ns->ns : NULL; } static void utsns_put(struct ns_common *ns) { put_uts_ns(to_uts_ns(ns)); } static int utsns_install(struct nsset *nsset, struct ns_common *new) { struct nsproxy *nsproxy = nsset->nsproxy; struct uts_namespace *ns = to_uts_ns(new); if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) return -EPERM; get_uts_ns(ns); put_uts_ns(nsproxy->uts_ns); nsproxy->uts_ns = ns; return 0; } static struct user_namespace *utsns_owner(struct ns_common *ns) { return to_uts_ns(ns)->user_ns; } const struct proc_ns_operations utsns_operations = { .name = "uts", .get = utsns_get, .put = utsns_put, .install = utsns_install, .owner = utsns_owner, }; void __init uts_ns_init(void) { uts_ns_cache = kmem_cache_create_usercopy( "uts_namespace", sizeof(struct uts_namespace), 0, SLAB_PANIC|SLAB_ACCOUNT, offsetof(struct uts_namespace, name), sizeof_field(struct uts_namespace, name), NULL); ns_tree_add(&init_uts_ns); } |
| 65 41 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_TRACEPOINT_H #define _LINUX_TRACEPOINT_H /* * Kernel Tracepoint API. * * See Documentation/trace/tracepoints.rst. * * Copyright (C) 2008-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> * * Heavily inspired from the Linux Kernel Markers. */ #include <linux/smp.h> #include <linux/srcu.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/rcupdate_trace.h> #include <linux/tracepoint-defs.h> #include <linux/static_call.h> struct module; struct tracepoint; struct notifier_block; struct trace_eval_map { const char *system; const char *eval_string; unsigned long eval_value; }; #define TRACEPOINT_DEFAULT_PRIO 10 extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, int prio); extern int tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data, int prio); extern int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); static inline int tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe, void *data) { return tracepoint_probe_register_prio_may_exist(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); } extern void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), void *priv); #ifdef CONFIG_MODULES struct tp_module { struct list_head list; struct module *mod; }; bool trace_module_has_bad_taint(struct module *mod); extern int register_tracepoint_module_notifier(struct notifier_block *nb); extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); void for_each_module_tracepoint(void (*fct)(struct tracepoint *, struct module *, void *), void *priv); void for_each_tracepoint_in_module(struct module *, void (*fct)(struct tracepoint *, struct module *, void *), void *priv); #else static inline bool trace_module_has_bad_taint(struct module *mod) { return false; } static inline int register_tracepoint_module_notifier(struct notifier_block *nb) { return 0; } static inline int unregister_tracepoint_module_notifier(struct notifier_block *nb) { return 0; } static inline void for_each_module_tracepoint(void (*fct)(struct tracepoint *, struct module *, void *), void *priv) { } static inline void for_each_tracepoint_in_module(struct module *mod, void (*fct)(struct tracepoint *, struct module *, void *), void *priv) { } #endif /* CONFIG_MODULES */ /* * tracepoint_synchronize_unregister must be called between the last tracepoint * probe unregistration and the end of module exit to make sure there is no * caller executing a probe when it is freed. * * An alternative is to use the following for batch reclaim associated * with a given tracepoint: * * - tracepoint_is_faultable() == false: call_rcu() * - tracepoint_is_faultable() == true: call_rcu_tasks_trace() */ #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { synchronize_rcu_tasks_trace(); synchronize_rcu(); } static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return tp->ext && tp->ext->faultable; } #else static inline void tracepoint_synchronize_unregister(void) { } static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return false; } #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS extern int syscall_regfunc(void); extern void syscall_unregfunc(void); #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ #ifndef PARAMS #define PARAMS(args...) args #endif #define TRACE_DEFINE_ENUM(x) #define TRACE_DEFINE_SIZEOF(x) #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { return offset_to_ptr(p); } #define __TRACEPOINT_ENTRY(name) \ asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ " .balign 4 \n" \ " .long __tracepoint_" #name " - . \n" \ " .previous \n") #else static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) { return *p; } #define __TRACEPOINT_ENTRY(name) \ static tracepoint_ptr_t __tracepoint_ptr_##name __used \ __section("__tracepoints_ptrs") = &__tracepoint_##name #endif #endif /* _LINUX_TRACEPOINT_H */ /* * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include * file ifdef protection. * This is due to the way trace events work. If a file includes two * trace event headers under one "CREATE_TRACE_POINTS" the first include * will override the TRACE_EVENT and break the second include. */ #ifndef DECLARE_TRACE #define TP_PROTO(args...) args #define TP_ARGS(args...) args #define TP_CONDITION(args...) args /* * Individual subsystem my have a separate configuration to * enable their tracepoints. By default, this file will create * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created * it can define NOTRACE before including the tracepoint headers. */ #if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE) #define TRACEPOINTS_ENABLED #endif #ifdef TRACEPOINTS_ENABLED #ifdef CONFIG_HAVE_STATIC_CALL #define __DO_TRACE_CALL(name, args) \ do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ static_call(tp_func_##name)(__data, args); \ } \ } while (0) #else #define __DO_TRACE_CALL(name, args) __traceiter_##name(NULL, args) #endif /* CONFIG_HAVE_STATIC_CALL */ /* * Declare an exported function that Rust code can call to trigger this * tracepoint. This function does not include the static branch; that is done * in Rust to avoid a function call when the tracepoint is disabled. */ #define DEFINE_RUST_DO_TRACE(name, proto, args) #define __DEFINE_RUST_DO_TRACE(name, proto, args) \ notrace void rust_do_trace_##name(proto) \ { \ __do_trace_##name(args); \ } /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. * * When lockdep is enabled, we make sure to always test if RCU is * "watching" regardless if the tracepoint is enabled or not. Tracepoints * require RCU to be active, and it should always warn at the tracepoint * site if it is not watching, as it will need to be active when the * tracepoint is enabled. */ #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ extern void rust_do_trace_##name(proto); \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_register(&__tracepoint_##name, \ (void *)probe, data); \ } \ static inline int \ register_trace_prio_##name(void (*probe)(data_proto), void *data,\ int prio) \ { \ return tracepoint_probe_register_prio(&__tracepoint_##name, \ (void *)probe, data, prio); \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_##name,\ (void *)probe, data); \ } \ static inline void \ check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool \ trace_##name##_enabled(void) \ { \ return static_branch_unlikely(&__tracepoint_##name.key);\ } #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ if (cond) { \ guard(preempt_notrace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ } \ static inline void trace_##name(proto) \ { \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ } #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ guard(rcu_tasks_trace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ static inline void trace_##name(proto) \ { \ might_fault(); \ if (static_branch_unlikely(&__tracepoint_##name.key)) \ __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ } \ } /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. * * it_func[0] is never NULL because there is at least one element in the array * when the array itself is non NULL. */ #define __DEFINE_TRACE_EXT(_name, _ext, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ int __traceiter_##_name(void *__data, proto); \ void __probestub_##_name(void *__data, proto); \ struct tracepoint __tracepoint_##_name __used \ __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ .key = STATIC_KEY_FALSE_INIT, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ .iterator = &__traceiter_##_name, \ .probestub = &__probestub_##_name, \ .funcs = NULL, \ .ext = _ext, \ }; \ __TRACEPOINT_ENTRY(_name); \ int __traceiter_##_name(void *__data, proto) \ { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ if (it_func_ptr) { \ do { \ it_func = READ_ONCE((it_func_ptr)->func); \ __data = (it_func_ptr)->data; \ ((void(*)(void *, proto))(it_func))(__data, args); \ } while ((++it_func_ptr)->func); \ } \ return 0; \ } \ void __probestub_##_name(void *__data, proto) \ { \ } \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \ DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args)) #define DEFINE_TRACE_FN(_name, _reg, _unreg, _proto, _args) \ static struct tracepoint_ext __tracepoint_ext_##_name = { \ .regfunc = _reg, \ .unregfunc = _unreg, \ .faultable = false, \ }; \ __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); #define DEFINE_TRACE_SYSCALL(_name, _reg, _unreg, _proto, _args) \ static struct tracepoint_ext __tracepoint_ext_##_name = { \ .regfunc = _reg, \ .unregfunc = _unreg, \ .faultable = true, \ }; \ __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); #define DEFINE_TRACE(_name, _proto, _args) \ __DEFINE_TRACE_EXT(_name, NULL, PARAMS(_proto), PARAMS(_args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ EXPORT_SYMBOL_GPL(__traceiter_##name); \ EXPORT_STATIC_CALL_GPL(tp_func_##name) #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name); \ EXPORT_SYMBOL(__traceiter_##name); \ EXPORT_STATIC_CALL(tp_func_##name) #else /* !TRACEPOINTS_ENABLED */ #define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ static inline void trace_##name(proto) \ { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ { \ return -ENOSYS; \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), \ void *data) \ { \ return -ENOSYS; \ } \ static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ { \ } \ static inline bool \ trace_##name##_enabled(void) \ { \ return false; \ } #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) #define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args) #define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) #endif /* TRACEPOINTS_ENABLED */ #ifdef CONFIG_TRACING /** * tracepoint_string - register constant persistent string to trace system * @str - a constant persistent string that will be referenced in tracepoints * * If constant strings are being used in tracepoints, it is faster and * more efficient to just save the pointer to the string and reference * that with a printf "%s" instead of saving the string in the ring buffer * and wasting space and time. * * The problem with the above approach is that userspace tools that read * the binary output of the trace buffers do not have access to the string. * Instead they just show the address of the string which is not very * useful to users. * * With tracepoint_string(), the string will be registered to the tracing * system and exported to userspace via the debugfs/tracing/printk_formats * file that maps the string address to the string text. This way userspace * tools that read the binary buffers have a way to map the pointers to * the ASCII strings they represent. * * The @str used must be a constant string and persistent as it would not * make sense to show a string that no longer exists. But it is still fine * to be used with modules, because when modules are unloaded, if they * had tracepoints, the ring buffers are cleared too. As long as the string * does not change during the life of the module, it is fine to use * tracepoint_string() within a module. */ #define tracepoint_string(str) \ ({ \ static const char *___tp_str __tracepoint_string = str; \ ___tp_str; \ }) #define __tracepoint_string __used __section("__tracepoint_str") #else /* * tracepoint_string() is used to save the string address for userspace * tracing tools. When tracing isn't configured, there's no need to save * anything. */ # define tracepoint_string(str) str # define __tracepoint_string #endif #define DECLARE_TRACE(name, proto, args) \ __DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ __DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_SYSCALL(name, proto, args) \ __DECLARE_TRACE_SYSCALL(name##_tp, PARAMS(proto), PARAMS(args), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_EVENT(name, proto, args) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_EVENT_CONDITION(name, proto, args, cond) \ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_EVENT_SYSCALL(name, proto, args) \ __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ PARAMS(void *__data, proto)) #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * * We define a tracepoint, its arguments, its printk format * and its 'fast binary record' layout. * * Firstly, name your tracepoint via TRACE_EVENT(name : the * 'subsystem_event' notation is fine. * * Think about this whole construct as the * 'trace_sched_switch() function' from now on. * * * TRACE_EVENT(sched_switch, * * * * * A function has a regular function arguments * * prototype, declare it via TP_PROTO(): * * * * TP_PROTO(struct rq *rq, struct task_struct *prev, * struct task_struct *next), * * * * * Define the call signature of the 'function'. * * (Design sidenote: we use this instead of a * * TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.) * * * * TP_ARGS(rq, prev, next), * * * * * Fast binary tracing: define the trace record via * * TP_STRUCT__entry(). You can think about it like a * * regular C structure local variable definition. * * * * This is how the trace record is structured and will * * be saved into the ring buffer. These are the fields * * that will be exposed to user-space in * * /sys/kernel/tracing/events/<*>/format. * * * * The declared 'local variable' is called '__entry' * * * * __field(pid_t, prev_pid) is equivalent to a standard declaration: * * * * pid_t prev_pid; * * * * __array(char, prev_comm, TASK_COMM_LEN) is equivalent to: * * * * char prev_comm[TASK_COMM_LEN]; * * * * TP_STRUCT__entry( * __array( char, prev_comm, TASK_COMM_LEN ) * __field( pid_t, prev_pid ) * __field( int, prev_prio ) * __array( char, next_comm, TASK_COMM_LEN ) * __field( pid_t, next_pid ) * __field( int, next_prio ) * ), * * * * * Assign the entry into the trace record, by embedding * * a full C statement block into TP_fast_assign(). You * * can refer to the trace record as '__entry' - * * otherwise you can put arbitrary C code in here. * * * * Note: this C code will execute every time a trace event * * happens, on an active tracepoint. * * * * TP_fast_assign( * memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); * __entry->prev_pid = prev->pid; * __entry->prev_prio = prev->prio; * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); * __entry->next_pid = next->pid; * __entry->next_prio = next->prio; * ), * * * * * Formatted output of a trace record via TP_printk(). * * This is how the tracepoint will appear under ftrace * * plugins that make use of this tracepoint. * * * * (raw-binary tracing wont actually perform this step.) * * * * TP_printk("task %s:%d [%d] ==> %s:%d [%d]", * __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, * __entry->next_comm, __entry->next_pid, __entry->next_prio), * * ); * * This macro construct is thus used for the regular printk format * tracing setup, it is used to construct a function pointer based * tracepoint callback (this is used by programmatic plugins and * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in * /sys/kernel/tracing/events/. * * A set of (un)registration functions can be passed to the variant * TRACE_EVENT_FN to perform any (un)registration work. */ #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, \ args, cond) \ DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, \ struct, assign, print) \ DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ print, reg, unreg) \ DECLARE_TRACE_EVENT_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) #define DECLARE_EVENT_NOP(name, proto, args) \ static inline void trace_##name(proto) \ { } \ static inline bool trace_##name##_enabled(void) \ { \ return false; \ } #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) \ DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) #define DECLARE_EVENT_CLASS_NOP(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT_NOP(template, name, proto, args) \ DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) #endif /* ifdef TRACE_EVENT (see note above) */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright(c) 2017 Intel Corporation. */ #ifndef OPA_ADDR_H #define OPA_ADDR_H #include <rdma/opa_smi.h> #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) #define OPA_GID_INDEX 0x1 /** * 0xF8 - 4 bits of multicast range and 1 bit for collective range * Example: For 24 bit LID space, * Multicast range: 0xF00000 to 0xF7FFFF * Collective range: 0xF80000 to 0xFFFFFE */ #define OPA_MCAST_NR 0x4 /* Number of top bits set */ #define OPA_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that * the provided gid is a special purpose GID meant to carry * extended LID information. * * @gid: The Global identifier */ static inline bool ib_is_opa_gid(const union ib_gid *gid) { return ((be64_to_cpu(gid->global.interface_id) >> 40) == OPA_SPECIAL_OUI); } /** * opa_get_lid_from_gid: Returns the last 32 bits of the gid. * OPA devices use one of the gids in the gid table to also * store the lid. * * @gid: The Global identifier */ static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } /** * opa_is_extended_lid: Returns true if dlid or slid are * extended. * * @dlid: The DLID * @slid: The SLID */ static inline bool opa_is_extended_lid(__be32 dlid, __be32 slid) { if ((be32_to_cpu(dlid) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || (be32_to_cpu(slid) >= be16_to_cpu(IB_MULTICAST_LID_BASE))) return true; return false; } /* Get multicast lid base */ static inline u32 opa_get_mcast_base(u32 nr_top_bits) { return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); } /* Check for a valid unicast LID for non-SM traffic types */ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr) { if (attr->type == RDMA_AH_ATTR_TYPE_IB) { if (!rdma_ah_get_dlid(attr) || rdma_ah_get_dlid(attr) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) return false; } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) { if (!rdma_ah_get_dlid(attr) || rdma_ah_get_dlid(attr) >= opa_get_mcast_base(OPA_MCAST_NR)) return false; } return true; } #endif /* OPA_ADDR_H */ |
| 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 | // SPDX-License-Identifier: GPL-2.0-only /* * CAN driver for PEAK System PCAN-USB FD / PCAN-USB Pro FD adapter * * Copyright (C) 2013-2025 PEAK System-Technik GmbH * Author: Stéphane Grosjean <stephane.grosjean@hms-networks.com> */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/usb.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/can/dev/peak_canfd.h> #include "pcan_usb_core.h" #include "pcan_usb_pro.h" #define PCAN_USBPROFD_CHANNEL_COUNT 2 #define PCAN_USBFD_CHANNEL_COUNT 1 /* PCAN-USB Pro FD adapter internal clock (Hz) */ #define PCAN_UFD_CRYSTAL_HZ 80000000 #define PCAN_UFD_CMD_BUFFER_SIZE 512 #define PCAN_UFD_LOSPD_PKT_SIZE 64 /* PCAN-USB Pro FD command timeout (ms.) */ #define PCAN_UFD_CMD_TIMEOUT_MS 1000 /* PCAN-USB Pro FD rx/tx buffers size */ #define PCAN_UFD_RX_BUFFER_SIZE 2048 #define PCAN_UFD_TX_BUFFER_SIZE 512 /* struct pcan_ufd_fw_info::type */ #define PCAN_USBFD_TYPE_STD 1 #define PCAN_USBFD_TYPE_EXT 2 /* includes EP numbers */ /* read some versions info from the hw device */ struct __packed pcan_ufd_fw_info { __le16 size_of; /* sizeof this */ __le16 type; /* type of this structure */ u8 hw_type; /* Type of hardware (HW_TYPE_xxx) */ u8 bl_version[3]; /* Bootloader version */ u8 hw_version; /* Hardware version (PCB) */ u8 fw_version[3]; /* Firmware version */ __le32 dev_id[2]; /* "device id" per CAN */ __le32 ser_no; /* S/N */ __le32 flags; /* special functions */ /* extended data when type >= PCAN_USBFD_TYPE_EXT */ u8 cmd_out_ep; /* ep for cmd */ u8 cmd_in_ep; /* ep for replies */ u8 data_out_ep[2]; /* ep for CANx TX */ u8 data_in_ep; /* ep for CAN RX */ u8 dummy[3]; }; /* handle device specific info used by the netdevices */ struct pcan_usb_fd_if { struct peak_usb_device *dev[PCAN_USB_MAX_CHANNEL]; struct pcan_ufd_fw_info fw_info; struct peak_time_ref time_ref; int cm_ignore_count; int dev_opened_count; }; /* device information */ struct pcan_usb_fd_device { struct peak_usb_device dev; struct can_berr_counter bec; struct pcan_usb_fd_if *usb_if; u8 *cmd_buffer_addr; }; /* Extended USB commands (non uCAN commands) */ /* Clock Modes command */ #define PCAN_UFD_CMD_CLK_SET 0x80 #define PCAN_UFD_CLK_80MHZ 0x0 #define PCAN_UFD_CLK_60MHZ 0x1 #define PCAN_UFD_CLK_40MHZ 0x2 #define PCAN_UFD_CLK_30MHZ 0x3 #define PCAN_UFD_CLK_24MHZ 0x4 #define PCAN_UFD_CLK_20MHZ 0x5 #define PCAN_UFD_CLK_DEF PCAN_UFD_CLK_80MHZ struct __packed pcan_ufd_clock { __le16 opcode_channel; u8 mode; u8 unused[5]; }; /* LED control command */ #define PCAN_UFD_CMD_LED_SET 0x86 #define PCAN_UFD_LED_DEV 0x00 #define PCAN_UFD_LED_FAST 0x01 #define PCAN_UFD_LED_SLOW 0x02 #define PCAN_UFD_LED_ON 0x03 #define PCAN_UFD_LED_OFF 0x04 #define PCAN_UFD_LED_DEF PCAN_UFD_LED_DEV struct __packed pcan_ufd_led { __le16 opcode_channel; u8 mode; u8 unused[5]; }; /* Extended usage of uCAN commands CMD_xxx_xx_OPTION for PCAN-USB Pro FD */ #define PCAN_UFD_FLTEXT_CALIBRATION 0x8000 struct __packed pcan_ufd_options { __le16 opcode_channel; __le16 ucan_mask; u16 unused; __le16 usb_mask; }; /* Extended usage of uCAN messages for PCAN-USB Pro FD */ #define PCAN_UFD_MSG_CALIBRATION 0x100 struct __packed pcan_ufd_ts_msg { __le16 size; __le16 type; __le32 ts_low; __le32 ts_high; __le16 usb_frame_index; u16 unused; }; #define PCAN_UFD_MSG_OVERRUN 0x101 #define PCAN_UFD_OVMSG_CHANNEL(o) ((o)->channel & 0xf) struct __packed pcan_ufd_ovr_msg { __le16 size; __le16 type; __le32 ts_low; __le32 ts_high; u8 channel; u8 unused[3]; }; #define PCAN_UFD_CMD_DEVID_SET 0x81 struct __packed pcan_ufd_device_id { __le16 opcode_channel; u16 unused; __le32 device_id; }; static inline int pufd_omsg_get_channel(struct pcan_ufd_ovr_msg *om) { return om->channel & 0xf; } /* Clock mode frequency values */ static const u32 pcan_usb_fd_clk_freq[6] = { [PCAN_UFD_CLK_80MHZ] = 80000000, [PCAN_UFD_CLK_60MHZ] = 60000000, [PCAN_UFD_CLK_40MHZ] = 40000000, [PCAN_UFD_CLK_30MHZ] = 30000000, [PCAN_UFD_CLK_24MHZ] = 24000000, [PCAN_UFD_CLK_20MHZ] = 20000000 }; /* return a device USB interface */ static inline struct pcan_usb_fd_if *pcan_usb_fd_dev_if(struct peak_usb_device *dev) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); return pdev->usb_if; } /* return a device USB commands buffer */ static inline void *pcan_usb_fd_cmd_buffer(struct peak_usb_device *dev) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); return pdev->cmd_buffer_addr; } /* send PCAN-USB Pro FD commands synchronously */ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); struct pcan_ufd_fw_info *fw_info = &pdev->usb_if->fw_info; void *cmd_head = pcan_usb_fd_cmd_buffer(dev); int err = 0; u8 *packet_ptr; int packet_len; ptrdiff_t cmd_len; /* usb device unregistered? */ if (!(dev->state & PCAN_USB_STATE_CONNECTED)) return 0; /* if a packet is not filled completely by commands, the command list * is terminated with an "end of collection" record. */ cmd_len = cmd_tail - cmd_head; if (cmd_len <= (PCAN_UFD_CMD_BUFFER_SIZE - sizeof(u64))) { memset(cmd_tail, 0xff, sizeof(u64)); cmd_len += sizeof(u64); } packet_ptr = cmd_head; packet_len = cmd_len; /* firmware is not able to re-assemble 512 bytes buffer in full-speed */ if (unlikely(dev->udev->speed != USB_SPEED_HIGH)) packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE); do { err = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, fw_info->cmd_out_ep), packet_ptr, packet_len, NULL, PCAN_UFD_CMD_TIMEOUT_MS); if (err) { netdev_err(dev->netdev, "sending command failure: %d\n", err); break; } packet_ptr += packet_len; cmd_len -= packet_len; if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE) packet_len = cmd_len; } while (packet_len > 0); return err; } static int pcan_usb_fd_read_fwinfo(struct peak_usb_device *dev, struct pcan_ufd_fw_info *fw_info) { return pcan_usb_pro_send_req(dev, PCAN_USBPRO_REQ_INFO, PCAN_USBPRO_INFO_FW, fw_info, sizeof(*fw_info)); } /* build the commands list in the given buffer, to enter operational mode */ static int pcan_usb_fd_build_restart_cmd(struct peak_usb_device *dev, u8 *buf) { struct pucan_wr_err_cnt *prc; struct pucan_command *cmd; u8 *pc = buf; /* 1st, reset error counters: */ prc = (struct pucan_wr_err_cnt *)pc; prc->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_WR_ERR_CNT); /* select both counters */ prc->sel_mask = cpu_to_le16(PUCAN_WRERRCNT_TE|PUCAN_WRERRCNT_RE); /* and reset their values */ prc->tx_counter = 0; prc->rx_counter = 0; /* moves the pointer forward */ pc += sizeof(struct pucan_wr_err_cnt); /* add command to switch from ISO to non-ISO mode, if fw allows it */ if (dev->can.ctrlmode_supported & CAN_CTRLMODE_FD_NON_ISO) { struct pucan_options *puo = (struct pucan_options *)pc; puo->opcode_channel = (dev->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) ? pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_CLR_DIS_OPTION) : pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_SET_EN_OPTION); puo->options = cpu_to_le16(PUCAN_OPTION_CANDFDISO); /* to be sure that no other extended bits will be taken into * account */ puo->unused = 0; /* moves the pointer forward */ pc += sizeof(struct pucan_options); } /* next, go back to operational mode */ cmd = (struct pucan_command *)pc; cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, (dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) ? PUCAN_CMD_LISTEN_ONLY_MODE : PUCAN_CMD_NORMAL_MODE); pc += sizeof(struct pucan_command); return pc - buf; } /* set CAN bus on/off */ static int pcan_usb_fd_set_bus(struct peak_usb_device *dev, u8 onoff) { u8 *pc = pcan_usb_fd_cmd_buffer(dev); int l; if (onoff) { /* build the cmds list to enter operational mode */ l = pcan_usb_fd_build_restart_cmd(dev, pc); } else { struct pucan_command *cmd = (struct pucan_command *)pc; /* build cmd to go back to reset mode */ cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_RESET_MODE); l = sizeof(struct pucan_command); } /* send the command */ return pcan_usb_fd_send_cmd(dev, pc + l); } /* set filtering masks: * * idx in range [0..63] selects a row #idx, all rows otherwise * mask in range [0..0xffffffff] defines up to 32 CANIDs in the row(s) * * Each bit of this 64 x 32 bits array defines a CANID value: * * bit[i,j] = 1 implies that CANID=(i x 32)+j will be received, while * bit[i,j] = 0 implies that CANID=(i x 32)+j will be discarded. */ static int pcan_usb_fd_set_filter_std(struct peak_usb_device *dev, int idx, u32 mask) { struct pucan_filter_std *cmd = pcan_usb_fd_cmd_buffer(dev); int i, n; /* select all rows when idx is out of range [0..63] */ if ((idx < 0) || (idx >= (1 << PUCAN_FLTSTD_ROW_IDX_BITS))) { n = 1 << PUCAN_FLTSTD_ROW_IDX_BITS; idx = 0; /* select the row (and only the row) otherwise */ } else { n = idx + 1; } for (i = idx; i < n; i++, cmd++) { cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_FILTER_STD); cmd->idx = cpu_to_le16(i); cmd->mask = cpu_to_le32(mask); } /* send the command */ return pcan_usb_fd_send_cmd(dev, cmd); } /* set/unset options * * onoff set(1)/unset(0) options * mask each bit defines a kind of options to set/unset */ static int pcan_usb_fd_set_options(struct peak_usb_device *dev, bool onoff, u16 ucan_mask, u16 usb_mask) { struct pcan_ufd_options *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, (onoff) ? PUCAN_CMD_SET_EN_OPTION : PUCAN_CMD_CLR_DIS_OPTION); cmd->ucan_mask = cpu_to_le16(ucan_mask); cmd->usb_mask = cpu_to_le16(usb_mask); /* send the command */ return pcan_usb_fd_send_cmd(dev, ++cmd); } /* setup LED control */ static int pcan_usb_fd_set_can_led(struct peak_usb_device *dev, u8 led_mode) { struct pcan_ufd_led *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PCAN_UFD_CMD_LED_SET); cmd->mode = led_mode; /* send the command */ return pcan_usb_fd_send_cmd(dev, ++cmd); } /* set CAN clock domain */ static int pcan_usb_fd_set_clock_domain(struct peak_usb_device *dev, u8 clk_mode) { struct pcan_ufd_clock *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PCAN_UFD_CMD_CLK_SET); cmd->mode = clk_mode; /* send the command */ return pcan_usb_fd_send_cmd(dev, ++cmd); } /* set bittiming for CAN and CAN-FD header */ static int pcan_usb_fd_set_bittiming_slow(struct peak_usb_device *dev, struct can_bittiming *bt) { struct pucan_timing_slow *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_TIMING_SLOW); cmd->sjw_t = PUCAN_TSLOW_SJW_T(bt->sjw - 1, dev->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES); cmd->tseg2 = PUCAN_TSLOW_TSEG2(bt->phase_seg2 - 1); cmd->tseg1 = PUCAN_TSLOW_TSEG1(bt->prop_seg + bt->phase_seg1 - 1); cmd->brp = cpu_to_le16(PUCAN_TSLOW_BRP(bt->brp - 1)); cmd->ewl = 96; /* default */ /* send the command */ return pcan_usb_fd_send_cmd(dev, ++cmd); } /* set CAN-FD bittiming for data */ static int pcan_usb_fd_set_bittiming_fast(struct peak_usb_device *dev, struct can_bittiming *bt) { struct pucan_timing_fast *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PUCAN_CMD_TIMING_FAST); cmd->sjw = PUCAN_TFAST_SJW(bt->sjw - 1); cmd->tseg2 = PUCAN_TFAST_TSEG2(bt->phase_seg2 - 1); cmd->tseg1 = PUCAN_TFAST_TSEG1(bt->prop_seg + bt->phase_seg1 - 1); cmd->brp = cpu_to_le16(PUCAN_TFAST_BRP(bt->brp - 1)); /* send the command */ return pcan_usb_fd_send_cmd(dev, ++cmd); } /* read user CAN channel id from device */ static int pcan_usb_fd_get_can_channel_id(struct peak_usb_device *dev, u32 *can_ch_id) { int err; struct pcan_usb_fd_if *usb_if = pcan_usb_fd_dev_if(dev); err = pcan_usb_fd_read_fwinfo(dev, &usb_if->fw_info); if (err) return err; *can_ch_id = le32_to_cpu(usb_if->fw_info.dev_id[dev->ctrl_idx]); return err; } /* set a new CAN channel id in the flash memory of the device */ static int pcan_usb_fd_set_can_channel_id(struct peak_usb_device *dev, u32 can_ch_id) { struct pcan_ufd_device_id *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev->ctrl_idx, PCAN_UFD_CMD_DEVID_SET); cmd->device_id = cpu_to_le32(can_ch_id); /* send the command */ return pcan_usb_fd_send_cmd(dev, ++cmd); } /* handle restart but in asynchronously way * (uses PCAN-USB Pro code to complete asynchronous request) */ static int pcan_usb_fd_restart_async(struct peak_usb_device *dev, struct urb *urb, u8 *buf) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); struct pcan_ufd_fw_info *fw_info = &pdev->usb_if->fw_info; u8 *pc = buf; /* build the entire cmds list in the provided buffer, to go back into * operational mode. */ pc += pcan_usb_fd_build_restart_cmd(dev, pc); /* add EOC */ memset(pc, 0xff, sizeof(struct pucan_command)); pc += sizeof(struct pucan_command); /* complete the URB */ usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, fw_info->cmd_out_ep), buf, pc - buf, pcan_usb_pro_restart_complete, dev); /* and submit it. */ return usb_submit_urb(urb, GFP_ATOMIC); } static int pcan_usb_fd_drv_loaded(struct peak_usb_device *dev, bool loaded) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); pdev->cmd_buffer_addr[0] = 0; pdev->cmd_buffer_addr[1] = !!loaded; return pcan_usb_pro_send_req(dev, PCAN_USBPRO_REQ_FCT, PCAN_USBPRO_FCT_DRVLD, pdev->cmd_buffer_addr, PCAN_USBPRO_FCT_DRVLD_REQ_LEN); } static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg; struct peak_usb_device *dev; struct net_device *netdev; struct canfd_frame *cfd; struct sk_buff *skb; const u16 rx_msg_flags = le16_to_cpu(rm->flags); if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev)) return -ENOMEM; dev = usb_if->dev[pucan_msg_get_channel(rm)]; netdev = dev->netdev; if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) { /* CANFD frame case */ skb = alloc_canfd_skb(netdev, &cfd); if (!skb) return -ENOMEM; if (rx_msg_flags & PUCAN_MSG_BITRATE_SWITCH) cfd->flags |= CANFD_BRS; if (rx_msg_flags & PUCAN_MSG_ERROR_STATE_IND) cfd->flags |= CANFD_ESI; cfd->len = can_fd_dlc2len(pucan_msg_get_dlc(rm)); } else { /* CAN 2.0 frame case */ skb = alloc_can_skb(netdev, (struct can_frame **)&cfd); if (!skb) return -ENOMEM; can_frame_set_cc_len((struct can_frame *)cfd, pucan_msg_get_dlc(rm), dev->can.ctrlmode); } cfd->can_id = le32_to_cpu(rm->can_id); if (rx_msg_flags & PUCAN_MSG_EXT_ID) cfd->can_id |= CAN_EFF_FLAG; if (rx_msg_flags & PUCAN_MSG_RTR) { cfd->can_id |= CAN_RTR_FLAG; } else { memcpy(cfd->data, rm->d, cfd->len); netdev->stats.rx_bytes += cfd->len; } netdev->stats.rx_packets++; peak_usb_netif_rx_64(skb, le32_to_cpu(rm->ts_low), le32_to_cpu(rm->ts_high)); return 0; } /* handle uCAN status message */ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg; struct pcan_usb_fd_device *pdev; enum can_state new_state = CAN_STATE_ERROR_ACTIVE; enum can_state rx_state, tx_state; struct peak_usb_device *dev; struct net_device *netdev; struct can_frame *cf; struct sk_buff *skb; if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev)) return -ENOMEM; dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; pdev = container_of(dev, struct pcan_usb_fd_device, dev); netdev = dev->netdev; /* nothing should be sent while in BUS_OFF state */ if (dev->can.state == CAN_STATE_BUS_OFF) return 0; if (sm->channel_p_w_b & PUCAN_BUS_BUSOFF) { new_state = CAN_STATE_BUS_OFF; } else if (sm->channel_p_w_b & PUCAN_BUS_PASSIVE) { new_state = CAN_STATE_ERROR_PASSIVE; } else if (sm->channel_p_w_b & PUCAN_BUS_WARNING) { new_state = CAN_STATE_ERROR_WARNING; } else { /* back to (or still in) ERROR_ACTIVE state */ new_state = CAN_STATE_ERROR_ACTIVE; pdev->bec.txerr = 0; pdev->bec.rxerr = 0; } /* state hasn't changed */ if (new_state == dev->can.state) return 0; /* handle bus state change */ tx_state = (pdev->bec.txerr >= pdev->bec.rxerr) ? new_state : 0; rx_state = (pdev->bec.txerr <= pdev->bec.rxerr) ? new_state : 0; /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(netdev, &cf); can_change_state(netdev, cf, tx_state, rx_state); /* things must be done even in case of OOM */ if (new_state == CAN_STATE_BUS_OFF) can_bus_off(netdev); if (!skb) return -ENOMEM; peak_usb_netif_rx_64(skb, le32_to_cpu(sm->ts_low), le32_to_cpu(sm->ts_high)); return 0; } /* handle uCAN error message */ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg; struct pcan_usb_fd_device *pdev; struct peak_usb_device *dev; if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev)) return -EINVAL; dev = usb_if->dev[pucan_ermsg_get_channel(er)]; pdev = container_of(dev, struct pcan_usb_fd_device, dev); /* keep a trace of tx and rx error counters for later use */ pdev->bec.txerr = er->tx_err_cnt; pdev->bec.rxerr = er->rx_err_cnt; return 0; } /* handle uCAN overrun message */ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg; struct peak_usb_device *dev; struct net_device *netdev; struct can_frame *cf; struct sk_buff *skb; if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev)) return -EINVAL; dev = usb_if->dev[pufd_omsg_get_channel(ov)]; netdev = dev->netdev; /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(netdev, &cf); if (!skb) return -ENOMEM; cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW; peak_usb_netif_rx_64(skb, le32_to_cpu(ov->ts_low), le32_to_cpu(ov->ts_high)); netdev->stats.rx_over_errors++; netdev->stats.rx_errors++; return 0; } /* handle USB calibration message */ static void pcan_usb_fd_decode_ts(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pcan_ufd_ts_msg *ts = (struct pcan_ufd_ts_msg *)rx_msg; /* should wait until clock is stabilized */ if (usb_if->cm_ignore_count > 0) usb_if->cm_ignore_count--; else peak_usb_set_ts_now(&usb_if->time_ref, le32_to_cpu(ts->ts_low)); } /* callback for bulk IN urb */ static int pcan_usb_fd_decode_buf(struct peak_usb_device *dev, struct urb *urb) { struct pcan_usb_fd_if *usb_if = pcan_usb_fd_dev_if(dev); struct net_device *netdev = dev->netdev; struct pucan_msg *rx_msg; u8 *msg_ptr, *msg_end; int err = 0; /* loop reading all the records from the incoming message */ msg_ptr = urb->transfer_buffer; msg_end = urb->transfer_buffer + urb->actual_length; for (; msg_ptr < msg_end;) { u16 rx_msg_type, rx_msg_size; rx_msg = (struct pucan_msg *)msg_ptr; if (!rx_msg->size) { /* null packet found: end of list */ break; } rx_msg_size = le16_to_cpu(rx_msg->size); rx_msg_type = le16_to_cpu(rx_msg->type); /* check if the record goes out of current packet */ if (msg_ptr + rx_msg_size > msg_end) { netdev_err(netdev, "got frag rec: should inc usb rx buf sze\n"); err = -EBADMSG; break; } switch (rx_msg_type) { case PUCAN_MSG_CAN_RX: err = pcan_usb_fd_decode_canmsg(usb_if, rx_msg); if (err < 0) goto fail; break; case PCAN_UFD_MSG_CALIBRATION: pcan_usb_fd_decode_ts(usb_if, rx_msg); break; case PUCAN_MSG_ERROR: err = pcan_usb_fd_decode_error(usb_if, rx_msg); if (err < 0) goto fail; break; case PUCAN_MSG_STATUS: err = pcan_usb_fd_decode_status(usb_if, rx_msg); if (err < 0) goto fail; break; case PCAN_UFD_MSG_OVERRUN: err = pcan_usb_fd_decode_overrun(usb_if, rx_msg); if (err < 0) goto fail; break; default: netdev_err(netdev, "unhandled msg type 0x%02x (%d): ignored\n", rx_msg_type, rx_msg_type); break; } msg_ptr += rx_msg_size; } fail: if (err) pcan_dump_mem("received msg", urb->transfer_buffer, urb->actual_length); return err; } /* CAN/CANFD frames encoding callback */ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb, u8 *obuf, size_t *size) { struct pucan_tx_msg *tx_msg = (struct pucan_tx_msg *)obuf; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; u16 tx_msg_size, tx_msg_flags; u8 dlc; if (cfd->len > CANFD_MAX_DLEN) return -EINVAL; tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4); tx_msg->size = cpu_to_le16(tx_msg_size); tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX); tx_msg_flags = 0; if (cfd->can_id & CAN_EFF_FLAG) { tx_msg_flags |= PUCAN_MSG_EXT_ID; tx_msg->can_id = cpu_to_le32(cfd->can_id & CAN_EFF_MASK); } else { tx_msg->can_id = cpu_to_le32(cfd->can_id & CAN_SFF_MASK); } if (can_is_canfd_skb(skb)) { /* considering a CANFD frame */ dlc = can_fd_len2dlc(cfd->len); tx_msg_flags |= PUCAN_MSG_EXT_DATA_LEN; if (cfd->flags & CANFD_BRS) tx_msg_flags |= PUCAN_MSG_BITRATE_SWITCH; if (cfd->flags & CANFD_ESI) tx_msg_flags |= PUCAN_MSG_ERROR_STATE_IND; } else { /* CAND 2.0 frames */ dlc = can_get_cc_dlc((struct can_frame *)cfd, dev->can.ctrlmode); if (cfd->can_id & CAN_RTR_FLAG) tx_msg_flags |= PUCAN_MSG_RTR; } /* Single-Shot frame */ if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) tx_msg_flags |= PUCAN_MSG_SINGLE_SHOT; tx_msg->flags = cpu_to_le16(tx_msg_flags); tx_msg->channel_dlc = PUCAN_MSG_CHANNEL_DLC(dev->ctrl_idx, dlc); memcpy(tx_msg->d, cfd->data, cfd->len); /* add null size message to tag the end (messages are 32-bits aligned) */ tx_msg = (struct pucan_tx_msg *)(obuf + tx_msg_size); tx_msg->size = 0; /* set the whole size of the USB packet to send */ *size = tx_msg_size + sizeof(u32); return 0; } /* start the interface (last chance before set bus on) */ static int pcan_usb_fd_start(struct peak_usb_device *dev) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); int err; /* set filter mode: all acceptance */ err = pcan_usb_fd_set_filter_std(dev, -1, 0xffffffff); if (err) return err; /* opening first device: */ if (pdev->usb_if->dev_opened_count == 0) { /* reset time_ref */ peak_usb_init_time_ref(&pdev->usb_if->time_ref, &pcan_usb_pro_fd); /* enable USB calibration messages */ err = pcan_usb_fd_set_options(dev, 1, PUCAN_OPTION_ERROR, PCAN_UFD_FLTEXT_CALIBRATION); } pdev->usb_if->dev_opened_count++; /* reset cached error counters */ pdev->bec.txerr = 0; pdev->bec.rxerr = 0; return err; } /* socket callback used to copy berr counters values received through USB */ static int pcan_usb_fd_get_berr_counter(const struct net_device *netdev, struct can_berr_counter *bec) { struct peak_usb_device *dev = netdev_priv(netdev); struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); *bec = pdev->bec; /* must return 0 */ return 0; } /* probe function for all PCAN-USB FD family usb interfaces */ static int pcan_usb_fd_probe(struct usb_interface *intf) { struct usb_host_interface *iface_desc = &intf->altsetting[0]; /* CAN interface is always interface #0 */ return iface_desc->desc.bInterfaceNumber; } /* stop interface (last chance before set bus off) */ static int pcan_usb_fd_stop(struct peak_usb_device *dev) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); /* turn off special msgs for that interface if no other dev opened */ if (pdev->usb_if->dev_opened_count == 1) pcan_usb_fd_set_options(dev, 0, PUCAN_OPTION_ERROR, PCAN_UFD_FLTEXT_CALIBRATION); pdev->usb_if->dev_opened_count--; return 0; } /* called when probing, to initialize a device object */ static int pcan_usb_fd_init(struct peak_usb_device *dev) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); struct pcan_ufd_fw_info *fw_info; int i, err = -ENOMEM; /* do this for 1st channel only */ if (!dev->prev_siblings) { /* allocate netdevices common structure attached to first one */ pdev->usb_if = kzalloc(sizeof(*pdev->usb_if), GFP_KERNEL); if (!pdev->usb_if) goto err_out; /* allocate command buffer once for all for the interface */ pdev->cmd_buffer_addr = kzalloc(PCAN_UFD_CMD_BUFFER_SIZE, GFP_KERNEL); if (!pdev->cmd_buffer_addr) goto err_out_1; /* number of ts msgs to ignore before taking one into account */ pdev->usb_if->cm_ignore_count = 5; fw_info = &pdev->usb_if->fw_info; err = pcan_usb_fd_read_fwinfo(dev, fw_info); if (err) { dev_err(dev->netdev->dev.parent, "unable to read %s firmware info (err %d)\n", dev->adapter->name, err); goto err_out_2; } /* explicit use of dev_xxx() instead of netdev_xxx() here: * information displayed are related to the device itself, not * to the canx (channel) device. */ dev_info(dev->netdev->dev.parent, "PEAK-System %s v%u fw v%u.%u.%u (%u channels)\n", dev->adapter->name, fw_info->hw_version, fw_info->fw_version[0], fw_info->fw_version[1], fw_info->fw_version[2], dev->adapter->ctrl_count); /* check for ability to switch between ISO/non-ISO modes */ if (fw_info->fw_version[0] >= 2) { /* firmware >= 2.x supports ISO/non-ISO switching */ dev->can.ctrlmode_supported |= CAN_CTRLMODE_FD_NON_ISO; } else { /* firmware < 2.x only supports fixed(!) non-ISO */ dev->can.ctrlmode |= CAN_CTRLMODE_FD_NON_ISO; } /* if vendor rsp type is greater than or equal to 2, then it * contains EP numbers to use for cmds pipes. If not, then * default EP should be used. */ if (le16_to_cpu(fw_info->type) < PCAN_USBFD_TYPE_EXT) { fw_info->cmd_out_ep = PCAN_USBPRO_EP_CMDOUT; fw_info->cmd_in_ep = PCAN_USBPRO_EP_CMDIN; } /* tell the hardware the can driver is running */ err = pcan_usb_fd_drv_loaded(dev, 1); if (err) { dev_err(dev->netdev->dev.parent, "unable to tell %s driver is loaded (err %d)\n", dev->adapter->name, err); goto err_out_2; } } else { /* otherwise, simply copy previous sibling's values */ struct pcan_usb_fd_device *ppdev = container_of(dev->prev_siblings, struct pcan_usb_fd_device, dev); pdev->usb_if = ppdev->usb_if; pdev->cmd_buffer_addr = ppdev->cmd_buffer_addr; /* do a copy of the ctrlmode[_supported] too */ dev->can.ctrlmode = ppdev->dev.can.ctrlmode; dev->can.ctrlmode_supported = ppdev->dev.can.ctrlmode_supported; fw_info = &pdev->usb_if->fw_info; } pdev->usb_if->dev[dev->ctrl_idx] = dev; dev->can_channel_id = le32_to_cpu(pdev->usb_if->fw_info.dev_id[dev->ctrl_idx]); /* if vendor rsp type is greater than or equal to 2, then it contains EP * numbers to use for data pipes. If not, then statically defined EP are * used (see peak_usb_create_dev()). */ if (le16_to_cpu(fw_info->type) >= PCAN_USBFD_TYPE_EXT) { dev->ep_msg_in = fw_info->data_in_ep; dev->ep_msg_out = fw_info->data_out_ep[dev->ctrl_idx]; } /* set clock domain */ for (i = 0; i < ARRAY_SIZE(pcan_usb_fd_clk_freq); i++) if (dev->adapter->clock.freq == pcan_usb_fd_clk_freq[i]) break; if (i >= ARRAY_SIZE(pcan_usb_fd_clk_freq)) { dev_warn(dev->netdev->dev.parent, "incompatible clock frequencies\n"); err = -EINVAL; goto err_out_2; } pcan_usb_fd_set_clock_domain(dev, i); /* set LED in default state (end of init phase) */ pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_DEF); return 0; err_out_2: kfree(pdev->cmd_buffer_addr); err_out_1: kfree(pdev->usb_if); err_out: return err; } /* called when driver module is being unloaded */ static void pcan_usb_fd_exit(struct peak_usb_device *dev) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); /* when rmmod called before unplug and if down, should reset things * before leaving */ if (dev->can.state != CAN_STATE_STOPPED) { /* set bus off on the corresponding channel */ pcan_usb_fd_set_bus(dev, 0); } /* switch off corresponding CAN LEDs */ pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_OFF); /* if channel #0 (only) */ if (dev->ctrl_idx == 0) { /* turn off calibration message if any device were opened */ if (pdev->usb_if->dev_opened_count > 0) pcan_usb_fd_set_options(dev, 0, PUCAN_OPTION_ERROR, PCAN_UFD_FLTEXT_CALIBRATION); /* tell USB adapter that the driver is being unloaded */ pcan_usb_fd_drv_loaded(dev, 0); } } /* called when the USB adapter is unplugged */ static void pcan_usb_fd_free(struct peak_usb_device *dev) { /* last device: can free shared objects now */ if (!dev->prev_siblings && !dev->next_siblings) { struct pcan_usb_fd_device *pdev = container_of(dev, struct pcan_usb_fd_device, dev); /* free commands buffer */ kfree(pdev->cmd_buffer_addr); /* free usb interface object */ kfree(pdev->usb_if); } } /* blink LED's */ static int pcan_usb_fd_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct peak_usb_device *dev = netdev_priv(netdev); int err = 0; switch (state) { case ETHTOOL_ID_ACTIVE: err = pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_FAST); break; case ETHTOOL_ID_INACTIVE: err = pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_DEF); break; default: break; } return err; } static const struct ethtool_ops pcan_usb_fd_ethtool_ops = { .set_phys_id = pcan_usb_fd_set_phys_id, .get_ts_info = pcan_get_ts_info, .get_eeprom_len = peak_usb_get_eeprom_len, .get_eeprom = peak_usb_get_eeprom, .set_eeprom = peak_usb_set_eeprom, }; /* describes the PCAN-USB FD adapter */ static const struct can_bittiming_const pcan_usb_fd_const = { .name = "pcan_usb_fd", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), .brp_inc = 1, }; static const struct can_bittiming_const pcan_usb_fd_data_const = { .name = "pcan_usb_fd", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TFAST_BRP_BITS), .brp_inc = 1, }; const struct peak_usb_adapter pcan_usb_fd = { .name = "PCAN-USB FD", .device_id = PCAN_USBFD_PRODUCT_ID, .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, .bittiming_const = &pcan_usb_fd_const, .data_bittiming_const = &pcan_usb_fd_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), .ethtool_ops = &pcan_usb_fd_ethtool_ops, /* timestamps usage */ .ts_used_bits = 32, .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ .us_per_ts_shift = 0, /* give here messages in/out endpoints */ .ep_msg_in = PCAN_USBPRO_EP_MSGIN, .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0}, /* size of rx/tx usb buffers */ .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, /* device callbacks */ .intf_probe = pcan_usb_fd_probe, .dev_init = pcan_usb_fd_init, .dev_exit = pcan_usb_fd_exit, .dev_free = pcan_usb_fd_free, .dev_set_bus = pcan_usb_fd_set_bus, .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, .dev_get_can_channel_id = pcan_usb_fd_get_can_channel_id, .dev_set_can_channel_id = pcan_usb_fd_set_can_channel_id, .dev_decode_buf = pcan_usb_fd_decode_buf, .dev_start = pcan_usb_fd_start, .dev_stop = pcan_usb_fd_stop, .dev_restart_async = pcan_usb_fd_restart_async, .dev_encode_msg = pcan_usb_fd_encode_msg, .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; /* describes the PCAN-CHIP USB */ static const struct can_bittiming_const pcan_usb_chip_const = { .name = "pcan_chip_usb", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), .brp_inc = 1, }; static const struct can_bittiming_const pcan_usb_chip_data_const = { .name = "pcan_chip_usb", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TFAST_BRP_BITS), .brp_inc = 1, }; const struct peak_usb_adapter pcan_usb_chip = { .name = "PCAN-Chip USB", .device_id = PCAN_USBCHIP_PRODUCT_ID, .ctrl_count = PCAN_USBFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, .bittiming_const = &pcan_usb_chip_const, .data_bittiming_const = &pcan_usb_chip_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), .ethtool_ops = &pcan_usb_fd_ethtool_ops, /* timestamps usage */ .ts_used_bits = 32, .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ .us_per_ts_shift = 0, /* give here messages in/out endpoints */ .ep_msg_in = PCAN_USBPRO_EP_MSGIN, .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0}, /* size of rx/tx usb buffers */ .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, /* device callbacks */ .intf_probe = pcan_usb_pro_probe, /* same as PCAN-USB Pro */ .dev_init = pcan_usb_fd_init, .dev_exit = pcan_usb_fd_exit, .dev_free = pcan_usb_fd_free, .dev_set_bus = pcan_usb_fd_set_bus, .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, .dev_get_can_channel_id = pcan_usb_fd_get_can_channel_id, .dev_set_can_channel_id = pcan_usb_fd_set_can_channel_id, .dev_decode_buf = pcan_usb_fd_decode_buf, .dev_start = pcan_usb_fd_start, .dev_stop = pcan_usb_fd_stop, .dev_restart_async = pcan_usb_fd_restart_async, .dev_encode_msg = pcan_usb_fd_encode_msg, .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; /* describes the PCAN-USB Pro FD adapter */ static const struct can_bittiming_const pcan_usb_pro_fd_const = { .name = "pcan_usb_pro_fd", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), .brp_inc = 1, }; static const struct can_bittiming_const pcan_usb_pro_fd_data_const = { .name = "pcan_usb_pro_fd", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TFAST_BRP_BITS), .brp_inc = 1, }; const struct peak_usb_adapter pcan_usb_pro_fd = { .name = "PCAN-USB Pro FD", .device_id = PCAN_USBPROFD_PRODUCT_ID, .ctrl_count = PCAN_USBPROFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, .bittiming_const = &pcan_usb_pro_fd_const, .data_bittiming_const = &pcan_usb_pro_fd_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), .ethtool_ops = &pcan_usb_fd_ethtool_ops, /* timestamps usage */ .ts_used_bits = 32, .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ .us_per_ts_shift = 0, /* give here messages in/out endpoints */ .ep_msg_in = PCAN_USBPRO_EP_MSGIN, .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0, PCAN_USBPRO_EP_MSGOUT_1}, /* size of rx/tx usb buffers */ .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, /* device callbacks */ .intf_probe = pcan_usb_pro_probe, /* same as PCAN-USB Pro */ .dev_init = pcan_usb_fd_init, .dev_exit = pcan_usb_fd_exit, .dev_free = pcan_usb_fd_free, .dev_set_bus = pcan_usb_fd_set_bus, .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, .dev_get_can_channel_id = pcan_usb_fd_get_can_channel_id, .dev_set_can_channel_id = pcan_usb_fd_set_can_channel_id, .dev_decode_buf = pcan_usb_fd_decode_buf, .dev_start = pcan_usb_fd_start, .dev_stop = pcan_usb_fd_stop, .dev_restart_async = pcan_usb_fd_restart_async, .dev_encode_msg = pcan_usb_fd_encode_msg, .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; /* describes the PCAN-USB X6 adapter */ static const struct can_bittiming_const pcan_usb_x6_const = { .name = "pcan_usb_x6", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TSLOW_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TSLOW_BRP_BITS), .brp_inc = 1, }; static const struct can_bittiming_const pcan_usb_x6_data_const = { .name = "pcan_usb_x6", .tseg1_min = 1, .tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS), .tseg2_min = 1, .tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS), .sjw_max = (1 << PUCAN_TFAST_SJW_BITS), .brp_min = 1, .brp_max = (1 << PUCAN_TFAST_BRP_BITS), .brp_inc = 1, }; const struct peak_usb_adapter pcan_usb_x6 = { .name = "PCAN-USB X6", .device_id = PCAN_USBX6_PRODUCT_ID, .ctrl_count = PCAN_USBPROFD_CHANNEL_COUNT, .ctrlmode_supported = CAN_CTRLMODE_FD | CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC, .clock = { .freq = PCAN_UFD_CRYSTAL_HZ, }, .bittiming_const = &pcan_usb_x6_const, .data_bittiming_const = &pcan_usb_x6_data_const, /* size of device private data */ .sizeof_dev_private = sizeof(struct pcan_usb_fd_device), .ethtool_ops = &pcan_usb_fd_ethtool_ops, /* timestamps usage */ .ts_used_bits = 32, .us_per_ts_scale = 1, /* us = (ts * scale) >> shift */ .us_per_ts_shift = 0, /* give here messages in/out endpoints */ .ep_msg_in = PCAN_USBPRO_EP_MSGIN, .ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0, PCAN_USBPRO_EP_MSGOUT_1}, /* size of rx/tx usb buffers */ .rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE, .tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE, /* device callbacks */ .intf_probe = pcan_usb_pro_probe, /* same as PCAN-USB Pro */ .dev_init = pcan_usb_fd_init, .dev_exit = pcan_usb_fd_exit, .dev_free = pcan_usb_fd_free, .dev_set_bus = pcan_usb_fd_set_bus, .dev_set_bittiming = pcan_usb_fd_set_bittiming_slow, .dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast, .dev_get_can_channel_id = pcan_usb_fd_get_can_channel_id, .dev_set_can_channel_id = pcan_usb_fd_set_can_channel_id, .dev_decode_buf = pcan_usb_fd_decode_buf, .dev_start = pcan_usb_fd_start, .dev_stop = pcan_usb_fd_stop, .dev_restart_async = pcan_usb_fd_restart_async, .dev_encode_msg = pcan_usb_fd_encode_msg, .do_get_berr_counter = pcan_usb_fd_get_berr_counter, }; |
| 11 11 11 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | // SPDX-License-Identifier: GPL-2.0-only /* * llc_output.c - LLC minimal output path * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #include <linux/if_arp.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/export.h> #include <net/llc.h> #include <net/llc_pdu.h> /** * llc_mac_hdr_init - fills MAC header fields * @skb: Address of the frame to initialize its MAC header * @sa: The MAC source address * @da: The MAC destination address * * Fills MAC header fields, depending on MAC type. Returns 0, If MAC type * is a valid type and initialization completes correctly 1, otherwise. */ int llc_mac_hdr_init(struct sk_buff *skb, const unsigned char *sa, const unsigned char *da) { int rc = -EINVAL; switch (skb->dev->type) { case ARPHRD_ETHER: case ARPHRD_LOOPBACK: rc = dev_hard_header(skb, skb->dev, ETH_P_802_2, da, sa, skb->len); if (rc > 0) rc = 0; break; default: break; } return rc; } /** * llc_build_and_send_ui_pkt - unitdata request interface for upper layers * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * Upper layers calls this function when upper layer wants to send data * using connection-less mode communication (UI pdu). * * Accept data frame from network layer to be sent using connection- * less mode communication; timeout/retries handled by network layer; * package primitive as an event and send to SAP event handler */ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, const unsigned char *dmac, unsigned char dsap) { int rc; llc_pdu_header_init(skb, LLC_PDU_TYPE_U, sap->laddr.lsap, dsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac); if (likely(!rc)) rc = dev_queue_xmit(skb); else kfree_skb(skb); return rc; } EXPORT_SYMBOL(llc_mac_hdr_init); EXPORT_SYMBOL(llc_build_and_send_ui_pkt); |
| 48 1968 50 122 6 27 1 27 4 27 15 15 225 49 235 140 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VMSTAT_H #define _LINUX_VMSTAT_H #include <linux/types.h> #include <linux/percpu.h> #include <linux/mmzone.h> #include <linux/vm_event_item.h> #include <linux/atomic.h> #include <linux/static_key.h> #include <linux/mmdebug.h> #ifdef CONFIG_NUMA DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); #endif struct reclaim_stat { unsigned nr_dirty; unsigned nr_unqueued_dirty; unsigned nr_congested; unsigned nr_writeback; unsigned nr_immediate; unsigned nr_pageout; unsigned nr_activate[ANON_AND_FILE]; unsigned nr_ref_keep; unsigned nr_unmap_fail; unsigned nr_lazyfree_fail; unsigned nr_demoted; }; /* Stat data for system wide items */ enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. * * Counters should only be incremented and no critical kernel component * should rely on the counter values. * * Counters are handled completely inline. On many platforms the code * generated will simply be the increment of a global address. */ struct vm_event_state { unsigned long event[NR_VM_EVENT_ITEMS]; }; DECLARE_PER_CPU(struct vm_event_state, vm_event_states); /* * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the * local_irq_disable overhead. */ static inline void __count_vm_event(enum vm_event_item item) { raw_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) { this_cpu_inc(vm_event_states.event[item]); } static inline void __count_vm_events(enum vm_event_item item, long delta) { raw_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) { this_cpu_add(vm_event_states.event[item], delta); } extern void all_vm_events(unsigned long *); extern void vm_events_fold_cpu(int cpu); #else /* Disable counters */ static inline void count_vm_event(enum vm_event_item item) { } static inline void count_vm_events(enum vm_event_item item, long delta) { } static inline void __count_vm_event(enum vm_event_item item) { } static inline void __count_vm_events(enum vm_event_item item, long delta) { } static inline void all_vm_events(unsigned long *ret) { } static inline void vm_events_fold_cpu(int cpu) { } #endif /* CONFIG_VM_EVENT_COUNTERS */ #ifdef CONFIG_NUMA_BALANCING #define count_vm_numa_event(x) count_vm_event(x) #define count_vm_numa_events(x, y) count_vm_events(x, y) #else #define count_vm_numa_event(x) do {} while (0) #define count_vm_numa_events(x, y) do { (void)(y); } while (0) #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_DEBUG_TLBFLUSH #define count_vm_tlb_event(x) count_vm_event(x) #define count_vm_tlb_events(x, y) count_vm_events(x, y) #else #define count_vm_tlb_event(x) do {} while (0) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif #ifdef CONFIG_PER_VMA_LOCK_STATS #define count_vm_vma_lock_event(x) count_vm_event(x) #else #define count_vm_vma_lock_event(x) do {} while (0) #endif #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) /* * Zone and node-based page accounting with per cpu differentials. */ extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS]; #ifdef CONFIG_NUMA static inline void zone_numa_event_add(long x, struct zone *zone, enum numa_stat_item item) { atomic_long_add(x, &zone->vm_numa_event[item]); atomic_long_add(x, &vm_numa_event[item]); } static inline unsigned long zone_numa_event_state(struct zone *zone, enum numa_stat_item item) { return atomic_long_read(&zone->vm_numa_event[item]); } static inline unsigned long global_numa_event_state(enum numa_stat_item item) { return atomic_long_read(&vm_numa_event[item]); } #endif /* CONFIG_NUMA */ static inline void zone_page_state_add(long x, struct zone *zone, enum zone_stat_item item) { atomic_long_add(x, &zone->vm_stat[item]); atomic_long_add(x, &vm_zone_stat[item]); } static inline void node_page_state_add(long x, struct pglist_data *pgdat, enum node_stat_item item) { atomic_long_add(x, &pgdat->vm_stat[item]); atomic_long_add(x, &vm_node_stat[item]); } static inline unsigned long global_zone_page_state(enum zone_stat_item item) { long x = atomic_long_read(&vm_zone_stat[item]); #ifdef CONFIG_SMP if (x < 0) x = 0; #endif return x; } static inline unsigned long global_node_page_state_pages(enum node_stat_item item) { long x = atomic_long_read(&vm_node_stat[item]); #ifdef CONFIG_SMP if (x < 0) x = 0; #endif return x; } static inline unsigned long global_node_page_state(enum node_stat_item item) { VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); return global_node_page_state_pages(item); } static inline unsigned long zone_page_state(struct zone *zone, enum zone_stat_item item) { long x = atomic_long_read(&zone->vm_stat[item]); #ifdef CONFIG_SMP if (x < 0) x = 0; #endif return x; } /* * More accurate version that also considers the currently pending * deltas. For that we need to loop over all cpus to find the current * deltas. There is no synchronization so the result cannot be * exactly accurate either. */ static inline unsigned long zone_page_state_snapshot(struct zone *zone, enum zone_stat_item item) { long x = atomic_long_read(&zone->vm_stat[item]); #ifdef CONFIG_SMP int cpu; for_each_online_cpu(cpu) x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item]; if (x < 0) x = 0; #endif return x; } #ifdef CONFIG_NUMA /* See __count_vm_event comment on why raw_cpu_inc is used. */ static inline void __count_numa_event(struct zone *zone, enum numa_stat_item item) { struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; raw_cpu_inc(pzstats->vm_numa_event[item]); } static inline void __count_numa_events(struct zone *zone, enum numa_stat_item item, long delta) { struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; raw_cpu_add(pzstats->vm_numa_event[item], delta); } extern unsigned long sum_zone_node_page_state(int node, enum zone_stat_item item); extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item); extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); extern unsigned long node_page_state_pages(struct pglist_data *pgdat, enum node_stat_item item); extern void fold_vm_numa_events(void); #else #define sum_zone_node_page_state(node, item) global_zone_page_state(item) #define node_page_state(node, item) global_node_page_state(item) #define node_page_state_pages(node, item) global_node_page_state_pages(item) static inline void fold_vm_numa_events(void) { } #endif /* CONFIG_NUMA */ #ifdef CONFIG_SMP void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); void __inc_zone_page_state(struct page *, enum zone_stat_item); void __dec_zone_page_state(struct page *, enum zone_stat_item); void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long); void __inc_node_page_state(struct page *, enum node_stat_item); void __dec_node_page_state(struct page *, enum node_stat_item); void mod_zone_page_state(struct zone *, enum zone_stat_item, long); void inc_zone_page_state(struct page *, enum zone_stat_item); void dec_zone_page_state(struct page *, enum zone_stat_item); void mod_node_page_state(struct pglist_data *, enum node_stat_item, long); void inc_node_page_state(struct page *, enum node_stat_item); void dec_node_page_state(struct page *, enum node_stat_item); extern void inc_node_state(struct pglist_data *, enum node_stat_item); extern void __inc_zone_state(struct zone *, enum zone_stat_item); extern void __inc_node_state(struct pglist_data *, enum node_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_node_state(struct pglist_data *, enum node_stat_item); void quiet_vmstat(void); void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); int calculate_pressure_threshold(struct zone *zone); int calculate_normal_threshold(struct zone *zone); void set_pgdat_percpu_threshold(pg_data_t *pgdat, int (*calculate_pressure)(struct zone *)); #else /* CONFIG_SMP */ /* * We do not maintain differentials in a single processor configuration. * The functions directly modify the zone and global counters. */ static inline void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, long delta) { zone_page_state_add(delta, zone, item); } static inline void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, int delta) { if (vmstat_item_in_bytes(item)) { /* * Only cgroups use subpage accounting right now; at * the global level, these items still change in * multiples of whole pages. Store them as pages * internally to keep the per-cpu counters compact. */ VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); delta >>= PAGE_SHIFT; } node_page_state_add(delta, pgdat, item); } static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { atomic_long_inc(&zone->vm_stat[item]); atomic_long_inc(&vm_zone_stat[item]); } static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) { atomic_long_inc(&pgdat->vm_stat[item]); atomic_long_inc(&vm_node_stat[item]); } static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { atomic_long_dec(&zone->vm_stat[item]); atomic_long_dec(&vm_zone_stat[item]); } static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) { atomic_long_dec(&pgdat->vm_stat[item]); atomic_long_dec(&vm_node_stat[item]); } static inline void __inc_zone_page_state(struct page *page, enum zone_stat_item item) { __inc_zone_state(page_zone(page), item); } static inline void __inc_node_page_state(struct page *page, enum node_stat_item item) { __inc_node_state(page_pgdat(page), item); } static inline void __dec_zone_page_state(struct page *page, enum zone_stat_item item) { __dec_zone_state(page_zone(page), item); } static inline void __dec_node_page_state(struct page *page, enum node_stat_item item) { __dec_node_state(page_pgdat(page), item); } /* * We only use atomic operations to update counters. So there is no need to * disable interrupts. */ #define inc_zone_page_state __inc_zone_page_state #define dec_zone_page_state __dec_zone_page_state #define mod_zone_page_state __mod_zone_page_state #define inc_node_page_state __inc_node_page_state #define dec_node_page_state __dec_node_page_state #define mod_node_page_state __mod_node_page_state #define inc_zone_state __inc_zone_state #define inc_node_state __inc_node_state #define dec_zone_state __dec_zone_state #define set_pgdat_percpu_threshold(pgdat, callback) { } static inline void refresh_zone_stat_thresholds(void) { } static inline void cpu_vm_stats_fold(int cpu) { } static inline void quiet_vmstat(void) { } static inline void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) { } #endif /* CONFIG_SMP */ static inline void __zone_stat_mod_folio(struct folio *folio, enum zone_stat_item item, long nr) { __mod_zone_page_state(folio_zone(folio), item, nr); } static inline void __zone_stat_add_folio(struct folio *folio, enum zone_stat_item item) { __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); } static inline void __zone_stat_sub_folio(struct folio *folio, enum zone_stat_item item) { __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); } static inline void zone_stat_mod_folio(struct folio *folio, enum zone_stat_item item, long nr) { mod_zone_page_state(folio_zone(folio), item, nr); } static inline void zone_stat_add_folio(struct folio *folio, enum zone_stat_item item) { mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio)); } static inline void zone_stat_sub_folio(struct folio *folio, enum zone_stat_item item) { mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio)); } static inline void __node_stat_mod_folio(struct folio *folio, enum node_stat_item item, long nr) { __mod_node_page_state(folio_pgdat(folio), item, nr); } static inline void __node_stat_add_folio(struct folio *folio, enum node_stat_item item) { __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); } static inline void __node_stat_sub_folio(struct folio *folio, enum node_stat_item item) { __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } static inline void node_stat_mod_folio(struct folio *folio, enum node_stat_item item, long nr) { mod_node_page_state(folio_pgdat(folio), item, nr); } static inline void node_stat_add_folio(struct folio *folio, enum node_stat_item item) { mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio)); } static inline void node_stat_sub_folio(struct folio *folio, enum node_stat_item item) { mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } extern const char * const vmstat_text[]; static inline const char *zone_stat_name(enum zone_stat_item item) { return vmstat_text[item]; } #ifdef CONFIG_NUMA static inline const char *numa_stat_name(enum numa_stat_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + item]; } #endif /* CONFIG_NUMA */ static inline const char *node_stat_name(enum node_stat_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + item]; } static inline const char *lru_list_name(enum lru_list lru) { return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } #if defined(CONFIG_VM_EVENT_COUNTERS) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS */ #ifdef CONFIG_MEMCG void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); __mod_lruvec_state(lruvec, idx, val); local_irq_restore(flags); } void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val); static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { unsigned long flags; local_irq_save(flags); __lruvec_stat_mod_folio(folio, idx, val); local_irq_restore(flags); } static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { lruvec_stat_mod_folio(page_folio(page), idx, val); } #else static inline void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { mod_node_page_state(lruvec_pgdat(lruvec), idx, val); } static inline void __lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { __mod_node_page_state(folio_pgdat(folio), idx, val); } static inline void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx, int val) { mod_node_page_state(folio_pgdat(folio), idx, val); } static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { mod_node_page_state(page_pgdat(page), idx, val); } #endif /* CONFIG_MEMCG */ static inline void __lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); } static inline void __lruvec_stat_sub_folio(struct folio *folio, enum node_stat_item idx) { __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } static inline void lruvec_stat_add_folio(struct folio *folio, enum node_stat_item idx) { lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio)); } static inline void lruvec_stat_sub_folio(struct folio *folio, enum node_stat_item idx) { lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } void memmap_boot_pages_add(long delta); void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ |
| 52 4 58 19 4 4 20 114 877 7043 445 10 10 5 50 843 24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NODEMASK_H #define __LINUX_NODEMASK_H /* * Nodemasks provide a bitmap suitable for representing the * set of Node's in a system, one bit position per Node number. * * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * * For details of nodemask_parse_user(), see bitmap_parse_user() in * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in * lib/bitmap.c. * * The available nodemask operations are: * * void node_set(node, mask) turn on bit 'node' in mask * void node_clear(node, mask) turn off bit 'node' in mask * void nodes_setall(mask) set all bits * void nodes_clear(mask) clear all bits * int node_isset(node, mask) true iff bit 'node' set in mask * int node_test_and_set(node, mask) test and set bit 'node' in mask * * void nodes_and(dst, src1, src2) dst = src1 & src2 [intersection] * void nodes_or(dst, src1, src2) dst = src1 | src2 [union] * void nodes_xor(dst, src1, src2) dst = src1 ^ src2 * void nodes_andnot(dst, src1, src2) dst = src1 & ~src2 * void nodes_complement(dst, src) dst = ~src * * int nodes_equal(mask1, mask2) Does mask1 == mask2? * int nodes_intersects(mask1, mask2) Do mask1 and mask2 intersect? * int nodes_subset(mask1, mask2) Is mask1 a subset of mask2? * int nodes_empty(mask) Is mask empty (no bits sets)? * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, * or MAX_NUMNODES * unsigned int first_unset_node(mask) First node not set in mask, or * MAX_NUMNODES * * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set * NODE_MASK_ALL Initializer - all bits set * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) * void nodes_onto(dst, orig, relmap) *dst = orig relative to relmap * void nodes_fold(dst, orig, sz) dst bits = orig bits mod sz * * for_each_node_mask(node, mask) for-loop node over mask * * int num_online_nodes() Number of online Nodes * int num_possible_nodes() Number of all possible Nodes * * int node_random(mask) Random node with set bit in mask * * int node_online(node) Is some node online? * int node_possible(node) Is some node possible? * * node_set_online(node) set bit 'node' in node_online_map * node_set_offline(node) clear bit 'node' in node_online_map * * for_each_node(node) for-loop node over node_possible_map * for_each_online_node(node) for-loop node over node_online_map * * Subtlety: * 1) The 'type-checked' form of node_isset() causes gcc (3.3.2, anyway) * to generate slightly worse code. So use a simple one-line #define * for node_isset(), instead of wrapping an inline inside a macro, the * way we do the other calls. * * NODEMASK_SCRATCH * When doing above logical AND, OR, XOR, Remap operations the callers tend to * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large, * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper * for such situations. See below and CPUMASK_ALLOC also. */ #include <linux/threads.h> #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; /** * nodemask_pr_args - printf args to output a nodemask * @maskp: nodemask to be printed * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions * are called in both __init and non-__init functions, if they are not * inlined we will end up with a section mismatch error (of the type of * freeable items not being freed). So we must use __always_inline here * to fix the problem. If other functions in the future also end up in * this situation they will also need to be annotated as __always_inline */ #define node_set(node, dst) __node_set((node), &(dst)) static __always_inline void __node_set(int node, volatile nodemask_t *dstp) { set_bit(node, dstp->bits); } #define node_clear(node, dst) __node_clear((node), &(dst)) static __always_inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } /* No static inline type checking - see Subtlety (1) above. */ #define node_isset(node, nodemask) test_bit((node), (nodemask).bits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) static __always_inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } #define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_copy(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_copy(dstp->bits, srcp->bits, nbits); } #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); } #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); } #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); } #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) static __always_inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) static __always_inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } /* * Find the next present node in src, starting after node n, wrapping around to * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); if (ret == MAX_NUMNODES) ret = __first_node(srcp); return ret; } static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); } #define nodemask_of_node(node) \ ({ \ typeof(_unused_nodemask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL << (node); \ } else { \ init_nodemask_of_node(&m, (node)); \ } \ m; \ }) #define first_unset_node(mask) __first_unset_node(&(mask)) static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); } #define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES) #if MAX_NUMNODES <= BITS_PER_LONG #define NODE_MASK_ALL \ ((nodemask_t) { { \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #else #define NODE_MASK_ALL \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-2] = ~0UL, \ [BITS_TO_LONGS(MAX_NUMNODES)-1] = NODE_MASK_LAST_WORD \ } }) #endif #define NODE_MASK_NONE \ ((nodemask_t) { { \ [0 ... BITS_TO_LONGS(MAX_NUMNODES)-1] = 0UL \ } }) #define nodes_addr(src) ((src).bits) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static __always_inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) static __always_inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); } #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); } #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); } #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); } #if MAX_NUMNODES > 1 #define for_each_node_mask(node, mask) \ for ((node) = first_node(mask); \ (node) < MAX_NUMNODES; \ (node) = next_node((node), (mask))) #else /* MAX_NUMNODES == 1 */ #define for_each_node_mask(node, mask) \ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) #endif /* MAX_NUMNODES */ /* * Bitmasks that are kept for all the nodes. */ enum node_states { N_POSSIBLE, /* The node could become online at some point */ N_ONLINE, /* The node is online */ N_NORMAL_MEMORY, /* The node has regular memory */ #ifdef CONFIG_HIGHMEM N_HIGH_MEMORY, /* The node has regular or high memory */ #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif N_MEMORY, /* The node has memory(regular, high, movable) */ N_CPU, /* The node has one or more cpus */ N_GENERIC_INITIATOR, /* The node has one or more Generic Initiators */ NR_NODE_STATES }; /* * The following particular system nodemasks and operations * on them manage all possible and online nodes. */ extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 static __always_inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } static __always_inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } static __always_inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } static __always_inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } #define for_each_node_state(__node, __state) \ for_each_node_mask((__node), node_states[__state]) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) static __always_inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } static __always_inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; static __always_inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } static __always_inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } #else static __always_inline int node_state(int node, enum node_states state) { return node == 0; } static __always_inline void node_set_state(int node, enum node_states state) { } static __always_inline void node_clear_state(int node, enum node_states state) { } static __always_inline int num_node_state(enum node_states state) { return 1; } #define for_each_node_state(node, __state) \ for ( (node) = 0; (node) == 0; (node) = 1) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) #define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U #define node_set_online(node) node_set_state((node), N_ONLINE) #define node_set_offline(node) node_clear_state((node), N_ONLINE) #endif static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int node = find_random_bit(maskp->bits, MAX_NUMNODES); return node < MAX_NUMNODES ? node : NUMA_NO_NODE; #else return 0; #endif } #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] #define num_online_nodes() num_node_state(N_ONLINE) #define num_possible_nodes() num_node_state(N_POSSIBLE) #define node_online(node) node_state((node), N_ONLINE) #define node_possible(node) node_state((node), N_POSSIBLE) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) #define for_each_node_with_cpus(node) for_each_node_state(node, N_CPU) /* * For nodemask scratch area. * NODEMASK_ALLOC(type, name) allocates an object with a specified type and * name. */ #if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */ #define NODEMASK_ALLOC(type, name, gfp_flags) \ type *name = kmalloc(sizeof(*name), gfp_flags) #define NODEMASK_FREE(m) kfree(m) #else #define NODEMASK_ALLOC(type, name, gfp_flags) type _##name, *name = &_##name #define NODEMASK_FREE(m) do {} while (0) #endif /* Example structure for using NODEMASK_ALLOC, used in mempolicy. */ struct nodemask_scratch { nodemask_t mask1; nodemask_t mask2; }; #define NODEMASK_SCRATCH(x) \ NODEMASK_ALLOC(struct nodemask_scratch, x, \ GFP_KERNEL | __GFP_NORETRY) #define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) #endif /* __LINUX_NODEMASK_H */ |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Common values for SHA-2 algorithms */ #ifndef _CRYPTO_SHA2_H #define _CRYPTO_SHA2_H #include <linux/types.h> #define SHA224_DIGEST_SIZE 28 #define SHA224_BLOCK_SIZE 64 #define SHA256_DIGEST_SIZE 32 #define SHA256_BLOCK_SIZE 64 #define SHA256_STATE_WORDS 8 #define SHA384_DIGEST_SIZE 48 #define SHA384_BLOCK_SIZE 128 #define SHA512_DIGEST_SIZE 64 #define SHA512_BLOCK_SIZE 128 #define SHA512_STATE_SIZE 80 #define SHA224_H0 0xc1059ed8UL #define SHA224_H1 0x367cd507UL #define SHA224_H2 0x3070dd17UL #define SHA224_H3 0xf70e5939UL #define SHA224_H4 0xffc00b31UL #define SHA224_H5 0x68581511UL #define SHA224_H6 0x64f98fa7UL #define SHA224_H7 0xbefa4fa4UL #define SHA256_H0 0x6a09e667UL #define SHA256_H1 0xbb67ae85UL #define SHA256_H2 0x3c6ef372UL #define SHA256_H3 0xa54ff53aUL #define SHA256_H4 0x510e527fUL #define SHA256_H5 0x9b05688cUL #define SHA256_H6 0x1f83d9abUL #define SHA256_H7 0x5be0cd19UL #define SHA384_H0 0xcbbb9d5dc1059ed8ULL #define SHA384_H1 0x629a292a367cd507ULL #define SHA384_H2 0x9159015a3070dd17ULL #define SHA384_H3 0x152fecd8f70e5939ULL #define SHA384_H4 0x67332667ffc00b31ULL #define SHA384_H5 0x8eb44a8768581511ULL #define SHA384_H6 0xdb0c2e0d64f98fa7ULL #define SHA384_H7 0x47b5481dbefa4fa4ULL #define SHA512_H0 0x6a09e667f3bcc908ULL #define SHA512_H1 0xbb67ae8584caa73bULL #define SHA512_H2 0x3c6ef372fe94f82bULL #define SHA512_H3 0xa54ff53a5f1d36f1ULL #define SHA512_H4 0x510e527fade682d1ULL #define SHA512_H5 0x9b05688c2b3e6c1fULL #define SHA512_H6 0x1f83d9abfb41bd6bULL #define SHA512_H7 0x5be0cd19137e2179ULL extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE]; extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE]; extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE]; extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE]; struct crypto_sha256_state { u32 state[SHA256_STATE_WORDS]; u64 count; }; static inline void sha224_block_init(struct crypto_sha256_state *sctx) { sctx->state[0] = SHA224_H0; sctx->state[1] = SHA224_H1; sctx->state[2] = SHA224_H2; sctx->state[3] = SHA224_H3; sctx->state[4] = SHA224_H4; sctx->state[5] = SHA224_H5; sctx->state[6] = SHA224_H6; sctx->state[7] = SHA224_H7; sctx->count = 0; } static inline void sha256_block_init(struct crypto_sha256_state *sctx) { sctx->state[0] = SHA256_H0; sctx->state[1] = SHA256_H1; sctx->state[2] = SHA256_H2; sctx->state[3] = SHA256_H3; sctx->state[4] = SHA256_H4; sctx->state[5] = SHA256_H5; sctx->state[6] = SHA256_H6; sctx->state[7] = SHA256_H7; sctx->count = 0; } struct sha256_state { union { struct crypto_sha256_state ctx; struct { u32 state[SHA256_STATE_WORDS]; u64 count; }; }; u8 buf[SHA256_BLOCK_SIZE]; }; struct sha512_state { u64 state[SHA512_DIGEST_SIZE / 8]; u64 count[2]; u8 buf[SHA512_BLOCK_SIZE]; }; /* State for the SHA-256 (and SHA-224) compression function */ struct sha256_block_state { u32 h[SHA256_STATE_WORDS]; }; /* * Context structure, shared by SHA-224 and SHA-256. The sha224_ctx and * sha256_ctx structs wrap this one so that the API has proper typing and * doesn't allow mixing the SHA-224 and SHA-256 functions arbitrarily. */ struct __sha256_ctx { struct sha256_block_state state; u64 bytecount; u8 buf[SHA256_BLOCK_SIZE] __aligned(__alignof__(__be64)); }; void __sha256_update(struct __sha256_ctx *ctx, const u8 *data, size_t len); /* * HMAC key and message context structs, shared by HMAC-SHA224 and HMAC-SHA256. * The hmac_sha224_* and hmac_sha256_* structs wrap this one so that the API has * proper typing and doesn't allow mixing the functions arbitrarily. */ struct __hmac_sha256_key { struct sha256_block_state istate; struct sha256_block_state ostate; }; struct __hmac_sha256_ctx { struct __sha256_ctx sha_ctx; struct sha256_block_state ostate; }; void __hmac_sha256_init(struct __hmac_sha256_ctx *ctx, const struct __hmac_sha256_key *key); /** * struct sha224_ctx - Context for hashing a message with SHA-224 * @ctx: private */ struct sha224_ctx { struct __sha256_ctx ctx; }; /** * sha224_init() - Initialize a SHA-224 context for a new message * @ctx: the context to initialize * * If you don't need incremental computation, consider sha224() instead. * * Context: Any context. */ void sha224_init(struct sha224_ctx *ctx); /** * sha224_update() - Update a SHA-224 context with message data * @ctx: the context to update; must have been initialized * @data: the message data * @len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void sha224_update(struct sha224_ctx *ctx, const u8 *data, size_t len) { __sha256_update(&ctx->ctx, data, len); } /** * sha224_final() - Finish computing a SHA-224 message digest * @ctx: the context to finalize; must have been initialized * @out: (output) the resulting SHA-224 message digest * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]); /** * sha224() - Compute SHA-224 message digest in one shot * @data: the message data * @len: the data length in bytes * @out: (output) the resulting SHA-224 message digest * * Context: Any context. */ void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]); /** * struct hmac_sha224_key - Prepared key for HMAC-SHA224 * @key: private */ struct hmac_sha224_key { struct __hmac_sha256_key key; }; /** * struct hmac_sha224_ctx - Context for computing HMAC-SHA224 of a message * @ctx: private */ struct hmac_sha224_ctx { struct __hmac_sha256_ctx ctx; }; /** * hmac_sha224_preparekey() - Prepare a key for HMAC-SHA224 * @key: (output) the key structure to initialize * @raw_key: the raw HMAC-SHA224 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * Note: the caller is responsible for zeroizing both the struct hmac_sha224_key * and the raw key once they are no longer needed. * * Context: Any context. */ void hmac_sha224_preparekey(struct hmac_sha224_key *key, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha224_init() - Initialize an HMAC-SHA224 context for a new message * @ctx: (output) the HMAC context to initialize * @key: the prepared HMAC key * * If you don't need incremental computation, consider hmac_sha224() instead. * * Context: Any context. */ static inline void hmac_sha224_init(struct hmac_sha224_ctx *ctx, const struct hmac_sha224_key *key) { __hmac_sha256_init(&ctx->ctx, &key->key); } /** * hmac_sha224_init_usingrawkey() - Initialize an HMAC-SHA224 context for a new * message, using a raw key * @ctx: (output) the HMAC context to initialize * @raw_key: the raw HMAC-SHA224 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * If you don't need incremental computation, consider hmac_sha224_usingrawkey() * instead. * * Context: Any context. */ void hmac_sha224_init_usingrawkey(struct hmac_sha224_ctx *ctx, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha224_update() - Update an HMAC-SHA224 context with message data * @ctx: the HMAC context to update; must have been initialized * @data: the message data * @data_len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void hmac_sha224_update(struct hmac_sha224_ctx *ctx, const u8 *data, size_t data_len) { __sha256_update(&ctx->ctx.sha_ctx, data, data_len); } /** * hmac_sha224_final() - Finish computing an HMAC-SHA224 value * @ctx: the HMAC context to finalize; must have been initialized * @out: (output) the resulting HMAC-SHA224 value * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void hmac_sha224_final(struct hmac_sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]); /** * hmac_sha224() - Compute HMAC-SHA224 in one shot, using a prepared key * @key: the prepared HMAC key * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA224 value * * If you're using the key only once, consider using hmac_sha224_usingrawkey(). * * Context: Any context. */ void hmac_sha224(const struct hmac_sha224_key *key, const u8 *data, size_t data_len, u8 out[SHA224_DIGEST_SIZE]); /** * hmac_sha224_usingrawkey() - Compute HMAC-SHA224 in one shot, using a raw key * @raw_key: the raw HMAC-SHA224 key * @raw_key_len: the key length in bytes. All key lengths are supported. * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA224 value * * If you're using the key multiple times, prefer to use * hmac_sha224_preparekey() followed by multiple calls to hmac_sha224() instead. * * Context: Any context. */ void hmac_sha224_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA224_DIGEST_SIZE]); /** * struct sha256_ctx - Context for hashing a message with SHA-256 * @ctx: private */ struct sha256_ctx { struct __sha256_ctx ctx; }; /** * sha256_init() - Initialize a SHA-256 context for a new message * @ctx: the context to initialize * * If you don't need incremental computation, consider sha256() instead. * * Context: Any context. */ void sha256_init(struct sha256_ctx *ctx); /** * sha256_update() - Update a SHA-256 context with message data * @ctx: the context to update; must have been initialized * @data: the message data * @len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void sha256_update(struct sha256_ctx *ctx, const u8 *data, size_t len) { __sha256_update(&ctx->ctx, data, len); } /** * sha256_final() - Finish computing a SHA-256 message digest * @ctx: the context to finalize; must have been initialized * @out: (output) the resulting SHA-256 message digest * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); /** * sha256() - Compute SHA-256 message digest in one shot * @data: the message data * @len: the data length in bytes * @out: (output) the resulting SHA-256 message digest * * Context: Any context. */ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]); /** * sha256_finup_2x() - Compute two SHA-256 digests from a common initial * context. On some CPUs, this is faster than sequentially * computing each digest. * @ctx: an optional initial context, which may have already processed data. If * NULL, a default initial context is used (equivalent to sha256_init()). * @data1: data for the first message * @data2: data for the second message * @len: the length of each of @data1 and @data2, in bytes * @out1: (output) the first SHA-256 message digest * @out2: (output) the second SHA-256 message digest * * Context: Any context. */ void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1, const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE], u8 out2[SHA256_DIGEST_SIZE]); /** * sha256_finup_2x_is_optimized() - Check if sha256_finup_2x() is using a real * interleaved implementation, as opposed to a * sequential fallback * @return: true if optimized * * Context: Any context. */ bool sha256_finup_2x_is_optimized(void); /** * struct hmac_sha256_key - Prepared key for HMAC-SHA256 * @key: private */ struct hmac_sha256_key { struct __hmac_sha256_key key; }; /** * struct hmac_sha256_ctx - Context for computing HMAC-SHA256 of a message * @ctx: private */ struct hmac_sha256_ctx { struct __hmac_sha256_ctx ctx; }; /** * hmac_sha256_preparekey() - Prepare a key for HMAC-SHA256 * @key: (output) the key structure to initialize * @raw_key: the raw HMAC-SHA256 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * Note: the caller is responsible for zeroizing both the struct hmac_sha256_key * and the raw key once they are no longer needed. * * Context: Any context. */ void hmac_sha256_preparekey(struct hmac_sha256_key *key, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha256_init() - Initialize an HMAC-SHA256 context for a new message * @ctx: (output) the HMAC context to initialize * @key: the prepared HMAC key * * If you don't need incremental computation, consider hmac_sha256() instead. * * Context: Any context. */ static inline void hmac_sha256_init(struct hmac_sha256_ctx *ctx, const struct hmac_sha256_key *key) { __hmac_sha256_init(&ctx->ctx, &key->key); } /** * hmac_sha256_init_usingrawkey() - Initialize an HMAC-SHA256 context for a new * message, using a raw key * @ctx: (output) the HMAC context to initialize * @raw_key: the raw HMAC-SHA256 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * If you don't need incremental computation, consider hmac_sha256_usingrawkey() * instead. * * Context: Any context. */ void hmac_sha256_init_usingrawkey(struct hmac_sha256_ctx *ctx, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha256_update() - Update an HMAC-SHA256 context with message data * @ctx: the HMAC context to update; must have been initialized * @data: the message data * @data_len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void hmac_sha256_update(struct hmac_sha256_ctx *ctx, const u8 *data, size_t data_len) { __sha256_update(&ctx->ctx.sha_ctx, data, data_len); } /** * hmac_sha256_final() - Finish computing an HMAC-SHA256 value * @ctx: the HMAC context to finalize; must have been initialized * @out: (output) the resulting HMAC-SHA256 value * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void hmac_sha256_final(struct hmac_sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]); /** * hmac_sha256() - Compute HMAC-SHA256 in one shot, using a prepared key * @key: the prepared HMAC key * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA256 value * * If you're using the key only once, consider using hmac_sha256_usingrawkey(). * * Context: Any context. */ void hmac_sha256(const struct hmac_sha256_key *key, const u8 *data, size_t data_len, u8 out[SHA256_DIGEST_SIZE]); /** * hmac_sha256_usingrawkey() - Compute HMAC-SHA256 in one shot, using a raw key * @raw_key: the raw HMAC-SHA256 key * @raw_key_len: the key length in bytes. All key lengths are supported. * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA256 value * * If you're using the key multiple times, prefer to use * hmac_sha256_preparekey() followed by multiple calls to hmac_sha256() instead. * * Context: Any context. */ void hmac_sha256_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA256_DIGEST_SIZE]); /* State for the SHA-512 (and SHA-384) compression function */ struct sha512_block_state { u64 h[8]; }; /* * Context structure, shared by SHA-384 and SHA-512. The sha384_ctx and * sha512_ctx structs wrap this one so that the API has proper typing and * doesn't allow mixing the SHA-384 and SHA-512 functions arbitrarily. */ struct __sha512_ctx { struct sha512_block_state state; u64 bytecount_lo; u64 bytecount_hi; u8 buf[SHA512_BLOCK_SIZE] __aligned(__alignof__(__be64)); }; void __sha512_update(struct __sha512_ctx *ctx, const u8 *data, size_t len); /* * HMAC key and message context structs, shared by HMAC-SHA384 and HMAC-SHA512. * The hmac_sha384_* and hmac_sha512_* structs wrap this one so that the API has * proper typing and doesn't allow mixing the functions arbitrarily. */ struct __hmac_sha512_key { struct sha512_block_state istate; struct sha512_block_state ostate; }; struct __hmac_sha512_ctx { struct __sha512_ctx sha_ctx; struct sha512_block_state ostate; }; void __hmac_sha512_init(struct __hmac_sha512_ctx *ctx, const struct __hmac_sha512_key *key); /** * struct sha384_ctx - Context for hashing a message with SHA-384 * @ctx: private */ struct sha384_ctx { struct __sha512_ctx ctx; }; /** * sha384_init() - Initialize a SHA-384 context for a new message * @ctx: the context to initialize * * If you don't need incremental computation, consider sha384() instead. * * Context: Any context. */ void sha384_init(struct sha384_ctx *ctx); /** * sha384_update() - Update a SHA-384 context with message data * @ctx: the context to update; must have been initialized * @data: the message data * @len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void sha384_update(struct sha384_ctx *ctx, const u8 *data, size_t len) { __sha512_update(&ctx->ctx, data, len); } /** * sha384_final() - Finish computing a SHA-384 message digest * @ctx: the context to finalize; must have been initialized * @out: (output) the resulting SHA-384 message digest * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void sha384_final(struct sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]); /** * sha384() - Compute SHA-384 message digest in one shot * @data: the message data * @len: the data length in bytes * @out: (output) the resulting SHA-384 message digest * * Context: Any context. */ void sha384(const u8 *data, size_t len, u8 out[SHA384_DIGEST_SIZE]); /** * struct hmac_sha384_key - Prepared key for HMAC-SHA384 * @key: private */ struct hmac_sha384_key { struct __hmac_sha512_key key; }; /** * struct hmac_sha384_ctx - Context for computing HMAC-SHA384 of a message * @ctx: private */ struct hmac_sha384_ctx { struct __hmac_sha512_ctx ctx; }; /** * hmac_sha384_preparekey() - Prepare a key for HMAC-SHA384 * @key: (output) the key structure to initialize * @raw_key: the raw HMAC-SHA384 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * Note: the caller is responsible for zeroizing both the struct hmac_sha384_key * and the raw key once they are no longer needed. * * Context: Any context. */ void hmac_sha384_preparekey(struct hmac_sha384_key *key, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha384_init() - Initialize an HMAC-SHA384 context for a new message * @ctx: (output) the HMAC context to initialize * @key: the prepared HMAC key * * If you don't need incremental computation, consider hmac_sha384() instead. * * Context: Any context. */ static inline void hmac_sha384_init(struct hmac_sha384_ctx *ctx, const struct hmac_sha384_key *key) { __hmac_sha512_init(&ctx->ctx, &key->key); } /** * hmac_sha384_init_usingrawkey() - Initialize an HMAC-SHA384 context for a new * message, using a raw key * @ctx: (output) the HMAC context to initialize * @raw_key: the raw HMAC-SHA384 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * If you don't need incremental computation, consider hmac_sha384_usingrawkey() * instead. * * Context: Any context. */ void hmac_sha384_init_usingrawkey(struct hmac_sha384_ctx *ctx, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha384_update() - Update an HMAC-SHA384 context with message data * @ctx: the HMAC context to update; must have been initialized * @data: the message data * @data_len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void hmac_sha384_update(struct hmac_sha384_ctx *ctx, const u8 *data, size_t data_len) { __sha512_update(&ctx->ctx.sha_ctx, data, data_len); } /** * hmac_sha384_final() - Finish computing an HMAC-SHA384 value * @ctx: the HMAC context to finalize; must have been initialized * @out: (output) the resulting HMAC-SHA384 value * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void hmac_sha384_final(struct hmac_sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]); /** * hmac_sha384() - Compute HMAC-SHA384 in one shot, using a prepared key * @key: the prepared HMAC key * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA384 value * * If you're using the key only once, consider using hmac_sha384_usingrawkey(). * * Context: Any context. */ void hmac_sha384(const struct hmac_sha384_key *key, const u8 *data, size_t data_len, u8 out[SHA384_DIGEST_SIZE]); /** * hmac_sha384_usingrawkey() - Compute HMAC-SHA384 in one shot, using a raw key * @raw_key: the raw HMAC-SHA384 key * @raw_key_len: the key length in bytes. All key lengths are supported. * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA384 value * * If you're using the key multiple times, prefer to use * hmac_sha384_preparekey() followed by multiple calls to hmac_sha384() instead. * * Context: Any context. */ void hmac_sha384_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA384_DIGEST_SIZE]); /** * struct sha512_ctx - Context for hashing a message with SHA-512 * @ctx: private */ struct sha512_ctx { struct __sha512_ctx ctx; }; /** * sha512_init() - Initialize a SHA-512 context for a new message * @ctx: the context to initialize * * If you don't need incremental computation, consider sha512() instead. * * Context: Any context. */ void sha512_init(struct sha512_ctx *ctx); /** * sha512_update() - Update a SHA-512 context with message data * @ctx: the context to update; must have been initialized * @data: the message data * @len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void sha512_update(struct sha512_ctx *ctx, const u8 *data, size_t len) { __sha512_update(&ctx->ctx, data, len); } /** * sha512_final() - Finish computing a SHA-512 message digest * @ctx: the context to finalize; must have been initialized * @out: (output) the resulting SHA-512 message digest * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void sha512_final(struct sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]); /** * sha512() - Compute SHA-512 message digest in one shot * @data: the message data * @len: the data length in bytes * @out: (output) the resulting SHA-512 message digest * * Context: Any context. */ void sha512(const u8 *data, size_t len, u8 out[SHA512_DIGEST_SIZE]); /** * struct hmac_sha512_key - Prepared key for HMAC-SHA512 * @key: private */ struct hmac_sha512_key { struct __hmac_sha512_key key; }; /** * struct hmac_sha512_ctx - Context for computing HMAC-SHA512 of a message * @ctx: private */ struct hmac_sha512_ctx { struct __hmac_sha512_ctx ctx; }; /** * hmac_sha512_preparekey() - Prepare a key for HMAC-SHA512 * @key: (output) the key structure to initialize * @raw_key: the raw HMAC-SHA512 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * Note: the caller is responsible for zeroizing both the struct hmac_sha512_key * and the raw key once they are no longer needed. * * Context: Any context. */ void hmac_sha512_preparekey(struct hmac_sha512_key *key, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha512_init() - Initialize an HMAC-SHA512 context for a new message * @ctx: (output) the HMAC context to initialize * @key: the prepared HMAC key * * If you don't need incremental computation, consider hmac_sha512() instead. * * Context: Any context. */ static inline void hmac_sha512_init(struct hmac_sha512_ctx *ctx, const struct hmac_sha512_key *key) { __hmac_sha512_init(&ctx->ctx, &key->key); } /** * hmac_sha512_init_usingrawkey() - Initialize an HMAC-SHA512 context for a new * message, using a raw key * @ctx: (output) the HMAC context to initialize * @raw_key: the raw HMAC-SHA512 key * @raw_key_len: the key length in bytes. All key lengths are supported. * * If you don't need incremental computation, consider hmac_sha512_usingrawkey() * instead. * * Context: Any context. */ void hmac_sha512_init_usingrawkey(struct hmac_sha512_ctx *ctx, const u8 *raw_key, size_t raw_key_len); /** * hmac_sha512_update() - Update an HMAC-SHA512 context with message data * @ctx: the HMAC context to update; must have been initialized * @data: the message data * @data_len: the data length in bytes * * This can be called any number of times. * * Context: Any context. */ static inline void hmac_sha512_update(struct hmac_sha512_ctx *ctx, const u8 *data, size_t data_len) { __sha512_update(&ctx->ctx.sha_ctx, data, data_len); } /** * hmac_sha512_final() - Finish computing an HMAC-SHA512 value * @ctx: the HMAC context to finalize; must have been initialized * @out: (output) the resulting HMAC-SHA512 value * * After finishing, this zeroizes @ctx. So the caller does not need to do it. * * Context: Any context. */ void hmac_sha512_final(struct hmac_sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]); /** * hmac_sha512() - Compute HMAC-SHA512 in one shot, using a prepared key * @key: the prepared HMAC key * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA512 value * * If you're using the key only once, consider using hmac_sha512_usingrawkey(). * * Context: Any context. */ void hmac_sha512(const struct hmac_sha512_key *key, const u8 *data, size_t data_len, u8 out[SHA512_DIGEST_SIZE]); /** * hmac_sha512_usingrawkey() - Compute HMAC-SHA512 in one shot, using a raw key * @raw_key: the raw HMAC-SHA512 key * @raw_key_len: the key length in bytes. All key lengths are supported. * @data: the message data * @data_len: the data length in bytes * @out: (output) the resulting HMAC-SHA512 value * * If you're using the key multiple times, prefer to use * hmac_sha512_preparekey() followed by multiple calls to hmac_sha512() instead. * * Context: Any context. */ void hmac_sha512_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA512_DIGEST_SIZE]); #endif /* _CRYPTO_SHA2_H */ |
| 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Cryptographic utilities * * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_UTILS_H #define _CRYPTO_UTILS_H #include <linux/unaligned.h> #include <linux/compiler_attributes.h> #include <linux/types.h> void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int size); static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && __builtin_constant_p(size) && (size % sizeof(unsigned long)) == 0) { unsigned long *d = (unsigned long *)dst; unsigned long *s = (unsigned long *)src; unsigned long l; while (size > 0) { l = get_unaligned(d) ^ get_unaligned(s++); put_unaligned(l, d++); size -= sizeof(unsigned long); } } else { __crypto_xor(dst, dst, src, size); } } static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, unsigned int size) { if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && __builtin_constant_p(size) && (size % sizeof(unsigned long)) == 0) { unsigned long *d = (unsigned long *)dst; unsigned long *s1 = (unsigned long *)src1; unsigned long *s2 = (unsigned long *)src2; unsigned long l; while (size > 0) { l = get_unaligned(s1++) ^ get_unaligned(s2++); put_unaligned(l, d++); size -= sizeof(unsigned long); } } else { __crypto_xor(dst, src1, src2, size); } } noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size); /** * crypto_memneq - Compare two areas of memory without leaking * timing information. * * @a: One area of memory * @b: Another area of memory * @size: The size of the area. * * Returns 0 when data is equal, 1 otherwise. */ static inline int crypto_memneq(const void *a, const void *b, size_t size) { return __crypto_memneq(a, b, size) != 0UL ? 1 : 0; } #endif /* _CRYPTO_UTILS_H */ |
| 680 680 671 670 673 671 673 669 65 64 65 1 362 2 364 368 366 361 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 | // SPDX-License-Identifier: GPL-2.0 /* sysfs entries for device PM */ #include <linux/device.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> #include <linux/pm_qos.h> #include <linux/pm_runtime.h> #include <linux/atomic.h> #include <linux/jiffies.h> #include "power.h" /* * control - Report/change current runtime PM setting of the device * * Runtime power management of a device can be blocked with the help of * this attribute. All devices have one of the following two values for * the power/control file: * * + "auto\n" to allow the device to be power managed at run time; * + "on\n" to prevent the device from being power managed at run time; * * The default for all devices is "auto", which means that devices may be * subject to automatic power management, depending on their drivers. * Changing this attribute to "on" prevents the driver from power managing * the device at run time. Doing that while the device is suspended causes * it to be woken up. * * wakeup - Report/change current wakeup option for device * * Some devices support "wakeup" events, which are hardware signals * used to activate devices from suspended or low power states. Such * devices have one of three values for the sysfs power/wakeup file: * * + "enabled\n" to issue the events; * + "disabled\n" not to do so; or * + "\n" for temporary or permanent inability to issue wakeup. * * (For example, unconfigured USB devices can't issue wakeups.) * * Familiar examples of devices that can issue wakeup events include * keyboards and mice (both PS2 and USB styles), power buttons, modems, * "Wake-On-LAN" Ethernet links, GPIO lines, and more. Some events * will wake the entire system from a suspend state; others may just * wake up the device (if the system as a whole is already active). * Some wakeup events use normal IRQ lines; other use special out * of band signaling. * * It is the responsibility of device drivers to enable (or disable) * wakeup signaling as part of changing device power states, respecting * the policy choices provided through the driver model. * * Devices may not be able to generate wakeup events from all power * states. Also, the events may be ignored in some configurations; * for example, they might need help from other devices that aren't * active, or which may have wakeup disabled. Some drivers rely on * wakeup events internally (unless they are disabled), keeping * their hardware in low power modes whenever they're unused. This * saves runtime power, without requiring system-wide sleep states. * * async - Report/change current async suspend setting for the device * * Asynchronous suspend and resume of the device during system-wide power * state transitions can be enabled by writing "enabled" to this file. * Analogously, if "disabled" is written to this file, the device will be * suspended and resumed synchronously. * * All devices have one of the following two values for power/async: * * + "enabled\n" to permit the asynchronous suspend/resume of the device; * + "disabled\n" to forbid it; * * NOTE: It generally is unsafe to permit the asynchronous suspend/resume * of a device unless it is certain that all of the PM dependencies of the * device are known to the PM core. However, for some devices this * attribute is set to "enabled" by bus type code or device drivers and in * that cases it should be safe to leave the default value. * * autosuspend_delay_ms - Report/change a device's autosuspend_delay value * * Some drivers don't want to carry out a runtime suspend as soon as a * device becomes idle; they want it always to remain idle for some period * of time before suspending it. This period is the autosuspend_delay * value (expressed in milliseconds) and it can be controlled by the user. * If the value is negative then the device will never be runtime * suspended. * * NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay * value are used only if the driver calls pm_runtime_use_autosuspend(). * * wakeup_count - Report the number of wakeup events related to the device */ const char power_group_name[] = "power"; EXPORT_SYMBOL_GPL(power_group_name); static const char ctrl_auto[] = "auto"; static const char ctrl_on[] = "on"; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", dev->power.runtime_auto ? ctrl_auto : ctrl_on); } static ssize_t control_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) { device_lock(dev); if (sysfs_streq(buf, ctrl_auto)) pm_runtime_allow(dev); else if (sysfs_streq(buf, ctrl_on)) pm_runtime_forbid(dev); else n = -EINVAL; device_unlock(dev); return n; } static DEVICE_ATTR_RW(control); static ssize_t runtime_active_time_show(struct device *dev, struct device_attribute *attr, char *buf) { u64 tmp = pm_runtime_active_time(dev); do_div(tmp, NSEC_PER_MSEC); return sysfs_emit(buf, "%llu\n", tmp); } static DEVICE_ATTR_RO(runtime_active_time); static ssize_t runtime_suspended_time_show(struct device *dev, struct device_attribute *attr, char *buf) { u64 tmp = pm_runtime_suspended_time(dev); do_div(tmp, NSEC_PER_MSEC); return sysfs_emit(buf, "%llu\n", tmp); } static DEVICE_ATTR_RO(runtime_suspended_time); static ssize_t runtime_status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; if (dev->power.runtime_error) { output = "error"; } else if (dev->power.disable_depth) { output = "unsupported"; } else { switch (dev->power.runtime_status) { case RPM_SUSPENDED: output = "suspended"; break; case RPM_SUSPENDING: output = "suspending"; break; case RPM_RESUMING: output = "resuming"; break; case RPM_ACTIVE: output = "active"; break; default: return -EIO; } } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(runtime_status); static ssize_t autosuspend_delay_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { if (!dev->power.use_autosuspend) return -EIO; return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay); } static ssize_t autosuspend_delay_ms_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { long delay; if (!dev->power.use_autosuspend) return -EIO; if (kstrtol(buf, 10, &delay) != 0 || delay != (int) delay) return -EINVAL; device_lock(dev); pm_runtime_set_autosuspend_delay(dev, delay); device_unlock(dev); return n; } static DEVICE_ATTR_RW(autosuspend_delay_ms); static ssize_t pm_qos_resume_latency_us_show(struct device *dev, struct device_attribute *attr, char *buf) { s32 value = dev_pm_qos_requested_resume_latency(dev); if (value == 0) return sysfs_emit(buf, "n/a\n"); if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) value = 0; return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_us_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { s32 value; int ret; if (!kstrtos32(buf, 0, &value)) { /* * Prevent users from writing negative or "no constraint" values * directly. */ if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) return -EINVAL; if (value == 0) value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } else if (sysfs_streq(buf, "n/a")) { value = 0; } else { return -EINVAL; } ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req, value); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_resume_latency_us); static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev, struct device_attribute *attr, char *buf) { s32 value = dev_pm_qos_get_user_latency_tolerance(dev); if (value < 0) return sysfs_emit(buf, "%s\n", "auto"); if (value == PM_QOS_LATENCY_ANY) return sysfs_emit(buf, "%s\n", "any"); return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { s32 value; int ret; if (kstrtos32(buf, 0, &value) == 0) { /* Users can't write negative values directly */ if (value < 0) return -EINVAL; } else { if (sysfs_streq(buf, "auto")) value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; else if (sysfs_streq(buf, "any")) value = PM_QOS_LATENCY_ANY; else return -EINVAL; } ret = dev_pm_qos_update_user_latency_tolerance(dev, value); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_latency_tolerance_us); static ssize_t pm_qos_no_power_off_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) & PM_QOS_FLAG_NO_POWER_OFF)); } static ssize_t pm_qos_no_power_off_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { int ret; if (kstrtoint(buf, 0, &ret)) return -EINVAL; if (ret != 0 && ret != 1) return -EINVAL; ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_no_power_off); #ifdef CONFIG_PM_SLEEP static const char _enabled[] = "enabled"; static const char _disabled[] = "disabled"; static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", device_can_wakeup(dev) ? (device_may_wakeup(dev) ? _enabled : _disabled) : ""); } static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { if (!device_can_wakeup(dev)) return -EINVAL; if (sysfs_streq(buf, _enabled)) device_set_wakeup_enable(dev, 1); else if (sysfs_streq(buf, _disabled)) device_set_wakeup_enable(dev, 0); else return -EINVAL; return n; } static DEVICE_ATTR_RW(wakeup); static ssize_t wakeup_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_count); static ssize_t wakeup_active_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->active_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_active_count); static ssize_t wakeup_abort_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_abort_count); static ssize_t wakeup_expire_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->expire_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_expire_count); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int active; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { active = dev->power.wakeup->active; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%u\n", active); } static DEVICE_ATTR_RO(wakeup_active); static ssize_t wakeup_total_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->total_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_total_time_ms); static ssize_t wakeup_max_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->max_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_max_time_ms); static ssize_t wakeup_last_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->last_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_last_time_ms); #ifdef CONFIG_PM_AUTOSLEEP static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); #endif /* CONFIG_PM_AUTOSLEEP */ static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { if (dev->power.wakeup && dev->power.wakeup->dev) return device_change_owner(dev->power.wakeup->dev, kuid, kgid); return 0; } #else /* CONFIG_PM_SLEEP */ static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { return 0; } #endif #ifdef CONFIG_PM_ADVANCED_DEBUG static ssize_t runtime_usage_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count)); } static DEVICE_ATTR_RO(runtime_usage); static ssize_t runtime_active_kids_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", dev->power.ignore_children ? 0 : atomic_read(&dev->power.child_count)); } static DEVICE_ATTR_RO(runtime_active_kids); static ssize_t runtime_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; if (dev->power.disable_depth && !dev->power.runtime_auto) output = "disabled & forbidden"; else if (dev->power.disable_depth) output = "disabled"; else if (!dev->power.runtime_auto) output = "forbidden"; else output = "enabled"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(runtime_enabled); #ifdef CONFIG_PM_SLEEP static ssize_t async_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", device_async_suspend_enabled(dev) ? _enabled : _disabled); } static ssize_t async_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { if (sysfs_streq(buf, _enabled)) device_enable_async_suspend(dev); else if (sysfs_streq(buf, _disabled)) device_disable_async_suspend(dev); else return -EINVAL; return n; } static DEVICE_ATTR_RW(async); #endif /* CONFIG_PM_SLEEP */ #endif /* CONFIG_PM_ADVANCED_DEBUG */ static struct attribute *power_attrs[] = { #if defined(CONFIG_PM_ADVANCED_DEBUG) && defined(CONFIG_PM_SLEEP) &dev_attr_async.attr, #endif NULL, }; static const struct attribute_group pm_attr_group = { .name = power_group_name, .attrs = power_attrs, }; static struct attribute *wakeup_attrs[] = { #ifdef CONFIG_PM_SLEEP &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, &dev_attr_wakeup_abort_count.attr, &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, #ifdef CONFIG_PM_AUTOSLEEP &dev_attr_wakeup_prevent_sleep_time_ms.attr, #endif #endif NULL, }; static const struct attribute_group pm_wakeup_attr_group = { .name = power_group_name, .attrs = wakeup_attrs, }; static struct attribute *runtime_attrs[] = { &dev_attr_runtime_status.attr, &dev_attr_control.attr, &dev_attr_runtime_suspended_time.attr, &dev_attr_runtime_active_time.attr, &dev_attr_autosuspend_delay_ms.attr, #ifdef CONFIG_PM_ADVANCED_DEBUG &dev_attr_runtime_usage.attr, &dev_attr_runtime_active_kids.attr, &dev_attr_runtime_enabled.attr, #endif NULL, }; static const struct attribute_group pm_runtime_attr_group = { .name = power_group_name, .attrs = runtime_attrs, }; static struct attribute *pm_qos_resume_latency_attrs[] = { &dev_attr_pm_qos_resume_latency_us.attr, NULL, }; static const struct attribute_group pm_qos_resume_latency_attr_group = { .name = power_group_name, .attrs = pm_qos_resume_latency_attrs, }; static struct attribute *pm_qos_latency_tolerance_attrs[] = { &dev_attr_pm_qos_latency_tolerance_us.attr, NULL, }; static const struct attribute_group pm_qos_latency_tolerance_attr_group = { .name = power_group_name, .attrs = pm_qos_latency_tolerance_attrs, }; static struct attribute *pm_qos_flags_attrs[] = { &dev_attr_pm_qos_no_power_off.attr, NULL, }; static const struct attribute_group pm_qos_flags_attr_group = { .name = power_group_name, .attrs = pm_qos_flags_attrs, }; int dpm_sysfs_add(struct device *dev) { int rc; /* No need to create PM sysfs if explicitly disabled. */ if (device_pm_not_required(dev)) return 0; rc = sysfs_create_group(&dev->kobj, &pm_attr_group); if (rc) return rc; if (!pm_runtime_has_no_callbacks(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group); if (rc) goto err_out; } if (device_can_wakeup(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); if (rc) goto err_runtime; } if (dev->power.set_latency_tolerance) { rc = sysfs_merge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); if (rc) goto err_wakeup; } rc = pm_wakeup_source_sysfs_add(dev); if (rc) goto err_latency; return 0; err_latency: sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); err_wakeup: sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); err_runtime: sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); err_out: sysfs_remove_group(&dev->kobj, &pm_attr_group); return rc; } int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { int rc; if (device_pm_not_required(dev)) return 0; rc = sysfs_group_change_owner(&dev->kobj, &pm_attr_group, kuid, kgid); if (rc) return rc; if (!pm_runtime_has_no_callbacks(dev)) { rc = sysfs_group_change_owner( &dev->kobj, &pm_runtime_attr_group, kuid, kgid); if (rc) return rc; } if (device_can_wakeup(dev)) { rc = sysfs_group_change_owner(&dev->kobj, &pm_wakeup_attr_group, kuid, kgid); if (rc) return rc; rc = dpm_sysfs_wakeup_change_owner(dev, kuid, kgid); if (rc) return rc; } if (dev->power.set_latency_tolerance) { rc = sysfs_group_change_owner( &dev->kobj, &pm_qos_latency_tolerance_attr_group, kuid, kgid); if (rc) return rc; } return 0; } int wakeup_sysfs_add(struct device *dev) { int ret = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); if (!ret) kobject_uevent(&dev->kobj, KOBJ_CHANGE); return ret; } void wakeup_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); kobject_uevent(&dev->kobj, KOBJ_CHANGE); } int pm_qos_sysfs_add_resume_latency(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_resume_latency_attr_group); } void pm_qos_sysfs_remove_resume_latency(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_resume_latency_attr_group); } int pm_qos_sysfs_add_flags(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group); } void pm_qos_sysfs_remove_flags(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group); } int pm_qos_sysfs_add_latency_tolerance(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); } void pm_qos_sysfs_remove_latency_tolerance(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); } void rpm_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); } void dpm_sysfs_remove(struct device *dev) { if (device_pm_not_required(dev)) return; sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); dev_pm_qos_constraints_destroy(dev); rpm_sysfs_remove(dev); sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); sysfs_remove_group(&dev->kobj, &pm_attr_group); } |
| 4 3 1 3 1 2 4 8 8 8 8 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual NCI device simulation driver * * Copyright (C) 2020 Samsung Electronics * Bongsu Jeon <bongsu.jeon@samsung.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/wait.h> #include <net/nfc/nci_core.h> #define IOCTL_GET_NCIDEV_IDX 0 #define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \ NFC_PROTO_MIFARE_MASK | \ NFC_PROTO_FELICA_MASK | \ NFC_PROTO_ISO14443_MASK | \ NFC_PROTO_ISO14443_B_MASK | \ NFC_PROTO_ISO15693_MASK) struct virtual_nci_dev { struct nci_dev *ndev; struct mutex mtx; struct sk_buff *send_buff; struct wait_queue_head wq; bool running; }; static int virtual_nci_open(struct nci_dev *ndev) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); vdev->running = true; return 0; } static int virtual_nci_close(struct nci_dev *ndev) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); kfree_skb(vdev->send_buff); vdev->send_buff = NULL; vdev->running = false; mutex_unlock(&vdev->mtx); return 0; } static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); if (vdev->send_buff || !vdev->running) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; } vdev->send_buff = skb_copy(skb, GFP_KERNEL); if (!vdev->send_buff) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; } mutex_unlock(&vdev->mtx); wake_up_interruptible(&vdev->wq); consume_skb(skb); return 0; } static const struct nci_ops virtual_nci_ops = { .open = virtual_nci_open, .close = virtual_nci_close, .send = virtual_nci_send }; static ssize_t virtual_ncidev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct virtual_nci_dev *vdev = file->private_data; size_t actual_len; mutex_lock(&vdev->mtx); while (!vdev->send_buff) { mutex_unlock(&vdev->mtx); if (wait_event_interruptible(vdev->wq, vdev->send_buff)) return -EFAULT; mutex_lock(&vdev->mtx); } actual_len = min_t(size_t, count, vdev->send_buff->len); if (copy_to_user(buf, vdev->send_buff->data, actual_len)) { mutex_unlock(&vdev->mtx); return -EFAULT; } skb_pull(vdev->send_buff, actual_len); if (vdev->send_buff->len == 0) { consume_skb(vdev->send_buff); vdev->send_buff = NULL; } mutex_unlock(&vdev->mtx); return actual_len; } static ssize_t virtual_ncidev_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct virtual_nci_dev *vdev = file->private_data; struct sk_buff *skb; skb = alloc_skb(count, GFP_KERNEL); if (!skb) return -ENOMEM; if (copy_from_user(skb_put(skb, count), buf, count)) { kfree_skb(skb); return -EFAULT; } if (strnlen(skb->data, count) != count) { kfree_skb(skb); return -EINVAL; } nci_recv_frame(vdev->ndev, skb); return count; } static int virtual_ncidev_open(struct inode *inode, struct file *file) { int ret = 0; struct virtual_nci_dev *vdev; vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) return -ENOMEM; vdev->ndev = nci_allocate_device(&virtual_nci_ops, VIRTUAL_NFC_PROTOCOLS, 0, 0); if (!vdev->ndev) { kfree(vdev); return -ENOMEM; } mutex_init(&vdev->mtx); init_waitqueue_head(&vdev->wq); file->private_data = vdev; nci_set_drvdata(vdev->ndev, vdev); ret = nci_register_device(vdev->ndev); if (ret < 0) { nci_free_device(vdev->ndev); mutex_destroy(&vdev->mtx); kfree(vdev); return ret; } return 0; } static int virtual_ncidev_close(struct inode *inode, struct file *file) { struct virtual_nci_dev *vdev = file->private_data; nci_unregister_device(vdev->ndev); nci_free_device(vdev->ndev); mutex_destroy(&vdev->mtx); kfree(vdev); return 0; } static long virtual_ncidev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct virtual_nci_dev *vdev = file->private_data; const struct nfc_dev *nfc_dev = vdev->ndev->nfc_dev; void __user *p = (void __user *)arg; if (cmd != IOCTL_GET_NCIDEV_IDX) return -ENOTTY; if (copy_to_user(p, &nfc_dev->idx, sizeof(nfc_dev->idx))) return -EFAULT; return 0; } static const struct file_operations virtual_ncidev_fops = { .owner = THIS_MODULE, .read = virtual_ncidev_read, .write = virtual_ncidev_write, .open = virtual_ncidev_open, .release = virtual_ncidev_close, .unlocked_ioctl = virtual_ncidev_ioctl }; static struct miscdevice miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "virtual_nci", .fops = &virtual_ncidev_fops, .mode = 0600, }; module_misc_device(miscdev); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Virtual NCI device simulation driver"); MODULE_AUTHOR("Bongsu Jeon <bongsu.jeon@samsung.com>"); |
| 7 7 17 6 11 11 11 11 11 11 11 17 11 21 1 21 20 2 19 19 19 1 18 18 1 17 55 55 30 2 32 55 55 54 21 22 32 26 53 26 33 55 1 55 1 54 55 23 25 48 23 23 55 55 55 3 12 38 55 55 55 55 55 55 55 55 55 55 55 59 59 54 53 54 54 55 54 55 55 54 53 3 54 54 59 59 59 28 57 59 55 55 1 1 55 55 2 55 55 55 55 55 55 55 54 55 54 55 55 55 55 32 22 21 17 38 38 19 10 19 16 11 11 59 31 31 30 31 7 7 31 7 31 14 30 13 31 31 31 31 31 31 31 4 4 4 4 4 4 4 4 4 1 4 1 4 4 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 | // SPDX-License-Identifier: GPL-2.0-or-later /* * (Tentative) USB Audio Driver for ALSA * * Copyright (c) 2002 by Takashi Iwai <tiwai@suse.de> * * Many codes borrowed from audio.c by * Alan Cox (alan@lxorguk.ukuu.org.uk) * Thomas Sailer (sailer@ife.ee.ethz.ch) * * Audio Class 3.0 support by Ruslan Bilovol <ruslan.bilovol@gmail.com> * * NOTES: * * - the linked URBs would be preferred but not used so far because of * the instability of unlinking. * - type II is not supported properly. there is no device which supports * this type *correctly*. SB extigy looks as if it supports, but it's * indeed an AC3 stream packed in SPDIF frames (i.e. no real AC3 stream). */ #include <linux/bitops.h> #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/usb.h> #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <linux/module.h> #include <sound/control.h> #include <sound/core.h> #include <sound/info.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include <sound/initval.h> #include "usbaudio.h" #include "card.h" #include "midi.h" #include "midi2.h" #include "mixer.h" #include "proc.h" #include "quirks.h" #include "endpoint.h" #include "helper.h" #include "pcm.h" #include "format.h" #include "power.h" #include "stream.h" #include "media.h" MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("USB Audio"); MODULE_LICENSE("GPL"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;/* Enable this card */ /* Vendor/product IDs for this card */ static int vid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; static int pid[SNDRV_CARDS] = { [0 ... (SNDRV_CARDS-1)] = -1 }; static int device_setup[SNDRV_CARDS]; /* device parameter for this card */ static bool ignore_ctl_error; static bool autoclock = true; static bool lowlatency = true; static char *quirk_alias[SNDRV_CARDS]; static char *delayed_register[SNDRV_CARDS]; static bool implicit_fb[SNDRV_CARDS]; static char *quirk_flags[SNDRV_CARDS]; bool snd_usb_use_vmalloc = true; bool snd_usb_skip_validation; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the USB audio adapter."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for the USB audio adapter."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable USB audio adapter."); module_param_array(vid, int, NULL, 0444); MODULE_PARM_DESC(vid, "Vendor ID for the USB audio device."); module_param_array(pid, int, NULL, 0444); MODULE_PARM_DESC(pid, "Product ID for the USB audio device."); module_param_array(device_setup, int, NULL, 0444); MODULE_PARM_DESC(device_setup, "Specific device setup (if needed)."); module_param(ignore_ctl_error, bool, 0444); MODULE_PARM_DESC(ignore_ctl_error, "Ignore errors from USB controller for mixer interfaces."); module_param(autoclock, bool, 0444); MODULE_PARM_DESC(autoclock, "Enable auto-clock selection for UAC2 devices (default: yes)."); module_param(lowlatency, bool, 0444); MODULE_PARM_DESC(lowlatency, "Enable low latency playback (default: yes)."); module_param_array(quirk_alias, charp, NULL, 0444); MODULE_PARM_DESC(quirk_alias, "Quirk aliases, e.g. 0123abcd:5678beef."); module_param_array(delayed_register, charp, NULL, 0444); MODULE_PARM_DESC(delayed_register, "Quirk for delayed registration, given by id:iface, e.g. 0123abcd:4."); module_param_array(implicit_fb, bool, NULL, 0444); MODULE_PARM_DESC(implicit_fb, "Apply generic implicit feedback sync mode."); module_param_named(use_vmalloc, snd_usb_use_vmalloc, bool, 0444); MODULE_PARM_DESC(use_vmalloc, "Use vmalloc for PCM intermediate buffers (default: yes)."); module_param_named(skip_validation, snd_usb_skip_validation, bool, 0444); MODULE_PARM_DESC(skip_validation, "Skip unit descriptor validation (default: no)."); /* protects quirk_flags */ static DEFINE_MUTEX(quirk_flags_mutex); static int param_set_quirkp(const char *val, const struct kernel_param *kp) { guard(mutex)(&quirk_flags_mutex); return param_set_charp(val, kp); } static const struct kernel_param_ops param_ops_quirkp = { .set = param_set_quirkp, .get = param_get_charp, .free = param_free_charp, }; #define param_check_quirkp param_check_charp module_param_array(quirk_flags, quirkp, NULL, 0644); MODULE_PARM_DESC(quirk_flags, "Add/modify USB audio quirks"); /* * we keep the snd_usb_audio_t instances by ourselves for merging * the all interfaces on the same card as one sound device. */ static DEFINE_MUTEX(register_mutex); static struct snd_usb_audio *usb_chip[SNDRV_CARDS]; static struct usb_driver usb_audio_driver; static struct snd_usb_platform_ops *platform_ops; /* * Register platform specific operations that will be notified on events * which occur in USB SND. The platform driver can utilize this path to * enable features, such as USB audio offloading, which allows for audio data * to be queued by an audio DSP. * * Only one set of platform operations can be registered to USB SND. The * platform register operation is protected by the register_mutex. */ int snd_usb_register_platform_ops(struct snd_usb_platform_ops *ops) { guard(mutex)(®ister_mutex); if (platform_ops) return -EEXIST; platform_ops = ops; return 0; } EXPORT_SYMBOL_GPL(snd_usb_register_platform_ops); /* * Unregisters the current set of platform operations. This allows for * a new set to be registered if required. * * The platform unregister operation is protected by the register_mutex. */ int snd_usb_unregister_platform_ops(void) { guard(mutex)(®ister_mutex); platform_ops = NULL; return 0; } EXPORT_SYMBOL_GPL(snd_usb_unregister_platform_ops); /* * in case the platform driver was not ready at the time of USB SND * device connect, expose an API to discover all connected USB devices * so it can populate any dependent resources/structures. */ void snd_usb_rediscover_devices(void) { int i; guard(mutex)(®ister_mutex); if (!platform_ops || !platform_ops->connect_cb) return; for (i = 0; i < SNDRV_CARDS; i++) { if (usb_chip[i]) platform_ops->connect_cb(usb_chip[i]); } } EXPORT_SYMBOL_GPL(snd_usb_rediscover_devices); /* * Checks to see if requested audio profile, i.e sample rate, # of * channels, etc... is supported by the substream associated to the * USB audio device. */ struct snd_usb_stream * snd_usb_find_suppported_substream(int card_idx, struct snd_pcm_hw_params *params, int direction) { struct snd_usb_audio *chip; struct snd_usb_substream *subs; struct snd_usb_stream *as; /* * Register mutex is held when populating and clearing usb_chip * array. */ guard(mutex)(®ister_mutex); chip = usb_chip[card_idx]; if (chip && enable[card_idx]) { list_for_each_entry(as, &chip->pcm_list, list) { subs = &as->substream[direction]; if (snd_usb_find_substream_format(subs, params)) return as; } } return NULL; } EXPORT_SYMBOL_GPL(snd_usb_find_suppported_substream); /* * disconnect streams * called from usb_audio_disconnect() */ static void snd_usb_stream_disconnect(struct snd_usb_stream *as) { int idx; struct snd_usb_substream *subs; for (idx = 0; idx < 2; idx++) { subs = &as->substream[idx]; if (!subs->num_formats) continue; subs->data_endpoint = NULL; subs->sync_endpoint = NULL; } } static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int interface) { struct usb_device *dev = chip->dev; struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; struct usb_interface *iface = usb_ifnum_to_if(dev, interface); if (!iface) { dev_err(&dev->dev, "%u:%d : does not exist\n", ctrlif, interface); return -EINVAL; } alts = &iface->altsetting[0]; altsd = get_iface_desc(alts); /* * Android with both accessory and audio interfaces enabled gets the * interface numbers wrong. */ if ((chip->usb_id == USB_ID(0x18d1, 0x2d04) || chip->usb_id == USB_ID(0x18d1, 0x2d05)) && interface == 0 && altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC && altsd->bInterfaceSubClass == USB_SUBCLASS_VENDOR_SPEC) { interface = 2; iface = usb_ifnum_to_if(dev, interface); if (!iface) return -EINVAL; alts = &iface->altsetting[0]; altsd = get_iface_desc(alts); } if (usb_interface_claimed(iface)) { dev_dbg(&dev->dev, "%d:%d: skipping, already claimed\n", ctrlif, interface); return -EINVAL; } if ((altsd->bInterfaceClass == USB_CLASS_AUDIO || altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC) && altsd->bInterfaceSubClass == USB_SUBCLASS_MIDISTREAMING) { int err = snd_usb_midi_v2_create(chip, iface, NULL, chip->usb_id); if (err < 0) { dev_err(&dev->dev, "%u:%d: cannot create sequencer device\n", ctrlif, interface); return -EINVAL; } return usb_driver_claim_interface(&usb_audio_driver, iface, USB_AUDIO_IFACE_UNUSED); } if ((altsd->bInterfaceClass != USB_CLASS_AUDIO && altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING) { dev_dbg(&dev->dev, "%u:%d: skipping non-supported interface %d\n", ctrlif, interface, altsd->bInterfaceClass); /* skip non-supported classes */ return -EINVAL; } if (snd_usb_get_speed(dev) == USB_SPEED_LOW) { dev_err(&dev->dev, "low speed audio streaming not supported\n"); return -EINVAL; } snd_usb_add_ctrl_interface_link(chip, interface, ctrlif); if (! snd_usb_parse_audio_interface(chip, interface)) { usb_set_interface(dev, interface, 0); /* reset the current interface */ return usb_driver_claim_interface(&usb_audio_driver, iface, USB_AUDIO_IFACE_UNUSED); } return 0; } /* * parse audio control descriptor and create pcm/midi streams */ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) { struct usb_device *dev = chip->dev; struct usb_host_interface *host_iface; struct usb_interface_descriptor *altsd; int i, protocol; /* find audiocontrol interface */ host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0]; altsd = get_iface_desc(host_iface); protocol = altsd->bInterfaceProtocol; switch (protocol) { default: dev_warn(&dev->dev, "unknown interface protocol %#02x, assuming v1\n", protocol); fallthrough; case UAC_VERSION_1: { struct uac1_ac_header_descriptor *h1; int rest_bytes; h1 = snd_usb_find_csint_desc(host_iface->extra, host_iface->extralen, NULL, UAC_HEADER); if (!h1 || h1->bLength < sizeof(*h1)) { dev_err(&dev->dev, "cannot find UAC_HEADER\n"); return -EINVAL; } rest_bytes = (void *)(host_iface->extra + host_iface->extralen) - (void *)h1; /* just to be sure -- this shouldn't hit at all */ if (rest_bytes <= 0) { dev_err(&dev->dev, "invalid control header\n"); return -EINVAL; } if (rest_bytes < sizeof(*h1)) { dev_err(&dev->dev, "too short v1 buffer descriptor\n"); return -EINVAL; } if (!h1->bInCollection) { dev_info(&dev->dev, "skipping empty audio interface (v1)\n"); return -EINVAL; } if (rest_bytes < h1->bLength) { dev_err(&dev->dev, "invalid buffer length (v1)\n"); return -EINVAL; } if (h1->bLength < sizeof(*h1) + h1->bInCollection) { dev_err(&dev->dev, "invalid UAC_HEADER (v1)\n"); return -EINVAL; } for (i = 0; i < h1->bInCollection; i++) snd_usb_create_stream(chip, ctrlif, h1->baInterfaceNr[i]); break; } case UAC_VERSION_2: case UAC_VERSION_3: { struct usb_interface_assoc_descriptor *assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc; if (!assoc) { /* * Firmware writers cannot count to three. So to find * the IAD on the NuForce UDH-100, also check the next * interface. */ struct usb_interface *iface = usb_ifnum_to_if(dev, ctrlif + 1); if (iface && iface->intf_assoc && iface->intf_assoc->bFunctionClass == USB_CLASS_AUDIO && iface->intf_assoc->bFunctionProtocol == UAC_VERSION_2) assoc = iface->intf_assoc; } if (!assoc) { dev_err(&dev->dev, "Audio class v2/v3 interfaces need an interface association\n"); return -EINVAL; } if (protocol == UAC_VERSION_3) { int badd = assoc->bFunctionSubClass; if (badd != UAC3_FUNCTION_SUBCLASS_FULL_ADC_3_0 && (badd < UAC3_FUNCTION_SUBCLASS_GENERIC_IO || badd > UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE)) { dev_err(&dev->dev, "Unsupported UAC3 BADD profile\n"); return -EINVAL; } chip->badd_profile = badd; } for (i = 0; i < assoc->bInterfaceCount; i++) { int intf = assoc->bFirstInterface + i; if (intf != ctrlif) snd_usb_create_stream(chip, ctrlif, intf); } break; } } return 0; } /* * Profile name preset table */ struct usb_audio_device_name { u32 id; const char *vendor_name; const char *product_name; const char *profile_name; /* override card->longname */ }; #define PROFILE_NAME(vid, pid, vendor, product, profile) \ { .id = USB_ID(vid, pid), .vendor_name = (vendor), \ .product_name = (product), .profile_name = (profile) } #define DEVICE_NAME(vid, pid, vendor, product) \ PROFILE_NAME(vid, pid, vendor, product, NULL) /* vendor/product and profile name presets, sorted in device id order */ static const struct usb_audio_device_name usb_audio_names[] = { /* HP Thunderbolt Dock Audio Headset */ PROFILE_NAME(0x03f0, 0x0269, "HP", "Thunderbolt Dock Audio Headset", "HP-Thunderbolt-Dock-Audio-Headset"), /* HP Thunderbolt Dock Audio Module */ PROFILE_NAME(0x03f0, 0x0567, "HP", "Thunderbolt Dock Audio Module", "HP-Thunderbolt-Dock-Audio-Module"), /* Two entries for Gigabyte TRX40 Aorus Master: * TRX40 Aorus Master has two USB-audio devices, one for the front * headphone with ESS SABRE9218 DAC chip, while another for the rest * I/O (the rear panel and the front mic) with Realtek ALC1220-VB. * Here we provide two distinct names for making UCM profiles easier. */ PROFILE_NAME(0x0414, 0xa000, "Gigabyte", "Aorus Master Front Headphone", "Gigabyte-Aorus-Master-Front-Headphone"), PROFILE_NAME(0x0414, 0xa001, "Gigabyte", "Aorus Master Main Audio", "Gigabyte-Aorus-Master-Main-Audio"), /* Gigabyte TRX40 Aorus Pro WiFi */ PROFILE_NAME(0x0414, 0xa002, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Creative/E-Mu devices */ DEVICE_NAME(0x041e, 0x3010, "Creative Labs", "Sound Blaster MP3+"), /* Creative/Toshiba Multimedia Center SB-0500 */ DEVICE_NAME(0x041e, 0x3048, "Toshiba", "SB-0500"), /* Logitech Audio Devices */ DEVICE_NAME(0x046d, 0x0867, "Logitech, Inc.", "Logi-MeetUp"), DEVICE_NAME(0x046d, 0x0874, "Logitech, Inc.", "Logi-Tap-Audio"), DEVICE_NAME(0x046d, 0x087c, "Logitech, Inc.", "Logi-Huddle"), DEVICE_NAME(0x046d, 0x0898, "Logitech, Inc.", "Logi-RB-Audio"), DEVICE_NAME(0x046d, 0x08d2, "Logitech, Inc.", "Logi-RBM-Audio"), DEVICE_NAME(0x046d, 0x0990, "Logitech, Inc.", "QuickCam Pro 9000"), DEVICE_NAME(0x05e1, 0x0408, "Syntek", "STK1160"), DEVICE_NAME(0x05e1, 0x0480, "Hauppauge", "Woodbury"), /* ASUS ROG Zenith II: this machine has also two devices, one for * the front headphone and another for the rest */ PROFILE_NAME(0x0b05, 0x1915, "ASUS", "Zenith II Front Headphone", "Zenith-II-Front-Headphone"), PROFILE_NAME(0x0b05, 0x1916, "ASUS", "Zenith II Main Audio", "Zenith-II-Main-Audio"), /* ASUS ROG Strix */ PROFILE_NAME(0x0b05, 0x1917, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* ASUS PRIME TRX40 PRO-S */ PROFILE_NAME(0x0b05, 0x1918, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* Dell WD15 Dock */ PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"), /* Dell WD19 Dock */ PROFILE_NAME(0x0bda, 0x402e, "Dell", "WD19 Dock", "Dell-WD15-Dock"), DEVICE_NAME(0x0ccd, 0x0028, "TerraTec", "Aureon5.1MkII"), /* * The original product_name is "USB Sound Device", however this name * is also used by the CM106 based cards, so make it unique. */ DEVICE_NAME(0x0d8c, 0x0102, NULL, "ICUSBAUDIO7D"), DEVICE_NAME(0x0d8c, 0x0103, NULL, "Audio Advantage MicroII"), /* MSI TRX40 Creator */ PROFILE_NAME(0x0db0, 0x0d64, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), /* MSI TRX40 */ PROFILE_NAME(0x0db0, 0x543d, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), DEVICE_NAME(0x0fd9, 0x0008, "Hauppauge", "HVR-950Q"), /* Dock/Stand for HP Engage Go */ PROFILE_NAME(0x103c, 0x830a, "HP", "HP Engage Go Dock", "HP-Engage-Go-Dock"), /* Stanton/N2IT Final Scratch v1 device ('Scratchamp') */ DEVICE_NAME(0x103d, 0x0100, "Stanton", "ScratchAmp"), DEVICE_NAME(0x103d, 0x0101, "Stanton", "ScratchAmp"), /* aka. Serato Scratch Live DJ Box */ DEVICE_NAME(0x13e5, 0x0001, "Rane", "SL-1"), /* Lenovo ThinkStation P620 Rear Line-in, Line-out and Microphone */ PROFILE_NAME(0x17aa, 0x1046, "Lenovo", "ThinkStation P620 Rear", "Lenovo-ThinkStation-P620-Rear"), /* Lenovo ThinkStation P620 Internal Speaker + Front Headset */ PROFILE_NAME(0x17aa, 0x104d, "Lenovo", "ThinkStation P620 Main", "Lenovo-ThinkStation-P620-Main"), /* Asrock TRX40 Creator */ PROFILE_NAME(0x26ce, 0x0a01, "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"), DEVICE_NAME(0x2040, 0x7200, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7201, "Hauppauge", "HVR-950Q-MXL"), DEVICE_NAME(0x2040, 0x7210, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7211, "Hauppauge", "HVR-950Q-MXL"), DEVICE_NAME(0x2040, 0x7213, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7217, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x721b, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x721e, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x721f, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7240, "Hauppauge", "HVR-850"), DEVICE_NAME(0x2040, 0x7260, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7280, "Hauppauge", "HVR-950Q"), DEVICE_NAME(0x2040, 0x7281, "Hauppauge", "HVR-950Q-MXL"), DEVICE_NAME(0x2040, 0x8200, "Hauppauge", "Woodbury"), { } /* terminator */ }; static const struct usb_audio_device_name * lookup_device_name(u32 id) { static const struct usb_audio_device_name *p; for (p = usb_audio_names; p->id; p++) if (p->id == id) return p; return NULL; } /* * free the chip instance * * here we have to do not much, since pcm and controls are already freed * */ static void snd_usb_audio_free(struct snd_card *card) { struct snd_usb_audio *chip = card->private_data; snd_usb_endpoint_free_all(chip); snd_usb_midi_v2_free_all(chip); mutex_destroy(&chip->mutex); if (!atomic_read(&chip->shutdown)) dev_set_drvdata(&chip->dev->dev, NULL); } static void usb_audio_make_shortname(struct usb_device *dev, struct snd_usb_audio *chip, const struct snd_usb_audio_quirk *quirk) { struct snd_card *card = chip->card; const struct usb_audio_device_name *preset; const char *s = NULL; preset = lookup_device_name(chip->usb_id); if (preset && preset->product_name) s = preset->product_name; else if (quirk && quirk->product_name) s = quirk->product_name; if (s && *s) { strscpy(card->shortname, s, sizeof(card->shortname)); return; } /* retrieve the device string as shortname */ if (!dev->descriptor.iProduct || usb_string(dev, dev->descriptor.iProduct, card->shortname, sizeof(card->shortname)) <= 0) { /* no name available from anywhere, so use ID */ scnprintf(card->shortname, sizeof(card->shortname), "USB Device %#04x:%#04x", USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id)); } strim(card->shortname); } static void usb_audio_make_longname(struct usb_device *dev, struct snd_usb_audio *chip, const struct snd_usb_audio_quirk *quirk) { struct snd_card *card = chip->card; const struct usb_audio_device_name *preset; const char *s = NULL; int len; preset = lookup_device_name(chip->usb_id); /* shortcut - if any pre-defined string is given, use it */ if (preset && preset->profile_name) s = preset->profile_name; if (s && *s) { strscpy(card->longname, s, sizeof(card->longname)); return; } if (preset && preset->vendor_name) s = preset->vendor_name; else if (quirk && quirk->vendor_name) s = quirk->vendor_name; *card->longname = 0; if (s && *s) { strscpy(card->longname, s, sizeof(card->longname)); } else { /* retrieve the vendor and device strings as longname */ if (dev->descriptor.iManufacturer) usb_string(dev, dev->descriptor.iManufacturer, card->longname, sizeof(card->longname)); /* we don't really care if there isn't any vendor string */ } if (*card->longname) { strim(card->longname); if (*card->longname) strlcat(card->longname, " ", sizeof(card->longname)); } strlcat(card->longname, card->shortname, sizeof(card->longname)); len = strlcat(card->longname, " at ", sizeof(card->longname)); if (len < sizeof(card->longname)) usb_make_path(dev, card->longname + len, sizeof(card->longname) - len); switch (snd_usb_get_speed(dev)) { case USB_SPEED_LOW: strlcat(card->longname, ", low speed", sizeof(card->longname)); break; case USB_SPEED_FULL: strlcat(card->longname, ", full speed", sizeof(card->longname)); break; case USB_SPEED_HIGH: strlcat(card->longname, ", high speed", sizeof(card->longname)); break; case USB_SPEED_SUPER: strlcat(card->longname, ", super speed", sizeof(card->longname)); break; case USB_SPEED_SUPER_PLUS: strlcat(card->longname, ", super speed plus", sizeof(card->longname)); break; default: break; } } static void snd_usb_init_quirk_flags(int idx, struct snd_usb_audio *chip) { size_t i; guard(mutex)(&quirk_flags_mutex); /* old style option found: the position-based integer value */ if (quirk_flags[idx] && !kstrtou32(quirk_flags[idx], 0, &chip->quirk_flags)) { snd_usb_apply_flag_dbg("module param", chip, chip->quirk_flags); return; } /* take the default quirk from the quirk table */ snd_usb_init_quirk_flags_table(chip); /* add or correct quirk bits from options */ for (i = 0; i < ARRAY_SIZE(quirk_flags); i++) { if (!quirk_flags[i] || !*quirk_flags[i]) break; snd_usb_init_quirk_flags_parse_string(chip, quirk_flags[i]); } } /* * create a chip instance and set its names. */ static int snd_usb_audio_create(struct usb_interface *intf, struct usb_device *dev, int idx, const struct snd_usb_audio_quirk *quirk, unsigned int usb_id, struct snd_usb_audio **rchip) { struct snd_card *card; struct snd_usb_audio *chip; int err; char component[14]; *rchip = NULL; switch (snd_usb_get_speed(dev)) { case USB_SPEED_LOW: case USB_SPEED_FULL: case USB_SPEED_HIGH: case USB_SPEED_SUPER: case USB_SPEED_SUPER_PLUS: break; default: dev_err(&dev->dev, "unknown device speed %d\n", snd_usb_get_speed(dev)); return -ENXIO; } err = snd_card_new(&intf->dev, index[idx], id[idx], THIS_MODULE, sizeof(*chip), &card); if (err < 0) { dev_err(&dev->dev, "cannot create card instance %d\n", idx); return err; } chip = card->private_data; mutex_init(&chip->mutex); init_waitqueue_head(&chip->shutdown_wait); chip->index = idx; chip->dev = dev; chip->card = card; chip->setup = device_setup[idx]; chip->generic_implicit_fb = implicit_fb[idx]; chip->autoclock = autoclock; chip->lowlatency = lowlatency; atomic_set(&chip->active, 1); /* avoid autopm during probing */ atomic_set(&chip->usage_count, 0); atomic_set(&chip->shutdown, 0); chip->usb_id = usb_id; INIT_LIST_HEAD(&chip->pcm_list); INIT_LIST_HEAD(&chip->ep_list); INIT_LIST_HEAD(&chip->iface_ref_list); INIT_LIST_HEAD(&chip->clock_ref_list); INIT_LIST_HEAD(&chip->midi_list); INIT_LIST_HEAD(&chip->midi_v2_list); INIT_LIST_HEAD(&chip->mixer_list); snd_usb_init_quirk_flags(idx, chip); card->private_free = snd_usb_audio_free; strscpy(card->driver, "USB-Audio"); scnprintf(component, sizeof(component), "USB%04x:%04x", USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id)); snd_component_add(card, component); usb_audio_make_shortname(dev, chip, quirk); usb_audio_make_longname(dev, chip, quirk); snd_usb_audio_create_proc(chip); *rchip = chip; return 0; } /* look for a matching quirk alias id */ static bool get_alias_id(struct usb_device *dev, unsigned int *id) { int i; unsigned int src, dst; for (i = 0; i < ARRAY_SIZE(quirk_alias); i++) { if (!quirk_alias[i] || sscanf(quirk_alias[i], "%x:%x", &src, &dst) != 2 || src != *id) continue; dev_info(&dev->dev, "device (%04x:%04x): applying quirk alias %04x:%04x\n", USB_ID_VENDOR(*id), USB_ID_PRODUCT(*id), USB_ID_VENDOR(dst), USB_ID_PRODUCT(dst)); *id = dst; return true; } return false; } static int check_delayed_register_option(struct snd_usb_audio *chip) { int i; unsigned int id, inum; for (i = 0; i < ARRAY_SIZE(delayed_register); i++) { if (delayed_register[i] && sscanf(delayed_register[i], "%x:%x", &id, &inum) == 2 && id == chip->usb_id) return inum; } return -1; } static const struct usb_device_id usb_audio_ids[]; /* defined below */ /* look for the last interface that matches with our ids and remember it */ static void find_last_interface(struct snd_usb_audio *chip) { struct usb_host_config *config = chip->dev->actconfig; struct usb_interface *intf; int i; if (!config) return; for (i = 0; i < config->desc.bNumInterfaces; i++) { intf = config->interface[i]; if (usb_match_id(intf, usb_audio_ids)) chip->last_iface = intf->altsetting[0].desc.bInterfaceNumber; } usb_audio_dbg(chip, "Found last interface = %d\n", chip->last_iface); } /* look for the corresponding quirk */ static const struct snd_usb_audio_quirk * get_alias_quirk(struct usb_device *dev, unsigned int id) { const struct usb_device_id *p; for (p = usb_audio_ids; p->match_flags; p++) { /* FIXME: this checks only vendor:product pair in the list */ if ((p->match_flags & USB_DEVICE_ID_MATCH_DEVICE) == USB_DEVICE_ID_MATCH_DEVICE && p->idVendor == USB_ID_VENDOR(id) && p->idProduct == USB_ID_PRODUCT(id)) return (const struct snd_usb_audio_quirk *)p->driver_info; } return NULL; } /* register card if we reach to the last interface or to the specified * one given via option */ static int try_to_register_card(struct snd_usb_audio *chip, int ifnum) { if (check_delayed_register_option(chip) == ifnum || chip->last_iface == ifnum || usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface))) return snd_card_register(chip->card); return 0; } /* * probe the active usb device * * note that this can be called multiple times per a device, when it * includes multiple audio control interfaces. * * thus we check the usb device pointer and creates the card instance * only at the first time. the successive calls of this function will * append the pcm interface to the corresponding card. */ static int usb_audio_probe(struct usb_interface *intf, const struct usb_device_id *usb_id) { struct usb_device *dev = interface_to_usbdev(intf); const struct snd_usb_audio_quirk *quirk = (const struct snd_usb_audio_quirk *)usb_id->driver_info; struct snd_usb_audio *chip; int i, err; struct usb_host_interface *alts; int ifnum; u32 id; alts = &intf->altsetting[0]; ifnum = get_iface_desc(alts)->bInterfaceNumber; id = USB_ID(le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); if (get_alias_id(dev, &id)) quirk = get_alias_quirk(dev, id); if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum) return -ENXIO; if (quirk && quirk->ifnum == QUIRK_NODEV_INTERFACE) return -ENODEV; err = snd_usb_apply_boot_quirk(dev, intf, quirk, id); if (err < 0) return err; /* * found a config. now register to ALSA */ /* check whether it's already registered */ chip = NULL; guard(mutex)(®ister_mutex); for (i = 0; i < SNDRV_CARDS; i++) { if (usb_chip[i] && usb_chip[i]->dev == dev) { if (atomic_read(&usb_chip[i]->shutdown)) { dev_err(&dev->dev, "USB device is in the shutdown state, cannot create a card instance\n"); err = -EIO; goto __error; } chip = usb_chip[i]; atomic_inc(&chip->active); /* avoid autopm */ break; } } if (! chip) { err = snd_usb_apply_boot_quirk_once(dev, intf, quirk, id); if (err < 0) goto __error; /* it's a fresh one. * now look for an empty slot and create a new card instance */ for (i = 0; i < SNDRV_CARDS; i++) if (!usb_chip[i] && (vid[i] == -1 || vid[i] == USB_ID_VENDOR(id)) && (pid[i] == -1 || pid[i] == USB_ID_PRODUCT(id))) { if (enable[i]) { err = snd_usb_audio_create(intf, dev, i, quirk, id, &chip); if (err < 0) goto __error; break; } else if (vid[i] != -1 || pid[i] != -1) { dev_info(&dev->dev, "device (%04x:%04x) is disabled\n", USB_ID_VENDOR(id), USB_ID_PRODUCT(id)); err = -ENOENT; goto __error; } } if (!chip) { dev_err(&dev->dev, "no available usb audio device\n"); err = -ENODEV; goto __error; } find_last_interface(chip); } if (chip->num_interfaces >= MAX_CARD_INTERFACES) { dev_info(&dev->dev, "Too many interfaces assigned to the single USB-audio card\n"); err = -EINVAL; goto __error; } dev_set_drvdata(&dev->dev, chip); if (ignore_ctl_error) chip->quirk_flags |= QUIRK_FLAG_IGNORE_CTL_ERROR; if (chip->quirk_flags & QUIRK_FLAG_DISABLE_AUTOSUSPEND) usb_disable_autosuspend(interface_to_usbdev(intf)); /* * For devices with more than one control interface, we assume the * first contains the audio controls. We might need a more specific * check here in the future. */ if (!chip->ctrl_intf) chip->ctrl_intf = alts; err = 1; /* continue */ if (quirk && quirk->ifnum != QUIRK_NO_INTERFACE) { /* need some special handlings */ err = snd_usb_create_quirk(chip, intf, &usb_audio_driver, quirk); if (err < 0) goto __error; } if (err > 0) { /* create normal USB audio interfaces */ err = snd_usb_create_streams(chip, ifnum); if (err < 0) goto __error; err = snd_usb_create_mixer(chip, ifnum); if (err < 0) goto __error; } if (chip->need_delayed_register) { dev_info(&dev->dev, "Found post-registration device assignment: %08x:%02x\n", chip->usb_id, ifnum); chip->need_delayed_register = false; /* clear again */ } err = try_to_register_card(chip, ifnum); if (err < 0) goto __error_no_register; if (chip->quirk_flags & QUIRK_FLAG_SHARE_MEDIA_DEVICE) { /* don't want to fail when snd_media_device_create() fails */ snd_media_device_create(chip, intf); } if (quirk) chip->quirk_type = quirk->type; usb_chip[chip->index] = chip; chip->intf[chip->num_interfaces] = intf; chip->num_interfaces++; usb_set_intfdata(intf, chip); atomic_dec(&chip->active); if (platform_ops && platform_ops->connect_cb) platform_ops->connect_cb(chip); return 0; __error: /* in the case of error in secondary interface, still try to register */ if (chip) try_to_register_card(chip, ifnum); __error_no_register: if (chip) { /* chip->active is inside the chip->card object, * decrement before memory is possibly returned. */ atomic_dec(&chip->active); if (!chip->num_interfaces) snd_card_free(chip->card); } return err; } /* * we need to take care of counter, since disconnection can be called also * many times as well as usb_audio_probe(). */ static bool __usb_audio_disconnect(struct usb_interface *intf, struct snd_usb_audio *chip, struct snd_card *card) { struct list_head *p; guard(mutex)(®ister_mutex); if (platform_ops && platform_ops->disconnect_cb) platform_ops->disconnect_cb(chip); if (atomic_inc_return(&chip->shutdown) == 1) { struct snd_usb_stream *as; struct snd_usb_endpoint *ep; struct usb_mixer_interface *mixer; /* wait until all pending tasks done; * they are protected by snd_usb_lock_shutdown() */ wait_event(chip->shutdown_wait, !atomic_read(&chip->usage_count)); snd_card_disconnect(card); /* release the pcm resources */ list_for_each_entry(as, &chip->pcm_list, list) { snd_usb_stream_disconnect(as); } /* release the endpoint resources */ list_for_each_entry(ep, &chip->ep_list, list) { snd_usb_endpoint_release(ep); } /* release the midi resources */ list_for_each(p, &chip->midi_list) { snd_usbmidi_disconnect(p); } snd_usb_midi_v2_disconnect_all(chip); /* * Nice to check quirk && quirk->shares_media_device and * then call the snd_media_device_delete(). Don't have * access to the quirk here. snd_media_device_delete() * accesses mixer_list */ snd_media_device_delete(chip); /* release mixer resources */ list_for_each_entry(mixer, &chip->mixer_list, list) { snd_usb_mixer_disconnect(mixer); } } if (chip->quirk_flags & QUIRK_FLAG_DISABLE_AUTOSUSPEND) usb_enable_autosuspend(interface_to_usbdev(intf)); chip->num_interfaces--; if (chip->num_interfaces > 0) return false; usb_chip[chip->index] = NULL; return true; } static void usb_audio_disconnect(struct usb_interface *intf) { struct snd_usb_audio *chip = usb_get_intfdata(intf); struct snd_card *card; if (chip == USB_AUDIO_IFACE_UNUSED) return; card = chip->card; if (__usb_audio_disconnect(intf, chip, card)) snd_card_free_when_closed(card); } /* lock the shutdown (disconnect) task and autoresume */ int snd_usb_lock_shutdown(struct snd_usb_audio *chip) { int err; atomic_inc(&chip->usage_count); if (atomic_read(&chip->shutdown)) { err = -EIO; goto error; } err = snd_usb_autoresume(chip); if (err < 0) goto error; return 0; error: if (atomic_dec_and_test(&chip->usage_count)) wake_up(&chip->shutdown_wait); return err; } EXPORT_SYMBOL_GPL(snd_usb_lock_shutdown); /* autosuspend and unlock the shutdown */ void snd_usb_unlock_shutdown(struct snd_usb_audio *chip) { snd_usb_autosuspend(chip); if (atomic_dec_and_test(&chip->usage_count)) wake_up(&chip->shutdown_wait); } EXPORT_SYMBOL_GPL(snd_usb_unlock_shutdown); int snd_usb_autoresume(struct snd_usb_audio *chip) { int i, err; if (atomic_read(&chip->shutdown)) return -EIO; if (atomic_inc_return(&chip->active) != 1) return 0; for (i = 0; i < chip->num_interfaces; i++) { err = usb_autopm_get_interface(chip->intf[i]); if (err < 0) { /* rollback */ while (--i >= 0) usb_autopm_put_interface(chip->intf[i]); atomic_dec(&chip->active); return err; } } return 0; } EXPORT_SYMBOL_GPL(snd_usb_autoresume); void snd_usb_autosuspend(struct snd_usb_audio *chip) { int i; if (atomic_read(&chip->shutdown)) return; if (!atomic_dec_and_test(&chip->active)) return; for (i = 0; i < chip->num_interfaces; i++) usb_autopm_put_interface(chip->intf[i]); } EXPORT_SYMBOL_GPL(snd_usb_autosuspend); static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message) { struct snd_usb_audio *chip = usb_get_intfdata(intf); struct snd_usb_stream *as; struct snd_usb_endpoint *ep; struct usb_mixer_interface *mixer; struct list_head *p; if (chip == USB_AUDIO_IFACE_UNUSED) return 0; if (!chip->num_suspended_intf++) { list_for_each_entry(as, &chip->pcm_list, list) snd_usb_pcm_suspend(as); list_for_each_entry(ep, &chip->ep_list, list) snd_usb_endpoint_suspend(ep); list_for_each(p, &chip->midi_list) snd_usbmidi_suspend(p); list_for_each_entry(mixer, &chip->mixer_list, list) snd_usb_mixer_suspend(mixer); snd_usb_midi_v2_suspend_all(chip); } if (!PMSG_IS_AUTO(message) && !chip->system_suspend) { snd_power_change_state(chip->card, SNDRV_CTL_POWER_D3hot); chip->system_suspend = chip->num_suspended_intf; } if (platform_ops && platform_ops->suspend_cb) platform_ops->suspend_cb(intf, message); return 0; } static int usb_audio_resume(struct usb_interface *intf) { struct snd_usb_audio *chip = usb_get_intfdata(intf); struct snd_usb_stream *as; struct usb_mixer_interface *mixer; struct list_head *p; int err = 0; if (chip == USB_AUDIO_IFACE_UNUSED) return 0; atomic_inc(&chip->active); /* avoid autopm */ if (chip->num_suspended_intf > 1) goto out; list_for_each_entry(as, &chip->pcm_list, list) { err = snd_usb_pcm_resume(as); if (err < 0) goto err_out; } /* * ALSA leaves material resumption to user space * we just notify and restart the mixers */ list_for_each_entry(mixer, &chip->mixer_list, list) { err = snd_usb_mixer_resume(mixer); if (err < 0) goto err_out; } list_for_each(p, &chip->midi_list) { snd_usbmidi_resume(p); } snd_usb_midi_v2_resume_all(chip); if (platform_ops && platform_ops->resume_cb) platform_ops->resume_cb(intf); out: if (chip->num_suspended_intf == chip->system_suspend) { snd_power_change_state(chip->card, SNDRV_CTL_POWER_D0); chip->system_suspend = 0; } chip->num_suspended_intf--; err_out: atomic_dec(&chip->active); /* allow autopm after this point */ return err; } static const struct usb_device_id usb_audio_ids [] = { #include "quirks-table.h" { .match_flags = (USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS), .bInterfaceClass = USB_CLASS_AUDIO, .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_audio_ids); /* * entry point for linux usb interface */ static struct usb_driver usb_audio_driver = { .name = "snd-usb-audio", .probe = usb_audio_probe, .disconnect = usb_audio_disconnect, .suspend = usb_audio_suspend, .resume = usb_audio_resume, .reset_resume = usb_audio_resume, .id_table = usb_audio_ids, .supports_autosuspend = 1, }; module_usb_driver(usb_audio_driver); |
| 6 6 3 3 3 2 2 3 3 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | // SPDX-License-Identifier: GPL-2.0 /* * Implement the manual drop-all-pagecache function */ #include <linux/pagemap.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/writeback.h> #include <linux/sysctl.h> #include <linux/gfp.h> #include <linux/swap.h> #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ static int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); /* * We must skip inodes in unusual state. We may also skip * inodes without pages but we deliberately won't in case * we need to reschedule to avoid softlockups. */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || (mapping_empty(inode->i_mapping) && !need_resched())) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(toput_inode); } static int drop_caches_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int ret; ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (ret) return ret; if (write) { static int stfu; if (sysctl_drop_caches & 1) { lru_add_drain_all(); iterate_supers(drop_pagecache_sb, NULL); count_vm_event(DROP_PAGECACHE); } if (sysctl_drop_caches & 2) { drop_slab(); count_vm_event(DROP_SLAB); } if (!stfu) { pr_info("%s (%d): drop_caches: %d\n", current->comm, task_pid_nr(current), sysctl_drop_caches); } stfu |= sysctl_drop_caches & 4; } return 0; } static const struct ctl_table drop_caches_table[] = { { .procname = "drop_caches", .data = &sysctl_drop_caches, .maxlen = sizeof(int), .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_FOUR, }, }; static int __init init_vm_drop_caches_sysctls(void) { register_sysctl_init("vm", drop_caches_table); return 0; } fs_initcall(init_vm_drop_caches_sysctls); |
| 6 14 14 24 24 24 24 24 24 24 24 24 26 26 26 26 24 24 26 26 24 24 24 24 24 17 17 17 17 17 5 17 17 17 17 17 2 17 26 24 24 24 24 24 24 4 4 4 24 14 14 14 1 14 14 14 14 1 14 14 2 2 2 2 2 2 2 2 2 24 24 24 24 24 24 24 4 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 24 14 14 14 14 13 14 14 14 14 14 14 14 14 14 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 | // SPDX-License-Identifier: GPL-2.0-or-later /* * zswap.c - zswap driver file * * zswap is a cache that takes pages that are in the process * of being swapped out and attempts to compress and store them in a * RAM-based memory pool. This can result in a significant I/O reduction on * the swap device and, in the case where decompressing from RAM is faster * than reading from the swap device, can also improve workload performance. * * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/cpu.h> #include <linux/highmem.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/atomic.h> #include <linux/swap.h> #include <linux/crypto.h> #include <linux/scatterlist.h> #include <linux/mempolicy.h> #include <linux/mempool.h> #include <crypto/acompress.h> #include <linux/zswap.h> #include <linux/mm_types.h> #include <linux/page-flags.h> #include <linux/swapops.h> #include <linux/writeback.h> #include <linux/pagemap.h> #include <linux/workqueue.h> #include <linux/list_lru.h> #include <linux/zsmalloc.h> #include "swap.h" #include "internal.h" /********************************* * statistics **********************************/ /* The number of pages currently stored in zswap */ atomic_long_t zswap_stored_pages = ATOMIC_LONG_INIT(0); /* The number of incompressible pages currently stored in zswap */ static atomic_long_t zswap_stored_incompressible_pages = ATOMIC_LONG_INIT(0); /* * The statistics below are not protected from concurrent access for * performance reasons so they may not be a 100% accurate. However, * they do provide useful information on roughly how many times a * certain event is occurring. */ /* Pool limit was hit (see zswap_max_pool_percent) */ static u64 zswap_pool_limit_hit; /* Pages written back when pool limit was reached */ static u64 zswap_written_back_pages; /* Store failed due to a reclaim failure after pool limit was reached */ static u64 zswap_reject_reclaim_fail; /* Store failed due to compression algorithm failure */ static u64 zswap_reject_compress_fail; /* Compressed page was too big for the allocator to (optimally) store */ static u64 zswap_reject_compress_poor; /* Load or writeback failed due to decompression failure */ static u64 zswap_decompress_fail; /* Store failed because underlying allocator could not get memory */ static u64 zswap_reject_alloc_fail; /* Store failed because the entry metadata could not be allocated (rare) */ static u64 zswap_reject_kmemcache_fail; /* Shrinker work queue */ static struct workqueue_struct *shrink_wq; /* Pool limit was hit, we need to calm down */ static bool zswap_pool_reached_full; /********************************* * tunables **********************************/ #define ZSWAP_PARAM_UNSET "" static int zswap_setup(void); /* Enable/disable zswap */ static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled); static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); static int zswap_enabled_param_set(const char *, const struct kernel_param *); static const struct kernel_param_ops zswap_enabled_param_ops = { .set = zswap_enabled_param_set, .get = param_get_bool, }; module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); /* Crypto compressor to use */ static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; static int zswap_compressor_param_set(const char *, const struct kernel_param *); static const struct kernel_param_ops zswap_compressor_param_ops = { .set = zswap_compressor_param_set, .get = param_get_charp, .free = param_free_charp, }; module_param_cb(compressor, &zswap_compressor_param_ops, &zswap_compressor, 0644); /* The maximum percentage of memory that the compressed pool can occupy */ static unsigned int zswap_max_pool_percent = 20; module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); /* The threshold for accepting new pages after the max_pool_percent was hit */ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ module_param_named(accept_threshold_percent, zswap_accept_thr_percent, uint, 0644); /* Enable/disable memory pressure-based shrinker. */ static bool zswap_shrinker_enabled = IS_ENABLED( CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644); bool zswap_is_enabled(void) { return zswap_enabled; } bool zswap_never_enabled(void) { return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled); } /********************************* * data structures **********************************/ struct crypto_acomp_ctx { struct crypto_acomp *acomp; struct acomp_req *req; struct crypto_wait wait; u8 *buffer; struct mutex mutex; bool is_sleepable; }; /* * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. * The only case where lru_lock is not acquired while holding tree.lock is * when a zswap_entry is taken off the lru for writeback, in that case it * needs to be verified that it's still valid in the tree. */ struct zswap_pool { struct zs_pool *zs_pool; struct crypto_acomp_ctx __percpu *acomp_ctx; struct percpu_ref ref; struct list_head list; struct work_struct release_work; struct hlist_node node; char tfm_name[CRYPTO_MAX_ALG_NAME]; }; /* Global LRU lists shared by all zswap pools. */ static struct list_lru zswap_list_lru; /* The lock protects zswap_next_shrink updates. */ static DEFINE_SPINLOCK(zswap_shrink_lock); static struct mem_cgroup *zswap_next_shrink; static struct work_struct zswap_shrink_work; static struct shrinker *zswap_shrinker; /* * struct zswap_entry * * This structure contains the metadata for tracking a single compressed * page within zswap. * * swpentry - associated swap entry, the offset indexes into the red-black tree * length - the length in bytes of the compressed page data. Needed during * decompression. * referenced - true if the entry recently entered the zswap pool. Unset by the * writeback logic. The entry is only reclaimed by the writeback * logic if referenced is unset. See comments in the shrinker * section for context. * pool - the zswap_pool the entry's data is in * handle - zsmalloc allocation handle that stores the compressed page data * objcg - the obj_cgroup that the compressed memory is charged to * lru - handle to the pool's lru used to evict pages. */ struct zswap_entry { swp_entry_t swpentry; unsigned int length; bool referenced; struct zswap_pool *pool; unsigned long handle; struct obj_cgroup *objcg; struct list_head lru; }; static struct xarray *zswap_trees[MAX_SWAPFILES]; static unsigned int nr_zswap_trees[MAX_SWAPFILES]; /* RCU-protected iteration */ static LIST_HEAD(zswap_pools); /* protects zswap_pools list modification */ static DEFINE_SPINLOCK(zswap_pools_lock); /* pool counter to provide unique names to zsmalloc */ static atomic_t zswap_pools_count = ATOMIC_INIT(0); enum zswap_init_type { ZSWAP_UNINIT, ZSWAP_INIT_SUCCEED, ZSWAP_INIT_FAILED }; static enum zswap_init_type zswap_init_state; /* used to ensure the integrity of initialization */ static DEFINE_MUTEX(zswap_init_lock); /* init completed, but couldn't create the initial pool */ static bool zswap_has_pool; /********************************* * helpers and fwd declarations **********************************/ /* One swap address space for each 64M swap space */ #define ZSWAP_ADDRESS_SPACE_SHIFT 14 #define ZSWAP_ADDRESS_SPACE_PAGES (1 << ZSWAP_ADDRESS_SPACE_SHIFT) static inline struct xarray *swap_zswap_tree(swp_entry_t swp) { return &zswap_trees[swp_type(swp)][swp_offset(swp) >> ZSWAP_ADDRESS_SPACE_SHIFT]; } #define zswap_pool_debug(msg, p) \ pr_debug("%s pool %s\n", msg, (p)->tfm_name) /********************************* * pool functions **********************************/ static void __zswap_pool_empty(struct percpu_ref *ref); static struct zswap_pool *zswap_pool_create(char *compressor) { struct zswap_pool *pool; char name[38]; /* 'zswap' + 32 char (max) num + \0 */ int ret, cpu; if (!zswap_has_pool && !strcmp(compressor, ZSWAP_PARAM_UNSET)) return NULL; pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return NULL; /* unique name for each pool specifically required by zsmalloc */ snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); pool->zs_pool = zs_create_pool(name); if (!pool->zs_pool) goto error; strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); if (!pool->acomp_ctx) { pr_err("percpu alloc failed\n"); goto error; } for_each_possible_cpu(cpu) mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex); ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); if (ret) goto error; /* being the current pool takes 1 ref; this func expects the * caller to always add the new pool as the current pool */ ret = percpu_ref_init(&pool->ref, __zswap_pool_empty, PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (ret) goto ref_fail; INIT_LIST_HEAD(&pool->list); zswap_pool_debug("created", pool); return pool; ref_fail: cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); error: if (pool->acomp_ctx) free_percpu(pool->acomp_ctx); if (pool->zs_pool) zs_destroy_pool(pool->zs_pool); kfree(pool); return NULL; } static struct zswap_pool *__zswap_pool_create_fallback(void) { if (!crypto_has_acomp(zswap_compressor, 0, 0) && strcmp(zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { pr_err("compressor %s not available, using default %s\n", zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); param_free_charp(&zswap_compressor); zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; } /* Default compressor should be available. Kconfig bug? */ if (WARN_ON_ONCE(!crypto_has_acomp(zswap_compressor, 0, 0))) { zswap_compressor = ZSWAP_PARAM_UNSET; return NULL; } return zswap_pool_create(zswap_compressor); } static void zswap_pool_destroy(struct zswap_pool *pool) { zswap_pool_debug("destroying", pool); cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); free_percpu(pool->acomp_ctx); zs_destroy_pool(pool->zs_pool); kfree(pool); } static void __zswap_pool_release(struct work_struct *work) { struct zswap_pool *pool = container_of(work, typeof(*pool), release_work); synchronize_rcu(); /* nobody should have been able to get a ref... */ WARN_ON(!percpu_ref_is_zero(&pool->ref)); percpu_ref_exit(&pool->ref); /* pool is now off zswap_pools list and has no references. */ zswap_pool_destroy(pool); } static struct zswap_pool *zswap_pool_current(void); static void __zswap_pool_empty(struct percpu_ref *ref) { struct zswap_pool *pool; pool = container_of(ref, typeof(*pool), ref); spin_lock_bh(&zswap_pools_lock); WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); INIT_WORK(&pool->release_work, __zswap_pool_release); schedule_work(&pool->release_work); spin_unlock_bh(&zswap_pools_lock); } static int __must_check zswap_pool_tryget(struct zswap_pool *pool) { if (!pool) return 0; return percpu_ref_tryget(&pool->ref); } /* The caller must already have a reference. */ static void zswap_pool_get(struct zswap_pool *pool) { percpu_ref_get(&pool->ref); } static void zswap_pool_put(struct zswap_pool *pool) { percpu_ref_put(&pool->ref); } static struct zswap_pool *__zswap_pool_current(void) { struct zswap_pool *pool; pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); WARN_ONCE(!pool && zswap_has_pool, "%s: no page storage pool!\n", __func__); return pool; } static struct zswap_pool *zswap_pool_current(void) { assert_spin_locked(&zswap_pools_lock); return __zswap_pool_current(); } static struct zswap_pool *zswap_pool_current_get(void) { struct zswap_pool *pool; rcu_read_lock(); pool = __zswap_pool_current(); if (!zswap_pool_tryget(pool)) pool = NULL; rcu_read_unlock(); return pool; } /* type and compressor must be null-terminated */ static struct zswap_pool *zswap_pool_find_get(char *compressor) { struct zswap_pool *pool; assert_spin_locked(&zswap_pools_lock); list_for_each_entry_rcu(pool, &zswap_pools, list) { if (strcmp(pool->tfm_name, compressor)) continue; /* if we can't get it, it's about to be destroyed */ if (!zswap_pool_tryget(pool)) continue; return pool; } return NULL; } static unsigned long zswap_max_pages(void) { return totalram_pages() * zswap_max_pool_percent / 100; } static unsigned long zswap_accept_thr_pages(void) { return zswap_max_pages() * zswap_accept_thr_percent / 100; } unsigned long zswap_total_pages(void) { struct zswap_pool *pool; unsigned long total = 0; rcu_read_lock(); list_for_each_entry_rcu(pool, &zswap_pools, list) total += zs_get_total_pages(pool->zs_pool); rcu_read_unlock(); return total; } static bool zswap_check_limits(void) { unsigned long cur_pages = zswap_total_pages(); unsigned long max_pages = zswap_max_pages(); if (cur_pages >= max_pages) { zswap_pool_limit_hit++; zswap_pool_reached_full = true; } else if (zswap_pool_reached_full && cur_pages <= zswap_accept_thr_pages()) { zswap_pool_reached_full = false; } return zswap_pool_reached_full; } /********************************* * param callbacks **********************************/ static int zswap_compressor_param_set(const char *val, const struct kernel_param *kp) { struct zswap_pool *pool, *put_pool = NULL; char *s = strstrip((char *)val); bool create_pool = false; int ret = 0; mutex_lock(&zswap_init_lock); switch (zswap_init_state) { case ZSWAP_UNINIT: /* Handled in zswap_setup() */ ret = param_set_charp(s, kp); break; case ZSWAP_INIT_SUCCEED: if (!zswap_has_pool || strcmp(s, *(char **)kp->arg)) create_pool = true; break; case ZSWAP_INIT_FAILED: pr_err("can't set param, initialization failed\n"); ret = -ENODEV; } mutex_unlock(&zswap_init_lock); if (!create_pool) return ret; if (!crypto_has_acomp(s, 0, 0)) { pr_err("compressor %s not available\n", s); return -ENOENT; } spin_lock_bh(&zswap_pools_lock); pool = zswap_pool_find_get(s); if (pool) { zswap_pool_debug("using existing", pool); WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); } spin_unlock_bh(&zswap_pools_lock); if (!pool) pool = zswap_pool_create(s); else { /* * Restore the initial ref dropped by percpu_ref_kill() * when the pool was decommissioned and switch it again * to percpu mode. */ percpu_ref_resurrect(&pool->ref); /* Drop the ref from zswap_pool_find_get(). */ zswap_pool_put(pool); } if (pool) ret = param_set_charp(s, kp); else ret = -EINVAL; spin_lock_bh(&zswap_pools_lock); if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); zswap_has_pool = true; } else if (pool) { /* * Add the possibly pre-existing pool to the end of the pools * list; if it's new (and empty) then it'll be removed and * destroyed by the put after we drop the lock */ list_add_tail_rcu(&pool->list, &zswap_pools); put_pool = pool; } spin_unlock_bh(&zswap_pools_lock); /* * Drop the ref from either the old current pool, * or the new pool we failed to add */ if (put_pool) percpu_ref_kill(&put_pool->ref); return ret; } static int zswap_enabled_param_set(const char *val, const struct kernel_param *kp) { int ret = -ENODEV; /* if this is load-time (pre-init) param setting, only set param. */ if (system_state != SYSTEM_RUNNING) return param_set_bool(val, kp); mutex_lock(&zswap_init_lock); switch (zswap_init_state) { case ZSWAP_UNINIT: if (zswap_setup()) break; fallthrough; case ZSWAP_INIT_SUCCEED: if (!zswap_has_pool) pr_err("can't enable, no pool configured\n"); else ret = param_set_bool(val, kp); break; case ZSWAP_INIT_FAILED: pr_err("can't enable, initialization failed\n"); } mutex_unlock(&zswap_init_lock); return ret; } /********************************* * lru functions **********************************/ /* should be called under RCU */ #ifdef CONFIG_MEMCG static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) { return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL; } #else static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry) { return NULL; } #endif static inline int entry_to_nid(struct zswap_entry *entry) { return page_to_nid(virt_to_page(entry)); } static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry) { int nid = entry_to_nid(entry); struct mem_cgroup *memcg; /* * Note that it is safe to use rcu_read_lock() here, even in the face of * concurrent memcg offlining: * * 1. list_lru_add() is called before list_lru_one is dead. The * new entry will be reparented to memcg's parent's list_lru. * 2. list_lru_add() is called after list_lru_one is dead. The * new entry will be added directly to memcg's parent's list_lru. * * Similar reasoning holds for list_lru_del(). */ rcu_read_lock(); memcg = mem_cgroup_from_entry(entry); /* will always succeed */ list_lru_add(list_lru, &entry->lru, nid, memcg); rcu_read_unlock(); } static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry) { int nid = entry_to_nid(entry); struct mem_cgroup *memcg; rcu_read_lock(); memcg = mem_cgroup_from_entry(entry); /* will always succeed */ list_lru_del(list_lru, &entry->lru, nid, memcg); rcu_read_unlock(); } void zswap_lruvec_state_init(struct lruvec *lruvec) { atomic_long_set(&lruvec->zswap_lruvec_state.nr_disk_swapins, 0); } void zswap_folio_swapin(struct folio *folio) { struct lruvec *lruvec; if (folio) { lruvec = folio_lruvec(folio); atomic_long_inc(&lruvec->zswap_lruvec_state.nr_disk_swapins); } } /* * This function should be called when a memcg is being offlined. * * Since the global shrinker shrink_worker() may hold a reference * of the memcg, we must check and release the reference in * zswap_next_shrink. * * shrink_worker() must handle the case where this function releases * the reference of memcg being shrunk. */ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) { /* lock out zswap shrinker walking memcg tree */ spin_lock(&zswap_shrink_lock); if (zswap_next_shrink == memcg) { do { zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL); } while (zswap_next_shrink && !mem_cgroup_online(zswap_next_shrink)); } spin_unlock(&zswap_shrink_lock); } /********************************* * zswap entry functions **********************************/ static struct kmem_cache *zswap_entry_cache; static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid) { struct zswap_entry *entry; entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid); if (!entry) return NULL; return entry; } static void zswap_entry_cache_free(struct zswap_entry *entry) { kmem_cache_free(zswap_entry_cache, entry); } /* * Carries out the common pattern of freeing an entry's zsmalloc allocation, * freeing the entry itself, and decrementing the number of stored pages. */ static void zswap_entry_free(struct zswap_entry *entry) { zswap_lru_del(&zswap_list_lru, entry); zs_free(entry->pool->zs_pool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); obj_cgroup_put(entry->objcg); } if (entry->length == PAGE_SIZE) atomic_long_dec(&zswap_stored_incompressible_pages); zswap_entry_cache_free(entry); atomic_long_dec(&zswap_stored_pages); } /********************************* * compressed storage functions **********************************/ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); struct crypto_acomp *acomp = NULL; struct acomp_req *req = NULL; u8 *buffer = NULL; int ret; buffer = kmalloc_node(PAGE_SIZE, GFP_KERNEL, cpu_to_node(cpu)); if (!buffer) { ret = -ENOMEM; goto fail; } acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); if (IS_ERR(acomp)) { pr_err("could not alloc crypto acomp %s : %ld\n", pool->tfm_name, PTR_ERR(acomp)); ret = PTR_ERR(acomp); goto fail; } req = acomp_request_alloc(acomp); if (!req) { pr_err("could not alloc crypto acomp_request %s\n", pool->tfm_name); ret = -ENOMEM; goto fail; } /* * Only hold the mutex after completing allocations, otherwise we may * recurse into zswap through reclaim and attempt to hold the mutex * again resulting in a deadlock. */ mutex_lock(&acomp_ctx->mutex); crypto_init_wait(&acomp_ctx->wait); /* * if the backend of acomp is async zip, crypto_req_done() will wakeup * crypto_wait_req(); if the backend of acomp is scomp, the callback * won't be called, crypto_wait_req() will return without blocking. */ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &acomp_ctx->wait); acomp_ctx->buffer = buffer; acomp_ctx->acomp = acomp; acomp_ctx->is_sleepable = acomp_is_async(acomp); acomp_ctx->req = req; mutex_unlock(&acomp_ctx->mutex); return 0; fail: if (acomp) crypto_free_acomp(acomp); kfree(buffer); return ret; } static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); struct acomp_req *req; struct crypto_acomp *acomp; u8 *buffer; if (IS_ERR_OR_NULL(acomp_ctx)) return 0; mutex_lock(&acomp_ctx->mutex); req = acomp_ctx->req; acomp = acomp_ctx->acomp; buffer = acomp_ctx->buffer; acomp_ctx->req = NULL; acomp_ctx->acomp = NULL; acomp_ctx->buffer = NULL; mutex_unlock(&acomp_ctx->mutex); /* * Do the actual freeing after releasing the mutex to avoid subtle * locking dependencies causing deadlocks. */ if (!IS_ERR_OR_NULL(req)) acomp_request_free(req); if (!IS_ERR_OR_NULL(acomp)) crypto_free_acomp(acomp); kfree(buffer); return 0; } static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool) { struct crypto_acomp_ctx *acomp_ctx; for (;;) { acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); mutex_lock(&acomp_ctx->mutex); if (likely(acomp_ctx->req)) return acomp_ctx; /* * It is possible that we were migrated to a different CPU after * getting the per-CPU ctx but before the mutex was acquired. If * the old CPU got offlined, zswap_cpu_comp_dead() could have * already freed ctx->req (among other things) and set it to * NULL. Just try again on the new CPU that we ended up on. */ mutex_unlock(&acomp_ctx->mutex); } } static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx) { mutex_unlock(&acomp_ctx->mutex); } static bool zswap_compress(struct page *page, struct zswap_entry *entry, struct zswap_pool *pool) { struct crypto_acomp_ctx *acomp_ctx; struct scatterlist input, output; int comp_ret = 0, alloc_ret = 0; unsigned int dlen = PAGE_SIZE; unsigned long handle; gfp_t gfp; u8 *dst; bool mapped = false; acomp_ctx = acomp_ctx_get_cpu_lock(pool); dst = acomp_ctx->buffer; sg_init_table(&input, 1); sg_set_page(&input, page, PAGE_SIZE, 0); sg_init_one(&output, dst, PAGE_SIZE); acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); /* * it maybe looks a little bit silly that we send an asynchronous request, * then wait for its completion synchronously. This makes the process look * synchronous in fact. * Theoretically, acomp supports users send multiple acomp requests in one * acomp instance, then get those requests done simultaneously. but in this * case, zswap actually does store and load page by page, there is no * existing method to send the second page before the first page is done * in one thread doing zwap. * but in different threads running on different cpu, we have different * acomp instance, so multiple threads can do (de)compression in parallel. */ comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); dlen = acomp_ctx->req->dlen; /* * If a page cannot be compressed into a size smaller than PAGE_SIZE, * save the content as is without a compression, to keep the LRU order * of writebacks. If writeback is disabled, reject the page since it * only adds metadata overhead. swap_writeout() will put the page back * to the active LRU list in the case. */ if (comp_ret || !dlen || dlen >= PAGE_SIZE) { dlen = PAGE_SIZE; if (!mem_cgroup_zswap_writeback_enabled( folio_memcg(page_folio(page)))) { comp_ret = comp_ret ? comp_ret : -EINVAL; goto unlock; } comp_ret = 0; dlen = PAGE_SIZE; dst = kmap_local_page(page); mapped = true; } gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE; handle = zs_malloc(pool->zs_pool, dlen, gfp, page_to_nid(page)); if (IS_ERR_VALUE(handle)) { alloc_ret = PTR_ERR((void *)handle); goto unlock; } zs_obj_write(pool->zs_pool, handle, dst, dlen); entry->handle = handle; entry->length = dlen; unlock: if (mapped) kunmap_local(dst); if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC) zswap_reject_compress_poor++; else if (comp_ret) zswap_reject_compress_fail++; else if (alloc_ret) zswap_reject_alloc_fail++; acomp_ctx_put_unlock(acomp_ctx); return comp_ret == 0 && alloc_ret == 0; } static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) { struct zswap_pool *pool = entry->pool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; int decomp_ret = 0, dlen = PAGE_SIZE; u8 *src, *obj; acomp_ctx = acomp_ctx_get_cpu_lock(pool); obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer); /* zswap entries of length PAGE_SIZE are not compressed. */ if (entry->length == PAGE_SIZE) { memcpy_to_folio(folio, 0, obj, entry->length); goto read_done; } /* * zs_obj_read_begin() might return a kmap address of highmem when * acomp_ctx->buffer is not used. However, sg_init_one() does not * handle highmem addresses, so copy the object to acomp_ctx->buffer. */ if (virt_addr_valid(obj)) { src = obj; } else { WARN_ON_ONCE(obj == acomp_ctx->buffer); memcpy(acomp_ctx->buffer, obj, entry->length); src = acomp_ctx->buffer; } sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); decomp_ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); dlen = acomp_ctx->req->dlen; read_done: zs_obj_read_end(pool->zs_pool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); if (!decomp_ret && dlen == PAGE_SIZE) return true; zswap_decompress_fail++; pr_alert_ratelimited("Decompression error from zswap (%d:%lu %s %u->%d)\n", swp_type(entry->swpentry), swp_offset(entry->swpentry), entry->pool->tfm_name, entry->length, dlen); return false; } /********************************* * writeback code **********************************/ /* * Attempts to free an entry by adding a folio to the swap cache, * decompressing the entry data into the folio, and issuing a * bio write to write the folio back to the swap device. * * This can be thought of as a "resumed writeback" of the folio * to the swap device. We are basically resuming the same swap * writeback path that was intercepted with the zswap_store() * in the first place. After the folio has been decompressed into * the swap cache, the compressed version stored by zswap can be * freed. */ static int zswap_writeback_entry(struct zswap_entry *entry, swp_entry_t swpentry) { struct xarray *tree; pgoff_t offset = swp_offset(swpentry); struct folio *folio; struct mempolicy *mpol; bool folio_was_allocated; struct swap_info_struct *si; int ret = 0; /* try to allocate swap cache folio */ si = get_swap_device(swpentry); if (!si) return -EEXIST; mpol = get_task_policy(current); folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol, NO_INTERLEAVE_INDEX, &folio_was_allocated, true); put_swap_device(si); if (!folio) return -ENOMEM; /* * Found an existing folio, we raced with swapin or concurrent * shrinker. We generally writeback cold folios from zswap, and * swapin means the folio just became hot, so skip this folio. * For unlikely concurrent shrinker case, it will be unlinked * and freed when invalidated by the concurrent shrinker anyway. */ if (!folio_was_allocated) { ret = -EEXIST; goto out; } /* * folio is locked, and the swapcache is now secured against * concurrent swapping to and from the slot, and concurrent * swapoff so we can safely dereference the zswap tree here. * Verify that the swap entry hasn't been invalidated and recycled * behind our backs, to avoid overwriting a new swap folio with * old compressed data. Only when this is successful can the entry * be dereferenced. */ tree = swap_zswap_tree(swpentry); if (entry != xa_load(tree, offset)) { ret = -ENOMEM; goto out; } if (!zswap_decompress(entry, folio)) { ret = -EIO; goto out; } xa_erase(tree, offset); count_vm_event(ZSWPWB); if (entry->objcg) count_objcg_events(entry->objcg, ZSWPWB, 1); zswap_entry_free(entry); /* folio is up to date */ folio_mark_uptodate(folio); /* move it to the tail of the inactive list after end_writeback */ folio_set_reclaim(folio); /* start writeback */ __swap_writepage(folio, NULL); out: if (ret && ret != -EEXIST) { swap_cache_del_folio(folio); folio_unlock(folio); } folio_put(folio); return ret; } /********************************* * shrinker functions **********************************/ /* * The dynamic shrinker is modulated by the following factors: * * 1. Each zswap entry has a referenced bit, which the shrinker unsets (giving * the entry a second chance) before rotating it in the LRU list. If the * entry is considered again by the shrinker, with its referenced bit unset, * it is written back. The writeback rate as a result is dynamically * adjusted by the pool activities - if the pool is dominated by new entries * (i.e lots of recent zswapouts), these entries will be protected and * the writeback rate will slow down. On the other hand, if the pool has a * lot of stagnant entries, these entries will be reclaimed immediately, * effectively increasing the writeback rate. * * 2. Swapins counter: If we observe swapins, it is a sign that we are * overshrinking and should slow down. We maintain a swapins counter, which * is consumed and subtract from the number of eligible objects on the LRU * in zswap_shrinker_count(). * * 3. Compression ratio. The better the workload compresses, the less gains we * can expect from writeback. We scale down the number of objects available * for reclaim by this ratio. */ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l, void *arg) { struct zswap_entry *entry = container_of(item, struct zswap_entry, lru); bool *encountered_page_in_swapcache = (bool *)arg; swp_entry_t swpentry; enum lru_status ret = LRU_REMOVED_RETRY; int writeback_result; /* * Second chance algorithm: if the entry has its referenced bit set, give it * a second chance. Only clear the referenced bit and rotate it in the * zswap's LRU list. */ if (entry->referenced) { entry->referenced = false; return LRU_ROTATE; } /* * As soon as we drop the LRU lock, the entry can be freed by * a concurrent invalidation. This means the following: * * 1. We extract the swp_entry_t to the stack, allowing * zswap_writeback_entry() to pin the swap entry and * then validate the zwap entry against that swap entry's * tree using pointer value comparison. Only when that * is successful can the entry be dereferenced. * * 2. Usually, objects are taken off the LRU for reclaim. In * this case this isn't possible, because if reclaim fails * for whatever reason, we have no means of knowing if the * entry is alive to put it back on the LRU. * * So rotate it before dropping the lock. If the entry is * written back or invalidated, the free path will unlink * it. For failures, rotation is the right thing as well. * * Temporary failures, where the same entry should be tried * again immediately, almost never happen for this shrinker. * We don't do any trylocking; -ENOMEM comes closest, * but that's extremely rare and doesn't happen spuriously * either. Don't bother distinguishing this case. */ list_move_tail(item, &l->list); /* * Once the lru lock is dropped, the entry might get freed. The * swpentry is copied to the stack, and entry isn't deref'd again * until the entry is verified to still be alive in the tree. */ swpentry = entry->swpentry; /* * It's safe to drop the lock here because we return either * LRU_REMOVED_RETRY, LRU_RETRY or LRU_STOP. */ spin_unlock(&l->lock); writeback_result = zswap_writeback_entry(entry, swpentry); if (writeback_result) { zswap_reject_reclaim_fail++; ret = LRU_RETRY; /* * Encountering a page already in swap cache is a sign that we are shrinking * into the warmer region. We should terminate shrinking (if we're in the dynamic * shrinker context). */ if (writeback_result == -EEXIST && encountered_page_in_swapcache) { ret = LRU_STOP; *encountered_page_in_swapcache = true; } } else { zswap_written_back_pages++; } return ret; } static unsigned long zswap_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { unsigned long shrink_ret; bool encountered_page_in_swapcache = false; if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(sc->memcg)) { sc->nr_scanned = 0; return SHRINK_STOP; } shrink_ret = list_lru_shrink_walk(&zswap_list_lru, sc, &shrink_memcg_cb, &encountered_page_in_swapcache); if (encountered_page_in_swapcache) return SHRINK_STOP; return shrink_ret ? shrink_ret : SHRINK_STOP; } static unsigned long zswap_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { struct mem_cgroup *memcg = sc->memcg; struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid)); atomic_long_t *nr_disk_swapins = &lruvec->zswap_lruvec_state.nr_disk_swapins; unsigned long nr_backing, nr_stored, nr_freeable, nr_disk_swapins_cur, nr_remain; if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg)) return 0; /* * The shrinker resumes swap writeback, which will enter block * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS * rules (may_enter_fs()), which apply on a per-folio basis. */ if (!gfp_has_io_fs(sc->gfp_mask)) return 0; /* * For memcg, use the cgroup-wide ZSWAP stats since we don't * have them per-node and thus per-lruvec. Careful if memcg is * runtime-disabled: we can get sc->memcg == NULL, which is ok * for the lruvec, but not for memcg_page_state(). * * Without memcg, use the zswap pool-wide metrics. */ if (!mem_cgroup_disabled()) { mem_cgroup_flush_stats(memcg); nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); } else { nr_backing = zswap_total_pages(); nr_stored = atomic_long_read(&zswap_stored_pages); } if (!nr_stored) return 0; nr_freeable = list_lru_shrink_count(&zswap_list_lru, sc); if (!nr_freeable) return 0; /* * Subtract from the lru size the number of pages that are recently swapped * in from disk. The idea is that had we protect the zswap's LRU by this * amount of pages, these disk swapins would not have happened. */ nr_disk_swapins_cur = atomic_long_read(nr_disk_swapins); do { if (nr_freeable >= nr_disk_swapins_cur) nr_remain = 0; else nr_remain = nr_disk_swapins_cur - nr_freeable; } while (!atomic_long_try_cmpxchg( nr_disk_swapins, &nr_disk_swapins_cur, nr_remain)); nr_freeable -= nr_disk_swapins_cur - nr_remain; if (!nr_freeable) return 0; /* * Scale the number of freeable pages by the memory saving factor. * This ensures that the better zswap compresses memory, the fewer * pages we will evict to swap (as it will otherwise incur IO for * relatively small memory saving). */ return mult_frac(nr_freeable, nr_backing, nr_stored); } static struct shrinker *zswap_alloc_shrinker(void) { struct shrinker *shrinker; shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap"); if (!shrinker) return NULL; shrinker->scan_objects = zswap_shrinker_scan; shrinker->count_objects = zswap_shrinker_count; shrinker->batch = 0; shrinker->seeks = DEFAULT_SEEKS; return shrinker; } static int shrink_memcg(struct mem_cgroup *memcg) { int nid, shrunk = 0, scanned = 0; if (!mem_cgroup_zswap_writeback_enabled(memcg)) return -ENOENT; /* * Skip zombies because their LRUs are reparented and we would be * reclaiming from the parent instead of the dead memcg. */ if (memcg && !mem_cgroup_online(memcg)) return -ENOENT; for_each_node_state(nid, N_NORMAL_MEMORY) { unsigned long nr_to_walk = 1; shrunk += list_lru_walk_one(&zswap_list_lru, nid, memcg, &shrink_memcg_cb, NULL, &nr_to_walk); scanned += 1 - nr_to_walk; } if (!scanned) return -ENOENT; return shrunk ? 0 : -EAGAIN; } static void shrink_worker(struct work_struct *w) { struct mem_cgroup *memcg; int ret, failures = 0, attempts = 0; unsigned long thr; /* Reclaim down to the accept threshold */ thr = zswap_accept_thr_pages(); /* * Global reclaim will select cgroup in a round-robin fashion from all * online memcgs, but memcgs that have no pages in zswap and * writeback-disabled memcgs (memory.zswap.writeback=0) are not * candidates for shrinking. * * Shrinking will be aborted if we encounter the following * MAX_RECLAIM_RETRIES times: * - No writeback-candidate memcgs found in a memcg tree walk. * - Shrinking a writeback-candidate memcg failed. * * We save iteration cursor memcg into zswap_next_shrink, * which can be modified by the offline memcg cleaner * zswap_memcg_offline_cleanup(). * * Since the offline cleaner is called only once, we cannot leave an * offline memcg reference in zswap_next_shrink. * We can rely on the cleaner only if we get online memcg under lock. * * If we get an offline memcg, we cannot determine if the cleaner has * already been called or will be called later. We must put back the * reference before returning from this function. Otherwise, the * offline memcg left in zswap_next_shrink will hold the reference * until the next run of shrink_worker(). */ do { /* * Start shrinking from the next memcg after zswap_next_shrink. * When the offline cleaner has already advanced the cursor, * advancing the cursor here overlooks one memcg, but this * should be negligibly rare. * * If we get an online memcg, keep the extra reference in case * the original one obtained by mem_cgroup_iter() is dropped by * zswap_memcg_offline_cleanup() while we are shrinking the * memcg. */ spin_lock(&zswap_shrink_lock); do { memcg = mem_cgroup_iter(NULL, zswap_next_shrink, NULL); zswap_next_shrink = memcg; } while (memcg && !mem_cgroup_tryget_online(memcg)); spin_unlock(&zswap_shrink_lock); if (!memcg) { /* * Continue shrinking without incrementing failures if * we found candidate memcgs in the last tree walk. */ if (!attempts && ++failures == MAX_RECLAIM_RETRIES) break; attempts = 0; goto resched; } ret = shrink_memcg(memcg); /* drop the extra reference */ mem_cgroup_put(memcg); /* * There are no writeback-candidate pages in the memcg. * This is not an issue as long as we can find another memcg * with pages in zswap. Skip this without incrementing attempts * and failures. */ if (ret == -ENOENT) continue; ++attempts; if (ret && ++failures == MAX_RECLAIM_RETRIES) break; resched: cond_resched(); } while (zswap_total_pages() > thr); } /********************************* * main API **********************************/ static bool zswap_store_page(struct page *page, struct obj_cgroup *objcg, struct zswap_pool *pool) { swp_entry_t page_swpentry = page_swap_entry(page); struct zswap_entry *entry, *old; /* allocate entry */ entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); if (!entry) { zswap_reject_kmemcache_fail++; return false; } if (!zswap_compress(page, entry, pool)) goto compress_failed; old = xa_store(swap_zswap_tree(page_swpentry), swp_offset(page_swpentry), entry, GFP_KERNEL); if (xa_is_err(old)) { int err = xa_err(old); WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err); zswap_reject_alloc_fail++; goto store_failed; } /* * We may have had an existing entry that became stale when * the folio was redirtied and now the new version is being * swapped out. Get rid of the old. */ if (old) zswap_entry_free(old); /* * The entry is successfully compressed and stored in the tree, there is * no further possibility of failure. Grab refs to the pool and objcg, * charge zswap memory, and increment zswap_stored_pages. * The opposite actions will be performed by zswap_entry_free() * when the entry is removed from the tree. */ zswap_pool_get(pool); if (objcg) { obj_cgroup_get(objcg); obj_cgroup_charge_zswap(objcg, entry->length); } atomic_long_inc(&zswap_stored_pages); if (entry->length == PAGE_SIZE) atomic_long_inc(&zswap_stored_incompressible_pages); /* * We finish initializing the entry while it's already in xarray. * This is safe because: * * 1. Concurrent stores and invalidations are excluded by folio lock. * * 2. Writeback is excluded by the entry not being on the LRU yet. * The publishing order matters to prevent writeback from seeing * an incoherent entry. */ entry->pool = pool; entry->swpentry = page_swpentry; entry->objcg = objcg; entry->referenced = true; if (entry->length) { INIT_LIST_HEAD(&entry->lru); zswap_lru_add(&zswap_list_lru, entry); } return true; store_failed: zs_free(pool->zs_pool, entry->handle); compress_failed: zswap_entry_cache_free(entry); return false; } bool zswap_store(struct folio *folio) { long nr_pages = folio_nr_pages(folio); swp_entry_t swp = folio->swap; struct obj_cgroup *objcg = NULL; struct mem_cgroup *memcg = NULL; struct zswap_pool *pool; bool ret = false; long index; VM_WARN_ON_ONCE(!folio_test_locked(folio)); VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); if (!zswap_enabled) goto check_old; objcg = get_obj_cgroup_from_folio(folio); if (objcg && !obj_cgroup_may_zswap(objcg)) { memcg = get_mem_cgroup_from_objcg(objcg); if (shrink_memcg(memcg)) { mem_cgroup_put(memcg); goto put_objcg; } mem_cgroup_put(memcg); } if (zswap_check_limits()) goto put_objcg; pool = zswap_pool_current_get(); if (!pool) goto put_objcg; if (objcg) { memcg = get_mem_cgroup_from_objcg(objcg); if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) { mem_cgroup_put(memcg); goto put_pool; } mem_cgroup_put(memcg); } for (index = 0; index < nr_pages; ++index) { struct page *page = folio_page(folio, index); if (!zswap_store_page(page, objcg, pool)) goto put_pool; } if (objcg) count_objcg_events(objcg, ZSWPOUT, nr_pages); count_vm_events(ZSWPOUT, nr_pages); ret = true; put_pool: zswap_pool_put(pool); put_objcg: obj_cgroup_put(objcg); if (!ret && zswap_pool_reached_full) queue_work(shrink_wq, &zswap_shrink_work); check_old: /* * If the zswap store fails or zswap is disabled, we must invalidate * the possibly stale entries which were previously stored at the * offsets corresponding to each page of the folio. Otherwise, * writeback could overwrite the new data in the swapfile. */ if (!ret) { unsigned type = swp_type(swp); pgoff_t offset = swp_offset(swp); struct zswap_entry *entry; struct xarray *tree; for (index = 0; index < nr_pages; ++index) { tree = swap_zswap_tree(swp_entry(type, offset + index)); entry = xa_erase(tree, offset + index); if (entry) zswap_entry_free(entry); } } return ret; } /** * zswap_load() - load a folio from zswap * @folio: folio to load * * Return: 0 on success, with the folio unlocked and marked up-to-date, or one * of the following error codes: * * -EIO: if the swapped out content was in zswap, but could not be loaded * into the page due to a decompression failure. The folio is unlocked, but * NOT marked up-to-date, so that an IO error is emitted (e.g. do_swap_page() * will SIGBUS). * * -EINVAL: if the swapped out content was in zswap, but the page belongs * to a large folio, which is not supported by zswap. The folio is unlocked, * but NOT marked up-to-date, so that an IO error is emitted (e.g. * do_swap_page() will SIGBUS). * * -ENOENT: if the swapped out content was not in zswap. The folio remains * locked on return. */ int zswap_load(struct folio *folio) { swp_entry_t swp = folio->swap; pgoff_t offset = swp_offset(swp); bool swapcache = folio_test_swapcache(folio); struct xarray *tree = swap_zswap_tree(swp); struct zswap_entry *entry; VM_WARN_ON_ONCE(!folio_test_locked(folio)); if (zswap_never_enabled()) return -ENOENT; /* * Large folios should not be swapped in while zswap is being used, as * they are not properly handled. Zswap does not properly load large * folios, and a large folio may only be partially in zswap. */ if (WARN_ON_ONCE(folio_test_large(folio))) { folio_unlock(folio); return -EINVAL; } entry = xa_load(tree, offset); if (!entry) return -ENOENT; if (!zswap_decompress(entry, folio)) { folio_unlock(folio); return -EIO; } folio_mark_uptodate(folio); count_vm_event(ZSWPIN); if (entry->objcg) count_objcg_events(entry->objcg, ZSWPIN, 1); /* * When reading into the swapcache, invalidate our entry. The * swapcache can be the authoritative owner of the page and * its mappings, and the pressure that results from having two * in-memory copies outweighs any benefits of caching the * compression work. * * (Most swapins go through the swapcache. The notable * exception is the singleton fault on SWP_SYNCHRONOUS_IO * files, which reads into a private page and may free it if * the fault fails. We remain the primary owner of the entry.) */ if (swapcache) { folio_mark_dirty(folio); xa_erase(tree, offset); zswap_entry_free(entry); } folio_unlock(folio); return 0; } void zswap_invalidate(swp_entry_t swp) { pgoff_t offset = swp_offset(swp); struct xarray *tree = swap_zswap_tree(swp); struct zswap_entry *entry; if (xa_empty(tree)) return; entry = xa_erase(tree, offset); if (entry) zswap_entry_free(entry); } int zswap_swapon(int type, unsigned long nr_pages) { struct xarray *trees, *tree; unsigned int nr, i; nr = DIV_ROUND_UP(nr_pages, ZSWAP_ADDRESS_SPACE_PAGES); trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL); if (!trees) { pr_err("alloc failed, zswap disabled for swap type %d\n", type); return -ENOMEM; } for (i = 0; i < nr; i++) xa_init(trees + i); nr_zswap_trees[type] = nr; zswap_trees[type] = trees; return 0; } void zswap_swapoff(int type) { struct xarray *trees = zswap_trees[type]; unsigned int i; if (!trees) return; /* try_to_unuse() invalidated all the entries already */ for (i = 0; i < nr_zswap_trees[type]; i++) WARN_ON_ONCE(!xa_empty(trees + i)); kvfree(trees); nr_zswap_trees[type] = 0; zswap_trees[type] = NULL; } /********************************* * debugfs functions **********************************/ #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> static struct dentry *zswap_debugfs_root; static int debugfs_get_total_size(void *data, u64 *val) { *val = zswap_total_pages() * PAGE_SIZE; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(total_size_fops, debugfs_get_total_size, NULL, "%llu\n"); static int debugfs_get_stored_pages(void *data, u64 *val) { *val = atomic_long_read(&zswap_stored_pages); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(stored_pages_fops, debugfs_get_stored_pages, NULL, "%llu\n"); static int debugfs_get_stored_incompressible_pages(void *data, u64 *val) { *val = atomic_long_read(&zswap_stored_incompressible_pages); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(stored_incompressible_pages_fops, debugfs_get_stored_incompressible_pages, NULL, "%llu\n"); static int zswap_debugfs_init(void) { if (!debugfs_initialized()) return -ENODEV; zswap_debugfs_root = debugfs_create_dir("zswap", NULL); debugfs_create_u64("pool_limit_hit", 0444, zswap_debugfs_root, &zswap_pool_limit_hit); debugfs_create_u64("reject_reclaim_fail", 0444, zswap_debugfs_root, &zswap_reject_reclaim_fail); debugfs_create_u64("reject_alloc_fail", 0444, zswap_debugfs_root, &zswap_reject_alloc_fail); debugfs_create_u64("reject_kmemcache_fail", 0444, zswap_debugfs_root, &zswap_reject_kmemcache_fail); debugfs_create_u64("reject_compress_fail", 0444, zswap_debugfs_root, &zswap_reject_compress_fail); debugfs_create_u64("reject_compress_poor", 0444, zswap_debugfs_root, &zswap_reject_compress_poor); debugfs_create_u64("decompress_fail", 0444, zswap_debugfs_root, &zswap_decompress_fail); debugfs_create_u64("written_back_pages", 0444, zswap_debugfs_root, &zswap_written_back_pages); debugfs_create_file("pool_total_size", 0444, zswap_debugfs_root, NULL, &total_size_fops); debugfs_create_file("stored_pages", 0444, zswap_debugfs_root, NULL, &stored_pages_fops); debugfs_create_file("stored_incompressible_pages", 0444, zswap_debugfs_root, NULL, &stored_incompressible_pages_fops); return 0; } #else static int zswap_debugfs_init(void) { return 0; } #endif /********************************* * module init and exit **********************************/ static int zswap_setup(void) { struct zswap_pool *pool; int ret; zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); if (!zswap_entry_cache) { pr_err("entry cache creation failed\n"); goto cache_fail; } ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, "mm/zswap_pool:prepare", zswap_cpu_comp_prepare, zswap_cpu_comp_dead); if (ret) goto hp_fail; shrink_wq = alloc_workqueue("zswap-shrink", WQ_UNBOUND|WQ_MEM_RECLAIM, 1); if (!shrink_wq) goto shrink_wq_fail; zswap_shrinker = zswap_alloc_shrinker(); if (!zswap_shrinker) goto shrinker_fail; if (list_lru_init_memcg(&zswap_list_lru, zswap_shrinker)) goto lru_fail; shrinker_register(zswap_shrinker); INIT_WORK(&zswap_shrink_work, shrink_worker); pool = __zswap_pool_create_fallback(); if (pool) { pr_info("loaded using pool %s\n", pool->tfm_name); list_add(&pool->list, &zswap_pools); zswap_has_pool = true; static_branch_enable(&zswap_ever_enabled); } else { pr_err("pool creation failed\n"); zswap_enabled = false; } if (zswap_debugfs_init()) pr_warn("debugfs initialization failed\n"); zswap_init_state = ZSWAP_INIT_SUCCEED; return 0; lru_fail: shrinker_free(zswap_shrinker); shrinker_fail: destroy_workqueue(shrink_wq); shrink_wq_fail: cpuhp_remove_multi_state(CPUHP_MM_ZSWP_POOL_PREPARE); hp_fail: kmem_cache_destroy(zswap_entry_cache); cache_fail: /* if built-in, we aren't unloaded on failure; don't allow use */ zswap_init_state = ZSWAP_INIT_FAILED; zswap_enabled = false; return -ENOMEM; } static int __init zswap_init(void) { if (!zswap_enabled) return 0; return zswap_setup(); } /* must be late so crypto has time to come up */ late_initcall(zswap_init); MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); MODULE_DESCRIPTION("Compressed cache for swap pages"); |
| 27 27 7 20 24 3 5 2 1 937 3 144 164 579 579 574 574 579 369 44 139 464 461 459 146 2 60 42 45 2 178 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ADDRCONF_H #define _ADDRCONF_H #define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ #define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ #define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ #define REGEN_MIN_ADVANCE (2) /* 2 seconds */ #define REGEN_MAX_RETRY (3) #define MAX_DESYNC_FACTOR (600) #define ADDR_CHECK_FREQUENCY (120*HZ) #define IPV6_MAX_ADDRESSES 16 #define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ / 50 : 1) #define ADDRCONF_TIMER_FUZZ (HZ / 4) #define ADDRCONF_TIMER_FUZZ_MAX (HZ) #define ADDRCONF_NOTIFY_PRIORITY 0 #include <linux/in.h> #include <linux/in6.h> struct prefix_info { __u8 type; __u8 length; __u8 prefix_len; union __packed { __u8 flags; struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) __u8 onlink : 1, autoconf : 1, routeraddr : 1, preferpd : 1, reserved : 4; #elif defined(__LITTLE_ENDIAN_BITFIELD) __u8 reserved : 4, preferpd : 1, routeraddr : 1, autoconf : 1, onlink : 1; #else #error "Please fix <asm/byteorder.h>" #endif }; }; __be32 valid; __be32 prefered; __be32 reserved2; struct in6_addr prefix; }; /* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ static_assert(sizeof(struct prefix_info) == 32); #include <linux/ipv6.h> #include <linux/netdevice.h> #include <net/if_inet6.h> #include <net/ipv6.h> struct in6_validator_info { struct in6_addr i6vi_addr; struct inet6_dev *i6vi_dev; struct netlink_ext_ack *extack; }; struct ifa6_config { const struct in6_addr *pfx; unsigned int plen; u8 ifa_proto; const struct in6_addr *peer_pfx; u32 rt_priority; u32 ifa_flags; u32 preferred_lft; u32 valid_lft; u16 scope; }; enum addr_type_t { UNICAST_ADDR, MULTICAST_ADDR, ANYCAST_ADDR, }; struct inet6_fill_args { u32 portid; u32 seq; int event; unsigned int flags; int netnsid; int ifindex; enum addr_type_t type; bool force_rt_scope_universe; }; int addrconf_init(void); void addrconf_cleanup(void); int addrconf_add_ifaddr(struct net *net, void __user *arg); int addrconf_del_ifaddr(struct net *net, void __user *arg); int addrconf_set_dstaddr(struct net *net, void __user *arg); int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, const struct net_device *dev, bool skip_dev_check, int strict, u32 banned_flags); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr); #endif int ipv6_chk_rpl_srh_loop(struct net *net, const struct in6_addr *segs, unsigned char nsegs); bool ipv6_chk_custom_prefix(const struct in6_addr *addr, const unsigned int prefix_len, struct net_device *dev); int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev); struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr, struct net_device *dev); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict); int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, bool match_wildcard); bool inet_rcv_saddr_any(const struct sock *sk); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr, u32 flags); int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, const struct prefix_info *pinfo, struct inet6_dev *in6_dev, const struct in6_addr *addr, int addr_type, u32 addr_flags, bool sllao, bool tokenized, __u32 valid_lft, u32 prefered_lft); static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr) { memcpy(eui, addr, 3); eui[3] = 0xFF; eui[4] = 0xFE; memcpy(eui + 5, addr + 3, 3); } static inline void addrconf_addr_eui48(u8 *eui, const char *const addr) { addrconf_addr_eui48_base(eui, addr); eui[0] ^= 2; } static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) return -1; /* * The zSeries OSA network cards can be shared among various * OS instances, but the OSA cards have only one MAC address. * This leads to duplicate address conflicts in conjunction * with IPv6 if more than one instance uses the same card. * * The driver for these cards can deliver a unique 16-bit * identifier for each instance sharing the same card. It is * placed instead of 0xFFFE in the interface identifier. The * "u" bit of the interface identifier is not inverted in this * case. Hence the resulting interface identifier has local * scope according to RFC2373. */ addrconf_addr_eui48_base(eui, dev->dev_addr); if (dev->dev_id) { eui[3] = (dev->dev_id >> 8) & 0xFF; eui[4] = dev->dev_id & 0xFF; } else { eui[0] ^= 2; } return 0; } #define INFINITY_LIFE_TIME 0xFFFFFFFF static inline unsigned long addrconf_timeout_fixup(u32 timeout, unsigned int unit) { if (timeout == INFINITY_LIFE_TIME) return ~0UL; /* * Avoid arithmetic overflow. * Assuming unit is constant and non-zero, this "if" statement * will go away on 64bit archs. */ if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit) return LONG_MAX / unit; return timeout; } static inline int addrconf_finite_timeout(unsigned long timeout) { return ~timeout; } /* * IPv6 Address Label subsystem (addrlabel.c) */ int ipv6_addr_label_init(void); void ipv6_addr_label_cleanup(void); int ipv6_addr_label_rtnl_register(void); u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex); /* * multicast prototypes (mcast.c) */ static inline bool ipv6_mc_may_pull(struct sk_buff *skb, unsigned int len) { if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) return false; return pskb_may_pull(skb, len); } int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr); int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr); int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr); void ipv6_mc_up(struct inet6_dev *idev); void ipv6_mc_down(struct inet6_dev *idev); void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); int ipv6_mc_check_mld(struct sk_buff *skb); void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, const struct in6_addr *src_addr); void ipv6_mc_dad_complete(struct inet6_dev *idev); /* * identify MLD packets for MLD filter exceptions */ static inline bool ipv6_is_mld(struct sk_buff *skb, int nexthdr, int offset) { struct icmp6hdr *hdr; if (nexthdr != IPPROTO_ICMPV6 || !pskb_network_may_pull(skb, offset + sizeof(struct icmp6hdr))) return false; hdr = (struct icmp6hdr *)(skb_network_header(skb) + offset); switch (hdr->icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: case ICMPV6_MLD2_REPORT: return true; default: break; } return false; } void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao); /* * anycast prototypes (anycast.c) */ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); void __ipv6_sock_ac_close(struct sock *sk); void ipv6_sock_ac_close(struct sock *sk); int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr); int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); void ipv6_ac_destroy_dev(struct inet6_dev *idev); bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); int ipv6_anycast_init(void); void ipv6_anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); int unregister_inet6addr_notifier(struct notifier_block *nb); int inet6addr_notifier_call_chain(unsigned long val, void *v); int register_inet6addr_validator_notifier(struct notifier_block *nb); int unregister_inet6addr_validator_notifier(struct notifier_block *nb); int inet6addr_validator_notifier_call_chain(unsigned long val, void *v); void inet6_netconf_notify_devconf(struct net *net, int event, int type, int ifindex, struct ipv6_devconf *devconf); /** * __in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device * * Caller must hold rcu_read_lock or RTNL, because this function * does not take a reference on the inet6_dev. */ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) { return rcu_dereference_rtnl(dev->ip6_ptr); } static inline struct inet6_dev *__in6_dev_get_rtnl_net(const struct net_device *dev) { return rtnl_net_dereference(dev_net(dev), dev->ip6_ptr); } /** * __in6_dev_stats_get - get inet6_dev pointer for stats * @dev: network device * @skb: skb for original incoming interface if needed * * Caller must hold rcu_read_lock or RTNL, because this function * does not take a reference on the inet6_dev. */ static inline struct inet6_dev *__in6_dev_stats_get(const struct net_device *dev, const struct sk_buff *skb) { if (netif_is_l3_master(dev)) dev = dev_get_by_index_rcu(dev_net(dev), inet6_iif(skb)); return __in6_dev_get(dev); } /** * __in6_dev_get_safely - get inet6_dev pointer from netdevice * @dev: network device * * This is a safer version of __in6_dev_get */ static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev) { if (likely(dev)) return rcu_dereference_rtnl(dev->ip6_ptr); else return NULL; } /** * in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device * * This version can be used in any context, and takes a reference * on the inet6_dev. Callers must use in6_dev_put() later to * release this reference. */ static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) { struct inet6_dev *idev; rcu_read_lock(); idev = rcu_dereference(dev->ip6_ptr); if (idev) refcount_inc(&idev->refcnt); rcu_read_unlock(); return idev; } static inline struct neigh_parms *__in6_dev_nd_parms_get_rcu(const struct net_device *dev) { struct inet6_dev *idev = __in6_dev_get(dev); return idev ? idev->nd_parms : NULL; } void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void in6_dev_put(struct inet6_dev *idev) { if (refcount_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } static inline void in6_dev_put_clear(struct inet6_dev **pidev) { struct inet6_dev *idev = *pidev; if (idev) { in6_dev_put(idev); *pidev = NULL; } } static inline void __in6_dev_put(struct inet6_dev *idev) { refcount_dec(&idev->refcnt); } static inline void in6_dev_hold(struct inet6_dev *idev) { refcount_inc(&idev->refcnt); } /* called with rcu_read_lock held */ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); if (unlikely(!idev)) return true; return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) { if (refcount_dec_and_test(&ifp->refcnt)) inet6_ifa_finish_destroy(ifp); } static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) { refcount_dec(&ifp->refcnt); } static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) { refcount_inc(&ifp->refcnt); } static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp) { return refcount_inc_not_zero(&ifp->refcnt); } /* * compute link-local solicited-node multicast address */ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr, struct in6_addr *solicited) { ipv6_addr_set(solicited, htonl(0xFF020000), 0, htonl(0x1), htonl(0xFF000000) | addr->s6_addr32[3]); } static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0; #endif } static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; #endif } static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) { return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); } static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | ((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) & cpu_to_be64(0xffffffffff000000UL))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | (addr->s6_addr32[2] ^ htonl(0x00000001)) | (addr->s6_addr[12] ^ 0xff)) == 0; #endif } static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 __be64 *p = (__force __be64 *)addr; return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(0x6a))) == 0UL; #else return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | (addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0; #endif } #ifdef CONFIG_PROC_FS int if6_proc_init(void); void if6_proc_exit(void); #endif int inet6_fill_ifmcaddr(struct sk_buff *skb, const struct ifmcaddr6 *ifmca, struct inet6_fill_args *args); int inet6_fill_ifacaddr(struct sk_buff *skb, const struct ifacaddr6 *ifaca, struct inet6_fill_args *args); #endif |
| 605 1032 1 226 1894 233 35 18339 31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include <linux/bits.h> #include <asm/barrier.h> #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif /* * Generic definitions for bit operations, should not be used in regular code * directly. */ /** * generic___set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * * Unlike set_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static __always_inline void generic___set_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p |= mask; } static __always_inline void generic___clear_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p &= ~mask; } /** * generic___change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * * Unlike change_bit(), this function is non-atomic and may be reordered. * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ static __always_inline void generic___change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p ^= mask; } /** * generic___test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * * This operation is non-atomic and can be reordered. * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ static __always_inline bool generic___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; *p = old | mask; return (old & mask) != 0; } /** * generic___test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * * This operation is non-atomic and can be reordered. * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ static __always_inline bool generic___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; *p = old & ~mask; return (old & mask) != 0; } /* WARNING: non atomic and it can be reordered! */ static __always_inline bool generic___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; *p = old ^ mask; return (old & mask) != 0; } /** * generic_test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ static __always_inline bool generic_test_bit(unsigned long nr, const volatile unsigned long *addr) { /* * Unlike the bitops with the '__' prefix above, this one *is* atomic, * so `volatile` must always stay here with no cast-aways. See * `Documentation/atomic_bitops.txt` for the details. */ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } /** * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ static __always_inline bool generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) { unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); } /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. */ #define const___set_bit generic___set_bit #define const___clear_bit generic___clear_bit #define const___change_bit generic___change_bit #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit #define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from * * A version of generic_test_bit() which discards the `volatile` qualifier to * allow a compiler to optimize code harder. Non-atomic and to be called only * for testing compile-time constants, e.g. by the corresponding macros, not * directly from "regular" code. */ static __always_inline bool const_test_bit(unsigned long nr, const volatile unsigned long *addr) { const unsigned long *p = (const unsigned long *)addr + BIT_WORD(nr); unsigned long mask = BIT_MASK(nr); unsigned long val = *p; return !!(val & mask); } #endif /* __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H */ |
| 1223 1222 1223 1223 1222 1223 1223 1223 1223 1223 1222 1219 1223 1221 1223 1222 44 44 1215 609 33 610 583 1214 1126 1214 1213 79 1216 1211 1216 1212 43 2 1168 9 2 1 1 7 9 9 9 9 1 7 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 | // SPDX-License-Identifier: GPL-2.0-or-later /* * SHA-224, SHA-256, HMAC-SHA224, and HMAC-SHA256 library functions * * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2014 Red Hat Inc. * Copyright 2025 Google LLC */ #include <crypto/hmac.h> #include <crypto/sha2.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> #include <linux/wordpart.h> static const struct sha256_block_state sha224_iv = { .h = { SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, }, }; static const struct sha256_ctx initial_sha256_ctx = { .ctx = { .state = { .h = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, }, }, .bytecount = 0, }, }; #define sha256_iv (initial_sha256_ctx.ctx.state) static const u32 sha256_K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; #define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) #define Maj(x, y, z) (((x) & (y)) | ((z) & ((x) | (y)))) #define e0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22)) #define e1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25)) #define s0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3)) #define s1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10)) static inline void LOAD_OP(int I, u32 *W, const u8 *input) { W[I] = get_unaligned_be32((__u32 *)input + I); } static inline void BLEND_OP(int I, u32 *W) { W[I] = s1(W[I - 2]) + W[I - 7] + s0(W[I - 15]) + W[I - 16]; } #define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \ do { \ u32 t1, t2; \ t1 = h + e1(e) + Ch(e, f, g) + sha256_K[i] + W[i]; \ t2 = e0(a) + Maj(a, b, c); \ d += t1; \ h = t1 + t2; \ } while (0) static void sha256_block_generic(struct sha256_block_state *state, const u8 *input, u32 W[64]) { u32 a, b, c, d, e, f, g, h; int i; /* load the input */ for (i = 0; i < 16; i += 8) { LOAD_OP(i + 0, W, input); LOAD_OP(i + 1, W, input); LOAD_OP(i + 2, W, input); LOAD_OP(i + 3, W, input); LOAD_OP(i + 4, W, input); LOAD_OP(i + 5, W, input); LOAD_OP(i + 6, W, input); LOAD_OP(i + 7, W, input); } /* now blend */ for (i = 16; i < 64; i += 8) { BLEND_OP(i + 0, W); BLEND_OP(i + 1, W); BLEND_OP(i + 2, W); BLEND_OP(i + 3, W); BLEND_OP(i + 4, W); BLEND_OP(i + 5, W); BLEND_OP(i + 6, W); BLEND_OP(i + 7, W); } /* load the state into our registers */ a = state->h[0]; b = state->h[1]; c = state->h[2]; d = state->h[3]; e = state->h[4]; f = state->h[5]; g = state->h[6]; h = state->h[7]; /* now iterate */ for (i = 0; i < 64; i += 8) { SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); } state->h[0] += a; state->h[1] += b; state->h[2] += c; state->h[3] += d; state->h[4] += e; state->h[5] += f; state->h[6] += g; state->h[7] += h; } static void __maybe_unused sha256_blocks_generic(struct sha256_block_state *state, const u8 *data, size_t nblocks) { u32 W[64]; do { sha256_block_generic(state, data, W); data += SHA256_BLOCK_SIZE; } while (--nblocks); memzero_explicit(W, sizeof(W)); } #if defined(CONFIG_CRYPTO_LIB_SHA256_ARCH) && !defined(__DISABLE_EXPORTS) #include "sha256.h" /* $(SRCARCH)/sha256.h */ #else #define sha256_blocks sha256_blocks_generic #endif static void __sha256_init(struct __sha256_ctx *ctx, const struct sha256_block_state *iv, u64 initial_bytecount) { ctx->state = *iv; ctx->bytecount = initial_bytecount; } void sha224_init(struct sha224_ctx *ctx) { __sha256_init(&ctx->ctx, &sha224_iv, 0); } EXPORT_SYMBOL_GPL(sha224_init); void sha256_init(struct sha256_ctx *ctx) { __sha256_init(&ctx->ctx, &sha256_iv, 0); } EXPORT_SYMBOL_GPL(sha256_init); void __sha256_update(struct __sha256_ctx *ctx, const u8 *data, size_t len) { size_t partial = ctx->bytecount % SHA256_BLOCK_SIZE; ctx->bytecount += len; if (partial + len >= SHA256_BLOCK_SIZE) { size_t nblocks; if (partial) { size_t l = SHA256_BLOCK_SIZE - partial; memcpy(&ctx->buf[partial], data, l); data += l; len -= l; sha256_blocks(&ctx->state, ctx->buf, 1); } nblocks = len / SHA256_BLOCK_SIZE; len %= SHA256_BLOCK_SIZE; if (nblocks) { sha256_blocks(&ctx->state, data, nblocks); data += nblocks * SHA256_BLOCK_SIZE; } partial = 0; } if (len) memcpy(&ctx->buf[partial], data, len); } EXPORT_SYMBOL(__sha256_update); static void __sha256_final(struct __sha256_ctx *ctx, u8 *out, size_t digest_size) { u64 bitcount = ctx->bytecount << 3; size_t partial = ctx->bytecount % SHA256_BLOCK_SIZE; ctx->buf[partial++] = 0x80; if (partial > SHA256_BLOCK_SIZE - 8) { memset(&ctx->buf[partial], 0, SHA256_BLOCK_SIZE - partial); sha256_blocks(&ctx->state, ctx->buf, 1); partial = 0; } memset(&ctx->buf[partial], 0, SHA256_BLOCK_SIZE - 8 - partial); *(__be64 *)&ctx->buf[SHA256_BLOCK_SIZE - 8] = cpu_to_be64(bitcount); sha256_blocks(&ctx->state, ctx->buf, 1); for (size_t i = 0; i < digest_size; i += 4) put_unaligned_be32(ctx->state.h[i / 4], out + i); } void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]) { __sha256_final(&ctx->ctx, out, SHA224_DIGEST_SIZE); memzero_explicit(ctx, sizeof(*ctx)); } EXPORT_SYMBOL(sha224_final); void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]) { __sha256_final(&ctx->ctx, out, SHA256_DIGEST_SIZE); memzero_explicit(ctx, sizeof(*ctx)); } EXPORT_SYMBOL(sha256_final); void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]) { struct sha224_ctx ctx; sha224_init(&ctx); sha224_update(&ctx, data, len); sha224_final(&ctx, out); } EXPORT_SYMBOL(sha224); void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]) { struct sha256_ctx ctx; sha256_init(&ctx); sha256_update(&ctx, data, len); sha256_final(&ctx, out); } EXPORT_SYMBOL(sha256); /* * Pre-boot environment (as indicated by __DISABLE_EXPORTS being defined) * doesn't need either HMAC support or interleaved hashing support */ #ifndef __DISABLE_EXPORTS #ifndef sha256_finup_2x_arch static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, const u8 *data1, const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE], u8 out2[SHA256_DIGEST_SIZE]) { return false; } static bool sha256_finup_2x_is_optimized_arch(void) { return false; } #endif /* Sequential fallback implementation of sha256_finup_2x() */ static noinline_for_stack void sha256_finup_2x_sequential( const struct __sha256_ctx *ctx, const u8 *data1, const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE], u8 out2[SHA256_DIGEST_SIZE]) { struct __sha256_ctx mut_ctx; mut_ctx = *ctx; __sha256_update(&mut_ctx, data1, len); __sha256_final(&mut_ctx, out1, SHA256_DIGEST_SIZE); mut_ctx = *ctx; __sha256_update(&mut_ctx, data2, len); __sha256_final(&mut_ctx, out2, SHA256_DIGEST_SIZE); } void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1, const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE], u8 out2[SHA256_DIGEST_SIZE]) { if (ctx == NULL) ctx = &initial_sha256_ctx; if (likely(sha256_finup_2x_arch(&ctx->ctx, data1, data2, len, out1, out2))) return; sha256_finup_2x_sequential(&ctx->ctx, data1, data2, len, out1, out2); } EXPORT_SYMBOL_GPL(sha256_finup_2x); bool sha256_finup_2x_is_optimized(void) { return sha256_finup_2x_is_optimized_arch(); } EXPORT_SYMBOL_GPL(sha256_finup_2x_is_optimized); static void __hmac_sha256_preparekey(struct sha256_block_state *istate, struct sha256_block_state *ostate, const u8 *raw_key, size_t raw_key_len, const struct sha256_block_state *iv) { union { u8 b[SHA256_BLOCK_SIZE]; unsigned long w[SHA256_BLOCK_SIZE / sizeof(unsigned long)]; } derived_key = { 0 }; if (unlikely(raw_key_len > SHA256_BLOCK_SIZE)) { if (iv == &sha224_iv) sha224(raw_key, raw_key_len, derived_key.b); else sha256(raw_key, raw_key_len, derived_key.b); } else { memcpy(derived_key.b, raw_key, raw_key_len); } for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) derived_key.w[i] ^= REPEAT_BYTE(HMAC_IPAD_VALUE); *istate = *iv; sha256_blocks(istate, derived_key.b, 1); for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) derived_key.w[i] ^= REPEAT_BYTE(HMAC_OPAD_VALUE ^ HMAC_IPAD_VALUE); *ostate = *iv; sha256_blocks(ostate, derived_key.b, 1); memzero_explicit(&derived_key, sizeof(derived_key)); } void hmac_sha224_preparekey(struct hmac_sha224_key *key, const u8 *raw_key, size_t raw_key_len) { __hmac_sha256_preparekey(&key->key.istate, &key->key.ostate, raw_key, raw_key_len, &sha224_iv); } EXPORT_SYMBOL_GPL(hmac_sha224_preparekey); void hmac_sha256_preparekey(struct hmac_sha256_key *key, const u8 *raw_key, size_t raw_key_len) { __hmac_sha256_preparekey(&key->key.istate, &key->key.ostate, raw_key, raw_key_len, &sha256_iv); } EXPORT_SYMBOL_GPL(hmac_sha256_preparekey); void __hmac_sha256_init(struct __hmac_sha256_ctx *ctx, const struct __hmac_sha256_key *key) { __sha256_init(&ctx->sha_ctx, &key->istate, SHA256_BLOCK_SIZE); ctx->ostate = key->ostate; } EXPORT_SYMBOL_GPL(__hmac_sha256_init); void hmac_sha224_init_usingrawkey(struct hmac_sha224_ctx *ctx, const u8 *raw_key, size_t raw_key_len) { __hmac_sha256_preparekey(&ctx->ctx.sha_ctx.state, &ctx->ctx.ostate, raw_key, raw_key_len, &sha224_iv); ctx->ctx.sha_ctx.bytecount = SHA256_BLOCK_SIZE; } EXPORT_SYMBOL_GPL(hmac_sha224_init_usingrawkey); void hmac_sha256_init_usingrawkey(struct hmac_sha256_ctx *ctx, const u8 *raw_key, size_t raw_key_len) { __hmac_sha256_preparekey(&ctx->ctx.sha_ctx.state, &ctx->ctx.ostate, raw_key, raw_key_len, &sha256_iv); ctx->ctx.sha_ctx.bytecount = SHA256_BLOCK_SIZE; } EXPORT_SYMBOL_GPL(hmac_sha256_init_usingrawkey); static void __hmac_sha256_final(struct __hmac_sha256_ctx *ctx, u8 *out, size_t digest_size) { /* Generate the padded input for the outer hash in ctx->sha_ctx.buf. */ __sha256_final(&ctx->sha_ctx, ctx->sha_ctx.buf, digest_size); memset(&ctx->sha_ctx.buf[digest_size], 0, SHA256_BLOCK_SIZE - digest_size); ctx->sha_ctx.buf[digest_size] = 0x80; *(__be32 *)&ctx->sha_ctx.buf[SHA256_BLOCK_SIZE - 4] = cpu_to_be32(8 * (SHA256_BLOCK_SIZE + digest_size)); /* Compute the outer hash, which gives the HMAC value. */ sha256_blocks(&ctx->ostate, ctx->sha_ctx.buf, 1); for (size_t i = 0; i < digest_size; i += 4) put_unaligned_be32(ctx->ostate.h[i / 4], out + i); memzero_explicit(ctx, sizeof(*ctx)); } void hmac_sha224_final(struct hmac_sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]) { __hmac_sha256_final(&ctx->ctx, out, SHA224_DIGEST_SIZE); } EXPORT_SYMBOL_GPL(hmac_sha224_final); void hmac_sha256_final(struct hmac_sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]) { __hmac_sha256_final(&ctx->ctx, out, SHA256_DIGEST_SIZE); } EXPORT_SYMBOL_GPL(hmac_sha256_final); void hmac_sha224(const struct hmac_sha224_key *key, const u8 *data, size_t data_len, u8 out[SHA224_DIGEST_SIZE]) { struct hmac_sha224_ctx ctx; hmac_sha224_init(&ctx, key); hmac_sha224_update(&ctx, data, data_len); hmac_sha224_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha224); void hmac_sha256(const struct hmac_sha256_key *key, const u8 *data, size_t data_len, u8 out[SHA256_DIGEST_SIZE]) { struct hmac_sha256_ctx ctx; hmac_sha256_init(&ctx, key); hmac_sha256_update(&ctx, data, data_len); hmac_sha256_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha256); void hmac_sha224_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA224_DIGEST_SIZE]) { struct hmac_sha224_ctx ctx; hmac_sha224_init_usingrawkey(&ctx, raw_key, raw_key_len); hmac_sha224_update(&ctx, data, data_len); hmac_sha224_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha224_usingrawkey); void hmac_sha256_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[SHA256_DIGEST_SIZE]) { struct hmac_sha256_ctx ctx; hmac_sha256_init_usingrawkey(&ctx, raw_key, raw_key_len); hmac_sha256_update(&ctx, data, data_len); hmac_sha256_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_sha256_usingrawkey); #endif /* !__DISABLE_EXPORTS */ #ifdef sha256_mod_init_arch static int __init sha256_mod_init(void) { sha256_mod_init_arch(); return 0; } subsys_initcall(sha256_mod_init); static void __exit sha256_mod_exit(void) { } module_exit(sha256_mod_exit); #endif MODULE_DESCRIPTION("SHA-224, SHA-256, HMAC-SHA224, and HMAC-SHA256 library functions"); MODULE_LICENSE("GPL"); |
| 25 268 130 246 243 246 243 242 52 52 3 52 275 149 8 1 31 7 154 6 175 6 6 6 6 69 71 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Forwarding Information Base. * * Authors: A.N.Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #ifndef _NET_IP_FIB_H #define _NET_IP_FIB_H #include <net/flow.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/fib_notifier.h> #include <net/fib_rules.h> #include <net/inet_dscp.h> #include <net/inetpeer.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/refcount.h> #include <linux/ip.h> #include <linux/in_route.h> struct fib_config { u8 fc_dst_len; dscp_t fc_dscp; u8 fc_protocol; u8 fc_scope; u8 fc_type; u8 fc_gw_family; /* 2 bytes unused */ u32 fc_table; __be32 fc_dst; union { __be32 fc_gw4; struct in6_addr fc_gw6; }; int fc_oif; u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; u32 fc_nh_id; struct nlattr *fc_mx; struct rtnexthop *fc_mp; int fc_mx_len; int fc_mp_len; u32 fc_flow; u32 fc_nlflags; struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; }; struct fib_info; struct rtable; struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; int fnhe_genid; __be32 fnhe_daddr; u32 fnhe_pmtu; bool fnhe_mtu_locked; __be32 fnhe_gw; unsigned long fnhe_expires; struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; struct rcu_head rcu; }; struct fnhe_hash_bucket { struct fib_nh_exception __rcu *chain; }; #define FNHE_HASH_SHIFT 11 #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 struct fib_nh_common { struct net_device *nhc_dev; netdevice_tracker nhc_dev_tracker; int nhc_oif; unsigned char nhc_scope; u8 nhc_family; u8 nhc_gw_family; unsigned char nhc_flags; struct lwtunnel_state *nhc_lwtstate; union { __be32 ipv4; struct in6_addr ipv6; } nhc_gw; int nhc_weight; atomic_t nhc_upper_bound; /* v4 specific, but allows fib6_nh with v4 routes */ struct rtable __rcu * __percpu *nhc_pcpu_rth_output; struct rtable __rcu *nhc_rth_input; struct fnhe_hash_bucket __rcu *nhc_exceptions; }; struct fib_nh { struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif __be32 nh_saddr; int nh_saddr_genid; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev #define fib_nh_dev_tracker nh_common.nhc_dev_tracker #define fib_nh_oif nh_common.nhc_oif #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate #define fib_nh_scope nh_common.nhc_scope #define fib_nh_gw_family nh_common.nhc_gw_family #define fib_nh_gw4 nh_common.nhc_gw.ipv4 #define fib_nh_gw6 nh_common.nhc_gw.ipv6 #define fib_nh_weight nh_common.nhc_weight #define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* * This structure contains data shared by many of routes. */ struct nexthop; struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; struct list_head nh_list; struct net *fib_net; refcount_t fib_treeref; refcount_t fib_clntref; unsigned int fib_flags; unsigned char fib_dead; unsigned char fib_protocol; unsigned char fib_scope; unsigned char fib_type; __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; struct dst_metrics *fib_metrics; #define fib_mtu fib_metrics->metrics[RTAX_MTU-1] #define fib_window fib_metrics->metrics[RTAX_WINDOW-1] #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; bool fib_nh_is_v6; bool nh_updated; bool pfsrc_removed; struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[] __counted_by(fib_nhs); }; int __net_init fib4_semantics_init(struct net *net); void __net_exit fib4_semantics_exit(struct net *net); #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rule; #endif struct fib_table; struct fib_result { __be32 prefix; unsigned char prefixlen; unsigned char nh_sel; unsigned char type; unsigned char scope; u32 tclassid; dscp_t dscp; struct fib_nh_common *nhc; struct fib_info *fi; struct fib_table *table; struct hlist_head *fa_head; }; struct fib_result_nl { __be32 fl_addr; /* To be looked up*/ u32 fl_mark; unsigned char fl_tos; unsigned char fl_scope; unsigned char tb_id_in; unsigned char tb_id; /* Results */ unsigned char prefixlen; unsigned char nh_sel; unsigned char type; unsigned char scope; int err; }; #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 2 #endif __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, unsigned char scope); __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_NHC(res) ((res).nhc) #define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) #define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_rt_info { struct fib_info *fi; u32 tb_id; __be32 dst; int dst_len; dscp_t dscp; u8 type; u8 offload:1, trap:1, offload_failed:1, unused:5; }; struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; dscp_t dscp; u8 type; u32 tb_id; }; struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; }; int call_fib4_notifier(struct notifier_block *nb, enum fib_event_type event_type, struct fib_notifier_info *info); int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_info_notify_update(struct net *net, struct nl_info *info); int fib_notify(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); struct fib_table { struct hlist_node tb_hlist; u32 tb_id; int tb_num_default; struct rcu_head rcu; unsigned long *tb_data; unsigned long __data[]; }; struct fib_dump_filter { u32 table_id; /* filter_set is an optimization that an entry is set */ bool filter_set; bool dump_routes; bool dump_exceptions; bool rtnl_held; unsigned char protocol; unsigned char rt_type; unsigned int flags; struct net_device *dev; }; int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES #define TABLE_LOCAL_INDEX (RT_TABLE_LOCAL & (FIB_TABLE_HASHSZ - 1)) #define TABLE_MAIN_INDEX (RT_TABLE_MAIN & (FIB_TABLE_HASHSZ - 1)) static inline struct fib_table *fib_get_table(struct net *net, u32 id) { struct hlist_node *tb_hlist; struct hlist_head *ptr; ptr = id == RT_TABLE_LOCAL ? &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX] : &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]; tb_hlist = rcu_dereference_rtnl(hlist_first_rcu(ptr)); return hlist_entry(tb_hlist, struct fib_table, tb_hlist); } static inline struct fib_table *fib_new_table(struct net *net, u32 id) { return fib_get_table(net, id); } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); if (tb) err = fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF); if (err == -EAGAIN) err = -ENETUNREACH; rcu_read_unlock(); return err; } static inline bool fib4_has_custom_rules(const struct net *net) { return false; } static inline bool fib4_rule_default(const struct fib_rule *rule) { return true; } static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return 0; } static inline unsigned int fib4_rules_seq_read(const struct net *net) { return 0; } static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi4 *fl4, struct flow_keys *flkeys) { return false; } #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) return __fib_lookup(net, flp, res, flags); rcu_read_lock(); res->tclassid = 0; tb = rcu_dereference_rtnl(net->ipv4.fib_main); if (tb) err = fib_table_lookup(tb, flp, res, flags); if (!err) goto out; tb = rcu_dereference_rtnl(net->ipv4.fib_default); if (tb) err = fib_table_lookup(tb, flp, res, flags); out: if (err == -EAGAIN) err = -ENETUNREACH; rcu_read_unlock(); return err; } static inline bool fib4_has_custom_rules(const struct net *net) { return net->ipv4.fib_has_custom_rules; } bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); unsigned int fib4_rules_seq_read(const struct net *net); static inline bool fib4_rules_early_flow_dissect(struct net *net, struct sk_buff *skb, struct flowi4 *fl4, struct flow_keys *flkeys) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; if (!net->ipv4.fib_rules_require_fldissect) return false; memset(flkeys, 0, sizeof(*flkeys)); __skb_flow_dissect(net, skb, &flow_keys_dissector, flkeys, NULL, 0, 0, 0, flag); fl4->fl4_sport = flkeys->ports.src; fl4->fl4_dport = flkeys->ports.dst; fl4->flowi4_proto = flkeys->basic.ip_proto; return true; } #endif /* CONFIG_IP_MULTIPLE_TABLES */ static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4) { return dscp == (fl4->flowi4_dscp & INET_DSCP_LEGACY_TOS_MASK); } /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; void ip_fib_init(void); int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, struct netlink_ext_ack *extack); __be32 fib_compute_spec_dst(struct sk_buff *skb); bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev); int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); static inline enum skb_drop_reason fib_validate_source_reason(struct sk_buff *skb, __be32 src, __be32 dst, dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag) { int err = fib_validate_source(skb, src, dst, dscp, oif, dev, idev, itag); if (err < 0) return -err; return SKB_NOT_DROPPED_YET; } #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) { return 0; } #endif int fib_unmerge(struct net *net); static inline bool nhc_l3mdev_matches_dev(const struct fib_nh_common *nhc, const struct net_device *dev) { if (nhc->nhc_dev == dev || l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) return true; return false; } /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); /* Fields used for sysctl_fib_multipath_hash_fields. * Common to IPv4 and IPv6. * * Add new fields at the end. This is user API. */ #define FIB_MULTIPATH_HASH_FIELD_SRC_IP BIT(0) #define FIB_MULTIPATH_HASH_FIELD_DST_IP BIT(1) #define FIB_MULTIPATH_HASH_FIELD_IP_PROTO BIT(2) #define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL BIT(3) #define FIB_MULTIPATH_HASH_FIELD_SRC_PORT BIT(4) #define FIB_MULTIPATH_HASH_FIELD_DST_PORT BIT(5) #define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP BIT(6) #define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP BIT(7) #define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO BIT(8) #define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL BIT(9) #define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT BIT(10) #define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT BIT(11) #define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK \ (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \ FIB_MULTIPATH_HASH_FIELD_DST_IP | \ FIB_MULTIPATH_HASH_FIELD_IP_PROTO | \ FIB_MULTIPATH_HASH_FIELD_FLOWLABEL | \ FIB_MULTIPATH_HASH_FIELD_SRC_PORT | \ FIB_MULTIPATH_HASH_FIELD_DST_PORT) #define FIB_MULTIPATH_HASH_FIELD_INNER_MASK \ (FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP | \ FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP | \ FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO | \ FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL | \ FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT | \ FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) #define FIB_MULTIPATH_HASH_FIELD_ALL_MASK \ (FIB_MULTIPATH_HASH_FIELD_OUTER_MASK | \ FIB_MULTIPATH_HASH_FIELD_INNER_MASK) #define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK \ (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \ FIB_MULTIPATH_HASH_FIELD_DST_IP | \ FIB_MULTIPATH_HASH_FIELD_IP_PROTO) #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); static void fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed) { u64 mp_seed_64 = mp_seed; key->key[0] = (mp_seed_64 << 32) | mp_seed_64; key->key[1] = key->key[0]; } static inline u32 fib_multipath_hash_from_keys(const struct net *net, struct flow_keys *keys) { siphash_aligned_key_t hash_key; u32 mp_seed; mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; fib_multipath_hash_construct_key(&hash_key, mp_seed); return flow_hash_from_keys_seed(keys, &hash_key); } #else static inline u32 fib_multipath_hash_from_keys(const struct net *net, struct flow_keys *keys) { return flow_hash_from_keys(keys); } #endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash, const struct flowi4 *fl4); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); int fib_nh_init(struct net *net, struct fib_nh *fib_nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack); void fib_nh_release(struct net *net, struct fib_nh *fib_nh); int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, struct nlattr *fc_encap, u16 fc_encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib_nh_common_release(struct fib_nh_common *nhc); /* Exported by fib_trie.c */ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri); void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, const struct flowi4 *flp); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID struct fib_nh_common *nhc = res->nhc; #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); *itag = nh->nh_tclassid << 16; } else { *itag = 0; } #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) *itag = (rtag<<16); *itag |= (rtag>>16); #endif #endif } void fib_flush(struct net *net); void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) { refcount_inc(&fi->fib_clntref); } static inline void fib_info_put(struct fib_info *fi) { if (refcount_dec_and_test(&fi->fib_clntref)) free_fib_info(fi); } #ifdef CONFIG_PROC_FS int __net_init fib_proc_init(struct net *net); void __net_exit fib_proc_exit(struct net *net); #else static inline int fib_proc_init(struct net *net) { return 0; } static inline void fib_proc_exit(struct net *net) { } #endif u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */ |
| 395 110 111 25 24 25 42 365 364 365 365 37 36 371 372 372 372 372 36 366 334 334 333 334 332 42 332 2 310 310 308 310 310 27 27 27 27 10 10 10 10 10 2 4 4 2 2 2 2 36 36 36 36 4 4 4 4 2 36 36 34 34 34 28 28 28 28 28 28 34 295 4 293 284 293 289 111 110 25 25 25 14 14 25 3430 3429 3439 3448 3434 291 290 290 293 289 290 289 289 287 288 287 285 111 111 111 111 111 111 111 110 25 25 25 25 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. * Authors: David Chinner and Glauber Costa * * Generic LRU infrastructure */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/list_lru.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/memcontrol.h> #include "slab.h" #include "internal.h" #ifdef CONFIG_MEMCG static LIST_HEAD(memcg_list_lrus); static DEFINE_MUTEX(list_lrus_mutex); static inline bool list_lru_memcg_aware(struct list_lru *lru) { return lru->memcg_aware; } static void list_lru_register(struct list_lru *lru) { if (!list_lru_memcg_aware(lru)) return; mutex_lock(&list_lrus_mutex); list_add(&lru->list, &memcg_list_lrus); mutex_unlock(&list_lrus_mutex); } static void list_lru_unregister(struct list_lru *lru) { if (!list_lru_memcg_aware(lru)) return; mutex_lock(&list_lrus_mutex); list_del(&lru->list); mutex_unlock(&list_lrus_mutex); } static int lru_shrinker_id(struct list_lru *lru) { return lru->shrinker_id; } static inline struct list_lru_one * list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) { if (list_lru_memcg_aware(lru) && idx >= 0) { struct list_lru_memcg *mlru = xa_load(&lru->xa, idx); return mlru ? &mlru->node[nid] : NULL; } return &lru->node[nid].lru; } static inline bool lock_list_lru(struct list_lru_one *l, bool irq) { if (irq) spin_lock_irq(&l->lock); else spin_lock(&l->lock); if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) { if (irq) spin_unlock_irq(&l->lock); else spin_unlock(&l->lock); return false; } return true; } static inline struct list_lru_one * lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, bool irq, bool skip_empty) { struct list_lru_one *l; rcu_read_lock(); again: l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); if (likely(l) && lock_list_lru(l, irq)) { rcu_read_unlock(); return l; } /* * Caller may simply bail out if raced with reparenting or * may iterate through the list_lru and expect empty slots. */ if (skip_empty) { rcu_read_unlock(); return NULL; } VM_WARN_ON(!css_is_dying(&memcg->css)); memcg = parent_mem_cgroup(memcg); goto again; } static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) { if (irq_off) spin_unlock_irq(&l->lock); else spin_unlock(&l->lock); } #else static void list_lru_register(struct list_lru *lru) { } static void list_lru_unregister(struct list_lru *lru) { } static int lru_shrinker_id(struct list_lru *lru) { return -1; } static inline bool list_lru_memcg_aware(struct list_lru *lru) { return false; } static inline struct list_lru_one * list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) { return &lru->node[nid].lru; } static inline struct list_lru_one * lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg, bool irq, bool skip_empty) { struct list_lru_one *l = &lru->node[nid].lru; if (irq) spin_lock_irq(&l->lock); else spin_lock(&l->lock); return l; } static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off) { if (irq_off) spin_unlock_irq(&l->lock); else spin_unlock(&l->lock); } #endif /* CONFIG_MEMCG */ /* The caller must ensure the memcg lifetime. */ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { struct list_lru_node *nlru = &lru->node[nid]; struct list_lru_one *l; l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); if (!l) return false; if (list_empty(item)) { list_add_tail(item, &l->list); /* Set shrinker bit if the first element was added */ if (!l->nr_items++) set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); unlock_list_lru(l, false); atomic_long_inc(&nlru->nr_items); return true; } unlock_list_lru(l, false); return false; } bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) { bool ret; int nid = page_to_nid(virt_to_page(item)); if (list_lru_memcg_aware(lru)) { rcu_read_lock(); ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); rcu_read_unlock(); } else { ret = list_lru_add(lru, item, nid, NULL); } return ret; } EXPORT_SYMBOL_GPL(list_lru_add_obj); /* The caller must ensure the memcg lifetime. */ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { struct list_lru_node *nlru = &lru->node[nid]; struct list_lru_one *l; l = lock_list_lru_of_memcg(lru, nid, memcg, false, false); if (!l) return false; if (!list_empty(item)) { list_del_init(item); l->nr_items--; unlock_list_lru(l, false); atomic_long_dec(&nlru->nr_items); return true; } unlock_list_lru(l, false); return false; } bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) { bool ret; int nid = page_to_nid(virt_to_page(item)); if (list_lru_memcg_aware(lru)) { rcu_read_lock(); ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); rcu_read_unlock(); } else { ret = list_lru_del(lru, item, nid, NULL); } return ret; } EXPORT_SYMBOL_GPL(list_lru_del_obj); void list_lru_isolate(struct list_lru_one *list, struct list_head *item) { list_del_init(item); list->nr_items--; } EXPORT_SYMBOL_GPL(list_lru_isolate); void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head) { list_move(item, head); list->nr_items--; } EXPORT_SYMBOL_GPL(list_lru_isolate_move); unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg) { struct list_lru_one *l; long count; rcu_read_lock(); l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); count = l ? READ_ONCE(l->nr_items) : 0; rcu_read_unlock(); if (unlikely(count < 0)) count = 0; return count; } EXPORT_SYMBOL_GPL(list_lru_count_one); unsigned long list_lru_count_node(struct list_lru *lru, int nid) { struct list_lru_node *nlru; nlru = &lru->node[nid]; return atomic_long_read(&nlru->nr_items); } EXPORT_SYMBOL_GPL(list_lru_count_node); static unsigned long __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk, bool irq_off) { struct list_lru_node *nlru = &lru->node[nid]; struct list_lru_one *l = NULL; struct list_head *item, *n; unsigned long isolated = 0; restart: l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true); if (!l) return isolated; list_for_each_safe(item, n, &l->list) { enum lru_status ret; /* * decrement nr_to_walk first so that we don't livelock if we * get stuck on large numbers of LRU_RETRY items */ if (!*nr_to_walk) break; --*nr_to_walk; ret = isolate(item, l, cb_arg); switch (ret) { /* * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru * lock. List traversal will have to restart from scratch. */ case LRU_RETRY: goto restart; case LRU_REMOVED_RETRY: fallthrough; case LRU_REMOVED: isolated++; atomic_long_dec(&nlru->nr_items); if (ret == LRU_REMOVED_RETRY) goto restart; break; case LRU_ROTATE: list_move_tail(item, &l->list); break; case LRU_SKIP: break; case LRU_STOP: goto out; default: BUG(); } } unlock_list_lru(l, irq_off); out: return isolated; } unsigned long list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk) { return __list_lru_walk_one(lru, nid, memcg, isolate, cb_arg, nr_to_walk, false); } EXPORT_SYMBOL_GPL(list_lru_walk_one); unsigned long list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk) { return __list_lru_walk_one(lru, nid, memcg, isolate, cb_arg, nr_to_walk, true); } unsigned long list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk) { long isolated = 0; isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, nr_to_walk); #ifdef CONFIG_MEMCG if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { struct list_lru_memcg *mlru; struct mem_cgroup *memcg; unsigned long index; xa_for_each(&lru->xa, index, mlru) { rcu_read_lock(); memcg = mem_cgroup_from_id(index); if (!mem_cgroup_tryget(memcg)) { rcu_read_unlock(); continue; } rcu_read_unlock(); isolated += __list_lru_walk_one(lru, nid, memcg, isolate, cb_arg, nr_to_walk, false); mem_cgroup_put(memcg); if (*nr_to_walk <= 0) break; } } #endif return isolated; } EXPORT_SYMBOL_GPL(list_lru_walk_node); static void init_one_lru(struct list_lru *lru, struct list_lru_one *l) { INIT_LIST_HEAD(&l->list); spin_lock_init(&l->lock); l->nr_items = 0; #ifdef CONFIG_LOCKDEP if (lru->key) lockdep_set_class(&l->lock, lru->key); #endif } #ifdef CONFIG_MEMCG static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp) { int nid; struct list_lru_memcg *mlru; mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp); if (!mlru) return NULL; for_each_node(nid) init_one_lru(lru, &mlru->node[nid]); return mlru; } static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) { if (memcg_aware) xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ); lru->memcg_aware = memcg_aware; } static void memcg_destroy_list_lru(struct list_lru *lru) { XA_STATE(xas, &lru->xa, 0); struct list_lru_memcg *mlru; if (!list_lru_memcg_aware(lru)) return; xas_lock_irq(&xas); xas_for_each(&xas, mlru, ULONG_MAX) { kfree(mlru); xas_store(&xas, NULL); } xas_unlock_irq(&xas); } static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid, struct list_lru_one *src, struct mem_cgroup *dst_memcg) { int dst_idx = dst_memcg->kmemcg_id; struct list_lru_one *dst; spin_lock_irq(&src->lock); dst = list_lru_from_memcg_idx(lru, nid, dst_idx); spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING); list_splice_init(&src->list, &dst->list); if (src->nr_items) { WARN_ON(src->nr_items < 0); dst->nr_items += src->nr_items; set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); } /* Mark the list_lru_one dead */ src->nr_items = LONG_MIN; spin_unlock(&dst->lock); spin_unlock_irq(&src->lock); } void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent) { struct list_lru *lru; int i; mutex_lock(&list_lrus_mutex); list_for_each_entry(lru, &memcg_list_lrus, list) { struct list_lru_memcg *mlru; XA_STATE(xas, &lru->xa, memcg->kmemcg_id); /* * Lock the Xarray to ensure no on going list_lru_memcg * allocation and further allocation will see css_is_dying(). */ xas_lock_irq(&xas); mlru = xas_store(&xas, NULL); xas_unlock_irq(&xas); if (!mlru) continue; /* * With Xarray value set to NULL, holding the lru lock below * prevents list_lru_{add,del,isolate} from touching the lru, * safe to reparent. */ for_each_node(i) memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent); /* * Here all list_lrus corresponding to the cgroup are guaranteed * to remain empty, we can safely free this lru, any further * memcg_list_lru_alloc() call will simply bail out. */ kvfree_rcu(mlru, rcu); } mutex_unlock(&list_lrus_mutex); } static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg, struct list_lru *lru) { int idx = memcg->kmemcg_id; return idx < 0 || xa_load(&lru->xa, idx); } int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp) { unsigned long flags; struct list_lru_memcg *mlru = NULL; struct mem_cgroup *pos, *parent; XA_STATE(xas, &lru->xa, 0); if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru)) return 0; gfp &= GFP_RECLAIM_MASK; /* * Because the list_lru can be reparented to the parent cgroup's * list_lru, we should make sure that this cgroup and all its * ancestors have allocated list_lru_memcg. */ do { /* * Keep finding the farest parent that wasn't populated * until found memcg itself. */ pos = memcg; parent = parent_mem_cgroup(pos); while (!memcg_list_lru_allocated(parent, lru)) { pos = parent; parent = parent_mem_cgroup(pos); } if (!mlru) { mlru = memcg_init_list_lru_one(lru, gfp); if (!mlru) return -ENOMEM; } xas_set(&xas, pos->kmemcg_id); do { xas_lock_irqsave(&xas, flags); if (!xas_load(&xas) && !css_is_dying(&pos->css)) { xas_store(&xas, mlru); if (!xas_error(&xas)) mlru = NULL; } xas_unlock_irqrestore(&xas, flags); } while (xas_nomem(&xas, gfp)); } while (pos != memcg && !css_is_dying(&pos->css)); if (unlikely(mlru)) kfree(mlru); return xas_error(&xas); } #else static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) { } static void memcg_destroy_list_lru(struct list_lru *lru) { } #endif /* CONFIG_MEMCG */ int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker) { int i; #ifdef CONFIG_MEMCG if (shrinker) lru->shrinker_id = shrinker->id; else lru->shrinker_id = -1; if (mem_cgroup_kmem_disabled()) memcg_aware = false; #endif lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); if (!lru->node) return -ENOMEM; for_each_node(i) init_one_lru(lru, &lru->node[i].lru); memcg_init_list_lru(lru, memcg_aware); list_lru_register(lru); return 0; } EXPORT_SYMBOL_GPL(__list_lru_init); void list_lru_destroy(struct list_lru *lru) { /* Already destroyed or not yet initialized? */ if (!lru->node) return; list_lru_unregister(lru); memcg_destroy_list_lru(lru); kfree(lru->node); lru->node = NULL; #ifdef CONFIG_MEMCG lru->shrinker_id = -1; #endif } EXPORT_SYMBOL_GPL(list_lru_destroy); |
| 455 455 459 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | // SPDX-License-Identifier: GPL-2.0 /* * This file contains functions which manage high resolution tick * related events. * * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include "tick-internal.h" /** * tick_program_event - program the CPU local timer device for the next event */ int tick_program_event(ktime_t expires, int force) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); if (unlikely(expires == KTIME_MAX)) { /* * We don't need the clock event device any more, stop it. */ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); dev->next_event = KTIME_MAX; return 0; } if (unlikely(clockevent_state_oneshot_stopped(dev))) { /* * We need the clock event again, configure it in ONESHOT mode * before using it. */ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); } return clockevents_program_event(dev, expires, force); } /** * tick_resume_oneshot - resume oneshot mode */ void tick_resume_oneshot(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(dev, ktime_get(), true); } /** * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) */ void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), ktime_t next_event) { newdev->event_handler = handler; clockevents_switch_state(newdev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(newdev, next_event, true); } /** * tick_switch_to_oneshot - switch to oneshot mode */ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *dev = td->evtdev; if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || !tick_device_is_functional(dev)) { pr_info("Clockevents: could not switch to one-shot mode:"); if (!dev) { pr_cont(" no tick device\n"); } else { if (!tick_device_is_functional(dev)) pr_cont(" %s is not functional.\n", dev->name); else pr_cont(" %s does not support one-shot mode.\n", dev->name); } return -EINVAL; } td->mode = TICKDEV_MODE_ONESHOT; dev->event_handler = handler; clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_switch_to_oneshot(); return 0; } /** * tick_oneshot_mode_active - check whether the system is in oneshot mode * * returns 1 when either nohz or highres are enabled. otherwise 0. */ int tick_oneshot_mode_active(void) { unsigned long flags; int ret; local_irq_save(flags); ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT; local_irq_restore(flags); return ret; } #ifdef CONFIG_HIGH_RES_TIMERS /** * tick_init_highres - switch to high resolution mode * * Called with interrupts disabled. */ int tick_init_highres(void) { return tick_switch_to_oneshot(hrtimer_interrupt); } #endif |
| 7 7 7 7 7 7 4 7 1 7 7 42 4 3 4 42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for Linux input subsystem * * Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com> * Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru> */ /* #define DEBUG */ #include <linux/export.h> #include <linux/input.h> #include <linux/limits.h> #include <linux/mutex.h> #include <linux/overflow.h> #include <linux/sched.h> #include <linux/slab.h> /* * Check that the effect_id is a valid effect and whether the user * is the owner */ static int check_effect_access(struct ff_device *ff, int effect_id, struct file *file) { if (effect_id < 0 || effect_id >= ff->max_effects || !ff->effect_owners[effect_id]) return -EINVAL; if (file && ff->effect_owners[effect_id] != file) return -EACCES; return 0; } /* * Checks whether 2 effects can be combined together */ static inline int check_effects_compatible(struct ff_effect *e1, struct ff_effect *e2) { return e1->type == e2->type && (e1->type != FF_PERIODIC || e1->u.periodic.waveform == e2->u.periodic.waveform); } /* * Convert an effect into compatible one */ static int compat_effect(struct ff_device *ff, struct ff_effect *effect) { int magnitude; switch (effect->type) { case FF_RUMBLE: if (!test_bit(FF_PERIODIC, ff->ffbit)) return -EINVAL; /* * calculate magnitude of sine wave as average of rumble's * 2/3 of strong magnitude and 1/3 of weak magnitude */ magnitude = effect->u.rumble.strong_magnitude / 3 + effect->u.rumble.weak_magnitude / 6; effect->type = FF_PERIODIC; effect->u.periodic.waveform = FF_SINE; effect->u.periodic.period = 50; effect->u.periodic.magnitude = magnitude; effect->u.periodic.offset = 0; effect->u.periodic.phase = 0; effect->u.periodic.envelope.attack_length = 0; effect->u.periodic.envelope.attack_level = 0; effect->u.periodic.envelope.fade_length = 0; effect->u.periodic.envelope.fade_level = 0; return 0; default: /* Let driver handle conversion */ return 0; } } /** * input_ff_upload() - upload effect into force-feedback device * @dev: input device * @effect: effect to be uploaded * @file: owner of the effect */ int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file) { struct ff_device *ff = dev->ff; struct ff_effect *old; int error; int id; if (!test_bit(EV_FF, dev->evbit)) return -ENOSYS; if (effect->type < FF_EFFECT_MIN || effect->type > FF_EFFECT_MAX || !test_bit(effect->type, dev->ffbit)) { dev_dbg(&dev->dev, "invalid or not supported effect type in upload\n"); return -EINVAL; } if (effect->type == FF_PERIODIC && (effect->u.periodic.waveform < FF_WAVEFORM_MIN || effect->u.periodic.waveform > FF_WAVEFORM_MAX || !test_bit(effect->u.periodic.waveform, dev->ffbit))) { dev_dbg(&dev->dev, "invalid or not supported wave form in upload\n"); return -EINVAL; } if (!test_bit(effect->type, ff->ffbit)) { error = compat_effect(ff, effect); if (error) return error; } guard(mutex)(&ff->mutex); if (effect->id == -1) { for (id = 0; id < ff->max_effects; id++) if (!ff->effect_owners[id]) break; if (id >= ff->max_effects) return -ENOSPC; effect->id = id; old = NULL; } else { id = effect->id; error = check_effect_access(ff, id, file); if (error) return error; old = &ff->effects[id]; if (!check_effects_compatible(effect, old)) return -EINVAL; } error = ff->upload(dev, effect, old); if (error) return error; scoped_guard(spinlock_irq, &dev->event_lock) { ff->effects[id] = *effect; ff->effect_owners[id] = file; } return 0; } EXPORT_SYMBOL_GPL(input_ff_upload); /* * Erases the effect if the requester is also the effect owner. The mutex * should already be locked before calling this function. */ static int erase_effect(struct input_dev *dev, int effect_id, struct file *file) { struct ff_device *ff = dev->ff; int error; error = check_effect_access(ff, effect_id, file); if (error) return error; scoped_guard(spinlock_irq, &dev->event_lock) { ff->playback(dev, effect_id, 0); ff->effect_owners[effect_id] = NULL; } if (ff->erase) { error = ff->erase(dev, effect_id); if (error) { scoped_guard(spinlock_irq, &dev->event_lock) ff->effect_owners[effect_id] = file; return error; } } return 0; } /** * input_ff_erase - erase a force-feedback effect from device * @dev: input device to erase effect from * @effect_id: id of the effect to be erased * @file: purported owner of the request * * This function erases a force-feedback effect from specified device. * The effect will only be erased if it was uploaded through the same * file handle that is requesting erase. */ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file) { struct ff_device *ff = dev->ff; if (!test_bit(EV_FF, dev->evbit)) return -ENOSYS; guard(mutex)(&ff->mutex); return erase_effect(dev, effect_id, file); } EXPORT_SYMBOL_GPL(input_ff_erase); /* * input_ff_flush - erase all effects owned by a file handle * @dev: input device to erase effect from * @file: purported owner of the effects * * This function erases all force-feedback effects associated with * the given owner from specified device. Note that @file may be %NULL, * in which case all effects will be erased. */ int input_ff_flush(struct input_dev *dev, struct file *file) { struct ff_device *ff = dev->ff; int i; dev_dbg(&dev->dev, "flushing now\n"); guard(mutex)(&ff->mutex); for (i = 0; i < ff->max_effects; i++) erase_effect(dev, i, file); return 0; } EXPORT_SYMBOL_GPL(input_ff_flush); /** * input_ff_event() - generic handler for force-feedback events * @dev: input device to send the effect to * @type: event type (anything but EV_FF is ignored) * @code: event code * @value: event value */ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct ff_device *ff = dev->ff; if (type != EV_FF) return 0; switch (code) { case FF_GAIN: if (!test_bit(FF_GAIN, dev->ffbit) || value > 0xffffU) break; ff->set_gain(dev, value); break; case FF_AUTOCENTER: if (!test_bit(FF_AUTOCENTER, dev->ffbit) || value > 0xffffU) break; ff->set_autocenter(dev, value); break; default: if (check_effect_access(ff, code, NULL) == 0) ff->playback(dev, code, value); break; } return 0; } EXPORT_SYMBOL_GPL(input_ff_event); /** * input_ff_create() - create force-feedback device * @dev: input device supporting force-feedback * @max_effects: maximum number of effects supported by the device * * This function allocates all necessary memory for a force feedback * portion of an input device and installs all default handlers. * @dev->ffbit should be already set up before calling this function. * Once ff device is created you need to setup its upload, erase, * playback and other handlers before registering input device */ int input_ff_create(struct input_dev *dev, unsigned int max_effects) { int i; if (!max_effects) { dev_err(&dev->dev, "cannot allocate device without any effects\n"); return -EINVAL; } if (max_effects > FF_MAX_EFFECTS) { dev_err(&dev->dev, "cannot allocate more than FF_MAX_EFFECTS effects\n"); return -EINVAL; } struct ff_device *ff __free(kfree) = kzalloc(struct_size(ff, effect_owners, max_effects), GFP_KERNEL); if (!ff) return -ENOMEM; ff->effects = kcalloc(max_effects, sizeof(*ff->effects), GFP_KERNEL); if (!ff->effects) return -ENOMEM; ff->max_effects = max_effects; mutex_init(&ff->mutex); dev->flush = input_ff_flush; dev->event = input_ff_event; __set_bit(EV_FF, dev->evbit); /* Copy "true" bits into ff device bitmap */ for_each_set_bit(i, dev->ffbit, FF_CNT) __set_bit(i, ff->ffbit); /* we can emulate RUMBLE with periodic effects */ if (test_bit(FF_PERIODIC, ff->ffbit)) __set_bit(FF_RUMBLE, dev->ffbit); dev->ff = no_free_ptr(ff); return 0; } EXPORT_SYMBOL_GPL(input_ff_create); /** * input_ff_destroy() - frees force feedback portion of input device * @dev: input device supporting force feedback * * This function is only needed in error path as input core will * automatically free force feedback structures when device is * destroyed. */ void input_ff_destroy(struct input_dev *dev) { struct ff_device *ff = dev->ff; __clear_bit(EV_FF, dev->evbit); if (ff) { if (ff->destroy) ff->destroy(ff); kfree(ff->private); kfree(ff->effects); kfree(ff); dev->ff = NULL; } } EXPORT_SYMBOL_GPL(input_ff_destroy); |
| 14 14 13 14 14 11 11 14 14 14 14 14 14 14 14 8 13 3 11 11 11 11 11 11 11 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 | // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor auditing functions * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #include <linux/audit.h> #include <linux/socket.h> #include "include/apparmor.h" #include "include/audit.h" #include "include/policy.h" #include "include/policy_ns.h" #include "include/secid.h" const char *const audit_mode_names[] = { "normal", "quiet_denied", "quiet", "noquiet", "all" }; static const char *const aa_audit_type[] = { "AUDIT", "ALLOWED", "DENIED", "HINT", "STATUS", "ERROR", "KILLED", "AUTO" }; static const char *const aa_class_names[] = { "none", "unknown", "file", "cap", "net", "rlimits", "domain", "mount", "unknown", "ptrace", "signal", "xmatch", "unknown", "unknown", "net", "unknown", "label", "posix_mqueue", "io_uring", "module", "lsm", "namespace", "io_uring", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "X", "dbus", }; /* * Currently AppArmor auditing is fed straight into the audit framework. * * TODO: * netlink interface for complain mode * user auditing, - send user auditing to netlink interface * system control of whether user audit messages go to system log */ /** * audit_pre() - core AppArmor function. * @ab: audit buffer to fill (NOT NULL) * @va: audit structure containing data to audit (NOT NULL) * * Record common AppArmor audit data from @va */ static void audit_pre(struct audit_buffer *ab, void *va) { struct apparmor_audit_data *ad = aad_of_va(va); if (aa_g_audit_header) { audit_log_format(ab, "apparmor=\"%s\"", aa_audit_type[ad->type]); } if (ad->op) audit_log_format(ab, " operation=\"%s\"", ad->op); if (ad->class) audit_log_format(ab, " class=\"%s\"", ad->class <= AA_CLASS_LAST ? aa_class_names[ad->class] : "unknown"); if (ad->info) { audit_log_format(ab, " info=\"%s\"", ad->info); if (ad->error) audit_log_format(ab, " error=%d", ad->error); } if (ad->subj_label) { struct aa_label *label = ad->subj_label; if (label_isprofile(label)) { struct aa_profile *profile = labels_profile(label); if (profile->ns != root_ns) { audit_log_format(ab, " namespace="); audit_log_untrustedstring(ab, profile->ns->base.hname); } audit_log_format(ab, " profile="); audit_log_untrustedstring(ab, profile->base.hname); } else { audit_log_format(ab, " label="); aa_label_xaudit(ab, root_ns, label, FLAG_VIEW_SUBNS, GFP_ATOMIC); } } if (ad->name) { audit_log_format(ab, " name="); audit_log_untrustedstring(ab, ad->name); } } /** * aa_audit_msg - Log a message to the audit subsystem * @type: audit type for the message * @ad: audit event structure (NOT NULL) * @cb: optional callback fn for type specific fields (MAYBE NULL) */ void aa_audit_msg(int type, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)) { ad->type = type; common_lsm_audit(&ad->common, audit_pre, cb); } /** * aa_audit - Log a profile based audit event to the audit subsystem * @type: audit type for the message * @profile: profile to check against (NOT NULL) * @ad: audit event (NOT NULL) * @cb: optional callback fn for type specific fields (MAYBE NULL) * * Handle default message switching based off of audit mode flags * * Returns: error on failure */ int aa_audit(int type, struct aa_profile *profile, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)) { AA_BUG(!profile); if (type == AUDIT_APPARMOR_AUTO) { if (likely(!ad->error)) { if (AUDIT_MODE(profile) != AUDIT_ALL) return 0; type = AUDIT_APPARMOR_AUDIT; } else if (COMPLAIN_MODE(profile)) type = AUDIT_APPARMOR_ALLOWED; else type = AUDIT_APPARMOR_DENIED; } if (AUDIT_MODE(profile) == AUDIT_QUIET || (type == AUDIT_APPARMOR_DENIED && AUDIT_MODE(profile) == AUDIT_QUIET_DENIED)) return ad->error; if (KILL_MODE(profile) && type == AUDIT_APPARMOR_DENIED) type = AUDIT_APPARMOR_KILL; ad->subj_label = &profile->label; aa_audit_msg(type, ad, cb); if (ad->type == AUDIT_APPARMOR_KILL) (void)send_sig_info(profile->signal, NULL, ad->common.type == LSM_AUDIT_DATA_TASK && ad->common.u.tsk ? ad->common.u.tsk : current); if (ad->type == AUDIT_APPARMOR_ALLOWED) return complain_error(ad->error); return ad->error; } struct aa_audit_rule { struct aa_label *label; }; void aa_audit_rule_free(void *vrule) { struct aa_audit_rule *rule = vrule; if (rule) { if (!IS_ERR(rule->label)) aa_put_label(rule->label); kfree(rule); } } int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp) { struct aa_audit_rule *rule; switch (field) { case AUDIT_SUBJ_ROLE: if (op != Audit_equal && op != Audit_not_equal) return -EINVAL; break; default: return -EINVAL; } rule = kzalloc(sizeof(struct aa_audit_rule), gfp); if (!rule) return -ENOMEM; /* Currently rules are treated as coming from the root ns */ rule->label = aa_label_parse(&root_ns->unconfined->label, rulestr, gfp, true, false); if (IS_ERR(rule->label)) { int err = PTR_ERR(rule->label); aa_audit_rule_free(rule); return err; } *vrule = rule; return 0; } int aa_audit_rule_known(struct audit_krule *rule) { int i; for (i = 0; i < rule->field_count; i++) { struct audit_field *f = &rule->fields[i]; switch (f->type) { case AUDIT_SUBJ_ROLE: return 1; } } return 0; } int aa_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule) { struct aa_audit_rule *rule = vrule; struct aa_label *label; int found = 0; label = prop->apparmor.label; if (!label) return -ENOENT; if (aa_label_is_subset(label, rule->label)) found = 1; switch (field) { case AUDIT_SUBJ_ROLE: switch (op) { case Audit_equal: return found; case Audit_not_equal: return !found; } } return 0; } |
| 8 8 8 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 | // SPDX-License-Identifier: GPL-2.0-only /* * The NFC Controller Interface is the communication protocol between an * NFC Controller (NFCC) and a Device Host (DH). * This is the HCI over NCI implementation, as specified in the 10.2 * section of the NCI 1.1 specification. * * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. */ #include <linux/skbuff.h> #include "../nfc.h" #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include <linux/nfc.h> #include <linux/kcov.h> struct nci_data { u8 conn_id; u8 pipe; u8 cmd; const u8 *data; u32 data_len; } __packed; struct nci_hci_create_pipe_params { u8 src_gate; u8 dest_host; u8 dest_gate; } __packed; struct nci_hci_create_pipe_resp { u8 src_host; u8 src_gate; u8 dest_host; u8 dest_gate; u8 pipe; } __packed; struct nci_hci_delete_pipe_noti { u8 pipe; } __packed; struct nci_hci_all_pipe_cleared_noti { u8 host; } __packed; struct nci_hcp_message { u8 header; /* type -cmd,evt,rsp- + instruction */ u8 data[]; } __packed; struct nci_hcp_packet { u8 header; /* cbit+pipe */ struct nci_hcp_message message; } __packed; #define NCI_HCI_ANY_SET_PARAMETER 0x01 #define NCI_HCI_ANY_GET_PARAMETER 0x02 #define NCI_HCI_ANY_CLOSE_PIPE 0x04 #define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14 #define NCI_HFP_NO_CHAINING 0x80 #define NCI_NFCEE_ID_HCI 0x80 #define NCI_EVT_HOT_PLUG 0x03 #define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY 0x01 #define NCI_HCI_ADM_CREATE_PIPE 0x10 #define NCI_HCI_ADM_DELETE_PIPE 0x11 /* HCP headers */ #define NCI_HCI_HCP_PACKET_HEADER_LEN 1 #define NCI_HCI_HCP_MESSAGE_HEADER_LEN 1 #define NCI_HCI_HCP_HEADER_LEN 2 /* HCP types */ #define NCI_HCI_HCP_COMMAND 0x00 #define NCI_HCI_HCP_EVENT 0x01 #define NCI_HCI_HCP_RESPONSE 0x02 #define NCI_HCI_ADM_NOTIFY_PIPE_CREATED 0x12 #define NCI_HCI_ADM_NOTIFY_PIPE_DELETED 0x13 #define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15 #define NCI_HCI_FRAGMENT 0x7f #define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\ ((instr) & 0x3f)) #define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6) #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) static int nci_hci_result_to_errno(u8 result) { switch (result) { case NCI_HCI_ANY_OK: return 0; case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: return -EOPNOTSUPP; case NCI_HCI_ANY_E_TIMEOUT: return -ETIME; default: return -1; } } /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { int i; for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { hdev->pipes[i].gate = NCI_HCI_INVALID_GATE; hdev->pipes[i].host = NCI_HCI_INVALID_HOST; } memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe)); } static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host) { int i; for (i = 0; i < NCI_HCI_MAX_PIPES; i++) { if (ndev->hci_dev->pipes[i].host == host) { ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE; ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST; } } } /* Fragment HCI data over NCI packet. * NFC Forum NCI 10.2.2 Data Exchange: * The payload of the Data Packets sent on the Logical Connection SHALL be * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be * applied to Data Messages in either direction. The HCI fragmentation mechanism * is used if required. */ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, const u8 data_type, const u8 *data, size_t data_len) { const struct nci_conn_info *conn_info; struct sk_buff *skb; int len, i, r; u8 cb = pipe; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; i = 0; skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *(u8 *)skb_push(skb, 1) = data_type; do { /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { cb |= NCI_HFP_NO_CHAINING; len = data_len - i; } else { len = conn_info->max_pkt_payload_len - skb->len - 1; } *(u8 *)skb_push(skb, 1) = cb; if (len > 0) skb_put_data(skb, data + i, len); r = nci_send_data(ndev, conn_info->conn_id, skb); if (r < 0) return r; i += len; if (i < data_len) { skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_ATOMIC); if (!skb) return -ENOMEM; skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); return i; } static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt) { const struct nci_data *data = opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); } int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len) { u8 pipe = ndev->hci_dev->gate2pipe[gate]; if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; return nci_hci_send_data(ndev, pipe, NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event), param, param_len); } EXPORT_SYMBOL(nci_hci_send_event); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd); data.data = param; data.data_len = param_len; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); if (!r && skb) *skb = conn_info->rx_skb; } return r; } EXPORT_SYMBOL(nci_hci_send_cmd); int nci_hci_clear_all_pipes(struct nci_dev *ndev) { int r; r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL); if (r < 0) return r; nci_hci_reset_pipes(ndev->hci_dev); return r; } EXPORT_SYMBOL(nci_hci_clear_all_pipes); static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe, u8 event, struct sk_buff *skb) { if (ndev->ops->hci_event_received) ndev->ops->hci_event_received(ndev, pipe, event, skb); } static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb) { u8 gate = ndev->hci_dev->pipes[pipe].gate; u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT; u8 dest_gate, new_pipe; struct nci_hci_create_pipe_resp *create_info; struct nci_hci_delete_pipe_noti *delete_info; struct nci_hci_all_pipe_cleared_noti *cleared_info; pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); switch (cmd) { case NCI_HCI_ADM_NOTIFY_PIPE_CREATED: if (skb->len != 5) { status = NCI_HCI_ANY_E_NOK; goto exit; } create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; if (new_pipe >= NCI_HCI_MAX_PIPES) { status = NCI_HCI_ANY_E_NOK; goto exit; } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we * are the destination and it is our local gate */ ndev->hci_dev->gate2pipe[dest_gate] = new_pipe; ndev->hci_dev->pipes[new_pipe].gate = dest_gate; ndev->hci_dev->pipes[new_pipe].host = create_info->src_host; break; case NCI_HCI_ANY_OPEN_PIPE: /* If the pipe is not created report an error */ if (gate == NCI_HCI_INVALID_GATE) { status = NCI_HCI_ANY_E_NOK; goto exit; } break; case NCI_HCI_ADM_NOTIFY_PIPE_DELETED: if (skb->len != 1) { status = NCI_HCI_ANY_E_NOK; goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { status = NCI_HCI_ANY_E_NOK; goto exit; } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; ndev->hci_dev->pipes[delete_info->pipe].host = NCI_HCI_INVALID_HOST; break; case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED: if (skb->len != 1) { status = NCI_HCI_ANY_E_NOK; goto exit; } cleared_info = (struct nci_hci_all_pipe_cleared_noti *)skb->data; nci_hci_reset_pipes_per_host(ndev, cleared_info->host); break; default: pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate); break; } if (ndev->ops->hci_cmd_received) ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb); exit: nci_hci_send_data(ndev, pipe, status, NULL, 0); kfree_skb(skb); } static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct sk_buff *skb) { struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) goto exit; conn_info->rx_skb = skb; exit: nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. * skb contains optional message data only. */ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe, u8 type, u8 instruction, struct sk_buff *skb) { switch (type) { case NCI_HCI_HCP_RESPONSE: nci_hci_resp_received(ndev, pipe, skb); break; case NCI_HCI_HCP_COMMAND: nci_hci_cmd_received(ndev, pipe, instruction, skb); break; case NCI_HCI_HCP_EVENT: nci_hci_event_received(ndev, pipe, instruction, skb); break; default: pr_err("UNKNOWN MSG Type %d, instruction=%d\n", type, instruction); kfree_skb(skb); break; } nci_req_complete(ndev, NCI_STATUS_OK); } static void nci_hci_msg_rx_work(struct work_struct *work) { struct nci_hci_dev *hdev = container_of(work, struct nci_hci_dev, msg_rx_work); struct sk_buff *skb; const struct nci_hcp_message *message; u8 pipe, type, instruction; for (; (skb = skb_dequeue(&hdev->msg_rx_queue)); kcov_remote_stop()) { kcov_remote_start_common(skb_get_kcov_handle(skb)); pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); instruction = NCI_HCP_MSG_GET_CMD(message->header); skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); nci_hci_hcp_message_rx(hdev->ndev, pipe, type, instruction, skb); } } void nci_hci_data_received_cb(void *context, struct sk_buff *skb, int err) { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; if (err) { nci_req_complete(ndev, err); return; } packet = (struct nci_hcp_packet *)skb->data; if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); return; } /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len += (frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN); } hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN + msg_len, GFP_KERNEL); if (!hcp_skb) { nci_req_complete(ndev, -ENOMEM); return; } skb_put_u8(hcp_skb, pipe); skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; skb_put_data(hcp_skb, frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } skb_queue_purge(&ndev->hci_dev->rx_hcp_frags); } else { packet->header &= NCI_HCI_FRAGMENT; hcp_skb = skb; } /* if this is a response, dispatch immediately to * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); nci_hci_hcp_message_rx(ndev, pipe, type, NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); } } int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) { struct nci_data data; const struct nci_conn_info *conn_info; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_OPEN_PIPE); data.data = NULL; data.data_len = 0; return nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); } EXPORT_SYMBOL(nci_hci_open_pipe); static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, int *result) { u8 pipe; struct sk_buff *skb; struct nci_hci_create_pipe_params params; const struct nci_hci_create_pipe_resp *resp; pr_debug("gate=%d\n", dest_gate); params.src_gate = NCI_HCI_ADMIN_GATE; params.dest_host = dest_host; params.dest_gate = dest_gate; *result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_CREATE_PIPE, (u8 *)¶ms, sizeof(params), &skb); if (*result < 0) return NCI_HCI_INVALID_PIPE; resp = (struct nci_hci_create_pipe_resp *)skb->data; pipe = resp->pipe; kfree_skb(skb); pr_debug("pipe created=%d\n", pipe); if (pipe >= NCI_HCI_MAX_PIPES) pipe = NCI_HCI_INVALID_PIPE; return pipe; } static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe) { return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL); } int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 *tmp; u8 pipe = ndev->hci_dev->gate2pipe[gate]; pr_debug("idx=%d to gate %d\n", idx, gate); if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; tmp = kmalloc(1 + param_len, GFP_KERNEL); if (!tmp) return -ENOMEM; *tmp = idx; memcpy(tmp + 1, param, param_len); data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_SET_PARAMETER); data.data = tmp; data.data_len = param_len + 1; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); } kfree(tmp); return r; } EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { const struct nci_hcp_message *message; const struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; pr_debug("idx=%d to gate %d\n", idx, gate); if (pipe == NCI_HCI_INVALID_PIPE) return -EADDRNOTAVAIL; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; data.conn_id = conn_info->conn_id; data.pipe = pipe; data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, NCI_HCI_ANY_GET_PARAMETER); data.data = &idx; data.data_len = 1; r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; r = nci_hci_result_to_errno( NCI_HCP_MSG_GET_CMD(message->header)); skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); if (!r && skb) *skb = conn_info->rx_skb; } return r; } EXPORT_SYMBOL(nci_hci_get_param); int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, u8 pipe) { bool pipe_created = false; int r; if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE) return 0; if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE) return -EADDRINUSE; if (pipe != NCI_HCI_INVALID_PIPE) goto open_pipe; switch (dest_gate) { case NCI_HCI_LINK_MGMT_GATE: pipe = NCI_HCI_LINK_MGMT_PIPE; break; case NCI_HCI_ADMIN_GATE: pipe = NCI_HCI_ADMIN_PIPE; break; default: pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r); if (pipe == NCI_HCI_INVALID_PIPE) return r; pipe_created = true; break; } open_pipe: r = nci_hci_open_pipe(ndev, pipe); if (r < 0) { if (pipe_created) { if (nci_hci_delete_pipe(ndev, pipe) < 0) { /* TODO: Cannot clean by deleting pipe... * -> inconsistent state */ } } return r; } ndev->hci_dev->pipes[pipe].gate = dest_gate; ndev->hci_dev->pipes[pipe].host = dest_host; ndev->hci_dev->gate2pipe[dest_gate] = pipe; return 0; } EXPORT_SYMBOL(nci_hci_connect_gate); static int nci_hci_dev_connect_gates(struct nci_dev *ndev, u8 gate_count, const struct nci_hci_gate *gates) { int r; while (gate_count--) { r = nci_hci_connect_gate(ndev, gates->dest_host, gates->gate, gates->pipe); if (r < 0) return r; gates++; } return 0; } int nci_hci_dev_session_init(struct nci_dev *ndev) { struct nci_conn_info *conn_info; struct sk_buff *skb; int r; ndev->hci_dev->count_pipes = 0; ndev->hci_dev->expected_pipes = 0; conn_info = ndev->hci_dev->conn_info; if (!conn_info) return -EPROTO; conn_info->data_exchange_cb = nci_hci_data_received_cb; conn_info->data_exchange_cb_context = ndev; nci_hci_reset_pipes(ndev->hci_dev); if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE) return -EPROTO; r = nci_hci_connect_gate(ndev, ndev->hci_dev->init_data.gates[0].dest_host, ndev->hci_dev->init_data.gates[0].gate, ndev->hci_dev->init_data.gates[0].pipe); if (r < 0) return r; r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb); if (r < 0) return r; if (skb->len && skb->len == strlen(ndev->hci_dev->init_data.session_id) && !memcmp(ndev->hci_dev->init_data.session_id, skb->data, skb->len) && ndev->ops->hci_load_session) { /* Restore gate<->pipe table from some proprietary location. */ r = ndev->ops->hci_load_session(ndev); } else { r = nci_hci_clear_all_pipes(ndev); if (r < 0) goto exit; r = nci_hci_dev_connect_gates(ndev, ndev->hci_dev->init_data.gate_count, ndev->hci_dev->init_data.gates); if (r < 0) goto exit; r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE, NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, ndev->hci_dev->init_data.session_id, strlen(ndev->hci_dev->init_data.session_id)); } exit: kfree_skb(skb); return r; } EXPORT_SYMBOL(nci_hci_dev_session_init); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) { struct nci_hci_dev *hdev; hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); if (!hdev) return NULL; skb_queue_head_init(&hdev->rx_hcp_frags); INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work); skb_queue_head_init(&hdev->msg_rx_queue); hdev->ndev = ndev; return hdev; } void nci_hci_deallocate(struct nci_dev *ndev) { kfree(ndev->hci_dev); } |
| 7 7 7 7 7 8 7 7 2 1 3 3 2 2 2 2 2 7 7 2 2 8 7 7 7 7 7 7 9 7 2 2 2 2 2 2 2 2 2 2 2 7 7 7 7 7 1 1 7 7 1 7 3 3 3 3 3 3 3 3 3 2 4 3 1 1 1 3 3 3 7 7 7 8 8 8 8 8 8 8 8 7 7 7 7 7 7 1 1 8 7 8 8 24 7 2 2 2 2 1 1 1 2 1 3 3 3 1 1 1 1 2 2 2 2 253 7 2 7 7 7 2 4 7 7 1 1 1 1 1 1 3 2 2 3 2 1 1 1 258 295 262 263 263 7 7 7 7 7 6 7 6 7 7 7 7 7 7 7 7 7 7 7 263 2 2 2 2 19 19 19 18 2 71 71 70 70 71 254 252 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 17 17 16 2 2 2 2 2 2 2 1 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 2 2 4 4 4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 1 2 2 2 1 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 | // SPDX-License-Identifier: GPL-2.0 /* * Implementation of the diskquota system for the LINUX operating system. QUOTA * is implemented using the BSD system call interface as the means of * communication with the user level. This file contains the generic routines * called by the different filesystems on allocation of an inode or block. * These routines take care of the administration needed to have a consistent * diskquota tracking system. The ideas of both user and group quotas are based * on the Melbourne quota system as used on BSD derived systems. The internal * implementation is based on one of the several variants of the LINUX * inode-subsystem with added complexity of the diskquota system. * * Author: Marco van Wieringen <mvw@planets.elm.net> * * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 * * Revised list management to avoid races * -- Bill Hawes, <whawes@star.net>, 9/98 * * Fixed races in dquot_transfer(), dqget() and dquot_alloc_...(). * As the consequence the locking was moved from dquot_decr_...(), * dquot_incr_...() to calling functions. * invalidate_dquots() now writes modified dquots. * Serialized quota_off() and quota_on() for mount point. * Fixed a few bugs in grow_dquots(). * Fixed deadlock in write_dquot() - we no longer account quotas on * quota files * remove_dquot_ref() moved to inode.c - it now traverses through inodes * add_dquot_ref() restarts after blocking * Added check for bogus uid and fixed check for group in quotactl. * Jan Kara, <jack@suse.cz>, sponsored by SuSE CR, 10-11/99 * * Used struct list_head instead of own list struct * Invalidation of referenced dquots is no longer possible * Improved free_dquots list management * Quota and i_blocks are now updated in one place to avoid races * Warnings are now delayed so we won't block in critical section * Write updated not to require dquot lock * Jan Kara, <jack@suse.cz>, 9/2000 * * Added dynamic quota structure allocation * Jan Kara <jack@suse.cz> 12/2000 * * Rewritten quota interface. Implemented new quota format and * formats registering. * Jan Kara, <jack@suse.cz>, 2001,2002 * * New SMP locking. * Jan Kara, <jack@suse.cz>, 10/2002 * * Added journalled quota support, fix lock inversion problems * Jan Kara, <jack@suse.cz>, 2003,2004 * * (C) Copyright 1994 - 1997 Marco van Wieringen */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/mm.h> #include <linux/time.h> #include <linux/types.h> #include <linux/string.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/tty.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/kmod.h> #include <linux/namei.h> #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/blkdev.h> #include <linux/sched/mm.h> #include <linux/uaccess.h> /* * There are five quota SMP locks: * * dq_list_lock protects all lists with quotas and quota formats. * * dquot->dq_dqb_lock protects data from dq_dqb * * inode->i_lock protects inode->i_blocks, i_bytes and also guards * consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that * dquot_transfer() can stabilize amount it transfers * * dq_data_lock protects mem_dqinfo structures and modifications of dquot * pointers in the inode * * dq_state_lock protects modifications of quota state (on quotaon and * quotaoff) and readers who care about latest values take it as well. * * The spinlock ordering is hence: * dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock, * dq_list_lock > dq_state_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * * Operation accessing dquots via inode pointers are protected by dquot_srcu. * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and * synchronize_srcu(&dquot_srcu) is called after clearing pointers from * inode and before dropping dquot references to avoid use of dquots after * they are freed. dq_data_lock is used to serialize the pointer setting and * clearing operations. * Special care needs to be taken about S_NOQUOTA inode flag (marking that * inode is a quota file). Functions adding pointers from inode to dquots have * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they * have to do all pointer modifications before dropping dq_data_lock. This makes * sure they cannot race with quotaon which first sets S_NOQUOTA flag and * then drops all pointers to dquots from an inode. * * Each dquot has its dq_lock mutex. Dquot is locked when it is being read to * memory (or space for it is being allocated) on the first dqget(), when it is * being written out, and when it is being released on the last dqput(). The * allocation and release operations are serialized by the dq_lock and by * checking the use count in dquot_release(). * * Lock ordering (including related VFS locks) is the following: * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); EXPORT_SYMBOL(dq_data_lock); DEFINE_STATIC_SRCU(dquot_srcu); static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq); void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) { if (printk_ratelimit()) { va_list args; struct va_format vaf; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; printk(KERN_ERR "Quota error (device %s): %s: %pV\n", sb->s_id, func, &vaf); va_end(args); } } EXPORT_SYMBOL(__quota_error); #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) static char *quotatypes[] = INITQFNAMES; #endif static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; /* SLAB cache for dquot structures */ static struct kmem_cache *dquot_cachep; /* workqueue for work quota_release_work*/ static struct workqueue_struct *quota_unbound_wq; void register_quota_format(struct quota_format_type *fmt) { spin_lock(&dq_list_lock); fmt->qf_next = quota_formats; quota_formats = fmt; spin_unlock(&dq_list_lock); } EXPORT_SYMBOL(register_quota_format); void unregister_quota_format(struct quota_format_type *fmt) { struct quota_format_type **actqf; spin_lock(&dq_list_lock); for (actqf = "a_formats; *actqf && *actqf != fmt; actqf = &(*actqf)->qf_next) ; if (*actqf) *actqf = (*actqf)->qf_next; spin_unlock(&dq_list_lock); } EXPORT_SYMBOL(unregister_quota_format); static struct quota_format_type *find_quota_format(int id) { struct quota_format_type *actqf; spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next) ; if (!actqf || !try_module_get(actqf->qf_owner)) { int qm; spin_unlock(&dq_list_lock); for (qm = 0; module_names[qm].qm_fmt_id && module_names[qm].qm_fmt_id != id; qm++) ; if (!module_names[qm].qm_fmt_id || request_module(module_names[qm].qm_mod_name)) return NULL; spin_lock(&dq_list_lock); for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next) ; if (actqf && !try_module_get(actqf->qf_owner)) actqf = NULL; } spin_unlock(&dq_list_lock); return actqf; } static void put_quota_format(struct quota_format_type *fmt) { module_put(fmt->qf_owner); } /* * Dquot List Management: * The quota code uses five lists for dquot management: the inuse_list, * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array. * A single dquot structure may be on some of those lists, depending on * its current state. * * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * * When the last reference of a dquot is dropped, the dquot is added to * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the * dquots on the list. Each cleaned up dquot is moved to free_dquots list. * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot * struct. * * Unused and cleaned up dquots are in the free_dquots list and this list is * searched whenever we need an available dquot. Dquots are removed from the * list as soon as they are used again and dqstats.free_dquots gives the number * of dquots on the list. When dquot is invalidated it's completely released * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to * quota file. Note that some filesystems do dirty dquot tracking on their * own (e.g. in a journal) and thus don't use dqi_dirty_list. * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. */ static LIST_HEAD(inuse_list); static LIST_HEAD(free_dquots); static LIST_HEAD(releasing_dquots); static unsigned int dq_hash_bits, dq_hash_mask; static struct hlist_head *dquot_hash; struct dqstats dqstats; EXPORT_SYMBOL(dqstats); static qsize_t inode_get_rsv_space(struct inode *inode); static qsize_t __inode_get_rsv_space(struct inode *inode); static int __dquot_initialize(struct inode *inode, int type); static void quota_release_workfn(struct work_struct *work); static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn); static inline unsigned int hashfn(const struct super_block *sb, struct kqid qid) { unsigned int id = from_kqid(&init_user_ns, qid); int type = qid.type; unsigned long tmp; tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask; } /* * Following list functions expect dq_list_lock to be held */ static inline void insert_dquot_hash(struct dquot *dquot) { struct hlist_head *head; head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id); hlist_add_head(&dquot->dq_hash, head); } static inline void remove_dquot_hash(struct dquot *dquot) { hlist_del_init(&dquot->dq_hash); } static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, struct kqid qid) { struct dquot *dquot; hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash) if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) return dquot; return NULL; } /* Add a dquot to the tail of the free list */ static inline void put_dquot_last(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &free_dquots); dqstats_inc(DQST_FREE_DQUOTS); } static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) { if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); else clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) { /* We add to the back of inuse list so we don't have to restart * when traversing this list and we block */ list_add_tail(&dquot->dq_inuse, &inuse_list); dqstats_inc(DQST_ALLOC_DQUOTS); } static inline void remove_inuse(struct dquot *dquot) { dqstats_dec(DQST_ALLOC_DQUOTS); list_del(&dquot->dq_inuse); } /* * End of list functions needing dq_list_lock */ static void wait_on_dquot(struct dquot *dquot) { mutex_lock(&dquot->dq_lock); mutex_unlock(&dquot->dq_lock); } static inline int dquot_active(struct dquot *dquot) { return test_bit(DQ_ACTIVE_B, &dquot->dq_flags); } static inline int dquot_dirty(struct dquot *dquot) { return test_bit(DQ_MOD_B, &dquot->dq_flags); } static inline int mark_dquot_dirty(struct dquot *dquot) { return dquot->dq_sb->dq_op->mark_dirty(dquot); } /* Mark dquot dirty in atomic manner, and return it's old dirty flag state */ int dquot_mark_dquot_dirty(struct dquot *dquot) { int ret = 1; if (!dquot_active(dquot)) return 0; if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags); /* If quota is dirty already, we don't have to acquire dq_list_lock */ if (dquot_dirty(dquot)) return 1; spin_lock(&dq_list_lock); if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> info[dquot->dq_id.type].dqi_dirty_list); ret = 0; } spin_unlock(&dq_list_lock); return ret; } EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots) { int ret, err, cnt; struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) /* Even in case of error we have to continue */ ret = mark_dquot_dirty(dquot); if (!err && ret < 0) err = ret; } return err; } static inline void dqput_all(struct dquot **dquot) { unsigned int cnt; for (cnt = 0; cnt < MAXQUOTAS; cnt++) dqput(dquot[cnt]); } static inline int clear_dquot_dirty(struct dquot *dquot) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY) return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags); spin_lock(&dq_list_lock); if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); return 0; } list_del_init(&dquot->dq_dirty); spin_unlock(&dq_list_lock); return 1; } void mark_info_dirty(struct super_block *sb, int type) { spin_lock(&dq_data_lock); sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY; spin_unlock(&dq_data_lock); } EXPORT_SYMBOL(mark_info_dirty); /* * Read dquot from disk and alloc space for it */ int dquot_acquire(struct dquot *dquot) { int ret = 0, ret2 = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); if (ret < 0) goto out_iolock; } /* Make sure flags update is visible after dquot has been filled */ smp_mb__before_atomic(); set_bit(DQ_READ_B, &dquot->dq_flags); /* Instantiate dquot if needed */ if (!dquot_active(dquot) && !dquot->dq_off) { ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); /* Write the info if needed */ if (info_dirty(&dqopt->info[dquot->dq_id.type])) { ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( dquot->dq_sb, dquot->dq_id.type); } if (ret < 0) goto out_iolock; if (ret2 < 0) { ret = ret2; goto out_iolock; } } /* * Make sure flags update is visible after on-disk struct has been * allocated. Paired with smp_rmb() in dqget(). */ smp_mb__before_atomic(); set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_iolock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_acquire); /* * Write dquot to disk */ int dquot_commit(struct dquot *dquot) { int ret = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); if (!clear_dquot_dirty(dquot)) goto out_lock; /* Inactive dquot can be only if there was error during read/init * => we have better not writing it */ if (dquot_active(dquot)) ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot); else ret = -EIO; out_lock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_commit); /* * Release dquot */ int dquot_release(struct dquot *dquot) { int ret = 0, ret2 = 0; unsigned int memalloc; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); mutex_lock(&dquot->dq_lock); memalloc = memalloc_nofs_save(); /* Check whether we are not racing with some other dqget() */ if (dquot_is_busy(dquot)) goto out_dqlock; if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); /* Write the info */ if (info_dirty(&dqopt->info[dquot->dq_id.type])) { ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info( dquot->dq_sb, dquot->dq_id.type); } if (ret >= 0) ret = ret2; } clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_dqlock: memalloc_nofs_restore(memalloc); mutex_unlock(&dquot->dq_lock); return ret; } EXPORT_SYMBOL(dquot_release); void dquot_destroy(struct dquot *dquot) { kmem_cache_free(dquot_cachep, dquot); } EXPORT_SYMBOL(dquot_destroy); static inline void do_destroy_dquot(struct dquot *dquot) { dquot->dq_sb->dq_op->destroy_dquot(dquot); } /* Invalidate all dquots on the list. Note that this function is called after * quota is disabled and pointers from inodes removed so there cannot be new * quota users. There can still be some users of quotas due to inodes being * just deleted or pruned by prune_icache() (those are not attached to any * list) or parallel quotactl call. We have to wait for such users. */ static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot, *tmp; restart: flush_delayed_work("a_release_work); spin_lock(&dq_list_lock); list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { if (dquot->dq_sb != sb) continue; if (dquot->dq_id.type != type) continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* * Once dqput() wakes us up, we know it's time to free * the dquot. * IMPORTANT: we rely on the fact that there is always * at most one process waiting for dquot to free. * Otherwise dq_count would be > 1 and we would never * wake up. */ wait_event(dquot_ref_wq, atomic_read(&dquot->dq_count) == 1); dqput(dquot); /* At this moment dquot() need not exist (it could be * reclaimed by prune_dqcache(). Hence we must * restart. */ goto restart; } /* * The last user already dropped its reference but dquot didn't * get fully cleaned up yet. Restart the scan which flushes the * work cleaning up released dquots. */ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); goto restart; } /* * Quota now has no users and it has been written on last * dqput() */ remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); } spin_unlock(&dq_list_lock); } /* Call callback for every active dquot on given filesystem */ int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), unsigned long priv) { struct dquot *dquot, *old_dquot = NULL; int ret = 0; WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); spin_lock(&dq_list_lock); list_for_each_entry(dquot, &inuse_list, dq_inuse) { if (!dquot_active(dquot)) continue; if (dquot->dq_sb != sb) continue; /* Now we have active dquot so we can just increase use count */ atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqput(old_dquot); old_dquot = dquot; /* * ->release_dquot() can be racing with us. Our reference * protects us from new calls to it so just wait for any * outstanding call and recheck the DQ_ACTIVE_B after that. */ wait_on_dquot(dquot); if (dquot_active(dquot)) { ret = fn(dquot, priv); if (ret < 0) goto out; } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ } spin_unlock(&dq_list_lock); out: dqput(old_dquot); return ret; } EXPORT_SYMBOL(dquot_scan_active); static inline int dquot_write_dquot(struct dquot *dquot) { int ret = dquot->dq_sb->dq_op->write_dquot(dquot); if (ret < 0) { quota_error(dquot->dq_sb, "Can't write quota structure " "(error %d). Quota may get out of sync!", ret); /* Clear dirty bit anyway to avoid infinite loop. */ clear_dquot_dirty(dquot); } return ret; } /* Write all dquot structures to quota files */ int dquot_writeback_dquots(struct super_block *sb, int type) { struct list_head dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; int err, ret = 0; WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); flush_delayed_work("a_release_work); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; spin_lock(&dq_list_lock); /* Move list away to avoid livelock. */ list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty); while (!list_empty(&dirty)) { dquot = list_first_entry(&dirty, struct dquot, dq_dirty); WARN_ON(!dquot_active(dquot)); /* If the dquot is releasing we should not touch it */ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); flush_delayed_work("a_release_work); spin_lock(&dq_list_lock); continue; } /* Now we have active dquot from which someone is * holding reference so we can safely just increase * use count */ dqgrab(dquot); spin_unlock(&dq_list_lock); err = dquot_write_dquot(dquot); if (err && !ret) ret = err; dqput(dquot); spin_lock(&dq_list_lock); } spin_unlock(&dq_list_lock); } for (cnt = 0; cnt < MAXQUOTAS; cnt++) if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); dqstats_inc(DQST_SYNCS); return ret; } EXPORT_SYMBOL(dquot_writeback_dquots); /* Write all dquot structures to disk and make them visible from userspace */ int dquot_quota_sync(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) return ret; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) return 0; /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ if (sb->s_op->sync_fs) { ret = sb->s_op->sync_fs(sb, 1); if (ret) return ret; } ret = sync_blockdev(sb->s_bdev); if (ret) return ret; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } return 0; } EXPORT_SYMBOL(dquot_quota_sync); static unsigned long dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct dquot *dquot; unsigned long freed = 0; spin_lock(&dq_list_lock); while (!list_empty(&free_dquots) && sc->nr_to_scan) { dquot = list_first_entry(&free_dquots, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); sc->nr_to_scan--; freed++; } spin_unlock(&dq_list_lock); return freed; } static unsigned long dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio( percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])); } /* * Safely release dquot and put reference to dquot. */ static void quota_release_workfn(struct work_struct *work) { struct dquot *dquot; struct list_head rls_head; spin_lock(&dq_list_lock); /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); synchronize_srcu(&dquot_srcu); restart: spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); WARN_ON_ONCE(atomic_read(&dquot->dq_count)); /* * Note that DQ_RELEASING_B protects us from racing with * invalidate_dquots() calls so we are safe to work with the * dquot even after we drop dq_list_lock. */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ dquot_write_dquot(dquot); goto restart; } if (dquot_active(dquot)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->release_dquot(dquot); goto restart; } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); } /* * Put reference to dquot */ void dqput(struct dquot *dquot) { if (!dquot) return; #ifdef CONFIG_QUOTA_DEBUG if (!atomic_read(&dquot->dq_count)) { quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", quotatypes[dquot->dq_id.type], from_kqid(&init_user_ns, dquot->dq_id)); BUG(); } #endif dqstats_inc(DQST_DROPS); spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { /* We have more than one user... nothing to do */ atomic_dec(&dquot->dq_count); /* Releasing dquot during quotaoff phase? */ if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) && atomic_read(&dquot->dq_count) == 1) wake_up(&dquot_ref_wq); spin_unlock(&dq_list_lock); return; } /* Need to release dquot? */ WARN_ON_ONCE(!list_empty(&dquot->dq_free)); put_releasing_dquots(dquot); atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(quota_unbound_wq, "a_release_work, 1); } EXPORT_SYMBOL(dqput); struct dquot *dquot_alloc(struct super_block *sb, int type) { return kmem_cache_zalloc(dquot_cachep, GFP_NOFS); } EXPORT_SYMBOL(dquot_alloc); static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; dquot = sb->dq_op->alloc_dquot(sb, type); if(!dquot) return NULL; mutex_init(&dquot->dq_lock); INIT_LIST_HEAD(&dquot->dq_free); INIT_LIST_HEAD(&dquot->dq_inuse); INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); dquot->dq_sb = sb; dquot->dq_id = make_kqid_invalid(type); atomic_set(&dquot->dq_count, 1); spin_lock_init(&dquot->dq_dqb_lock); return dquot; } /* * Get reference to dquot * * Locking is slightly tricky here. We are guarded from parallel quotaoff() * destroying our dquot by: * a) checking for quota flags under dq_list_lock and * b) getting a reference to dquot before we release dq_list_lock */ struct dquot *dqget(struct super_block *sb, struct kqid qid) { unsigned int hashent = hashfn(sb, qid); struct dquot *dquot, *empty = NULL; if (!qid_has_mapping(sb->s_user_ns, qid)) return ERR_PTR(-EINVAL); if (!sb_has_quota_active(sb, qid.type)) return ERR_PTR(-ESRCH); we_slept: spin_lock(&dq_list_lock); spin_lock(&dq_state_lock); if (!sb_has_quota_active(sb, qid.type)) { spin_unlock(&dq_state_lock); spin_unlock(&dq_list_lock); dquot = ERR_PTR(-ESRCH); goto out; } spin_unlock(&dq_state_lock); dquot = find_dquot(hashent, sb, qid); if (!dquot) { if (!empty) { spin_unlock(&dq_list_lock); empty = get_empty_dquot(sb, qid.type); if (!empty) schedule(); /* Try to wait for a moment... */ goto we_slept; } dquot = empty; empty = NULL; dquot->dq_id = qid; /* all dquots go on the inuse_list */ put_inuse(dquot); /* hash it first so it can be found */ insert_dquot_hash(dquot); spin_unlock(&dq_list_lock); dqstats_inc(DQST_LOOKUPS); } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqstats_inc(DQST_CACHE_HITS); dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { int err; err = sb->dq_op->acquire_dquot(dquot); if (err < 0) { dqput(dquot); dquot = ERR_PTR(err); goto out; } } /* * Make sure following reads see filled structure - paired with * smp_mb__before_atomic() in dquot_acquire(). */ smp_rmb(); /* Has somebody invalidated entry under us? */ WARN_ON_ONCE(hlist_unhashed(&dquot->dq_hash)); out: if (empty) do_destroy_dquot(empty); return dquot; } EXPORT_SYMBOL(dqget); static inline struct dquot __rcu **i_dquot(struct inode *inode) { return inode->i_sb->s_op->get_dquots(inode); } static int dqinit_needed(struct inode *inode, int type) { struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) return 0; dquots = i_dquot(inode); if (type != -1) return !dquots[type]; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!dquots[cnt]) return 1; return 0; } /* This routine is guarded by s_umount semaphore */ static int add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif int err = 0; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || !atomic_read(&inode->i_writecount) || !dqinit_needed(inode, type)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif iput(old_inode); err = __dquot_initialize(inode, type); if (err) { iput(inode); goto out; } /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the * s_inode_list_lock. We cannot iput the inode now as we can be * holding the last reference and we cannot iput it under * s_inode_list_lock. So we keep the reference and iput it * later. */ old_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(old_inode); out: #ifdef CONFIG_QUOTA_DEBUG if (reserved) { quota_error(sb, "Writes happened before quota was turned on " "thus quota information is probably inconsistent. " "Please run quotacheck(8)"); } #endif return err; } static void remove_dquot_ref(struct super_block *sb, int type) { struct inode *inode; #ifdef CONFIG_QUOTA_DEBUG int reserved = 0; #endif spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We have to scan also I_NEW inodes because they can already * have quota pointer initialized. Luckily, we need to touch * only quota pointers and these have separate locking * (dq_data_lock). */ spin_lock(&dq_data_lock); if (!IS_NOQUOTA(inode)) { struct dquot __rcu **dquots = i_dquot(inode); struct dquot *dquot = srcu_dereference_check( dquots[type], &dquot_srcu, lockdep_is_held(&dq_data_lock)); #ifdef CONFIG_QUOTA_DEBUG if (unlikely(inode_get_rsv_space(inode) > 0)) reserved = 1; #endif rcu_assign_pointer(dquots[type], NULL); if (dquot) dqput(dquot); } spin_unlock(&dq_data_lock); } spin_unlock(&sb->s_inode_list_lock); #ifdef CONFIG_QUOTA_DEBUG if (reserved) { printk(KERN_WARNING "VFS (%s): Writes happened after quota" " was disabled thus quota information is probably " "inconsistent. Please run quotacheck(8).\n", sb->s_id); } #endif } /* Gather all references from inodes and drop them */ static void drop_dquot_ref(struct super_block *sb, int type) { if (sb->dq_op) remove_dquot_ref(sb, type); } static inline void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) { if (dquot->dq_dqb.dqb_rsvspace >= number) dquot->dq_dqb.dqb_rsvspace -= number; else { WARN_ON_ONCE(1); dquot->dq_dqb.dqb_rsvspace = 0; } if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curinodes >= number) dquot->dq_dqb.dqb_curinodes -= number; else dquot->dq_dqb.dqb_curinodes = 0; if (dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit) dquot->dq_dqb.dqb_itime = (time64_t) 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } static void dquot_decr_space(struct dquot *dquot, qsize_t number) { if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curspace >= number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } struct dquot_warn { struct super_block *w_sb; struct kqid w_dq_id; short w_type; }; static int warning_issued(struct dquot *dquot, const int warntype) { int flag = (warntype == QUOTA_NL_BHARDWARN || warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B : ((warntype == QUOTA_NL_IHARDWARN || warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0); if (!flag) return 0; return test_and_set_bit(flag, &dquot->dq_flags); } #ifdef CONFIG_PRINT_QUOTA_WARNING static int flag_print_warnings = 1; static int need_print_warning(struct dquot_warn *warn) { if (!flag_print_warnings) return 0; switch (warn->w_dq_id.type) { case USRQUOTA: return uid_eq(current_fsuid(), warn->w_dq_id.uid); case GRPQUOTA: return in_group_p(warn->w_dq_id.gid); case PRJQUOTA: return 1; } return 0; } /* Print warning to user which exceeded quota */ static void print_warning(struct dquot_warn *warn) { char *msg = NULL; struct tty_struct *tty; int warntype = warn->w_type; if (warntype == QUOTA_NL_IHARDBELOW || warntype == QUOTA_NL_ISOFTBELOW || warntype == QUOTA_NL_BHARDBELOW || warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(warn)) return; tty = get_current_tty(); if (!tty) return; tty_write_message(tty, warn->w_sb->s_id); if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) tty_write_message(tty, ": warning, "); else tty_write_message(tty, ": write failed, "); tty_write_message(tty, quotatypes[warn->w_dq_id.type]); switch (warntype) { case QUOTA_NL_IHARDWARN: msg = " file limit reached.\r\n"; break; case QUOTA_NL_ISOFTLONGWARN: msg = " file quota exceeded too long.\r\n"; break; case QUOTA_NL_ISOFTWARN: msg = " file quota exceeded.\r\n"; break; case QUOTA_NL_BHARDWARN: msg = " block limit reached.\r\n"; break; case QUOTA_NL_BSOFTLONGWARN: msg = " block quota exceeded too long.\r\n"; break; case QUOTA_NL_BSOFTWARN: msg = " block quota exceeded.\r\n"; break; } tty_write_message(tty, msg); tty_kref_put(tty); } #endif static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot, int warntype) { if (warning_issued(dquot, warntype)) return; warn->w_type = warntype; warn->w_sb = dquot->dq_sb; warn->w_dq_id = dquot->dq_id; } /* * Write warnings to the console and send warning messages over netlink. * * Note that this function can call into tty and networking code. */ static void flush_warnings(struct dquot_warn *warn) { int i; for (i = 0; i < MAXQUOTAS; i++) { if (warn[i].w_type == QUOTA_NL_NOWARN) continue; #ifdef CONFIG_PRINT_QUOTA_WARNING print_warning(&warn[i]); #endif quota_send_warning(warn[i].w_dq_id, warn[i].w_sb->s_dev, warn[i].w_type); } } static int ignore_hardlimit(struct dquot *dquot) { struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; return capable(CAP_SYS_RESOURCE) && (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & DQF_ROOT_SQUASH)); } static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes, struct dquot_warn *warn) { qsize_t newinodes; int ret = 0; spin_lock(&dquot->dq_dqb_lock); newinodes = dquot->dq_dqb.dqb_curinodes + inodes; if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) goto add; if (dquot->dq_dqb.dqb_ihardlimit && newinodes > dquot->dq_dqb.dqb_ihardlimit && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN); ret = -EDQUOT; goto out; } if (dquot->dq_dqb.dqb_isoftlimit && newinodes > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime && !ignore_hardlimit(dquot)) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN); ret = -EDQUOT; goto out; } if (dquot->dq_dqb.dqb_isoftlimit && newinodes > dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_itime == 0) { prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; } add: dquot->dq_dqb.dqb_curinodes = newinodes; out: spin_unlock(&dquot->dq_dqb_lock); return ret; } static int dquot_add_space(struct dquot *dquot, qsize_t space, qsize_t rsv_space, unsigned int flags, struct dquot_warn *warn) { qsize_t tspace; struct super_block *sb = dquot->dq_sb; int ret = 0; spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) goto finish; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space + rsv_space; if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); ret = -EDQUOT; goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); ret = -EDQUOT; goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime == 0) { if (flags & DQUOT_SPACE_WARN) { prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() + sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; } else { /* * We don't allow preallocation to exceed softlimit so exceeding will * be always printed */ ret = -EDQUOT; goto finish; } } finish: /* * We have to be careful and go through warning generation & grace time * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it * only here... */ if (flags & DQUOT_SPACE_NOFAIL) ret = 0; if (!ret) { dquot->dq_dqb.dqb_rsvspace += rsv_space; dquot->dq_dqb.dqb_curspace += space; } spin_unlock(&dquot->dq_dqb_lock); return ret; } static int info_idq_free(struct dquot *dquot, qsize_t inodes) { qsize_t newinodes; if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type)) return QUOTA_NL_NOWARN; newinodes = dquot->dq_dqb.dqb_curinodes - inodes; if (newinodes <= dquot->dq_dqb.dqb_isoftlimit) return QUOTA_NL_ISOFTBELOW; if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit && newinodes < dquot->dq_dqb.dqb_ihardlimit) return QUOTA_NL_IHARDBELOW; return QUOTA_NL_NOWARN; } static int info_bdq_free(struct dquot *dquot, qsize_t space) { qsize_t tspace; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace; if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || tspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; if (tspace >= dquot->dq_dqb.dqb_bhardlimit && tspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } static int inode_quota_active(const struct inode *inode) { struct super_block *sb = inode->i_sb; if (IS_NOQUOTA(inode)) return 0; return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb); } /* * Initialize quota pointers in inode * * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; struct dquot __rcu **dquots; struct dquot *got[MAXQUOTAS] = {}; struct super_block *sb = inode->i_sb; qsize_t rsv; int ret = 0; if (!inode_quota_active(inode)) return 0; dquots = i_dquot(inode); /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { struct kqid qid; kprojid_t projid; int rc; struct dquot *dquot; if (type != -1 && cnt != type) continue; /* * The i_dquot should have been initialized in most cases, * we check it without locking here to avoid unnecessary * dqget()/dqput() calls. */ if (dquots[cnt]) continue; if (!sb_has_quota_active(sb, cnt)) continue; init_needed = 1; switch (cnt) { case USRQUOTA: qid = make_kqid_uid(inode->i_uid); break; case GRPQUOTA: qid = make_kqid_gid(inode->i_gid); break; case PRJQUOTA: rc = inode->i_sb->dq_op->get_projid(inode, &projid); if (rc) continue; qid = make_kqid_projid(projid); break; } dquot = dqget(sb, qid); if (IS_ERR(dquot)) { /* We raced with somebody turning quotas off... */ if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } got[cnt] = dquot; } /* All required i_dquot has been initialized */ if (!init_needed) return 0; spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) goto out_lock; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; /* Avoid races with quotaoff() */ if (!sb_has_quota_active(sb, cnt)) continue; /* We could race with quotaon or dqget() could have failed */ if (!got[cnt]) continue; if (!dquots[cnt]) { rcu_assign_pointer(dquots[cnt], got[cnt]); got[cnt] = NULL; /* * Make quota reservation system happy if someone * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) { struct dquot *dquot = srcu_dereference_check( dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); spin_lock(&inode->i_lock); /* Get reservation again under proper lock */ rsv = __inode_get_rsv_space(inode); spin_lock(&dquot->dq_dqb_lock); dquot->dq_dqb.dqb_rsvspace += rsv; spin_unlock(&dquot->dq_dqb_lock); spin_unlock(&inode->i_lock); } } } out_lock: spin_unlock(&dq_data_lock); out_put: /* Drop unused references */ dqput_all(got); return ret; } int dquot_initialize(struct inode *inode) { return __dquot_initialize(inode, -1); } EXPORT_SYMBOL(dquot_initialize); bool dquot_initialize_needed(struct inode *inode) { struct dquot __rcu **dquots; int i; if (!inode_quota_active(inode)) return false; dquots = i_dquot(inode); for (i = 0; i < MAXQUOTAS; i++) if (!dquots[i] && sb_has_quota_active(inode->i_sb, i)) return true; return false; } EXPORT_SYMBOL(dquot_initialize_needed); /* * Release all quotas referenced by inode. * * This function only be called on inode free or converting * a file to quota file, no other users for the i_dquot in * both cases, so we needn't call synchronize_srcu() after * clearing i_dquot. */ static void __dquot_drop(struct inode *inode) { int cnt; struct dquot __rcu **dquots = i_dquot(inode); struct dquot *put[MAXQUOTAS]; spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); rcu_assign_pointer(dquots[cnt], NULL); } spin_unlock(&dq_data_lock); dqput_all(put); } void dquot_drop(struct inode *inode) { struct dquot __rcu * const *dquots; int cnt; if (IS_NOQUOTA(inode)) return; /* * Test before calling to rule out calls from proc and such * where we are not allowed to block. Note that this is * actually reliable test even without the lock - the caller * must assure that nobody can come after the DQUOT_DROP and * add quota pointers back anyway. */ dquots = i_dquot(inode); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (dquots[cnt]) break; } if (cnt < MAXQUOTAS) __dquot_drop(inode); } EXPORT_SYMBOL(dquot_drop); /* * inode_reserved_space is managed internally by quota, and protected by * i_lock similar to i_blocks+i_bytes. */ static qsize_t *inode_reserved_space(struct inode * inode) { /* Filesystem must explicitly define it's own method in order to use * quota reservation interface */ BUG_ON(!inode->i_sb->dq_op->get_reserved_space); return inode->i_sb->dq_op->get_reserved_space(inode); } static qsize_t __inode_get_rsv_space(struct inode *inode) { if (!inode->i_sb->dq_op->get_reserved_space) return 0; return *inode_reserved_space(inode); } static qsize_t inode_get_rsv_space(struct inode *inode) { qsize_t ret; if (!inode->i_sb->dq_op->get_reserved_space) return 0; spin_lock(&inode->i_lock); ret = __inode_get_rsv_space(inode); spin_unlock(&inode->i_lock); return ret; } /* * This functions updates i_blocks+i_bytes fields and quota information * (together with appropriate checks). * * NOTE: We absolutely rely on the fact that caller dirties the inode * (usually helpers in quotaops.h care about this) and holds a handle for * the current transaction so that dquot write and inode write go into the * same transaction. */ /* * This operation can block, but only after everything is updated */ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; struct dquot __rcu **dquots; struct dquot *dquot; if (!inode_quota_active(inode)) { if (reserve) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; spin_unlock(&inode->i_lock); } else { inode_add_bytes(inode, number); } goto out; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; if (reserve) { ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); if (reserve) dquot_free_reserved_space(dquot, number); else dquot_decr_space(dquot, number); spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; } } if (reserve) *inode_reserved_space(inode) += number; else __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); if (reserve) goto out_flush_warn; ret = mark_all_dquot_dirty(dquots); out_flush_warn: srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); out: return ret; } EXPORT_SYMBOL(__dquot_alloc_space); /* * This operation can block, but only after everything is updated */ int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu * const *dquots; struct dquot *dquot; if (!inode_quota_active(inode)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; /* Back out changes we already did */ spin_lock(&dquot->dq_dqb_lock); dquot_decr_inodes(dquot, 1); spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } } warn_put_all: spin_unlock(&inode->i_lock); if (ret == 0) ret = mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); return ret; } EXPORT_SYMBOL(dquot_alloc_inode); /* * Convert in-memory reserved quotas to real consumed quotas */ void dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; dquot->dq_dqb.dqb_curspace += number; dquot->dq_dqb.dqb_rsvspace -= number; spin_unlock(&dquot->dq_dqb_lock); } } /* Update inode bytes */ *inode_reserved_space(inode) -= number; __inode_add_bytes(inode, number); spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); } EXPORT_SYMBOL(dquot_claim_space_nodirty); /* * Convert allocated space back to in-memory reserved quotas */ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot __rcu **dquots; struct dquot *dquot; int cnt, index; if (!inode_quota_active(inode)) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) += number; __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; dquot->dq_dqb.dqb_rsvspace += number; dquot->dq_dqb.dqb_curspace -= number; spin_unlock(&dquot->dq_dqb_lock); } } /* Update inode bytes */ *inode_reserved_space(inode) += number; __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); } EXPORT_SYMBOL(dquot_reclaim_space_nodirty); /* * This operation can block, but only after everything is updated */ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu **dquots; struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!inode_quota_active(inode)) { if (reserve) { spin_lock(&inode->i_lock); *inode_reserved_space(inode) -= number; spin_unlock(&inode->i_lock); } else { inode_sub_bytes(inode, number); } return; } dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquot, wtype); if (reserve) dquot_free_reserved_space(dquot, number); else dquot_decr_space(dquot, number); spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; else __inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock); if (reserve) goto out_unlock; mark_all_dquot_dirty(dquots); out_unlock: srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(__dquot_free_space); /* * This operation can block, but only after everything is updated */ void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot __rcu * const *dquots; struct dquot *dquot; int index; if (!inode_quota_active(inode)) return; dquots = i_dquot(inode); index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; dquot = srcu_dereference(dquots[cnt], &dquot_srcu); if (!dquot) continue; spin_lock(&dquot->dq_dqb_lock); wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn[cnt], dquot, wtype); dquot_decr_inodes(dquot, 1); spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn); } EXPORT_SYMBOL(dquot_free_inode); /* * Transfer the number of inode and blocks from one diskquota to an other. * On success, dquot references in transfer_to are consumed and references * to original dquots that need to be released are placed there. On failure, * references are kept untouched. * * This operation can block, but only after everything is updated * A transaction must be started when entering this function. * * We are holding reference on transfer_from & transfer_to, no need to * protect them by srcu_read_lock(). */ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) { qsize_t cur_space; qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot __rcu **dquots; struct dquot *transfer_from[MAXQUOTAS] = {}; int cnt, index, ret = 0, err; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; struct dquot_warn warn_from_space[MAXQUOTAS]; if (IS_NOQUOTA(inode)) return 0; if (inode->i_sb->dq_op->get_inode_usage) { ret = inode->i_sb->dq_op->get_inode_usage(inode, &inode_usage); if (ret) return ret; } /* Initialize the arrays */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { warn_to[cnt].w_type = QUOTA_NL_NOWARN; warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; } spin_lock(&dq_data_lock); spin_lock(&inode->i_lock); if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); return 0; } cur_space = __inode_get_bytes(inode); rsv_space = __inode_get_rsv_space(inode); dquots = i_dquot(inode); /* * Build the transfer_from list, check limits, and update usage in * the target structures. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* * Skip changes for same uid or gid or for turned off quota-type. */ if (!transfer_to[cnt]) continue; /* Avoid races with quotaoff() */ if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; transfer_from[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu, lockdep_is_held(&dq_data_lock)); ret = dquot_add_inodes(transfer_to[cnt], inode_usage, &warn_to[cnt]); if (ret) goto over_quota; ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, DQUOT_SPACE_WARN, &warn_to[cnt]); if (ret) { spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); spin_unlock(&transfer_to[cnt]->dq_dqb_lock); goto over_quota; } } /* Decrease usage for source structures and update quota pointers */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!is_valid[cnt]) continue; /* Due to IO error we might not have transfer_from[] structure */ if (transfer_from[cnt]) { int wtype; spin_lock(&transfer_from[cnt]->dq_dqb_lock); wtype = info_idq_free(transfer_from[cnt], inode_usage); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_inodes[cnt], transfer_from[cnt], wtype); wtype = info_bdq_free(transfer_from[cnt], cur_space + rsv_space); if (wtype != QUOTA_NL_NOWARN) prepare_warning(&warn_from_space[cnt], transfer_from[cnt], wtype); dquot_decr_inodes(transfer_from[cnt], inode_usage); dquot_decr_space(transfer_from[cnt], cur_space); dquot_free_reserved_space(transfer_from[cnt], rsv_space); spin_unlock(&transfer_from[cnt]->dq_dqb_lock); } rcu_assign_pointer(dquots[cnt], transfer_to[cnt]); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); /* * These arrays are local and we hold dquot references so we don't need * the srcu protection but still take dquot_srcu to avoid warning in * mark_all_dquot_dirty(). */ index = srcu_read_lock(&dquot_srcu); err = mark_all_dquot_dirty((struct dquot __rcu **)transfer_from); if (err < 0) ret = err; err = mark_all_dquot_dirty((struct dquot __rcu **)transfer_to); if (err < 0) ret = err; srcu_read_unlock(&dquot_srcu, index); flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); /* Pass back references to put */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (is_valid[cnt]) transfer_to[cnt] = transfer_from[cnt]; return ret; over_quota: /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { if (!is_valid[cnt]) continue; spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); dquot_decr_space(transfer_to[cnt], cur_space); dquot_free_reserved_space(transfer_to[cnt], rsv_space); spin_unlock(&transfer_to[cnt]->dq_dqb_lock); } spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); flush_warnings(warn_to); return ret; } EXPORT_SYMBOL(__dquot_transfer); /* Wrapper for transferring ownership of an inode for uid/gid only * Called from FSXXX_setattr() */ int dquot_transfer(struct mnt_idmap *idmap, struct inode *inode, struct iattr *iattr) { struct dquot *transfer_to[MAXQUOTAS] = {}; struct dquot *dquot; struct super_block *sb = inode->i_sb; int ret; if (!inode_quota_active(inode)) return 0; if (i_uid_needs_update(idmap, iattr, inode)) { kuid_t kuid = from_vfsuid(idmap, i_user_ns(inode), iattr->ia_vfsuid); dquot = dqget(sb, make_kqid_uid(kuid)); if (IS_ERR(dquot)) { if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } transfer_to[USRQUOTA] = dquot; } if (i_gid_needs_update(idmap, iattr, inode)) { kgid_t kgid = from_vfsgid(idmap, i_user_ns(inode), iattr->ia_vfsgid); dquot = dqget(sb, make_kqid_gid(kgid)); if (IS_ERR(dquot)) { if (PTR_ERR(dquot) != -ESRCH) { ret = PTR_ERR(dquot); goto out_put; } dquot = NULL; } transfer_to[GRPQUOTA] = dquot; } ret = __dquot_transfer(inode, transfer_to); out_put: dqput_all(transfer_to); return ret; } EXPORT_SYMBOL(dquot_transfer); /* * Write info of quota file to disk */ int dquot_commit_info(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); return dqopt->ops[type]->write_file_info(sb, type); } EXPORT_SYMBOL(dquot_commit_info); int dquot_get_next_id(struct super_block *sb, struct kqid *qid) { struct quota_info *dqopt = sb_dqopt(sb); if (!sb_has_quota_active(sb, qid->type)) return -ESRCH; if (!dqopt->ops[qid->type]->get_next_id) return -ENOSYS; return dqopt->ops[qid->type]->get_next_id(sb, qid); } EXPORT_SYMBOL(dquot_get_next_id); /* * Definitions of diskquota operations. */ const struct dquot_operations dquot_operations = { .write_dquot = dquot_commit, .acquire_dquot = dquot_acquire, .release_dquot = dquot_release, .mark_dirty = dquot_mark_dquot_dirty, .write_info = dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, .get_next_id = dquot_get_next_id, }; EXPORT_SYMBOL(dquot_operations); /* * Generic helper for ->open on filesystems supporting disk quotas. */ int dquot_file_open(struct inode *inode, struct file *file) { int error; error = generic_file_open(inode, file); if (!error && (file->f_mode & FMODE_WRITE)) error = dquot_initialize(inode); return error; } EXPORT_SYMBOL(dquot_file_open); static void vfs_cleanup_quota_inode(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); struct inode *inode = dqopt->files[type]; if (!inode) return; if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { inode_lock(inode); inode->i_flags &= ~S_NOQUOTA; inode_unlock(inode); } dqopt->files[type] = NULL; iput(inode); } /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ int dquot_disable(struct super_block *sb, int type, unsigned int flags) { int cnt; struct quota_info *dqopt = sb_dqopt(sb); rwsem_assert_held_write(&sb->s_umount); /* Cannot turn off usage accounting without turning off limits, or * suspend quotas and simultaneously turn quotas off. */ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED)) || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED))) return -EINVAL; /* * Skip everything if there's nothing to do. We have to do this because * sometimes we are called when fill_super() failed and calling * sync_fs() in such cases does no good. */ if (!sb_any_quota_loaded(sb)) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_loaded(sb, cnt)) continue; if (flags & DQUOT_SUSPENDED) { spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); } else { spin_lock(&dq_state_lock); dqopt->flags &= ~dquot_state_flag(flags, cnt); /* Turning off suspended quotas? */ if (!sb_has_quota_loaded(sb, cnt) && sb_has_quota_suspended(sb, cnt)) { dqopt->flags &= ~dquot_state_flag( DQUOT_SUSPENDED, cnt); spin_unlock(&dq_state_lock); vfs_cleanup_quota_inode(sb, cnt); continue; } spin_unlock(&dq_state_lock); } /* We still have to keep quota loaded? */ if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED)) continue; /* Note: these are blocking operations */ drop_dquot_ref(sb, cnt); invalidate_dquots(sb, cnt); /* * Now all dquots should be invalidated, all writes done so we * should be only users of the info. No locks needed. */ if (info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; dqopt->ops[cnt] = NULL; } /* Skip syncing and setting flags if quota files are hidden */ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) goto put_inodes; /* Sync the superblock so that buffers with quota data are written to * disk (and so userspace sees correct data afterwards). */ if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); /* Now the quota files are just ordinary files and we can set the * inode flags back. Moreover we discard the pagecache so that * userspace sees the writes we did bypassing the pagecache. We * must also discard the blockdev buffers so that we see the * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!sb_has_quota_loaded(sb, cnt) && dqopt->files[cnt]) { inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev); put_inodes: /* We are done when suspending quotas */ if (flags & DQUOT_SUSPENDED) return 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (!sb_has_quota_loaded(sb, cnt)) vfs_cleanup_quota_inode(sb, cnt); return 0; } EXPORT_SYMBOL(dquot_disable); int dquot_quota_off(struct super_block *sb, int type) { return dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); } EXPORT_SYMBOL(dquot_quota_off); /* * Turn quotas on on a device */ static int vfs_setup_quota_inode(struct inode *inode, int type) { struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); if (is_bad_inode(inode)) return -EUCLEAN; if (!S_ISREG(inode->i_mode)) return -EACCES; if (IS_RDONLY(inode)) return -EROFS; if (sb_has_quota_loaded(sb, type)) return -EBUSY; /* * Quota files should never be encrypted. They should be thought of as * filesystem metadata, not user data. New-style internal quota files * cannot be encrypted by users anyway, but old-style external quota * files could potentially be incorrectly created in an encrypted * directory, hence this explicit check. Some reasons why encrypted * quota files don't work include: (1) some filesystems that support * encryption don't handle it in their quota_read and quota_write, and * (2) cleaning up encrypted quota files at unmount would need special * consideration, as quota files are cleaned up later than user files. */ if (IS_ENCRYPTED(inode)) return -EINVAL; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) return -EIO; if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* We don't want quota and atime on quota files (deadlocks * possible) Also nobody should write to the file - we use * special IO operations which ignore the immutable bit. */ inode_lock(inode); inode->i_flags |= S_NOQUOTA; inode_unlock(inode); /* * When S_NOQUOTA is set, remove dquot references as no more * references can be added */ __dquot_drop(inode); } return 0; } int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags) { struct quota_format_type *fmt; struct quota_info *dqopt = sb_dqopt(sb); int error; lockdep_assert_held_write(&sb->s_umount); /* Just unsuspend quotas? */ if (WARN_ON_ONCE(flags & DQUOT_SUSPENDED)) return -EINVAL; fmt = find_quota_format(format_id); if (!fmt) return -ESRCH; if (!sb->dq_op || !sb->s_qcop || (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; goto out_fmt; } /* Filesystems outside of init_user_ns not yet supported */ if (sb->s_user_ns != &init_user_ns) { error = -EINVAL; goto out_fmt; } /* Usage always has to be set... */ if (!(flags & DQUOT_USAGE_ENABLED)) { error = -EINVAL; goto out_fmt; } if (sb_has_quota_loaded(sb, type)) { error = -EBUSY; goto out_fmt; } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* As we bypass the pagecache we must now flush all the * dirty data and invalidate caches so that kernel sees * changes from userspace. It is not enough to just flush * the quota file since if blocksize < pagesize, invalidation * of the cache could fail because of other unrelated dirty * data */ sync_filesystem(sb); invalidate_bdev(sb->s_bdev); } error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) goto out_fmt; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; dqopt->info[type].dqi_fmt_id = format_id; INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); error = dqopt->ops[type]->read_file_info(sb, type); if (error < 0) goto out_fmt; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) { spin_lock(&dq_data_lock); dqopt->info[type].dqi_flags |= DQF_SYS_FILE; spin_unlock(&dq_data_lock); } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); error = add_dquot_ref(sb, type); if (error) dquot_disable(sb, type, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; out_fmt: put_quota_format(fmt); return error; } EXPORT_SYMBOL(dquot_load_quota_sb); /* * More powerful function for turning on quotas on given quota inode allowing * setting of individual quota flags */ int dquot_load_quota_inode(struct inode *inode, int type, int format_id, unsigned int flags) { int err; err = vfs_setup_quota_inode(inode, type); if (err < 0) return err; err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags); if (err < 0) vfs_cleanup_quota_inode(inode->i_sb, type); return err; } EXPORT_SYMBOL(dquot_load_quota_inode); /* Reenable quotas on remount RW */ int dquot_resume(struct super_block *sb, int type) { struct quota_info *dqopt = sb_dqopt(sb); int ret = 0, cnt; unsigned int flags; rwsem_assert_held_write(&sb->s_umount); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; if (!sb_has_quota_suspended(sb, cnt)) continue; spin_lock(&dq_state_lock); flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, cnt); dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); spin_unlock(&dq_state_lock); flags = dquot_generic_flag(flags, cnt); ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id, flags); if (ret < 0) vfs_cleanup_quota_inode(sb, cnt); } return ret; } EXPORT_SYMBOL(dquot_resume); int dquot_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { int error = security_quota_on(path->dentry); if (error) return error; /* Quota file not on the same filesystem? */ if (path->dentry->d_sb != sb) error = -EXDEV; else error = dquot_load_quota_inode(d_inode(path->dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; } EXPORT_SYMBOL(dquot_quota_on); /* * This function is used when filesystem needs to initialize quotas * during mount time. */ int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type) { struct dentry *dentry; int error; dentry = lookup_noperm_positive_unlocked(&QSTR(qf_name), sb->s_root); if (IS_ERR(dentry)) return PTR_ERR(dentry); error = security_quota_on(dentry); if (!error) error = dquot_load_quota_inode(d_inode(dentry), type, format_id, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); dput(dentry); return error; } EXPORT_SYMBOL(dquot_quota_on_mount); static int dquot_quota_enable(struct super_block *sb, unsigned int flags) { int ret; int type; struct quota_info *dqopt = sb_dqopt(sb); if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) return -ENOSYS; /* Accounting cannot be turned on while fs is mounted */ flags &= ~(FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT); if (!flags) return -EINVAL; for (type = 0; type < MAXQUOTAS; type++) { if (!(flags & qtype_enforce_flag(type))) continue; /* Can't enforce without accounting */ if (!sb_has_quota_usage_enabled(sb, type)) { ret = -EINVAL; goto out_err; } if (sb_has_quota_limits_enabled(sb, type)) { /* compatible with XFS */ ret = -EEXIST; goto out_err; } spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); spin_unlock(&dq_state_lock); } return 0; out_err: /* Backout enforcement enablement we already did */ for (type--; type >= 0; type--) { if (flags & qtype_enforce_flag(type)) dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); } return ret; } static int dquot_quota_disable(struct super_block *sb, unsigned int flags) { int ret; int type; struct quota_info *dqopt = sb_dqopt(sb); if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) return -ENOSYS; /* * We don't support turning off accounting via quotactl. In principle * quota infrastructure can do this but filesystems don't expect * userspace to be able to do it. */ if (flags & (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT | FS_QUOTA_PDQ_ACCT)) return -EOPNOTSUPP; /* Filter out limits not enabled */ for (type = 0; type < MAXQUOTAS; type++) if (!sb_has_quota_limits_enabled(sb, type)) flags &= ~qtype_enforce_flag(type); /* Nothing left? */ if (!flags) return -EEXIST; for (type = 0; type < MAXQUOTAS; type++) { if (flags & qtype_enforce_flag(type)) { ret = dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); if (ret < 0) goto out_err; } } return 0; out_err: /* Backout enforcement disabling we already did */ for (type--; type >= 0; type--) { if (flags & qtype_enforce_flag(type)) { spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(DQUOT_LIMITS_ENABLED, type); spin_unlock(&dq_state_lock); } } return ret; } /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; memset(di, 0, sizeof(*di)); spin_lock(&dquot->dq_dqb_lock); di->d_spc_hardlimit = dm->dqb_bhardlimit; di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit; di->d_ino_softlimit = dm->dqb_isoftlimit; di->d_space = dm->dqb_curspace + dm->dqb_rsvspace; di->d_ino_count = dm->dqb_curinodes; di->d_spc_timer = dm->dqb_btime; di->d_ino_timer = dm->dqb_itime; spin_unlock(&dquot->dq_dqb_lock); } int dquot_get_dqblk(struct super_block *sb, struct kqid qid, struct qc_dqblk *di) { struct dquot *dquot; dquot = dqget(sb, qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); return 0; } EXPORT_SYMBOL(dquot_get_dqblk); int dquot_get_next_dqblk(struct super_block *sb, struct kqid *qid, struct qc_dqblk *di) { struct dquot *dquot; int err; if (!sb->dq_op->get_next_id) return -ENOSYS; err = sb->dq_op->get_next_id(sb, qid); if (err < 0) return err; dquot = dqget(sb, *qid); if (IS_ERR(dquot)) return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); return 0; } EXPORT_SYMBOL(dquot_get_next_dqblk); #define VFS_QC_MASK \ (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \ QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \ QC_SPC_TIMER | QC_INO_TIMER) /* Generic routine for setting common part of quota structure */ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; int ret; if (di->d_fieldmask & ~VFS_QC_MASK) return -EINVAL; if (((di->d_fieldmask & QC_SPC_SOFT) && di->d_spc_softlimit > dqi->dqi_max_spc_limit) || ((di->d_fieldmask & QC_SPC_HARD) && di->d_spc_hardlimit > dqi->dqi_max_spc_limit) || ((di->d_fieldmask & QC_INO_SOFT) && (di->d_ino_softlimit > dqi->dqi_max_ino_limit)) || ((di->d_fieldmask & QC_INO_HARD) && (di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) return -ERANGE; spin_lock(&dquot->dq_dqb_lock); if (di->d_fieldmask & QC_SPACE) { dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_SPC_SOFT) dm->dqb_bsoftlimit = di->d_spc_softlimit; if (di->d_fieldmask & QC_SPC_HARD) dm->dqb_bhardlimit = di->d_spc_hardlimit; if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_COUNT) { dm->dqb_curinodes = di->d_ino_count; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_SOFT) dm->dqb_isoftlimit = di->d_ino_softlimit; if (di->d_fieldmask & QC_INO_HARD) dm->dqb_ihardlimit = di->d_ino_hardlimit; if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_SPC_TIMER) { dm->dqb_btime = di->d_spc_timer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } if (di->d_fieldmask & QC_INO_TIMER) { dm->dqb_itime = di->d_ino_timer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } if (check_blim) { if (!dm->dqb_bsoftlimit || dm->dqb_curspace + dm->dqb_rsvspace <= dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_SPC_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = ktime_get_real_seconds() + dqi->dqi_bgrace; } if (check_ilim) { if (!dm->dqb_isoftlimit || dm->dqb_curinodes <= dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_INO_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = ktime_get_real_seconds() + dqi->dqi_igrace; } if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); spin_unlock(&dquot->dq_dqb_lock); ret = mark_dquot_dirty(dquot); if (ret < 0) return ret; return 0; } int dquot_set_dqblk(struct super_block *sb, struct kqid qid, struct qc_dqblk *di) { struct dquot *dquot; int rc; dquot = dqget(sb, qid); if (IS_ERR(dquot)) { rc = PTR_ERR(dquot); goto out; } rc = do_set_dqblk(dquot, di); dqput(dquot); out: return rc; } EXPORT_SYMBOL(dquot_set_dqblk); /* Generic routine for getting common part of quota file information */ int dquot_get_state(struct super_block *sb, struct qc_state *state) { struct mem_dqinfo *mi; struct qc_type_state *tstate; struct quota_info *dqopt = sb_dqopt(sb); int type; memset(state, 0, sizeof(*state)); for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_active(sb, type)) continue; tstate = state->s_state + type; mi = sb_dqopt(sb)->info + type; tstate->flags = QCI_ACCT_ENABLED; spin_lock(&dq_data_lock); if (mi->dqi_flags & DQF_SYS_FILE) tstate->flags |= QCI_SYSFILE; if (mi->dqi_flags & DQF_ROOT_SQUASH) tstate->flags |= QCI_ROOT_SQUASH; if (sb_has_quota_limits_enabled(sb, type)) tstate->flags |= QCI_LIMITS_ENFORCED; tstate->spc_timelimit = mi->dqi_bgrace; tstate->ino_timelimit = mi->dqi_igrace; if (dqopt->files[type]) { tstate->ino = dqopt->files[type]->i_ino; tstate->blocks = dqopt->files[type]->i_blocks; } tstate->nextents = 1; /* We don't know... */ spin_unlock(&dq_data_lock); } return 0; } EXPORT_SYMBOL(dquot_get_state); /* Generic routine for setting common part of quota file information */ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) { struct mem_dqinfo *mi; if ((ii->i_fieldmask & QC_WARNS_MASK) || (ii->i_fieldmask & QC_RT_SPC_TIMER)) return -EINVAL; if (!sb_has_quota_active(sb, type)) return -ESRCH; mi = sb_dqopt(sb)->info + type; if (ii->i_fieldmask & QC_FLAGS) { if ((ii->i_flags & QCI_ROOT_SQUASH && mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) return -EINVAL; } spin_lock(&dq_data_lock); if (ii->i_fieldmask & QC_SPC_TIMER) mi->dqi_bgrace = ii->i_spc_timelimit; if (ii->i_fieldmask & QC_INO_TIMER) mi->dqi_igrace = ii->i_ino_timelimit; if (ii->i_fieldmask & QC_FLAGS) { if (ii->i_flags & QCI_ROOT_SQUASH) mi->dqi_flags |= DQF_ROOT_SQUASH; else mi->dqi_flags &= ~DQF_ROOT_SQUASH; } spin_unlock(&dq_data_lock); mark_info_dirty(sb, type); /* Force write to disk */ return sb->dq_op->write_info(sb, type); } EXPORT_SYMBOL(dquot_set_dqinfo); const struct quotactl_ops dquot_quotactl_sysfile_ops = { .quota_enable = dquot_quota_enable, .quota_disable = dquot_quota_disable, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .get_nextdqblk = dquot_get_next_dqblk, .set_dqblk = dquot_set_dqblk }; EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); static int do_proc_dqstats(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int type = (unsigned long *)table->data - dqstats.stat; s64 value = percpu_counter_sum(&dqstats.counter[type]); /* Filter negative values for non-monotonic counters */ if (value < 0 && (type == DQST_ALLOC_DQUOTS || type == DQST_FREE_DQUOTS)) value = 0; /* Update global table */ dqstats.stat[type] = value; return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static const struct ctl_table fs_dqstats_table[] = { { .procname = "lookups", .data = &dqstats.stat[DQST_LOOKUPS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "drops", .data = &dqstats.stat[DQST_DROPS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "reads", .data = &dqstats.stat[DQST_READS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "writes", .data = &dqstats.stat[DQST_WRITES], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", .data = &dqstats.stat[DQST_CACHE_HITS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", .data = &dqstats.stat[DQST_ALLOC_DQUOTS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", .data = &dqstats.stat[DQST_FREE_DQUOTS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "syncs", .data = &dqstats.stat[DQST_SYNCS], .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, #ifdef CONFIG_PRINT_QUOTA_WARNING { .procname = "warnings", .data = &flag_print_warnings, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif }; static int __init dquot_init(void) { int i, ret; unsigned long nr_hash, order; struct shrinker *dqcache_shrinker; printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__); register_sysctl_init("fs/quota", fs_dqstats_table); dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_PANIC), NULL); order = 0; dquot_hash = (struct hlist_head *)__get_free_pages(GFP_KERNEL, order); if (!dquot_hash) panic("Cannot create dquot hash table"); ret = percpu_counter_init_many(dqstats.counter, 0, GFP_KERNEL, _DQST_DQSTAT_LAST); if (ret) panic("Cannot create dquot stat counters"); /* Find power-of-two hlist_heads which can fit into allocation */ nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); dq_hash_bits = ilog2(nr_hash); nr_hash = 1UL << dq_hash_bits; dq_hash_mask = nr_hash - 1; for (i = 0; i < nr_hash; i++) INIT_HLIST_HEAD(dquot_hash + i); pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); dqcache_shrinker = shrinker_alloc(0, "dquota-cache"); if (!dqcache_shrinker) panic("Cannot allocate dquot shrinker"); dqcache_shrinker->count_objects = dqcache_shrink_count; dqcache_shrinker->scan_objects = dqcache_shrink_scan; shrinker_register(dqcache_shrinker); quota_unbound_wq = alloc_workqueue("quota_events_unbound", WQ_UNBOUND | WQ_MEM_RECLAIM, WQ_MAX_ACTIVE); if (!quota_unbound_wq) panic("Cannot create quota_unbound_wq\n"); return 0; } fs_initcall(dquot_init); |
| 741 653 741 68 230 244 183 289 515 284 105 389 389 247 253 278 27 21 27 266 152 141 141 141 267 161 265 265 68 216 216 473 519 473 126 277 283 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Red Black Trees (C) 1999 Andrea Arcangeli <andrea@suse.de> (C) 2002 David Woodhouse <dwmw2@infradead.org> (C) 2012 Michel Lespinasse <walken@google.com> linux/include/linux/rbtree_augmented.h */ #ifndef _LINUX_RBTREE_AUGMENTED_H #define _LINUX_RBTREE_AUGMENTED_H #include <linux/compiler.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> /* * Please note - only struct rb_augment_callbacks and the prototypes for * rb_insert_augmented() and rb_erase_augmented() are intended to be public. * The rest are implementation details you are not expected to depend on. * * See Documentation/core-api/rbtree.rst for documentation and samples. */ struct rb_augment_callbacks { void (*propagate)(struct rb_node *node, struct rb_node *stop); void (*copy)(struct rb_node *old, struct rb_node *new); void (*rotate)(struct rb_node *old, struct rb_node *new); }; extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. * * On insertion, the user must update the augmented information on the path * leading to the inserted node, then call rb_link_node() as usual and * rb_insert_augmented() instead of the usual rb_insert_color() call. * If rb_insert_augmented() rebalances the rbtree, it will callback into * a user provided function to update the augmented information on the * affected subtrees. */ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { __rb_insert_augmented(node, root, augment->rotate); } static inline void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *root, bool newleft, const struct rb_augment_callbacks *augment) { if (newleft) root->rb_leftmost = node; rb_insert_augmented(node, &root->rb_root, augment); } static __always_inline struct rb_node * rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree, bool (*less)(struct rb_node *, const struct rb_node *), const struct rb_augment_callbacks *augment) { struct rb_node **link = &tree->rb_root.rb_node; struct rb_node *parent = NULL; bool leftmost = true; while (*link) { parent = *link; if (less(node, parent)) { link = &parent->rb_left; } else { link = &parent->rb_right; leftmost = false; } } rb_link_node(node, parent, link); augment->propagate(parent, NULL); /* suboptimal */ rb_insert_augmented_cached(node, tree, leftmost, augment); return leftmost ? node : NULL; } /* * Template for declaring augmented rbtree callbacks (generic case) * * RBSTATIC: 'static' or empty * RBNAME: name of the rb_augment_callbacks structure * RBSTRUCT: struct type of the tree nodes * RBFIELD: name of struct rb_node field within RBSTRUCT * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree * RBCOMPUTE: name of function that recomputes the RBAUGMENTED data */ #define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE) \ static inline void \ RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop) \ { \ while (rb != stop) { \ RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD); \ if (RBCOMPUTE(node, true)) \ break; \ rb = rb_parent(&node->RBFIELD); \ } \ } \ static inline void \ RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ { \ RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD); \ RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD); \ new->RBAUGMENTED = old->RBAUGMENTED; \ } \ static void \ RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ { \ RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD); \ RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD); \ new->RBAUGMENTED = old->RBAUGMENTED; \ RBCOMPUTE(old, false); \ } \ RBSTATIC const struct rb_augment_callbacks RBNAME = { \ .propagate = RBNAME ## _propagate, \ .copy = RBNAME ## _copy, \ .rotate = RBNAME ## _rotate \ }; /* * Template for declaring augmented rbtree callbacks, * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes. * * RBSTATIC: 'static' or empty * RBNAME: name of the rb_augment_callbacks structure * RBSTRUCT: struct type of the tree nodes * RBFIELD: name of struct rb_node field within RBSTRUCT * RBTYPE: type of the RBAUGMENTED field * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree * RBCOMPUTE: name of function that returns the per-node RBTYPE scalar */ #define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD, \ RBTYPE, RBAUGMENTED, RBCOMPUTE) \ static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit) \ { \ RBSTRUCT *child; \ RBTYPE max = RBCOMPUTE(node); \ if (node->RBFIELD.rb_left) { \ child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD); \ if (child->RBAUGMENTED > max) \ max = child->RBAUGMENTED; \ } \ if (node->RBFIELD.rb_right) { \ child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD); \ if (child->RBAUGMENTED > max) \ max = child->RBAUGMENTED; \ } \ if (exit && node->RBAUGMENTED == max) \ return true; \ node->RBAUGMENTED = max; \ return false; \ } \ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max) #define RB_RED 0 #define RB_BLACK 1 #define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) #define __rb_color(pc) ((pc) & 1) #define __rb_is_black(pc) __rb_color(pc) #define __rb_is_red(pc) (!__rb_color(pc)) #define rb_color(rb) __rb_color((rb)->__rb_parent_color) #define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) #define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) { rb->__rb_parent_color = rb_color(rb) + (unsigned long)p; } static inline void rb_set_parent_color(struct rb_node *rb, struct rb_node *p, int color) { rb->__rb_parent_color = (unsigned long)p + color; } static inline void __rb_change_child(struct rb_node *old, struct rb_node *new, struct rb_node *parent, struct rb_root *root) { if (parent) { if (parent->rb_left == old) WRITE_ONCE(parent->rb_left, new); else WRITE_ONCE(parent->rb_right, new); } else WRITE_ONCE(root->rb_node, new); } static inline void __rb_change_child_rcu(struct rb_node *old, struct rb_node *new, struct rb_node *parent, struct rb_root *root) { if (parent) { if (parent->rb_left == old) rcu_assign_pointer(parent->rb_left, new); else rcu_assign_pointer(parent->rb_right, new); } else rcu_assign_pointer(root->rb_node, new); } extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right; struct rb_node *tmp = node->rb_left; struct rb_node *parent, *rebalance; unsigned long pc; if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) * * Note that if there is one child it must be red due to 5) * and node must be black due to 4). We adjust colors locally * so as to bypass __rb_erase_color() later on. */ pc = node->__rb_parent_color; parent = __rb_parent(pc); __rb_change_child(node, child, parent, root); if (child) { child->__rb_parent_color = pc; rebalance = NULL; } else rebalance = __rb_is_black(pc) ? parent : NULL; tmp = parent; } else if (!child) { /* Still case 1, but this time the child is node->rb_left */ tmp->__rb_parent_color = pc = node->__rb_parent_color; parent = __rb_parent(pc); __rb_change_child(node, tmp, parent, root); rebalance = NULL; tmp = parent; } else { struct rb_node *successor = child, *child2; tmp = child->rb_left; if (!tmp) { /* * Case 2: node's successor is its right child * * (n) (s) * / \ / \ * (x) (s) -> (x) (c) * \ * (c) */ parent = successor; child2 = successor->rb_right; augment->copy(node, successor); } else { /* * Case 3: node's successor is leftmost under * node's right child subtree * * (n) (s) * / \ / \ * (x) (y) -> (x) (y) * / / * (p) (p) * / / * (s) (c) * \ * (c) */ do { parent = successor; successor = tmp; tmp = tmp->rb_left; } while (tmp); child2 = successor->rb_right; WRITE_ONCE(parent->rb_left, child2); WRITE_ONCE(successor->rb_right, child); rb_set_parent(child, successor); augment->copy(node, successor); augment->propagate(parent, successor); } tmp = node->rb_left; WRITE_ONCE(successor->rb_left, tmp); rb_set_parent(tmp, successor); pc = node->__rb_parent_color; tmp = __rb_parent(pc); __rb_change_child(node, successor, tmp, root); if (child2) { rb_set_parent_color(child2, parent, RB_BLACK); rebalance = NULL; } else { rebalance = rb_is_black(successor) ? parent : NULL; } successor->__rb_parent_color = pc; tmp = successor; } augment->propagate(tmp, NULL); return rebalance; } static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } static __always_inline void rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, const struct rb_augment_callbacks *augment) { if (root->rb_leftmost == node) root->rb_leftmost = rb_next(node); rb_erase_augmented(node, &root->rb_root, augment); } #endif /* _LINUX_RBTREE_AUGMENTED_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | /* * linux/drivers/video/console/fbcon.h -- Low level frame buffer based console driver * * Copyright (C) 1997 Geert Uytterhoeven * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. */ #ifndef _VIDEO_FBCON_H #define _VIDEO_FBCON_H #include <linux/types.h> #include <linux/vt_buffer.h> #include <linux/vt_kern.h> #include <linux/workqueue.h> #include <asm/io.h> /* * This is the interface between the low-level console driver and the * low-level frame buffer device */ struct fbcon_display { /* Filled in by the low-level console driver */ const u_char *fontdata; int userfont; /* != 0 if fontdata kmalloc()ed */ #ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION u_short scrollmode; /* Scroll Method, use fb_scrollmode() */ #endif u_short inverse; /* != 0 text black on white as default */ short yscroll; /* Hardware scrolling */ int vrows; /* number of virtual rows */ int cursor_shape; int con_rotate; u32 xres_virtual; u32 yres_virtual; u32 height; u32 width; u32 bits_per_pixel; u32 grayscale; u32 nonstd; u32 accel_flags; u32 rotate; struct fb_bitfield red; struct fb_bitfield green; struct fb_bitfield blue; struct fb_bitfield transp; const struct fb_videomode *mode; }; struct fbcon_ops { void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy, int sx, int dy, int dx, int height, int width); void (*clear)(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width, int fb, int bg); void (*putcs)(struct vc_data *vc, struct fb_info *info, const unsigned short *s, int count, int yy, int xx, int fg, int bg); void (*clear_margins)(struct vc_data *vc, struct fb_info *info, int color, int bottom_only); void (*cursor)(struct vc_data *vc, struct fb_info *info, bool enable, int fg, int bg); int (*update_start)(struct fb_info *info); int (*rotate_font)(struct fb_info *info, struct vc_data *vc); struct fb_var_screeninfo var; /* copy of the current fb_var_screeninfo */ struct delayed_work cursor_work; /* Cursor timer */ struct fb_cursor cursor_state; struct fbcon_display *p; struct fb_info *info; int currcon; /* Current VC. */ int cur_blink_jiffies; int cursor_flash; int cursor_reset; int blank_state; int graphics; int save_graphics; /* for debug enter/leave */ bool initialized; int rotate; int cur_rotate; char *cursor_data; u8 *fontbuffer; u8 *fontdata; u8 *cursor_src; u32 cursor_size; u32 fd_size; }; /* * Attribute Decoding */ /* Color */ #define attr_fgcol(fgshift,s) \ (((s) >> (fgshift)) & 0x0f) #define attr_bgcol(bgshift,s) \ (((s) >> (bgshift)) & 0x0f) /* Monochrome */ #define attr_bold(s) \ ((s) & 0x200) #define attr_reverse(s) \ ((s) & 0x800) #define attr_underline(s) \ ((s) & 0x400) #define attr_blink(s) \ ((s) & 0x8000) static inline int mono_col(const struct fb_info *info) { __u32 max_len; max_len = max(info->var.green.length, info->var.red.length); max_len = max(info->var.blue.length, max_len); return (~(0xfff << max_len)) & 0xff; } /* * Scroll Method */ /* There are several methods fbcon can use to move text around the screen: * * Operation Pan Wrap *--------------------------------------------- * SCROLL_MOVE copyarea No No * SCROLL_PAN_MOVE copyarea Yes No * SCROLL_WRAP_MOVE copyarea No Yes * SCROLL_REDRAW imageblit No No * SCROLL_PAN_REDRAW imageblit Yes No * SCROLL_WRAP_REDRAW imageblit No Yes * * (SCROLL_WRAP_REDRAW is not implemented yet) * * In general, fbcon will choose the best scrolling * method based on the rule below: * * Pan/Wrap > accel imageblit > accel copyarea > * soft imageblit > (soft copyarea) * * Exception to the rule: Pan + accel copyarea is * preferred over Pan + accel imageblit. * * The above is typical for PCI/AGP cards. Unless * overridden, fbcon will never use soft copyarea. * * If you need to override the above rule, set the * appropriate flags in fb_info->flags. For example, * to prefer copyarea over imageblit, set * FBINFO_READS_FAST. * * Other notes: * + use the hardware engine to move the text * (hw-accelerated copyarea() and fillrect()) * + use hardware-supported panning on a large virtual screen * + amifb can not only pan, but also wrap the display by N lines * (i.e. visible line i = physical line (i+N) % yres). * + read what's already rendered on the screen and * write it in a different place (this is cfb_copyarea()) * + re-render the text to the screen * * Whether to use wrapping or panning can only be figured out at * runtime (when we know whether our font height is a multiple * of the pan/wrap step) * */ #define SCROLL_MOVE 0x001 #define SCROLL_PAN_MOVE 0x002 #define SCROLL_WRAP_MOVE 0x003 #define SCROLL_REDRAW 0x004 #define SCROLL_PAN_REDRAW 0x005 static inline u_short fb_scrollmode(struct fbcon_display *fb) { #ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION return fb->scrollmode; #else /* hardcoded to SCROLL_REDRAW if acceleration was disabled. */ return SCROLL_REDRAW; #endif } #ifdef CONFIG_FB_TILEBLITTING extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info); #endif extern void fbcon_set_bitops(struct fbcon_ops *ops); extern int soft_cursor(struct fb_info *info, struct fb_cursor *cursor); #define FBCON_ATTRIBUTE_UNDERLINE 1 #define FBCON_ATTRIBUTE_REVERSE 2 #define FBCON_ATTRIBUTE_BOLD 4 static inline int real_y(struct fbcon_display *p, int ypos) { int rows = p->vrows; ypos += p->yscroll; return ypos < rows ? ypos : ypos - rows; } static inline int get_attribute(struct fb_info *info, u16 c) { int attribute = 0; if (fb_get_color_depth(&info->var, &info->fix) == 1) { if (attr_underline(c)) attribute |= FBCON_ATTRIBUTE_UNDERLINE; if (attr_reverse(c)) attribute |= FBCON_ATTRIBUTE_REVERSE; if (attr_bold(c)) attribute |= FBCON_ATTRIBUTE_BOLD; } return attribute; } #define FBCON_SWAP(i,r,v) ({ \ typeof(r) _r = (r); \ typeof(v) _v = (v); \ (void) (&_r == &_v); \ (i == FB_ROTATE_UR || i == FB_ROTATE_UD) ? _r : _v; }) #ifdef CONFIG_FRAMEBUFFER_CONSOLE_ROTATION extern void fbcon_set_rotate(struct fbcon_ops *ops); #else #define fbcon_set_rotate(x) do {} while(0) #endif /* CONFIG_FRAMEBUFFER_CONSOLE_ROTATION */ #endif /* _VIDEO_FBCON_H */ |
| 93 93 66 63 63 110 108 105 105 89 88 88 110 8 8 7 14 14 14 14 14 14 14 14 8 8 8 8 7 2 2 2 2 2 108 89 88 89 88 22 20 107 106 109 22 20 18 13 89 15 14 14 109 106 110 103 14 13 7 6 8 33 3 26 9 9 9 9 9 8 9 9 4 4 4 79 77 80 80 77 80 77 100 100 100 93 4 77 84 4 4 33 40 40 23 3 33 32 10 10 10 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Scatterlist Cryptographic API. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2002 David S. Miller (davem@redhat.com) * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> * * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no> * and Nettle, by Niels Möller. */ #include <linux/err.h> #include <linux/errno.h> #include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/module.h> #include <linux/param.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/completion.h> #include "internal.h" LIST_HEAD(crypto_alg_list); EXPORT_SYMBOL_GPL(crypto_alg_list); DECLARE_RWSEM(crypto_alg_sem); EXPORT_SYMBOL_GPL(crypto_alg_sem); BLOCKING_NOTIFIER_HEAD(crypto_chain); EXPORT_SYMBOL_GPL(crypto_chain); #if IS_BUILTIN(CONFIG_CRYPTO_ALGAPI) && IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) DEFINE_STATIC_KEY_FALSE(__crypto_boot_test_finished); #endif static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg, u32 type, u32 mask); static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask); struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; } EXPORT_SYMBOL_GPL(crypto_mod_get); void crypto_mod_put(struct crypto_alg *alg) { struct module *module = alg->cra_module; crypto_alg_put(alg); module_put(module); } EXPORT_SYMBOL_GPL(crypto_mod_put); static struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type, u32 mask) { struct crypto_alg *q, *alg = NULL; int best = -2; list_for_each_entry(q, &crypto_alg_list, cra_list) { int exact, fuzzy; if (crypto_is_moribund(q)) continue; if ((q->cra_flags ^ type) & mask) continue; exact = !strcmp(q->cra_driver_name, name); fuzzy = !strcmp(q->cra_name, name); if (!exact && !(fuzzy && q->cra_priority > best)) continue; if (unlikely(!crypto_mod_get(q))) continue; best = q->cra_priority; if (alg) crypto_mod_put(alg); alg = q; if (exact) break; } return alg; } static void crypto_larval_destroy(struct crypto_alg *alg) { struct crypto_larval *larval = (void *)alg; BUG_ON(!crypto_is_larval(alg)); if (!IS_ERR_OR_NULL(larval->adult)) crypto_mod_put(larval->adult); kfree(larval); } struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask) { struct crypto_larval *larval; larval = kzalloc(sizeof(*larval), GFP_KERNEL); if (!larval) return ERR_PTR(-ENOMEM); type &= ~CRYPTO_ALG_TYPE_MASK | (mask ?: CRYPTO_ALG_TYPE_MASK); larval->mask = mask; larval->alg.cra_flags = CRYPTO_ALG_LARVAL | type; larval->alg.cra_priority = -1; larval->alg.cra_destroy = crypto_larval_destroy; strscpy(larval->alg.cra_name, name, CRYPTO_MAX_ALG_NAME); init_completion(&larval->completion); return larval; } EXPORT_SYMBOL_GPL(crypto_larval_alloc); static struct crypto_alg *crypto_larval_add(const char *name, u32 type, u32 mask) { struct crypto_alg *alg; struct crypto_larval *larval; larval = crypto_larval_alloc(name, type, mask); if (IS_ERR(larval)) return ERR_CAST(larval); refcount_set(&larval->alg.cra_refcnt, 2); down_write(&crypto_alg_sem); alg = __crypto_alg_lookup(name, type, mask); if (!alg) { alg = &larval->alg; list_add(&alg->cra_list, &crypto_alg_list); } up_write(&crypto_alg_sem); if (alg != &larval->alg) { kfree(larval); if (crypto_is_larval(alg)) alg = crypto_larval_wait(alg, type, mask); } return alg; } static void crypto_larval_kill(struct crypto_larval *larval) { bool unlinked; down_write(&crypto_alg_sem); unlinked = list_empty(&larval->alg.cra_list); if (!unlinked) list_del_init(&larval->alg.cra_list); up_write(&crypto_alg_sem); if (unlinked) return; complete_all(&larval->completion); crypto_alg_put(&larval->alg); } void crypto_schedule_test(struct crypto_larval *larval) { int err; err = crypto_probing_notify(CRYPTO_MSG_ALG_REGISTER, larval->adult); WARN_ON_ONCE(err != NOTIFY_STOP); } EXPORT_SYMBOL_GPL(crypto_schedule_test); static void crypto_start_test(struct crypto_larval *larval) { if (!crypto_is_test_larval(larval)) return; if (larval->test_started) return; down_write(&crypto_alg_sem); if (larval->test_started) { up_write(&crypto_alg_sem); return; } larval->test_started = true; up_write(&crypto_alg_sem); crypto_schedule_test(larval); } static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg, u32 type, u32 mask) { struct crypto_larval *larval; long time_left; again: larval = container_of(alg, struct crypto_larval, alg); if (!crypto_boot_test_finished()) crypto_start_test(larval); time_left = wait_for_completion_killable_timeout( &larval->completion, 60 * HZ); alg = larval->adult; if (time_left < 0) alg = ERR_PTR(-EINTR); else if (!time_left) { if (crypto_is_test_larval(larval)) crypto_larval_kill(larval); alg = ERR_PTR(-ETIMEDOUT); } else if (!alg || PTR_ERR(alg) == -EEXIST) { int err = alg ? -EEXIST : -EAGAIN; /* * EEXIST is expected because two probes can be scheduled * at the same time with one using alg_name and the other * using driver_name. Do a re-lookup but do not retry in * case we hit a quirk like gcm_base(ctr(aes),...) which * will never match. */ alg = &larval->alg; alg = crypto_alg_lookup(alg->cra_name, type, mask) ?: ERR_PTR(err); } else if (IS_ERR(alg)) ; else if (crypto_is_test_larval(larval) && !(alg->cra_flags & CRYPTO_ALG_TESTED)) alg = ERR_PTR(-EAGAIN); else if (alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL) alg = ERR_PTR(-EAGAIN); else if (!crypto_mod_get(alg)) alg = ERR_PTR(-EAGAIN); crypto_mod_put(&larval->alg); if (!IS_ERR(alg) && crypto_is_larval(alg)) goto again; return alg; } static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask) { const u32 fips = CRYPTO_ALG_FIPS_INTERNAL; struct crypto_alg *alg; u32 test = 0; if (!((type | mask) & CRYPTO_ALG_TESTED)) test |= CRYPTO_ALG_TESTED; down_read(&crypto_alg_sem); alg = __crypto_alg_lookup(name, (type | test) & ~fips, (mask | test) & ~fips); if (alg) { if (((type | mask) ^ fips) & fips) mask |= fips; mask &= fips; if (!crypto_is_larval(alg) && ((type ^ alg->cra_flags) & mask)) { /* Algorithm is disallowed in FIPS mode. */ crypto_mod_put(alg); alg = ERR_PTR(-ENOENT); } } else if (test) { alg = __crypto_alg_lookup(name, type, mask); if (alg && !crypto_is_larval(alg)) { /* Test failed */ crypto_mod_put(alg); alg = ERR_PTR(-ELIBBAD); } } up_read(&crypto_alg_sem); return alg; } static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) { struct crypto_alg *alg; if (!name) return ERR_PTR(-ENOENT); type &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); alg = crypto_alg_lookup(name, type, mask); if (!alg && !(mask & CRYPTO_NOLOAD)) { request_module("crypto-%s", name); if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & CRYPTO_ALG_NEED_FALLBACK)) request_module("crypto-%s-all", name); alg = crypto_alg_lookup(name, type, mask); } if (!IS_ERR_OR_NULL(alg) && crypto_is_larval(alg)) alg = crypto_larval_wait(alg, type, mask); else if (alg) ; else if (!(mask & CRYPTO_ALG_TESTED)) alg = crypto_larval_add(name, type, mask); else alg = ERR_PTR(-ENOENT); return alg; } int crypto_probing_notify(unsigned long val, void *v) { int ok; ok = blocking_notifier_call_chain(&crypto_chain, val, v); if (ok == NOTIFY_DONE) { request_module("cryptomgr"); ok = blocking_notifier_call_chain(&crypto_chain, val, v); } return ok; } EXPORT_SYMBOL_GPL(crypto_probing_notify); struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) { struct crypto_alg *alg; struct crypto_alg *larval; int ok; /* * If the internal flag is set for a cipher, require a caller to * invoke the cipher with the internal flag to use that cipher. * Also, if a caller wants to allocate a cipher that may or may * not be an internal cipher, use type | CRYPTO_ALG_INTERNAL and * !(mask & CRYPTO_ALG_INTERNAL). */ if (!((type | mask) & CRYPTO_ALG_INTERNAL)) mask |= CRYPTO_ALG_INTERNAL; larval = crypto_larval_lookup(name, type, mask); if (IS_ERR(larval) || !crypto_is_larval(larval)) return larval; ok = crypto_probing_notify(CRYPTO_MSG_ALG_REQUEST, larval); if (ok == NOTIFY_STOP) alg = crypto_larval_wait(larval, type, mask); else { crypto_mod_put(larval); alg = ERR_PTR(-ENOENT); } crypto_larval_kill(container_of(larval, struct crypto_larval, alg)); return alg; } EXPORT_SYMBOL_GPL(crypto_alg_mod_lookup); static void crypto_exit_ops(struct crypto_tfm *tfm) { const struct crypto_type *type = tfm->__crt_alg->cra_type; if (type && tfm->exit) tfm->exit(tfm); } static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) { const struct crypto_type *type_obj = alg->cra_type; unsigned int len; len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1); if (type_obj) return len + type_obj->ctxsize(alg, type, mask); switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { default: BUG(); case CRYPTO_ALG_TYPE_CIPHER: len += crypto_cipher_ctxsize(alg); break; } return len; } void crypto_shoot_alg(struct crypto_alg *alg) { down_write(&crypto_alg_sem); alg->cra_flags |= CRYPTO_ALG_DYING; up_write(&crypto_alg_sem); } EXPORT_SYMBOL_GPL(crypto_shoot_alg); struct crypto_tfm *__crypto_alloc_tfmgfp(struct crypto_alg *alg, u32 type, u32 mask, gfp_t gfp) { struct crypto_tfm *tfm; unsigned int tfm_size; int err = -ENOMEM; tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask); tfm = kzalloc(tfm_size, gfp); if (tfm == NULL) goto out_err; tfm->__crt_alg = alg; refcount_set(&tfm->refcnt, 1); if (!tfm->exit && alg->cra_init && (err = alg->cra_init(tfm))) goto cra_init_failed; goto out; cra_init_failed: crypto_exit_ops(tfm); if (err == -EAGAIN) crypto_shoot_alg(alg); kfree(tfm); out_err: tfm = ERR_PTR(err); out: return tfm; } EXPORT_SYMBOL_GPL(__crypto_alloc_tfmgfp); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask) { return __crypto_alloc_tfmgfp(alg, type, mask, GFP_KERNEL); } EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); /* * crypto_alloc_base - Locate algorithm and allocate transform * @alg_name: Name of algorithm * @type: Type of algorithm * @mask: Mask for type comparison * * This function should not be used by new algorithm types. * Please use crypto_alloc_tfm instead. * * crypto_alloc_base() will first attempt to locate an already loaded * algorithm. If that fails and the kernel supports dynamically loadable * modules, it will then attempt to load a module of the same name or * alias. If that fails it will send a query to any loaded crypto manager * to construct an algorithm on the fly. A refcount is grabbed on the * algorithm which is then associated with the new transform. * * The returned transform is of a non-determinate type. Most people * should use one of the more specific allocation functions such as * crypto_alloc_skcipher(). * * In case of error the return value is an error pointer. */ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) { struct crypto_tfm *tfm; int err; for (;;) { struct crypto_alg *alg; alg = crypto_alg_mod_lookup(alg_name, type, mask); if (IS_ERR(alg)) { err = PTR_ERR(alg); goto err; } tfm = __crypto_alloc_tfm(alg, type, mask); if (!IS_ERR(tfm)) return tfm; crypto_mod_put(alg); err = PTR_ERR(tfm); err: if (err != -EAGAIN) break; if (fatal_signal_pending(current)) { err = -EINTR; break; } } return ERR_PTR(err); } EXPORT_SYMBOL_GPL(crypto_alloc_base); static void *crypto_alloc_tfmmem(struct crypto_alg *alg, const struct crypto_type *frontend, int node, gfp_t gfp) { struct crypto_tfm *tfm; unsigned int tfmsize; unsigned int total; char *mem; tfmsize = frontend->tfmsize; total = tfmsize + sizeof(*tfm) + frontend->extsize(alg); mem = kzalloc_node(total, gfp, node); if (mem == NULL) return ERR_PTR(-ENOMEM); tfm = (struct crypto_tfm *)(mem + tfmsize); tfm->__crt_alg = alg; tfm->node = node; refcount_set(&tfm->refcnt, 1); return mem; } void *crypto_create_tfm_node(struct crypto_alg *alg, const struct crypto_type *frontend, int node) { struct crypto_tfm *tfm; char *mem; int err; mem = crypto_alloc_tfmmem(alg, frontend, node, GFP_KERNEL); if (IS_ERR(mem)) goto out; tfm = (struct crypto_tfm *)(mem + frontend->tfmsize); tfm->fb = tfm; err = frontend->init_tfm(tfm); if (err) goto out_free_tfm; if (!tfm->exit && alg->cra_init && (err = alg->cra_init(tfm))) goto cra_init_failed; goto out; cra_init_failed: crypto_exit_ops(tfm); out_free_tfm: if (err == -EAGAIN) crypto_shoot_alg(alg); kfree(mem); mem = ERR_PTR(err); out: return mem; } EXPORT_SYMBOL_GPL(crypto_create_tfm_node); void *crypto_clone_tfm(const struct crypto_type *frontend, struct crypto_tfm *otfm) { struct crypto_alg *alg = otfm->__crt_alg; struct crypto_tfm *tfm; char *mem; mem = ERR_PTR(-ESTALE); if (unlikely(!crypto_mod_get(alg))) goto out; mem = crypto_alloc_tfmmem(alg, frontend, otfm->node, GFP_ATOMIC); if (IS_ERR(mem)) { crypto_mod_put(alg); goto out; } tfm = (struct crypto_tfm *)(mem + frontend->tfmsize); tfm->crt_flags = otfm->crt_flags; tfm->fb = tfm; out: return mem; } EXPORT_SYMBOL_GPL(crypto_clone_tfm); struct crypto_alg *crypto_find_alg(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask) { if (frontend) { type &= frontend->maskclear; mask &= frontend->maskclear; type |= frontend->type; mask |= frontend->maskset; } return crypto_alg_mod_lookup(alg_name, type, mask); } EXPORT_SYMBOL_GPL(crypto_find_alg); /* * crypto_alloc_tfm_node - Locate algorithm and allocate transform * @alg_name: Name of algorithm * @frontend: Frontend algorithm type * @type: Type of algorithm * @mask: Mask for type comparison * @node: NUMA node in which users desire to put requests, if node is * NUMA_NO_NODE, it means users have no special requirement. * * crypto_alloc_tfm() will first attempt to locate an already loaded * algorithm. If that fails and the kernel supports dynamically loadable * modules, it will then attempt to load a module of the same name or * alias. If that fails it will send a query to any loaded crypto manager * to construct an algorithm on the fly. A refcount is grabbed on the * algorithm which is then associated with the new transform. * * The returned transform is of a non-determinate type. Most people * should use one of the more specific allocation functions such as * crypto_alloc_skcipher(). * * In case of error the return value is an error pointer. */ void *crypto_alloc_tfm_node(const char *alg_name, const struct crypto_type *frontend, u32 type, u32 mask, int node) { void *tfm; int err; for (;;) { struct crypto_alg *alg; alg = crypto_find_alg(alg_name, frontend, type, mask); if (IS_ERR(alg)) { err = PTR_ERR(alg); goto err; } tfm = crypto_create_tfm_node(alg, frontend, node); if (!IS_ERR(tfm)) return tfm; crypto_mod_put(alg); err = PTR_ERR(tfm); err: if (err != -EAGAIN) break; if (fatal_signal_pending(current)) { err = -EINTR; break; } } return ERR_PTR(err); } EXPORT_SYMBOL_GPL(crypto_alloc_tfm_node); /* * crypto_destroy_tfm - Free crypto transform * @mem: Start of tfm slab * @tfm: Transform to free * * This function frees up the transform and any associated resources, * then drops the refcount on the associated algorithm. */ void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm) { struct crypto_alg *alg; if (IS_ERR_OR_NULL(mem)) return; if (!refcount_dec_and_test(&tfm->refcnt)) return; alg = tfm->__crt_alg; if (!tfm->exit && alg->cra_exit) alg->cra_exit(tfm); crypto_exit_ops(tfm); crypto_mod_put(alg); kfree_sensitive(mem); } EXPORT_SYMBOL_GPL(crypto_destroy_tfm); int crypto_has_alg(const char *name, u32 type, u32 mask) { int ret = 0; struct crypto_alg *alg = crypto_alg_mod_lookup(name, type, mask); if (!IS_ERR(alg)) { crypto_mod_put(alg); ret = 1; } return ret; } EXPORT_SYMBOL_GPL(crypto_has_alg); void crypto_req_done(void *data, int err) { struct crypto_wait *wait = data; if (err == -EINPROGRESS) return; wait->err = err; complete(&wait->completion); } EXPORT_SYMBOL_GPL(crypto_req_done); void crypto_destroy_alg(struct crypto_alg *alg) { if (alg->cra_type && alg->cra_type->destroy) alg->cra_type->destroy(alg); if (alg->cra_destroy) alg->cra_destroy(alg); } EXPORT_SYMBOL_GPL(crypto_destroy_alg); struct crypto_async_request *crypto_request_clone( struct crypto_async_request *req, size_t total, gfp_t gfp) { struct crypto_tfm *tfm = req->tfm; struct crypto_async_request *nreq; nreq = kmemdup(req, total, gfp); if (!nreq) { req->tfm = tfm->fb; return req; } nreq->flags &= ~CRYPTO_TFM_REQ_ON_STACK; return nreq; } EXPORT_SYMBOL_GPL(crypto_request_clone); MODULE_DESCRIPTION("Cryptographic core API"); MODULE_LICENSE("GPL"); |
| 21 8 14 22 22 22 22 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor ipc mediation * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2017 Canonical Ltd. */ #include <linux/gfp.h> #include "include/audit.h" #include "include/capability.h" #include "include/cred.h" #include "include/policy.h" #include "include/ipc.h" #include "include/sig_names.h" static inline int map_signal_num(int sig) { if (sig > SIGRTMAX) return SIGUNKNOWN; else if (sig >= SIGRTMIN) return sig - SIGRTMIN + SIGRT_BASE; else if (sig < MAXMAPPED_SIG) return sig_map[sig]; return SIGUNKNOWN; } /** * audit_signal_mask - convert mask to permission string * @mask: permission mask to convert * * Returns: pointer to static string */ static const char *audit_signal_mask(u32 mask) { if (mask & MAY_READ) return "receive"; if (mask & MAY_WRITE) return "send"; return ""; } /** * audit_signal_cb() - call back for signal specific audit fields * @ab: audit_buffer (NOT NULL) * @va: audit struct to audit values of (NOT NULL) */ static void audit_signal_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); if (ad->request & AA_SIGNAL_PERM_MASK) { audit_log_format(ab, " requested_mask=\"%s\"", audit_signal_mask(ad->request)); if (ad->denied & AA_SIGNAL_PERM_MASK) { audit_log_format(ab, " denied_mask=\"%s\"", audit_signal_mask(ad->denied)); } } if (ad->signal == SIGUNKNOWN) audit_log_format(ab, "signal=unknown(%d)", ad->unmappedsig); else if (ad->signal < MAXMAPPED_SIGNAME) audit_log_format(ab, " signal=%s", sig_names[ad->signal]); else audit_log_format(ab, " signal=rtmin+%d", ad->signal - SIGRT_BASE); audit_log_format(ab, " peer="); aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } static int profile_signal_perm(const struct cred *cred, struct aa_profile *profile, struct aa_label *peer, u32 request, struct apparmor_audit_data *ad) { struct aa_ruleset *rules = profile->label.rules[0]; struct aa_perms perms; aa_state_t state; if (profile_unconfined(profile)) return 0; ad->subj_cred = cred; ad->peer = peer; /* TODO: secondary cache check <profile, profile, perm> */ state = RULE_MEDIATES(rules, AA_CLASS_SIGNAL); if (!state) return 0; state = aa_dfa_next(rules->policy->dfa, state, ad->signal); aa_label_match(profile, rules, peer, state, false, request, &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, ad, audit_signal_cb); } int aa_may_signal(const struct cred *subj_cred, struct aa_label *sender, const struct cred *target_cred, struct aa_label *target, int sig) { struct aa_profile *profile; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_SIGNAL, OP_SIGNAL); ad.signal = map_signal_num(sig); ad.unmappedsig = sig; return xcheck_labels(sender, target, profile, profile_signal_perm(subj_cred, profile, target, MAY_WRITE, &ad), profile_signal_perm(target_cred, profile, sender, MAY_READ, &ad)); } |
| 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 5 5 3 5 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 | // SPDX-License-Identifier: GPL-2.0-or-later #include <crypto/hash.h> #include <linux/cpu.h> #include <linux/kref.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/workqueue.h> #include <net/tcp.h> static size_t __scratch_size; struct sigpool_scratch { local_lock_t bh_lock; void __rcu *pad; }; static DEFINE_PER_CPU(struct sigpool_scratch, sigpool_scratch) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; struct sigpool_entry { struct crypto_ahash *hash; const char *alg; struct kref kref; uint16_t needs_key:1, reserved:15; }; #define CPOOL_SIZE (PAGE_SIZE / sizeof(struct sigpool_entry)) static struct sigpool_entry cpool[CPOOL_SIZE]; static unsigned int cpool_populated; static DEFINE_MUTEX(cpool_mutex); /* Slow-path */ struct scratches_to_free { struct rcu_head rcu; unsigned int cnt; void *scratches[]; }; static void free_old_scratches(struct rcu_head *head) { struct scratches_to_free *stf; stf = container_of(head, struct scratches_to_free, rcu); while (stf->cnt--) kfree(stf->scratches[stf->cnt]); kfree(stf); } /** * sigpool_reserve_scratch - re-allocates scratch buffer, slow-path * @size: request size for the scratch/temp buffer */ static int sigpool_reserve_scratch(size_t size) { struct scratches_to_free *stf; size_t stf_sz = struct_size(stf, scratches, num_possible_cpus()); int cpu, err = 0; lockdep_assert_held(&cpool_mutex); if (__scratch_size >= size) return 0; stf = kmalloc(stf_sz, GFP_KERNEL); if (!stf) return -ENOMEM; stf->cnt = 0; size = max(size, __scratch_size); cpus_read_lock(); for_each_possible_cpu(cpu) { void *scratch, *old_scratch; scratch = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); if (!scratch) { err = -ENOMEM; break; } old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu), scratch, lockdep_is_held(&cpool_mutex)); if (!cpu_online(cpu) || !old_scratch) { kfree(old_scratch); continue; } stf->scratches[stf->cnt++] = old_scratch; } cpus_read_unlock(); if (!err) __scratch_size = size; call_rcu(&stf->rcu, free_old_scratches); return err; } static void sigpool_scratch_free(void) { int cpu; for_each_possible_cpu(cpu) kfree(rcu_replace_pointer(per_cpu(sigpool_scratch.pad, cpu), NULL, lockdep_is_held(&cpool_mutex))); __scratch_size = 0; } static int __cpool_try_clone(struct crypto_ahash *hash) { struct crypto_ahash *tmp; tmp = crypto_clone_ahash(hash); if (IS_ERR(tmp)) return PTR_ERR(tmp); crypto_free_ahash(tmp); return 0; } static int __cpool_alloc_ahash(struct sigpool_entry *e, const char *alg) { struct crypto_ahash *cpu0_hash; int ret; e->alg = kstrdup(alg, GFP_KERNEL); if (!e->alg) return -ENOMEM; cpu0_hash = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(cpu0_hash)) { ret = PTR_ERR(cpu0_hash); goto out_free_alg; } e->needs_key = crypto_ahash_get_flags(cpu0_hash) & CRYPTO_TFM_NEED_KEY; ret = __cpool_try_clone(cpu0_hash); if (ret) goto out_free_cpu0_hash; e->hash = cpu0_hash; kref_init(&e->kref); return 0; out_free_cpu0_hash: crypto_free_ahash(cpu0_hash); out_free_alg: kfree(e->alg); e->alg = NULL; return ret; } /** * tcp_sigpool_alloc_ahash - allocates pool for ahash requests * @alg: name of async hash algorithm * @scratch_size: reserve a tcp_sigpool::scratch buffer of this size */ int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) { int i, ret; /* slow-path */ mutex_lock(&cpool_mutex); ret = sigpool_reserve_scratch(scratch_size); if (ret) goto out; for (i = 0; i < cpool_populated; i++) { if (!cpool[i].alg) continue; if (strcmp(cpool[i].alg, alg)) continue; /* pairs with tcp_sigpool_release() */ if (!kref_get_unless_zero(&cpool[i].kref)) kref_init(&cpool[i].kref); ret = i; goto out; } for (i = 0; i < cpool_populated; i++) { if (!cpool[i].alg) break; } if (i >= CPOOL_SIZE) { ret = -ENOSPC; goto out; } ret = __cpool_alloc_ahash(&cpool[i], alg); if (!ret) { ret = i; if (i == cpool_populated) cpool_populated++; } out: mutex_unlock(&cpool_mutex); return ret; } EXPORT_SYMBOL_GPL(tcp_sigpool_alloc_ahash); static void __cpool_free_entry(struct sigpool_entry *e) { crypto_free_ahash(e->hash); kfree(e->alg); memset(e, 0, sizeof(*e)); } static void cpool_cleanup_work_cb(struct work_struct *work) { bool free_scratch = true; unsigned int i; mutex_lock(&cpool_mutex); for (i = 0; i < cpool_populated; i++) { if (kref_read(&cpool[i].kref) > 0) { free_scratch = false; continue; } if (!cpool[i].alg) continue; __cpool_free_entry(&cpool[i]); } if (free_scratch) sigpool_scratch_free(); mutex_unlock(&cpool_mutex); } static DECLARE_WORK(cpool_cleanup_work, cpool_cleanup_work_cb); static void cpool_schedule_cleanup(struct kref *kref) { schedule_work(&cpool_cleanup_work); } /** * tcp_sigpool_release - decreases number of users for a pool. If it was * the last user of the pool, releases any memory that was consumed. * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() */ void tcp_sigpool_release(unsigned int id) { if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; /* slow-path */ kref_put(&cpool[id].kref, cpool_schedule_cleanup); } EXPORT_SYMBOL_GPL(tcp_sigpool_release); /** * tcp_sigpool_get - increases number of users (refcounter) for a pool * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() */ void tcp_sigpool_get(unsigned int id) { if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return; kref_get(&cpool[id].kref); } EXPORT_SYMBOL_GPL(tcp_sigpool_get); int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RCU_BH) { struct crypto_ahash *hash; rcu_read_lock_bh(); if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) { rcu_read_unlock_bh(); return -EINVAL; } hash = crypto_clone_ahash(cpool[id].hash); if (IS_ERR(hash)) { rcu_read_unlock_bh(); return PTR_ERR(hash); } c->req = ahash_request_alloc(hash, GFP_ATOMIC); if (!c->req) { crypto_free_ahash(hash); rcu_read_unlock_bh(); return -ENOMEM; } ahash_request_set_callback(c->req, 0, NULL, NULL); /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is * valid (allocated) until tcp_sigpool_end(). */ local_lock_nested_bh(&sigpool_scratch.bh_lock); c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch.pad)); return 0; } EXPORT_SYMBOL_GPL(tcp_sigpool_start); void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH) { struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req); local_unlock_nested_bh(&sigpool_scratch.bh_lock); rcu_read_unlock_bh(); ahash_request_free(c->req); crypto_free_ahash(hash); } EXPORT_SYMBOL_GPL(tcp_sigpool_end); /** * tcp_sigpool_algo - return algorithm of tcp_sigpool * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() * @buf: buffer to return name of algorithm * @buf_len: size of @buf */ size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) { if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) return -EINVAL; return strscpy(buf, cpool[id].alg, buf_len); } EXPORT_SYMBOL_GPL(tcp_sigpool_algo); /** * tcp_sigpool_hash_skb_data - hash data in skb with initialized tcp_sigpool * @hp: tcp_sigpool pointer * @skb: buffer to add sign for * @header_len: TCP header length for this segment */ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, const struct sk_buff *skb, unsigned int header_len) { const unsigned int head_data_len = skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); const struct tcphdr *tp = tcp_hdr(skb); struct ahash_request *req = hp->req; struct sk_buff *frag_iter; struct scatterlist sg; unsigned int i; sg_init_table(&sg, 1); sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len); ahash_request_set_crypt(req, &sg, NULL, head_data_len); if (crypto_ahash_update(req)) return 1; for (i = 0; i < shi->nr_frags; ++i) { const skb_frag_t *f = &shi->frags[i]; unsigned int offset = skb_frag_off(f); struct page *page; page = skb_frag_page(f) + (offset >> PAGE_SHIFT); sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset)); ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); if (crypto_ahash_update(req)) return 1; } skb_walk_frags(skb, frag_iter) if (tcp_sigpool_hash_skb_data(hp, frag_iter, 0)) return 1; return 0; } EXPORT_SYMBOL(tcp_sigpool_hash_skb_data); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Per-CPU pool of crypto requests"); |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 | // SPDX-License-Identifier: GPL-2.0 /* * Prolific PL2303 USB to serial adaptor driver * * Copyright (C) 2001-2007 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2003 IBM Corp. * * Original driver for 2.2.x by anonymous * * See Documentation/usb/usb-serial.rst for more information on using this * driver */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/serial.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/unaligned.h> #include "pl2303.h" #define PL2303_QUIRK_UART_STATE_IDX0 BIT(0) #define PL2303_QUIRK_LEGACY BIT(1) #define PL2303_QUIRK_ENDPOINT_HACK BIT(2) #define PL2303_QUIRK_NO_BREAK_GETLINE BIT(3) static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_CHILITAG) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_PHAROS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ALDIGA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MMX) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GPRS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_TB) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GC) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GB) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GT) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GL) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GE) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GS) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC485), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC232B), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID_UCSGT) }, { USB_DEVICE(ITEGNO_VENDOR_ID, ITEGNO_PRODUCT_ID) }, { USB_DEVICE(ITEGNO_VENDOR_ID, ITEGNO_PRODUCT_ID_2080) }, { USB_DEVICE(MA620_VENDOR_ID, MA620_PRODUCT_ID) }, { USB_DEVICE(RATOC_VENDOR_ID, RATOC_PRODUCT_ID) }, { USB_DEVICE(TRIPP_VENDOR_ID, TRIPP_PRODUCT_ID) }, { USB_DEVICE(RADIOSHACK_VENDOR_ID, RADIOSHACK_PRODUCT_ID) }, { USB_DEVICE(DCU10_VENDOR_ID, DCU10_PRODUCT_ID) }, { USB_DEVICE(SITECOM_VENDOR_ID, SITECOM_PRODUCT_ID) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_ID) }, { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_SX1), .driver_info = PL2303_QUIRK_UART_STATE_IDX0 }, { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_X65), .driver_info = PL2303_QUIRK_UART_STATE_IDX0 }, { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_X75), .driver_info = PL2303_QUIRK_UART_STATE_IDX0 }, { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_EF81), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_ID_S81) }, /* Benq/Siemens S81 */ { USB_DEVICE(SYNTECH_VENDOR_ID, SYNTECH_PRODUCT_ID) }, { USB_DEVICE(NOKIA_CA42_VENDOR_ID, NOKIA_CA42_PRODUCT_ID) }, { USB_DEVICE(CA_42_CA42_VENDOR_ID, CA_42_CA42_PRODUCT_ID) }, { USB_DEVICE(SAGEM_VENDOR_ID, SAGEM_PRODUCT_ID) }, { USB_DEVICE(LEADTEK_VENDOR_ID, LEADTEK_9531_PRODUCT_ID) }, { USB_DEVICE(SPEEDDRAGON_VENDOR_ID, SPEEDDRAGON_PRODUCT_ID) }, { USB_DEVICE(DATAPILOT_U2_VENDOR_ID, DATAPILOT_U2_PRODUCT_ID) }, { USB_DEVICE(BELKIN_VENDOR_ID, BELKIN_PRODUCT_ID) }, { USB_DEVICE(ALCOR_VENDOR_ID, ALCOR_PRODUCT_ID), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(WS002IN_VENDOR_ID, WS002IN_PRODUCT_ID) }, { USB_DEVICE(COREGA_VENDOR_ID, COREGA_PRODUCT_ID) }, { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD381GC_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM920_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM930_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LM940_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_TD620_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, { USB_DEVICE(ZEAGLE_VENDOR_ID, ZEAGLE_N2ITION3_PRODUCT_ID) }, { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, { USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) }, { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530_PRODUCT_ID) }, { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) }, { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) }, { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_PRODUCT_ID) }, { USB_DEVICE(MACROSILICON_VENDOR_ID, MACROSILICON_MS3020_PRODUCT_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); #define SET_LINE_REQUEST_TYPE 0x21 #define SET_LINE_REQUEST 0x20 #define SET_CONTROL_REQUEST_TYPE 0x21 #define SET_CONTROL_REQUEST 0x22 #define CONTROL_DTR 0x01 #define CONTROL_RTS 0x02 #define BREAK_REQUEST_TYPE 0x21 #define BREAK_REQUEST 0x23 #define BREAK_ON 0xffff #define BREAK_OFF 0x0000 #define GET_LINE_REQUEST_TYPE 0xa1 #define GET_LINE_REQUEST 0x21 #define VENDOR_WRITE_REQUEST_TYPE 0x40 #define VENDOR_WRITE_REQUEST 0x01 #define VENDOR_WRITE_NREQUEST 0x80 #define VENDOR_READ_REQUEST_TYPE 0xc0 #define VENDOR_READ_REQUEST 0x01 #define VENDOR_READ_NREQUEST 0x81 #define UART_STATE_INDEX 8 #define UART_STATE_MSR_MASK 0x8b #define UART_STATE_TRANSIENT_MASK 0x74 #define UART_DCD 0x01 #define UART_DSR 0x02 #define UART_BREAK_ERROR 0x04 #define UART_RING 0x08 #define UART_FRAME_ERROR 0x10 #define UART_PARITY_ERROR 0x20 #define UART_OVERRUN_ERROR 0x40 #define UART_CTS 0x80 #define PL2303_FLOWCTRL_MASK 0xf0 #define PL2303_READ_TYPE_HX_STATUS 0x8080 #define PL2303_HXN_RESET_REG 0x07 #define PL2303_HXN_RESET_UPSTREAM_PIPE 0x02 #define PL2303_HXN_RESET_DOWNSTREAM_PIPE 0x01 #define PL2303_HXN_FLOWCTRL_REG 0x0a #define PL2303_HXN_FLOWCTRL_MASK 0x1c #define PL2303_HXN_FLOWCTRL_NONE 0x1c #define PL2303_HXN_FLOWCTRL_RTS_CTS 0x18 #define PL2303_HXN_FLOWCTRL_XON_XOFF 0x0c static int pl2303_set_break(struct usb_serial_port *port, bool enable); enum pl2303_type { TYPE_H, TYPE_HX, TYPE_TA, TYPE_TB, TYPE_HXD, TYPE_HXN, TYPE_COUNT }; struct pl2303_type_data { const char *name; speed_t max_baud_rate; unsigned long quirks; unsigned int no_autoxonxoff:1; unsigned int no_divisors:1; unsigned int alt_divisors:1; }; struct pl2303_serial_private { const struct pl2303_type_data *type; unsigned long quirks; }; struct pl2303_private { spinlock_t lock; u8 line_control; u8 line_status; u8 line_settings[7]; }; static const struct pl2303_type_data pl2303_type_data[TYPE_COUNT] = { [TYPE_H] = { .name = "H", .max_baud_rate = 1228800, .quirks = PL2303_QUIRK_LEGACY, .no_autoxonxoff = true, }, [TYPE_HX] = { .name = "HX", .max_baud_rate = 6000000, }, [TYPE_TA] = { .name = "TA", .max_baud_rate = 6000000, .alt_divisors = true, }, [TYPE_TB] = { .name = "TB", .max_baud_rate = 12000000, .alt_divisors = true, }, [TYPE_HXD] = { .name = "HXD", .max_baud_rate = 12000000, }, [TYPE_HXN] = { .name = "G", .max_baud_rate = 12000000, .no_divisors = true, }, }; static int pl2303_vendor_read(struct usb_serial *serial, u16 value, unsigned char buf[1]) { struct pl2303_serial_private *spriv = usb_get_serial_data(serial); struct device *dev = &serial->interface->dev; u8 request; int res; if (spriv->type == &pl2303_type_data[TYPE_HXN]) request = VENDOR_READ_NREQUEST; else request = VENDOR_READ_REQUEST; res = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), request, VENDOR_READ_REQUEST_TYPE, value, 0, buf, 1, 100); if (res != 1) { dev_err(dev, "%s - failed to read [%04x]: %d\n", __func__, value, res); if (res >= 0) res = -EIO; return res; } dev_dbg(dev, "%s - [%04x] = %02x\n", __func__, value, buf[0]); return 0; } static int pl2303_vendor_write(struct usb_serial *serial, u16 value, u16 index) { struct pl2303_serial_private *spriv = usb_get_serial_data(serial); struct device *dev = &serial->interface->dev; u8 request; int res; dev_dbg(dev, "%s - [%04x] = %02x\n", __func__, value, index); if (spriv->type == &pl2303_type_data[TYPE_HXN]) request = VENDOR_WRITE_NREQUEST; else request = VENDOR_WRITE_REQUEST; res = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), request, VENDOR_WRITE_REQUEST_TYPE, value, index, NULL, 0, 100); if (res) { dev_err(dev, "%s - failed to write [%04x]: %d\n", __func__, value, res); return res; } return 0; } static int pl2303_update_reg(struct usb_serial *serial, u8 reg, u8 mask, u8 val) { struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int ret = 0; u8 *buf; buf = kmalloc(1, GFP_KERNEL); if (!buf) return -ENOMEM; if (spriv->type == &pl2303_type_data[TYPE_HXN]) ret = pl2303_vendor_read(serial, reg, buf); else ret = pl2303_vendor_read(serial, reg | 0x80, buf); if (ret) goto out_free; *buf &= ~mask; *buf |= val & mask; ret = pl2303_vendor_write(serial, reg, *buf); out_free: kfree(buf); return ret; } static int pl2303_probe(struct usb_serial *serial, const struct usb_device_id *id) { usb_set_serial_data(serial, (void *)id->driver_info); return 0; } /* * Use interrupt endpoint from first interface if available. * * This is needed due to the looney way its endpoints are set up. */ static int pl2303_endpoint_hack(struct usb_serial *serial, struct usb_serial_endpoints *epds) { struct usb_interface *interface = serial->interface; struct usb_device *dev = serial->dev; struct device *ddev = &interface->dev; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; unsigned int i; if (interface == dev->actconfig->interface[0]) return 0; /* check out the endpoints of the other interface */ iface_desc = dev->actconfig->interface[0]->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; if (!usb_endpoint_is_int_in(endpoint)) continue; dev_dbg(ddev, "found interrupt in on separate interface\n"); if (epds->num_interrupt_in < ARRAY_SIZE(epds->interrupt_in)) epds->interrupt_in[epds->num_interrupt_in++] = endpoint; } return 0; } static int pl2303_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds) { unsigned long quirks = (unsigned long)usb_get_serial_data(serial); struct device *dev = &serial->interface->dev; int ret; if (quirks & PL2303_QUIRK_ENDPOINT_HACK) { ret = pl2303_endpoint_hack(serial, epds); if (ret) return ret; } if (epds->num_interrupt_in < 1) { dev_err(dev, "required interrupt-in endpoint missing\n"); return -ENODEV; } return 1; } static bool pl2303_supports_hx_status(struct usb_serial *serial) { int ret; u8 buf; ret = usb_control_msg_recv(serial->dev, 0, VENDOR_READ_REQUEST, VENDOR_READ_REQUEST_TYPE, PL2303_READ_TYPE_HX_STATUS, 0, &buf, 1, 100, GFP_KERNEL); return ret == 0; } static int pl2303_detect_type(struct usb_serial *serial) { struct usb_device_descriptor *desc = &serial->dev->descriptor; u16 bcdDevice, bcdUSB; /* * Legacy PL2303H, variants 0 and 1 (difference unknown). */ if (desc->bDeviceClass == 0x02) return TYPE_H; /* variant 0 */ if (desc->bMaxPacketSize0 != 0x40) { if (desc->bDeviceClass == 0x00 || desc->bDeviceClass == 0xff) return TYPE_H; /* variant 1 */ return TYPE_H; /* variant 0 */ } bcdDevice = le16_to_cpu(desc->bcdDevice); bcdUSB = le16_to_cpu(desc->bcdUSB); switch (bcdUSB) { case 0x101: /* USB 1.0.1? Let's assume they meant 1.1... */ fallthrough; case 0x110: switch (bcdDevice) { case 0x300: return TYPE_HX; case 0x400: return TYPE_HXD; default: return TYPE_HX; } break; case 0x200: switch (bcdDevice) { case 0x100: /* GC */ case 0x105: return TYPE_HXN; case 0x300: /* GT / TA */ if (pl2303_supports_hx_status(serial)) return TYPE_TA; fallthrough; case 0x305: case 0x400: /* GL */ case 0x405: return TYPE_HXN; case 0x500: /* GE / TB */ if (pl2303_supports_hx_status(serial)) return TYPE_TB; fallthrough; case 0x505: case 0x600: /* GS */ case 0x605: case 0x700: /* GR */ case 0x705: case 0x905: /* GT-2AB */ case 0x1005: /* GC-Q20 */ return TYPE_HXN; } break; } dev_err(&serial->interface->dev, "unknown device type, please report to linux-usb@vger.kernel.org\n"); return -ENODEV; } static bool pl2303_is_hxd_clone(struct usb_serial *serial) { struct usb_device *udev = serial->dev; unsigned char *buf; int ret; buf = kmalloc(7, GFP_KERNEL); if (!buf) return false; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), GET_LINE_REQUEST, GET_LINE_REQUEST_TYPE, 0, 0, buf, 7, 100); kfree(buf); return ret == -EPIPE; } static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; enum pl2303_type type; unsigned char *buf; int ret; ret = pl2303_detect_type(serial); if (ret < 0) return ret; type = ret; dev_dbg(&serial->interface->dev, "device type: %s\n", pl2303_type_data[type].name); spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; spriv->type = &pl2303_type_data[type]; spriv->quirks = (unsigned long)usb_get_serial_data(serial); spriv->quirks |= spriv->type->quirks; if (type == TYPE_HXD && pl2303_is_hxd_clone(serial)) spriv->quirks |= PL2303_QUIRK_NO_BREAK_GETLINE; usb_set_serial_data(serial, spriv); if (type != TYPE_HXN) { buf = kmalloc(1, GFP_KERNEL); if (!buf) { kfree(spriv); return -ENOMEM; } pl2303_vendor_read(serial, 0x8484, buf); pl2303_vendor_write(serial, 0x0404, 0); pl2303_vendor_read(serial, 0x8484, buf); pl2303_vendor_read(serial, 0x8383, buf); pl2303_vendor_read(serial, 0x8484, buf); pl2303_vendor_write(serial, 0x0404, 1); pl2303_vendor_read(serial, 0x8484, buf); pl2303_vendor_read(serial, 0x8383, buf); pl2303_vendor_write(serial, 0, 1); pl2303_vendor_write(serial, 1, 0); if (spriv->quirks & PL2303_QUIRK_LEGACY) pl2303_vendor_write(serial, 2, 0x24); else pl2303_vendor_write(serial, 2, 0x44); kfree(buf); } return 0; } static void pl2303_release(struct usb_serial *serial) { struct pl2303_serial_private *spriv = usb_get_serial_data(serial); kfree(spriv); } static int pl2303_port_probe(struct usb_serial_port *port) { struct pl2303_private *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; spin_lock_init(&priv->lock); usb_set_serial_port_data(port, priv); port->port.drain_delay = 256; return 0; } static void pl2303_port_remove(struct usb_serial_port *port) { struct pl2303_private *priv = usb_get_serial_port_data(port); kfree(priv); } static int pl2303_set_control_lines(struct usb_serial_port *port, u8 value) { struct usb_device *dev = port->serial->dev; int retval; dev_dbg(&port->dev, "%s - %02x\n", __func__, value); retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), SET_CONTROL_REQUEST, SET_CONTROL_REQUEST_TYPE, value, 0, NULL, 0, 100); if (retval) dev_err(&port->dev, "%s - failed: %d\n", __func__, retval); return retval; } /* * Returns the nearest supported baud rate that can be set directly without * using divisors. */ static speed_t pl2303_get_supported_baud_rate(speed_t baud) { static const speed_t baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, 4800, 7200, 9600, 14400, 19200, 28800, 38400, 57600, 115200, 230400, 460800, 614400, 921600, 1228800, 2457600, 3000000, 6000000 }; unsigned i; for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { if (baud_sup[i] > baud) break; } if (i == ARRAY_SIZE(baud_sup)) baud = baud_sup[i - 1]; else if (i > 0 && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) baud = baud_sup[i - 1]; else baud = baud_sup[i]; return baud; } /* * NOTE: If unsupported baud rates are set directly, the PL2303 seems to * use 9600 baud. */ static speed_t pl2303_encode_baud_rate_direct(unsigned char buf[4], speed_t baud) { put_unaligned_le32(baud, buf); return baud; } static speed_t pl2303_encode_baud_rate_divisor(unsigned char buf[4], speed_t baud) { unsigned int baseline, mantissa, exponent; /* * Apparently the formula is: * baudrate = 12M * 32 / (mantissa * 4^exponent) * where * mantissa = buf[8:0] * exponent = buf[11:9] */ baseline = 12000000 * 32; mantissa = baseline / baud; if (mantissa == 0) mantissa = 1; /* Avoid dividing by zero if baud > 32*12M. */ exponent = 0; while (mantissa >= 512) { if (exponent < 7) { mantissa >>= 2; /* divide by 4 */ exponent++; } else { /* Exponent is maxed. Trim mantissa and leave. */ mantissa = 511; break; } } buf[3] = 0x80; buf[2] = 0; buf[1] = exponent << 1 | mantissa >> 8; buf[0] = mantissa & 0xff; /* Calculate and return the exact baud rate. */ baud = (baseline / mantissa) >> (exponent << 1); return baud; } static speed_t pl2303_encode_baud_rate_divisor_alt(unsigned char buf[4], speed_t baud) { unsigned int baseline, mantissa, exponent; /* * Apparently, for the TA version the formula is: * baudrate = 12M * 32 / (mantissa * 2^exponent) * where * mantissa = buf[10:0] * exponent = buf[15:13 16] */ baseline = 12000000 * 32; mantissa = baseline / baud; if (mantissa == 0) mantissa = 1; /* Avoid dividing by zero if baud > 32*12M. */ exponent = 0; while (mantissa >= 2048) { if (exponent < 15) { mantissa >>= 1; /* divide by 2 */ exponent++; } else { /* Exponent is maxed. Trim mantissa and leave. */ mantissa = 2047; break; } } buf[3] = 0x80; buf[2] = exponent & 0x01; buf[1] = (exponent & ~0x01) << 4 | mantissa >> 8; buf[0] = mantissa & 0xff; /* Calculate and return the exact baud rate. */ baud = (baseline / mantissa) >> exponent; return baud; } static void pl2303_encode_baud_rate(struct tty_struct *tty, struct usb_serial_port *port, u8 buf[4]) { struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); speed_t baud_sup; speed_t baud; baud = tty_get_baud_rate(tty); dev_dbg(&port->dev, "baud requested = %u\n", baud); if (!baud) return; if (spriv->type->max_baud_rate) baud = min_t(speed_t, baud, spriv->type->max_baud_rate); /* * Use direct method for supported baud rates, otherwise use divisors. * Newer chip types do not support divisor encoding. */ if (spriv->type->no_divisors) baud_sup = baud; else baud_sup = pl2303_get_supported_baud_rate(baud); if (baud == baud_sup) baud = pl2303_encode_baud_rate_direct(buf, baud); else if (spriv->type->alt_divisors) baud = pl2303_encode_baud_rate_divisor_alt(buf, baud); else baud = pl2303_encode_baud_rate_divisor(buf, baud); /* Save resulting baud rate */ tty_encode_baud_rate(tty, baud, baud); dev_dbg(&port->dev, "baud set = %u\n", baud); } static int pl2303_get_line_request(struct usb_serial_port *port, unsigned char buf[7]) { struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); struct usb_device *udev = serial->dev; int ret; if (spriv->quirks & PL2303_QUIRK_NO_BREAK_GETLINE) { struct pl2303_private *priv = usb_get_serial_port_data(port); memcpy(buf, priv->line_settings, 7); return 0; } ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), GET_LINE_REQUEST, GET_LINE_REQUEST_TYPE, 0, 0, buf, 7, 100); if (ret != 7) { dev_err(&port->dev, "%s - failed: %d\n", __func__, ret); if (ret >= 0) ret = -EIO; return ret; } dev_dbg(&port->dev, "%s - %7ph\n", __func__, buf); return 0; } static int pl2303_set_line_request(struct usb_serial_port *port, unsigned char buf[7]) { struct usb_device *udev = port->serial->dev; int ret; ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), SET_LINE_REQUEST, SET_LINE_REQUEST_TYPE, 0, 0, buf, 7, 100); if (ret < 0) { dev_err(&port->dev, "%s - failed: %d\n", __func__, ret); return ret; } dev_dbg(&port->dev, "%s - %7ph\n", __func__, buf); return 0; } static bool pl2303_termios_change(const struct ktermios *a, const struct ktermios *b) { bool ixon_change; ixon_change = ((a->c_iflag ^ b->c_iflag) & (IXON | IXANY)) || a->c_cc[VSTART] != b->c_cc[VSTART] || a->c_cc[VSTOP] != b->c_cc[VSTOP]; return tty_termios_hw_change(a, b) || ixon_change; } static bool pl2303_enable_xonxoff(struct tty_struct *tty, const struct pl2303_type_data *type) { if (!I_IXON(tty) || I_IXANY(tty)) return false; if (START_CHAR(tty) != 0x11 || STOP_CHAR(tty) != 0x13) return false; if (type->no_autoxonxoff) return false; return true; } static void pl2303_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; unsigned char *buf; int ret; u8 control; if (old_termios && !pl2303_termios_change(&tty->termios, old_termios)) return; buf = kzalloc(7, GFP_KERNEL); if (!buf) { /* Report back no change occurred */ if (old_termios) tty->termios = *old_termios; return; } pl2303_get_line_request(port, buf); buf[6] = tty_get_char_size(tty->termios.c_cflag); dev_dbg(&port->dev, "data bits = %d\n", buf[6]); /* For reference buf[0]:buf[3] baud rate value */ pl2303_encode_baud_rate(tty, port, &buf[0]); /* For reference buf[4]=0 is 1 stop bits */ /* For reference buf[4]=1 is 1.5 stop bits */ /* For reference buf[4]=2 is 2 stop bits */ if (C_CSTOPB(tty)) { /* * NOTE: Comply with "real" UARTs / RS232: * use 1.5 instead of 2 stop bits with 5 data bits */ if (C_CSIZE(tty) == CS5) { buf[4] = 1; dev_dbg(&port->dev, "stop bits = 1.5\n"); } else { buf[4] = 2; dev_dbg(&port->dev, "stop bits = 2\n"); } } else { buf[4] = 0; dev_dbg(&port->dev, "stop bits = 1\n"); } if (C_PARENB(tty)) { /* For reference buf[5]=0 is none parity */ /* For reference buf[5]=1 is odd parity */ /* For reference buf[5]=2 is even parity */ /* For reference buf[5]=3 is mark parity */ /* For reference buf[5]=4 is space parity */ if (C_PARODD(tty)) { if (C_CMSPAR(tty)) { buf[5] = 3; dev_dbg(&port->dev, "parity = mark\n"); } else { buf[5] = 1; dev_dbg(&port->dev, "parity = odd\n"); } } else { if (C_CMSPAR(tty)) { buf[5] = 4; dev_dbg(&port->dev, "parity = space\n"); } else { buf[5] = 2; dev_dbg(&port->dev, "parity = even\n"); } } } else { buf[5] = 0; dev_dbg(&port->dev, "parity = none\n"); } /* * Some PL2303 are known to lose bytes if you change serial settings * even to the same values as before. Thus we actually need to filter * in this specific case. * * Note that the tty_termios_hw_change check above is not sufficient * as a previously requested baud rate may differ from the one * actually used (and stored in old_termios). * * NOTE: No additional locking needed for line_settings as it is * only used in set_termios, which is serialised against itself. */ if (!old_termios || memcmp(buf, priv->line_settings, 7)) { ret = pl2303_set_line_request(port, buf); if (!ret) memcpy(priv->line_settings, buf, 7); } /* change control lines if we are switching to or from B0 */ spin_lock_irqsave(&priv->lock, flags); control = priv->line_control; if (C_BAUD(tty) == B0) priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS); else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) priv->line_control |= (CONTROL_DTR | CONTROL_RTS); if (control != priv->line_control) { control = priv->line_control; spin_unlock_irqrestore(&priv->lock, flags); pl2303_set_control_lines(port, control); } else { spin_unlock_irqrestore(&priv->lock, flags); } if (C_CRTSCTS(tty)) { if (spriv->quirks & PL2303_QUIRK_LEGACY) { pl2303_update_reg(serial, 0, PL2303_FLOWCTRL_MASK, 0x40); } else if (spriv->type == &pl2303_type_data[TYPE_HXN]) { pl2303_update_reg(serial, PL2303_HXN_FLOWCTRL_REG, PL2303_HXN_FLOWCTRL_MASK, PL2303_HXN_FLOWCTRL_RTS_CTS); } else { pl2303_update_reg(serial, 0, PL2303_FLOWCTRL_MASK, 0x60); } } else if (pl2303_enable_xonxoff(tty, spriv->type)) { if (spriv->type == &pl2303_type_data[TYPE_HXN]) { pl2303_update_reg(serial, PL2303_HXN_FLOWCTRL_REG, PL2303_HXN_FLOWCTRL_MASK, PL2303_HXN_FLOWCTRL_XON_XOFF); } else { pl2303_update_reg(serial, 0, PL2303_FLOWCTRL_MASK, 0xc0); } } else { if (spriv->type == &pl2303_type_data[TYPE_HXN]) { pl2303_update_reg(serial, PL2303_HXN_FLOWCTRL_REG, PL2303_HXN_FLOWCTRL_MASK, PL2303_HXN_FLOWCTRL_NONE); } else { pl2303_update_reg(serial, 0, PL2303_FLOWCTRL_MASK, 0); } } kfree(buf); } static void pl2303_dtr_rts(struct usb_serial_port *port, int on) { struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; u8 control; spin_lock_irqsave(&priv->lock, flags); if (on) priv->line_control |= (CONTROL_DTR | CONTROL_RTS); else priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS); control = priv->line_control; spin_unlock_irqrestore(&priv->lock, flags); pl2303_set_control_lines(port, control); } static void pl2303_close(struct usb_serial_port *port) { usb_serial_generic_close(port); usb_kill_urb(port->interrupt_in_urb); pl2303_set_break(port, false); } static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int result; if (spriv->quirks & PL2303_QUIRK_LEGACY) { usb_clear_halt(serial->dev, port->write_urb->pipe); usb_clear_halt(serial->dev, port->read_urb->pipe); } else { /* reset upstream data pipes */ if (spriv->type == &pl2303_type_data[TYPE_HXN]) { pl2303_vendor_write(serial, PL2303_HXN_RESET_REG, PL2303_HXN_RESET_UPSTREAM_PIPE | PL2303_HXN_RESET_DOWNSTREAM_PIPE); } else { pl2303_vendor_write(serial, 8, 0); pl2303_vendor_write(serial, 9, 0); } } /* Setup termios */ if (tty) pl2303_set_termios(tty, port, NULL); result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (result) { dev_err(&port->dev, "failed to submit interrupt urb: %d\n", result); return result; } result = usb_serial_generic_open(tty, port); if (result) { usb_kill_urb(port->interrupt_in_urb); return result; } return 0; } static int pl2303_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; u8 control; int ret; spin_lock_irqsave(&priv->lock, flags); if (set & TIOCM_RTS) priv->line_control |= CONTROL_RTS; if (set & TIOCM_DTR) priv->line_control |= CONTROL_DTR; if (clear & TIOCM_RTS) priv->line_control &= ~CONTROL_RTS; if (clear & TIOCM_DTR) priv->line_control &= ~CONTROL_DTR; control = priv->line_control; spin_unlock_irqrestore(&priv->lock, flags); ret = pl2303_set_control_lines(port, control); if (ret) return usb_translate_errors(ret); return 0; } static int pl2303_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; unsigned int mcr; unsigned int status; unsigned int result; spin_lock_irqsave(&priv->lock, flags); mcr = priv->line_control; status = priv->line_status; spin_unlock_irqrestore(&priv->lock, flags); result = ((mcr & CONTROL_DTR) ? TIOCM_DTR : 0) | ((mcr & CONTROL_RTS) ? TIOCM_RTS : 0) | ((status & UART_CTS) ? TIOCM_CTS : 0) | ((status & UART_DSR) ? TIOCM_DSR : 0) | ((status & UART_RING) ? TIOCM_RI : 0) | ((status & UART_DCD) ? TIOCM_CD : 0); dev_dbg(&port->dev, "%s - result = %x\n", __func__, result); return result; } static int pl2303_carrier_raised(struct usb_serial_port *port) { struct pl2303_private *priv = usb_get_serial_port_data(port); if (priv->line_status & UART_DCD) return 1; return 0; } static int pl2303_set_break(struct usb_serial_port *port, bool enable) { struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); u16 state; int result; if (spriv->quirks & PL2303_QUIRK_NO_BREAK_GETLINE) return -ENOTTY; if (enable) state = BREAK_ON; else state = BREAK_OFF; dev_dbg(&port->dev, "%s - turning break %s\n", __func__, state == BREAK_OFF ? "off" : "on"); result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), BREAK_REQUEST, BREAK_REQUEST_TYPE, state, 0, NULL, 0, 100); if (result) { dev_err(&port->dev, "error sending break = %d\n", result); return result; } return 0; } static int pl2303_break_ctl(struct tty_struct *tty, int state) { struct usb_serial_port *port = tty->driver_data; return pl2303_set_break(port, state); } static void pl2303_update_line_status(struct usb_serial_port *port, unsigned char *data, unsigned int actual_length) { struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); struct pl2303_private *priv = usb_get_serial_port_data(port); struct tty_struct *tty; unsigned long flags; unsigned int status_idx = UART_STATE_INDEX; u8 status; u8 delta; if (spriv->quirks & PL2303_QUIRK_UART_STATE_IDX0) status_idx = 0; if (actual_length < status_idx + 1) return; status = data[status_idx]; /* Save off the uart status for others to look at */ spin_lock_irqsave(&priv->lock, flags); delta = priv->line_status ^ status; priv->line_status = status; spin_unlock_irqrestore(&priv->lock, flags); if (status & UART_BREAK_ERROR) usb_serial_handle_break(port); if (delta & UART_STATE_MSR_MASK) { if (delta & UART_CTS) port->icount.cts++; if (delta & UART_DSR) port->icount.dsr++; if (delta & UART_RING) port->icount.rng++; if (delta & UART_DCD) { port->icount.dcd++; tty = tty_port_tty_get(&port->port); if (tty) { usb_serial_handle_dcd_change(port, tty, status & UART_DCD); tty_kref_put(tty); } } wake_up_interruptible(&port->port.delta_msr_wait); } } static void pl2303_read_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; unsigned int actual_length = urb->actual_length; int status = urb->status; int retval; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_serial_debug_data(&port->dev, __func__, urb->actual_length, urb->transfer_buffer); pl2303_update_line_status(port, data, actual_length); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) { dev_err(&port->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } } static void pl2303_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned char *data = urb->transfer_buffer; char tty_flag = TTY_NORMAL; unsigned long flags; u8 line_status; int i; /* update line status */ spin_lock_irqsave(&priv->lock, flags); line_status = priv->line_status; priv->line_status &= ~UART_STATE_TRANSIENT_MASK; spin_unlock_irqrestore(&priv->lock, flags); if (!urb->actual_length) return; /* * Break takes precedence over parity, which takes precedence over * framing errors. */ if (line_status & UART_BREAK_ERROR) tty_flag = TTY_BREAK; else if (line_status & UART_PARITY_ERROR) tty_flag = TTY_PARITY; else if (line_status & UART_FRAME_ERROR) tty_flag = TTY_FRAME; if (tty_flag != TTY_NORMAL) dev_dbg(&port->dev, "%s - tty_flag = %d\n", __func__, tty_flag); /* overrun is special, not associated with a char */ if (line_status & UART_OVERRUN_ERROR) tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); if (port->sysrq) { for (i = 0; i < urb->actual_length; ++i) if (!usb_serial_handle_sysrq_char(port, data[i])) tty_insert_flip_char(&port->port, data[i], tty_flag); } else { tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag, urb->actual_length); } tty_flip_buffer_push(&port->port); } static struct usb_serial_driver pl2303_device = { .driver = { .name = "pl2303", }, .id_table = id_table, .num_bulk_in = 1, .num_bulk_out = 1, .num_interrupt_in = 0, /* see pl2303_calc_num_ports */ .bulk_in_size = 256, .bulk_out_size = 256, .open = pl2303_open, .close = pl2303_close, .dtr_rts = pl2303_dtr_rts, .carrier_raised = pl2303_carrier_raised, .break_ctl = pl2303_break_ctl, .set_termios = pl2303_set_termios, .tiocmget = pl2303_tiocmget, .tiocmset = pl2303_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .process_read_urb = pl2303_process_read_urb, .read_int_callback = pl2303_read_int_callback, .probe = pl2303_probe, .calc_num_ports = pl2303_calc_num_ports, .attach = pl2303_startup, .release = pl2303_release, .port_probe = pl2303_port_probe, .port_remove = pl2303_port_remove, }; static struct usb_serial_driver * const serial_drivers[] = { &pl2303_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_DESCRIPTION("Prolific PL2303 USB to serial adaptor driver"); MODULE_LICENSE("GPL v2"); |
| 18 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | /* SPDX-License-Identifier: GPL-2.0 */ /* * win_minmax.h: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H #define MINMAX_H #include <linux/types.h> /* A single data point for our parameterized min-max tracker */ struct minmax_sample { u32 t; /* time measurement was taken */ u32 v; /* value measured */ }; /* State for the parameterized min-max tracker */ struct minmax { struct minmax_sample s[3]; }; static inline u32 minmax_get(const struct minmax *m) { return m->s[0].v; } static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) { struct minmax_sample val = { .t = t, .v = meas }; m->s[2] = m->s[1] = m->s[0] = val; return m->s[0].v; } u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); #endif |
| 4 4 4 3 3 3 3 3 3 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * SHA-3, as specified in * https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf * * SHA-3 code by Jeff Garzik <jeff@garzik.org> * Ard Biesheuvel <ard.biesheuvel@linaro.org> */ #include <crypto/internal/hash.h> #include <crypto/sha3.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> /* * On some 32-bit architectures (h8300), GCC ends up using * over 1 KB of stack if we inline the round calculation into the loop * in keccakf(). On the other hand, on 64-bit architectures with plenty * of [64-bit wide] general purpose registers, not inlining it severely * hurts performance. So let's use 64-bitness as a heuristic to decide * whether to inline or not. */ #ifdef CONFIG_64BIT #define SHA3_INLINE inline #else #define SHA3_INLINE noinline #endif #define KECCAK_ROUNDS 24 static const u64 keccakf_rndc[24] = { 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL }; /* update the state with given number of rounds */ static SHA3_INLINE void keccakf_round(u64 st[25]) { u64 t[5], tt, bc[5]; /* Theta */ bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; t[0] = bc[4] ^ rol64(bc[1], 1); t[1] = bc[0] ^ rol64(bc[2], 1); t[2] = bc[1] ^ rol64(bc[3], 1); t[3] = bc[2] ^ rol64(bc[4], 1); t[4] = bc[3] ^ rol64(bc[0], 1); st[0] ^= t[0]; /* Rho Pi */ tt = st[1]; st[ 1] = rol64(st[ 6] ^ t[1], 44); st[ 6] = rol64(st[ 9] ^ t[4], 20); st[ 9] = rol64(st[22] ^ t[2], 61); st[22] = rol64(st[14] ^ t[4], 39); st[14] = rol64(st[20] ^ t[0], 18); st[20] = rol64(st[ 2] ^ t[2], 62); st[ 2] = rol64(st[12] ^ t[2], 43); st[12] = rol64(st[13] ^ t[3], 25); st[13] = rol64(st[19] ^ t[4], 8); st[19] = rol64(st[23] ^ t[3], 56); st[23] = rol64(st[15] ^ t[0], 41); st[15] = rol64(st[ 4] ^ t[4], 27); st[ 4] = rol64(st[24] ^ t[4], 14); st[24] = rol64(st[21] ^ t[1], 2); st[21] = rol64(st[ 8] ^ t[3], 55); st[ 8] = rol64(st[16] ^ t[1], 45); st[16] = rol64(st[ 5] ^ t[0], 36); st[ 5] = rol64(st[ 3] ^ t[3], 28); st[ 3] = rol64(st[18] ^ t[3], 21); st[18] = rol64(st[17] ^ t[2], 15); st[17] = rol64(st[11] ^ t[1], 10); st[11] = rol64(st[ 7] ^ t[2], 6); st[ 7] = rol64(st[10] ^ t[0], 3); st[10] = rol64( tt ^ t[1], 1); /* Chi */ bc[ 0] = ~st[ 1] & st[ 2]; bc[ 1] = ~st[ 2] & st[ 3]; bc[ 2] = ~st[ 3] & st[ 4]; bc[ 3] = ~st[ 4] & st[ 0]; bc[ 4] = ~st[ 0] & st[ 1]; st[ 0] ^= bc[ 0]; st[ 1] ^= bc[ 1]; st[ 2] ^= bc[ 2]; st[ 3] ^= bc[ 3]; st[ 4] ^= bc[ 4]; bc[ 0] = ~st[ 6] & st[ 7]; bc[ 1] = ~st[ 7] & st[ 8]; bc[ 2] = ~st[ 8] & st[ 9]; bc[ 3] = ~st[ 9] & st[ 5]; bc[ 4] = ~st[ 5] & st[ 6]; st[ 5] ^= bc[ 0]; st[ 6] ^= bc[ 1]; st[ 7] ^= bc[ 2]; st[ 8] ^= bc[ 3]; st[ 9] ^= bc[ 4]; bc[ 0] = ~st[11] & st[12]; bc[ 1] = ~st[12] & st[13]; bc[ 2] = ~st[13] & st[14]; bc[ 3] = ~st[14] & st[10]; bc[ 4] = ~st[10] & st[11]; st[10] ^= bc[ 0]; st[11] ^= bc[ 1]; st[12] ^= bc[ 2]; st[13] ^= bc[ 3]; st[14] ^= bc[ 4]; bc[ 0] = ~st[16] & st[17]; bc[ 1] = ~st[17] & st[18]; bc[ 2] = ~st[18] & st[19]; bc[ 3] = ~st[19] & st[15]; bc[ 4] = ~st[15] & st[16]; st[15] ^= bc[ 0]; st[16] ^= bc[ 1]; st[17] ^= bc[ 2]; st[18] ^= bc[ 3]; st[19] ^= bc[ 4]; bc[ 0] = ~st[21] & st[22]; bc[ 1] = ~st[22] & st[23]; bc[ 2] = ~st[23] & st[24]; bc[ 3] = ~st[24] & st[20]; bc[ 4] = ~st[20] & st[21]; st[20] ^= bc[ 0]; st[21] ^= bc[ 1]; st[22] ^= bc[ 2]; st[23] ^= bc[ 3]; st[24] ^= bc[ 4]; } static void keccakf(u64 st[25]) { int round; for (round = 0; round < KECCAK_ROUNDS; round++) { keccakf_round(st); /* Iota */ st[0] ^= keccakf_rndc[round]; } } int crypto_sha3_init(struct shash_desc *desc) { struct sha3_state *sctx = shash_desc_ctx(desc); memset(sctx->st, 0, sizeof(sctx->st)); return 0; } EXPORT_SYMBOL(crypto_sha3_init); static int crypto_sha3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { unsigned int rsiz = crypto_shash_blocksize(desc->tfm); struct sha3_state *sctx = shash_desc_ctx(desc); unsigned int rsizw = rsiz / 8; do { int i; for (i = 0; i < rsizw; i++) sctx->st[i] ^= get_unaligned_le64(data + 8 * i); keccakf(sctx->st); data += rsiz; len -= rsiz; } while (len >= rsiz); return len; } static int crypto_sha3_finup(struct shash_desc *desc, const u8 *src, unsigned int len, u8 *out) { unsigned int digest_size = crypto_shash_digestsize(desc->tfm); unsigned int rsiz = crypto_shash_blocksize(desc->tfm); struct sha3_state *sctx = shash_desc_ctx(desc); __le64 block[SHA3_224_BLOCK_SIZE / 8] = {}; __le64 *digest = (__le64 *)out; unsigned int rsizw = rsiz / 8; u8 *p; int i; p = memcpy(block, src, len); p[len++] = 0x06; p[rsiz - 1] |= 0x80; for (i = 0; i < rsizw; i++) sctx->st[i] ^= le64_to_cpu(block[i]); memzero_explicit(block, sizeof(block)); keccakf(sctx->st); for (i = 0; i < digest_size / 8; i++) put_unaligned_le64(sctx->st[i], digest++); if (digest_size & 4) put_unaligned_le32(sctx->st[i], (__le32 *)digest); return 0; } static struct shash_alg algs[] = { { .digestsize = SHA3_224_DIGEST_SIZE, .init = crypto_sha3_init, .update = crypto_sha3_update, .finup = crypto_sha3_finup, .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-224", .base.cra_driver_name = "sha3-224-generic", .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_224_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { .digestsize = SHA3_256_DIGEST_SIZE, .init = crypto_sha3_init, .update = crypto_sha3_update, .finup = crypto_sha3_finup, .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-256", .base.cra_driver_name = "sha3-256-generic", .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_256_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { .digestsize = SHA3_384_DIGEST_SIZE, .init = crypto_sha3_init, .update = crypto_sha3_update, .finup = crypto_sha3_finup, .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-384", .base.cra_driver_name = "sha3-384-generic", .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_384_BLOCK_SIZE, .base.cra_module = THIS_MODULE, }, { .digestsize = SHA3_512_DIGEST_SIZE, .init = crypto_sha3_init, .update = crypto_sha3_update, .finup = crypto_sha3_finup, .descsize = SHA3_STATE_SIZE, .base.cra_name = "sha3-512", .base.cra_driver_name = "sha3-512-generic", .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .base.cra_blocksize = SHA3_512_BLOCK_SIZE, .base.cra_module = THIS_MODULE, } }; static int __init sha3_generic_mod_init(void) { return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } static void __exit sha3_generic_mod_fini(void) { crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_init(sha3_generic_mod_init); module_exit(sha3_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-3 Secure Hash Algorithm"); MODULE_ALIAS_CRYPTO("sha3-224"); MODULE_ALIAS_CRYPTO("sha3-224-generic"); MODULE_ALIAS_CRYPTO("sha3-256"); MODULE_ALIAS_CRYPTO("sha3-256-generic"); MODULE_ALIAS_CRYPTO("sha3-384"); MODULE_ALIAS_CRYPTO("sha3-384-generic"); MODULE_ALIAS_CRYPTO("sha3-512"); MODULE_ALIAS_CRYPTO("sha3-512-generic"); |
| 2 1 1 2 3 2 1 2 2 2 2 2 2 2 1 1 1 1 3 2 2 2 1 1 1 1 3 2 3 1 1 1 2 1 1 1 11 11 11 1 11 11 11 1 1 1 5 5 1 3 1 1 1 2 1 1 2 1 4 4 4 4 4 4 4 4 4 5 8 8 8 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 | // SPDX-License-Identifier: GPL-2.0-only /* * Minimal file system backend for holding eBPF maps and programs, * used by bpf(2) object pinning. * * Authors: * * Daniel Borkmann <daniel@iogearbox.net> */ #include <linux/init.h> #include <linux/magic.h> #include <linux/major.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/kdev_t.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/kstrtox.h> #include "preload/bpf_preload.h" enum bpf_type { BPF_TYPE_UNSPEC = 0, BPF_TYPE_PROG, BPF_TYPE_MAP, BPF_TYPE_LINK, }; static void *bpf_any_get(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: bpf_prog_inc(raw); break; case BPF_TYPE_MAP: bpf_map_inc_with_uref(raw); break; case BPF_TYPE_LINK: bpf_link_inc(raw); break; default: WARN_ON_ONCE(1); break; } return raw; } static void bpf_any_put(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: bpf_prog_put(raw); break; case BPF_TYPE_MAP: bpf_map_put_with_uref(raw); break; case BPF_TYPE_LINK: bpf_link_put(raw); break; default: WARN_ON_ONCE(1); break; } } static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) { void *raw; raw = bpf_map_get_with_uref(ufd); if (!IS_ERR(raw)) { *type = BPF_TYPE_MAP; return raw; } raw = bpf_prog_get(ufd); if (!IS_ERR(raw)) { *type = BPF_TYPE_PROG; return raw; } raw = bpf_link_get_from_fd(ufd); if (!IS_ERR(raw)) { *type = BPF_TYPE_LINK; return raw; } return ERR_PTR(-EINVAL); } static const struct inode_operations bpf_dir_iops; static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode) { struct inode *inode; switch (mode & S_IFMT) { case S_IFDIR: case S_IFREG: case S_IFLNK: break; default: return ERR_PTR(-EINVAL); } inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOSPC); inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); return inode; } static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) { *type = BPF_TYPE_UNSPEC; if (inode->i_op == &bpf_prog_iops) *type = BPF_TYPE_PROG; else if (inode->i_op == &bpf_map_iops) *type = BPF_TYPE_MAP; else if (inode->i_op == &bpf_link_iops) *type = BPF_TYPE_LINK; else return -EACCES; return 0; } static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, struct inode *dir) { d_instantiate(dentry, inode); dget(dentry); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); if (IS_ERR(inode)) return ERR_CAST(inode); inode->i_op = &bpf_dir_iops; inode->i_fop = &simple_dir_operations; inc_nlink(inode); inc_nlink(dir); bpf_dentry_finalize(dentry, inode, dir); return NULL; } struct map_iter { void *key; bool done; }; static struct map_iter *map_iter(struct seq_file *m) { return m->private; } static struct bpf_map *seq_file_to_map(struct seq_file *m) { return file_inode(m->file)->i_private; } static void map_iter_free(struct map_iter *iter) { if (iter) { kfree(iter->key); kfree(iter); } } static struct map_iter *map_iter_alloc(struct bpf_map *map) { struct map_iter *iter; iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); if (!iter) goto error; iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); if (!iter->key) goto error; return iter; error: map_iter_free(iter); return NULL; } static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) { struct bpf_map *map = seq_file_to_map(m); void *key = map_iter(m)->key; void *prev_key; (*pos)++; if (map_iter(m)->done) return NULL; if (unlikely(v == SEQ_START_TOKEN)) prev_key = NULL; else prev_key = key; rcu_read_lock(); if (map->ops->map_get_next_key(map, prev_key, key)) { map_iter(m)->done = true; key = NULL; } rcu_read_unlock(); return key; } static void *map_seq_start(struct seq_file *m, loff_t *pos) { if (map_iter(m)->done) return NULL; return *pos ? map_iter(m)->key : SEQ_START_TOKEN; } static void map_seq_stop(struct seq_file *m, void *v) { } static int map_seq_show(struct seq_file *m, void *v) { struct bpf_map *map = seq_file_to_map(m); void *key = map_iter(m)->key; if (unlikely(v == SEQ_START_TOKEN)) { seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); seq_puts(m, "# WARNING!! The output format will change\n"); } else { map->ops->map_seq_show_elem(map, key, m); } return 0; } static const struct seq_operations bpffs_map_seq_ops = { .start = map_seq_start, .next = map_seq_next, .show = map_seq_show, .stop = map_seq_stop, }; static int bpffs_map_open(struct inode *inode, struct file *file) { struct bpf_map *map = inode->i_private; struct map_iter *iter; struct seq_file *m; int err; iter = map_iter_alloc(map); if (!iter) return -ENOMEM; err = seq_open(file, &bpffs_map_seq_ops); if (err) { map_iter_free(iter); return err; } m = file->private_data; m->private = iter; return 0; } static int bpffs_map_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; map_iter_free(map_iter(m)); return seq_release(inode, file); } /* bpffs_map_fops should only implement the basic * read operation for a BPF map. The purpose is to * provide a simple user intuitive way to do * "cat bpffs/pathto/a-pinned-map". * * Other operations (e.g. write, lookup...) should be realized by * the userspace tools (e.g. bpftool) through the * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update * interface. */ static const struct file_operations bpffs_map_fops = { .open = bpffs_map_open, .read = seq_read, .release = bpffs_map_release, }; static int bpffs_obj_open(struct inode *inode, struct file *file) { return -EIO; } static const struct file_operations bpffs_obj_fops = { .open = bpffs_obj_open, }; static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, const struct inode_operations *iops, const struct file_operations *fops) { struct inode *dir = dentry->d_parent->d_inode; struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = iops; inode->i_fop = fops; inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); return 0; } static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, &bpffs_obj_fops); } static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) { struct bpf_map *map = arg; return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, bpf_map_support_seq_show(map) ? &bpffs_map_fops : &bpffs_obj_fops); } static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) { struct bpf_link *link = arg; return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, bpf_link_is_iter(link) ? &bpf_iter_fops : &bpffs_obj_fops); } static struct dentry * bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) { /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future * extensions. That allows popoulate_bpffs() create special files. */ if ((dir->i_mode & S_IALLUGO) && strchr(dentry->d_name.name, '.')) return ERR_PTR(-EPERM); return simple_lookup(dir, dentry, flags); } static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *target) { char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); struct inode *inode; if (!link) return -ENOMEM; inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); if (IS_ERR(inode)) { kfree(link); return PTR_ERR(inode); } inode->i_op = &simple_symlink_inode_operations; inode->i_link = link; bpf_dentry_finalize(dentry, inode, dir); return 0; } static const struct inode_operations bpf_dir_iops = { .lookup = bpf_lookup, .mkdir = bpf_mkdir, .symlink = bpf_symlink, .rmdir = simple_rmdir, .rename = simple_rename, .link = simple_link, .unlink = simple_unlink, }; /* pin iterator link into bpffs */ static int bpf_iter_link_pin_kernel(struct dentry *parent, const char *name, struct bpf_link *link) { umode_t mode = S_IFREG | S_IRUSR; struct dentry *dentry; int ret; inode_lock(parent->d_inode); dentry = lookup_noperm(&QSTR(name), parent); if (IS_ERR(dentry)) { inode_unlock(parent->d_inode); return PTR_ERR(dentry); } ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, &bpf_iter_fops); dput(dentry); inode_unlock(parent->d_inode); return ret; } static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, enum bpf_type type) { struct dentry *dentry; struct inode *dir; struct path path; umode_t mode; int ret; dentry = start_creating_user_path(path_fd, pathname, &path, 0); if (IS_ERR(dentry)) return PTR_ERR(dentry); dir = d_inode(path.dentry); if (dir->i_op != &bpf_dir_iops) { ret = -EPERM; goto out; } mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); ret = security_path_mknod(&path, dentry, mode, 0); if (ret) goto out; switch (type) { case BPF_TYPE_PROG: ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); break; case BPF_TYPE_MAP: ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); break; case BPF_TYPE_LINK: ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); break; default: ret = -EPERM; } out: end_creating_path(&path, dentry); return ret; } int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) { enum bpf_type type; void *raw; int ret; raw = bpf_fd_probe_obj(ufd, &type); if (IS_ERR(raw)) return PTR_ERR(raw); ret = bpf_obj_do_pin(path_fd, pathname, raw, type); if (ret != 0) bpf_any_put(raw, type); return ret; } static void *bpf_obj_do_get(int path_fd, const char __user *pathname, enum bpf_type *type, int flags) { struct inode *inode; struct path path; void *raw; int ret; ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); inode = d_backing_inode(path.dentry); ret = path_permission(&path, ACC_MODE(flags)); if (ret) goto out; ret = bpf_inode_type(inode, type); if (ret) goto out; raw = bpf_any_get(inode->i_private, *type); if (!IS_ERR(raw)) touch_atime(&path); path_put(&path); return raw; out: path_put(&path); return ERR_PTR(ret); } int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; int f_flags; void *raw; int ret; f_flags = bpf_get_file_flag(flags); if (f_flags < 0) return f_flags; raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); if (IS_ERR(raw)) return PTR_ERR(raw); if (type == BPF_TYPE_PROG) ret = bpf_prog_new_fd(raw); else if (type == BPF_TYPE_MAP) ret = bpf_map_new_fd(raw, f_flags); else if (type == BPF_TYPE_LINK) ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); else return -ENOENT; if (ret < 0) bpf_any_put(raw, type); return ret; } static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) { struct bpf_prog *prog; int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); if (ret) return ERR_PTR(ret); if (inode->i_op == &bpf_map_iops) return ERR_PTR(-EINVAL); if (inode->i_op == &bpf_link_iops) return ERR_PTR(-EINVAL); if (inode->i_op != &bpf_prog_iops) return ERR_PTR(-EACCES); prog = inode->i_private; ret = security_bpf_prog(prog); if (ret < 0) return ERR_PTR(ret); if (!bpf_prog_get_ok(prog, &type, false)) return ERR_PTR(-EINVAL); bpf_prog_inc(prog); return prog; } struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) { struct bpf_prog *prog; struct path path; int ret = kern_path(name, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); prog = __get_prog_inode(d_backing_inode(path.dentry), type); if (!IS_ERR(prog)) touch_atime(&path); path_put(&path); return prog; } EXPORT_SYMBOL(bpf_prog_get_type_path); struct bpffs_btf_enums { const struct btf *btf; const struct btf_type *cmd_t; const struct btf_type *map_t; const struct btf_type *prog_t; const struct btf_type *attach_t; }; static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) { const struct btf *btf; const struct btf_type *t; const char *name; int i, n; memset(info, 0, sizeof(*info)); btf = bpf_get_btf_vmlinux(); if (IS_ERR(btf)) return PTR_ERR(btf); if (!btf) return -ENOENT; info->btf = btf; for (i = 1, n = btf_nr_types(btf); i < n; i++) { t = btf_type_by_id(btf, i); if (!btf_type_is_enum(t)) continue; name = btf_name_by_offset(btf, t->name_off); if (!name) continue; if (strcmp(name, "bpf_cmd") == 0) info->cmd_t = t; else if (strcmp(name, "bpf_map_type") == 0) info->map_t = t; else if (strcmp(name, "bpf_prog_type") == 0) info->prog_t = t; else if (strcmp(name, "bpf_attach_type") == 0) info->attach_t = t; else continue; if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) return 0; } return -ESRCH; } static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, const char *prefix, const char *str, int *value) { const struct btf_enum *e; const char *name; int i, n, pfx_len = strlen(prefix); *value = 0; if (!btf || !enum_t) return false; for (i = 0, n = btf_vlen(enum_t); i < n; i++) { e = &btf_enum(enum_t)[i]; name = btf_name_by_offset(btf, e->name_off); if (!name || strncasecmp(name, prefix, pfx_len) != 0) continue; /* match symbolic name case insensitive and ignoring prefix */ if (strcasecmp(name + pfx_len, str) == 0) { *value = e->val; return true; } } return false; } static void seq_print_delegate_opts(struct seq_file *m, const char *opt_name, const struct btf *btf, const struct btf_type *enum_t, const char *prefix, u64 delegate_msk, u64 any_msk) { const struct btf_enum *e; bool first = true; const char *name; u64 msk; int i, n, pfx_len = strlen(prefix); delegate_msk &= any_msk; /* clear unknown bits */ if (delegate_msk == 0) return; seq_printf(m, ",%s", opt_name); if (delegate_msk == any_msk) { seq_printf(m, "=any"); return; } if (btf && enum_t) { for (i = 0, n = btf_vlen(enum_t); i < n; i++) { e = &btf_enum(enum_t)[i]; name = btf_name_by_offset(btf, e->name_off); if (!name || strncasecmp(name, prefix, pfx_len) != 0) continue; msk = 1ULL << e->val; if (delegate_msk & msk) { /* emit lower-case name without prefix */ seq_putc(m, first ? '=' : ':'); name += pfx_len; while (*name) { seq_putc(m, tolower(*name)); name++; } delegate_msk &= ~msk; first = false; } } } if (delegate_msk) seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); } /* * Display the mount options in /proc/mounts. */ static int bpf_show_options(struct seq_file *m, struct dentry *root) { struct inode *inode = d_inode(root); umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; struct bpf_mount_opts *opts = root->d_sb->s_fs_info; u64 mask; if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", from_kuid_munged(&init_user_ns, inode->i_uid)); if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", from_kgid_munged(&init_user_ns, inode->i_gid)); if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); if (opts->delegate_cmds || opts->delegate_maps || opts->delegate_progs || opts->delegate_attachs) { struct bpffs_btf_enums info; /* ignore errors, fallback to hex */ (void)find_bpffs_btf_enums(&info); mask = (1ULL << __MAX_BPF_CMD) - 1; seq_print_delegate_opts(m, "delegate_cmds", info.btf, info.cmd_t, "BPF_", opts->delegate_cmds, mask); mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; seq_print_delegate_opts(m, "delegate_maps", info.btf, info.map_t, "BPF_MAP_TYPE_", opts->delegate_maps, mask); mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; seq_print_delegate_opts(m, "delegate_progs", info.btf, info.prog_t, "BPF_PROG_TYPE_", opts->delegate_progs, mask); mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; seq_print_delegate_opts(m, "delegate_attachs", info.btf, info.attach_t, "BPF_", opts->delegate_attachs, mask); } return 0; } static void bpf_destroy_inode(struct inode *inode) { enum bpf_type type; if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); if (!bpf_inode_type(inode, &type)) bpf_any_put(inode->i_private, type); free_inode_nonrcu(inode); } const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = inode_just_drop, .show_options = bpf_show_options, .destroy_inode = bpf_destroy_inode, }; enum { OPT_UID, OPT_GID, OPT_MODE, OPT_DELEGATE_CMDS, OPT_DELEGATE_MAPS, OPT_DELEGATE_PROGS, OPT_DELEGATE_ATTACHS, }; static const struct fs_parameter_spec bpf_fs_parameters[] = { fsparam_u32 ("uid", OPT_UID), fsparam_u32 ("gid", OPT_GID), fsparam_u32oct ("mode", OPT_MODE), fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), {} }; static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct bpf_mount_opts *opts = fc->s_fs_info; struct fs_parse_result result; kuid_t uid; kgid_t gid; int opt, err; opt = fs_parse(fc, bpf_fs_parameters, param, &result); if (opt < 0) { /* We might like to report bad mount options here, but * traditionally we've ignored all mount options, so we'd * better continue to ignore non-existing options for bpf. */ if (opt == -ENOPARAM) { opt = vfs_parse_fs_param_source(fc, param); if (opt != -ENOPARAM) return opt; return 0; } if (opt < 0) return opt; } switch (opt) { case OPT_UID: uid = make_kuid(current_user_ns(), result.uint_32); if (!uid_valid(uid)) goto bad_value; /* * The requested uid must be representable in the * filesystem's idmapping. */ if (!kuid_has_mapping(fc->user_ns, uid)) goto bad_value; opts->uid = uid; break; case OPT_GID: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) goto bad_value; /* * The requested gid must be representable in the * filesystem's idmapping. */ if (!kgid_has_mapping(fc->user_ns, gid)) goto bad_value; opts->gid = gid; break; case OPT_MODE: opts->mode = result.uint_32 & S_IALLUGO; break; case OPT_DELEGATE_CMDS: case OPT_DELEGATE_MAPS: case OPT_DELEGATE_PROGS: case OPT_DELEGATE_ATTACHS: { struct bpffs_btf_enums info; const struct btf_type *enum_t; const char *enum_pfx; u64 *delegate_msk, msk = 0; char *p, *str; int val; /* ignore errors, fallback to hex */ (void)find_bpffs_btf_enums(&info); switch (opt) { case OPT_DELEGATE_CMDS: delegate_msk = &opts->delegate_cmds; enum_t = info.cmd_t; enum_pfx = "BPF_"; break; case OPT_DELEGATE_MAPS: delegate_msk = &opts->delegate_maps; enum_t = info.map_t; enum_pfx = "BPF_MAP_TYPE_"; break; case OPT_DELEGATE_PROGS: delegate_msk = &opts->delegate_progs; enum_t = info.prog_t; enum_pfx = "BPF_PROG_TYPE_"; break; case OPT_DELEGATE_ATTACHS: delegate_msk = &opts->delegate_attachs; enum_t = info.attach_t; enum_pfx = "BPF_"; break; default: return -EINVAL; } str = param->string; while ((p = strsep(&str, ":"))) { if (strcmp(p, "any") == 0) { msk |= ~0ULL; } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { msk |= 1ULL << val; } else { err = kstrtou64(p, 0, &msk); if (err) return err; } } /* Setting delegation mount options requires privileges */ if (msk && !capable(CAP_SYS_ADMIN)) return -EPERM; *delegate_msk |= msk; break; } default: /* ignore unknown mount options */ break; } return 0; bad_value: return invalfc(fc, "Bad value for '%s'", param->key); } struct bpf_preload_ops *bpf_preload_ops; EXPORT_SYMBOL_GPL(bpf_preload_ops); static bool bpf_preload_mod_get(void) { /* If bpf_preload.ko wasn't loaded earlier then load it now. * When bpf_preload is built into vmlinux the module's __init * function will populate it. */ if (!bpf_preload_ops) { request_module("bpf_preload"); if (!bpf_preload_ops) return false; } /* And grab the reference, so the module doesn't disappear while the * kernel is interacting with the kernel module and its UMD. */ if (!try_module_get(bpf_preload_ops->owner)) { pr_err("bpf_preload module get failed.\n"); return false; } return true; } static void bpf_preload_mod_put(void) { if (bpf_preload_ops) /* now user can "rmmod bpf_preload" if necessary */ module_put(bpf_preload_ops->owner); } static DEFINE_MUTEX(bpf_preload_lock); static int populate_bpffs(struct dentry *parent) { struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; int err = 0, i; /* grab the mutex to make sure the kernel interactions with bpf_preload * are serialized */ mutex_lock(&bpf_preload_lock); /* if bpf_preload.ko wasn't built into vmlinux then load it */ if (!bpf_preload_mod_get()) goto out; err = bpf_preload_ops->preload(objs); if (err) goto out_put; for (i = 0; i < BPF_PRELOAD_LINKS; i++) { bpf_link_inc(objs[i].link); err = bpf_iter_link_pin_kernel(parent, objs[i].link_name, objs[i].link); if (err) { bpf_link_put(objs[i].link); goto out_put; } } out_put: bpf_preload_mod_put(); out: mutex_unlock(&bpf_preload_lock); return err; } static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; struct bpf_mount_opts *opts = sb->s_fs_info; struct inode *inode; int ret; /* Mounting an instance of BPF FS requires privileges */ if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) return -EPERM; ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); if (ret) return ret; sb->s_op = &bpf_super_ops; inode = sb->s_root->d_inode; inode->i_uid = opts->uid; inode->i_gid = opts->gid; inode->i_op = &bpf_dir_iops; inode->i_mode &= ~S_IALLUGO; populate_bpffs(sb->s_root); inode->i_mode |= S_ISVTX | opts->mode; return 0; } static int bpf_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, bpf_fill_super); } static void bpf_free_fc(struct fs_context *fc) { kfree(fc->s_fs_info); } static const struct fs_context_operations bpf_context_ops = { .free = bpf_free_fc, .parse_param = bpf_parse_param, .get_tree = bpf_get_tree, }; /* * Set up the filesystem mount context. */ static int bpf_init_fs_context(struct fs_context *fc) { struct bpf_mount_opts *opts; opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); if (!opts) return -ENOMEM; opts->mode = S_IRWXUGO; opts->uid = current_fsuid(); opts->gid = current_fsgid(); /* start out with no BPF token delegation enabled */ opts->delegate_cmds = 0; opts->delegate_maps = 0; opts->delegate_progs = 0; opts->delegate_attachs = 0; fc->s_fs_info = opts; fc->ops = &bpf_context_ops; return 0; } static void bpf_kill_super(struct super_block *sb) { struct bpf_mount_opts *opts = sb->s_fs_info; kill_litter_super(sb); kfree(opts); } static struct file_system_type bpf_fs_type = { .owner = THIS_MODULE, .name = "bpf", .init_fs_context = bpf_init_fs_context, .parameters = bpf_fs_parameters, .kill_sb = bpf_kill_super, .fs_flags = FS_USERNS_MOUNT, }; static int __init bpf_init(void) { int ret; ret = sysfs_create_mount_point(fs_kobj, "bpf"); if (ret) return ret; ret = register_filesystem(&bpf_fs_type); if (ret) sysfs_remove_mount_point(fs_kobj, "bpf"); return ret; } fs_initcall(bpf_init); |
| 2 2 2 2 3 1 3 3 3 3 1 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 | // SPDX-License-Identifier: GPL-2.0 #include <linux/jhash.h> #include <linux/netfilter.h> #include <linux/rcupdate.h> #include <linux/rhashtable.h> #include <linux/vmalloc.h> #include <net/genetlink.h> #include <net/netns/generic.h> #include <uapi/linux/genetlink.h> #include "ila.h" struct ila_xlat_params { struct ila_params ip; int ifindex; }; struct ila_map { struct ila_xlat_params xp; struct rhash_head node; struct ila_map __rcu *next; struct rcu_head rcu; }; #define MAX_LOCKS 1024 #define LOCKS_PER_CPU 10 static int alloc_ila_locks(struct ila_net *ilan) { return alloc_bucket_spinlocks(&ilan->xlat.locks, &ilan->xlat.locks_mask, MAX_LOCKS, LOCKS_PER_CPU, GFP_KERNEL); } static u32 hashrnd __read_mostly; static __always_inline void __ila_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } static inline u32 ila_locator_hash(struct ila_locator loc) { u32 *v = (u32 *)loc.v32; __ila_hash_secret_init(); return jhash_2words(v[0], v[1], hashrnd); } static inline spinlock_t *ila_get_lock(struct ila_net *ilan, struct ila_locator loc) { return &ilan->xlat.locks[ila_locator_hash(loc) & ilan->xlat.locks_mask]; } static inline int ila_cmp_wildcards(struct ila_map *ila, struct ila_addr *iaddr, int ifindex) { return (ila->xp.ifindex && ila->xp.ifindex != ifindex); } static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *xp) { return (ila->xp.ifindex != xp->ifindex); } static int ila_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct ila_map *ila = obj; return (ila->xp.ip.locator_match.v64 != *(__be64 *)arg->key); } static inline int ila_order(struct ila_map *ila) { int score = 0; if (ila->xp.ifindex) score += 1 << 1; return score; } static const struct rhashtable_params rht_params = { .nelem_hint = 1024, .head_offset = offsetof(struct ila_map, node), .key_offset = offsetof(struct ila_map, xp.ip.locator_match), .key_len = sizeof(u64), /* identifier */ .max_size = 1048576, .min_size = 256, .automatic_shrinking = true, .obj_cmpfn = ila_cmpfn, }; static int parse_nl_config(struct genl_info *info, struct ila_xlat_params *xp) { memset(xp, 0, sizeof(*xp)); if (info->attrs[ILA_ATTR_LOCATOR]) xp->ip.locator.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR]); if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) xp->ip.locator_match.v64 = (__force __be64)nla_get_u64( info->attrs[ILA_ATTR_LOCATOR_MATCH]); xp->ip.csum_mode = nla_get_u8_default(info->attrs[ILA_ATTR_CSUM_MODE], ILA_CSUM_NO_ACTION); xp->ip.ident_type = nla_get_u8_default(info->attrs[ILA_ATTR_IDENT_TYPE], ILA_ATYPE_USE_FORMAT); if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); return 0; } /* Must be called with rcu readlock */ static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr, int ifindex, struct ila_net *ilan) { struct ila_map *ila; ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &iaddr->loc, rht_params); while (ila) { if (!ila_cmp_wildcards(ila, iaddr, ifindex)) return ila; ila = rcu_access_pointer(ila->next); } return NULL; } /* Must be called with rcu readlock */ static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp, struct ila_net *ilan) { struct ila_map *ila; ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); while (ila) { if (!ila_cmp_params(ila, xp)) return ila; ila = rcu_access_pointer(ila->next); } return NULL; } static inline void ila_release(struct ila_map *ila) { kfree_rcu(ila, rcu); } static void ila_free_node(struct ila_map *ila) { struct ila_map *next; /* Assume rcu_readlock held */ while (ila) { next = rcu_access_pointer(ila->next); ila_release(ila); ila = next; } } static void ila_free_cb(void *ptr, void *arg) { ila_free_node((struct ila_map *)ptr); } static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila); static unsigned int ila_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { ila_xlat_addr(skb, false); return NF_ACCEPT; } static const struct nf_hook_ops ila_nf_hook_ops[] = { { .hook = ila_nf_input, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = -1, }, }; static DEFINE_MUTEX(ila_mutex); static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head; spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = 0, order; if (!READ_ONCE(ilan->xlat.hooks_registered)) { /* We defer registering net hooks in the namespace until the * first mapping is added. */ mutex_lock(&ila_mutex); if (!ilan->xlat.hooks_registered) { err = nf_register_net_hooks(net, ila_nf_hook_ops, ARRAY_SIZE(ila_nf_hook_ops)); if (!err) WRITE_ONCE(ilan->xlat.hooks_registered, true); } mutex_unlock(&ila_mutex); if (err) return err; } ila = kzalloc(sizeof(*ila), GFP_KERNEL); if (!ila) return -ENOMEM; ila_init_saved_csum(&xp->ip); ila->xp = *xp; order = ila_order(ila); spin_lock(lock); head = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); if (!head) { /* New entry for the rhash_table */ err = rhashtable_lookup_insert_fast(&ilan->xlat.rhash_table, &ila->node, rht_params); } else { struct ila_map *tila = head, *prev = NULL; do { if (!ila_cmp_params(tila, xp)) { err = -EEXIST; goto out; } if (order > ila_order(tila)) break; prev = tila; tila = rcu_dereference_protected(tila->next, lockdep_is_held(lock)); } while (tila); if (prev) { /* Insert in sub list of head */ RCU_INIT_POINTER(ila->next, tila); rcu_assign_pointer(prev->next, ila); } else { /* Make this ila new head */ RCU_INIT_POINTER(ila->next, head); err = rhashtable_replace_fast(&ilan->xlat.rhash_table, &head->node, &ila->node, rht_params); if (err) goto out; } } out: spin_unlock(lock); if (err) kfree(ila); return err; } static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp) { struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_map *ila, *head, *prev; spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match); int err = -ENOENT; spin_lock(lock); head = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &xp->ip.locator_match, rht_params); ila = head; prev = NULL; while (ila) { if (ila_cmp_params(ila, xp)) { prev = ila; ila = rcu_dereference_protected(ila->next, lockdep_is_held(lock)); continue; } err = 0; if (prev) { /* Not head, just delete from list */ rcu_assign_pointer(prev->next, ila->next); } else { /* It is the head. If there is something in the * sublist we need to make a new head. */ head = rcu_dereference_protected(ila->next, lockdep_is_held(lock)); if (head) { /* Put first entry in the sublist into the * table */ err = rhashtable_replace_fast( &ilan->xlat.rhash_table, &ila->node, &head->node, rht_params); if (err) goto out; } else { /* Entry no longer used */ err = rhashtable_remove_fast( &ilan->xlat.rhash_table, &ila->node, rht_params); } } ila_release(ila); break; } out: spin_unlock(lock); return err; } int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_xlat_params p; int err; err = parse_nl_config(info, &p); if (err) return err; return ila_add_mapping(net, &p); } int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_xlat_params xp; int err; err = parse_nl_config(info, &xp); if (err) return err; ila_del_mapping(net, &xp); return 0; } static inline spinlock_t *lock_from_ila_map(struct ila_net *ilan, struct ila_map *ila) { return ila_get_lock(ilan, ila->xp.ip.locator_match); } int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); struct rhashtable_iter iter; struct ila_map *ila; spinlock_t *lock; int ret = 0; rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter); rhashtable_walk_start(&iter); for (;;) { ila = rhashtable_walk_next(&iter); if (IS_ERR(ila)) { if (PTR_ERR(ila) == -EAGAIN) continue; ret = PTR_ERR(ila); goto done; } else if (!ila) { break; } lock = lock_from_ila_map(ilan, ila); spin_lock(lock); ret = rhashtable_remove_fast(&ilan->xlat.rhash_table, &ila->node, rht_params); if (!ret) ila_free_node(ila); spin_unlock(lock); if (ret) break; } done: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); return ret; } static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) { if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR, (__force u64)ila->xp.ip.locator.v64, ILA_ATTR_PAD) || nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR_MATCH, (__force u64)ila->xp.ip.locator_match.v64, ILA_ATTR_PAD) || nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) || nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type)) return -1; return 0; } static int ila_dump_info(struct ila_map *ila, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd); if (!hdr) return -ENOMEM; if (ila_fill_info(ila, skb) < 0) goto nla_put_failure; genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); struct ila_net *ilan = net_generic(net, ila_net_id); struct sk_buff *msg; struct ila_xlat_params xp; struct ila_map *ila; int ret; ret = parse_nl_config(info, &xp); if (ret) return ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; rcu_read_lock(); ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); } rcu_read_unlock(); if (ret < 0) goto out_free; return genlmsg_reply(msg, info); out_free: nlmsg_free(msg); return ret; } struct ila_dump_iter { struct rhashtable_iter rhiter; int skip; }; int ila_xlat_nl_dump_start(struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_dump_iter *iter; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return -ENOMEM; rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter); iter->skip = 0; cb->args[0] = (long)iter; return 0; } int ila_xlat_nl_dump_done(struct netlink_callback *cb) { struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; rhashtable_walk_exit(&iter->rhiter); kfree(iter); return 0; } int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0]; struct rhashtable_iter *rhiter = &iter->rhiter; int skip = iter->skip; struct ila_map *ila; int ret; rhashtable_walk_start(rhiter); /* Get first entry */ ila = rhashtable_walk_peek(rhiter); if (ila && !IS_ERR(ila) && skip) { /* Skip over visited entries */ while (ila && skip) { /* Skip over any ila entries in this list that we * have already dumped. */ ila = rcu_access_pointer(ila->next); skip--; } } skip = 0; for (;;) { if (IS_ERR(ila)) { ret = PTR_ERR(ila); if (ret == -EAGAIN) { /* Table has changed and iter has reset. Return * -EAGAIN to the application even if we have * written data to the skb. The application * needs to deal with this. */ goto out_ret; } else { break; } } else if (!ila) { ret = 0; break; } while (ila) { ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, ILA_CMD_GET); if (ret) goto out; skip++; ila = rcu_access_pointer(ila->next); } skip = 0; ila = rhashtable_walk_next(rhiter); } out: iter->skip = skip; ret = (skb->len ? : ret); out_ret: rhashtable_walk_stop(rhiter); return ret; } int ila_xlat_init_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); int err; err = alloc_ila_locks(ilan); if (err) return err; err = rhashtable_init(&ilan->xlat.rhash_table, &rht_params); if (err) { free_bucket_spinlocks(ilan->xlat.locks); return err; } return 0; } void ila_xlat_pre_exit_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); if (ilan->xlat.hooks_registered) nf_unregister_net_hooks(net, ila_nf_hook_ops, ARRAY_SIZE(ila_nf_hook_ops)); } void ila_xlat_exit_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL); free_bucket_spinlocks(ilan->xlat.locks); } static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); struct ila_net *ilan = net_generic(net, ila_net_id); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); /* Assumes skb contains a valid IPv6 header that is pulled */ /* No check here that ILA type in the mapping matches what is in the * address. We assume that whatever sender gaves us can be translated. * The checksum mode however is relevant. */ rcu_read_lock(); ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila); rcu_read_unlock(); return 0; } |
| 57 56 84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM page_pool #if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_PAGE_POOL_H #include <linux/types.h> #include <linux/tracepoint.h> #include <trace/events/mmflags.h> #include <net/page_pool/types.h> TRACE_EVENT(page_pool_release, TP_PROTO(const struct page_pool *pool, s32 inflight, u32 hold, u32 release), TP_ARGS(pool, inflight, hold, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(s32, inflight) __field(u32, hold) __field(u32, release) __field(u64, cnt) ), TP_fast_assign( __entry->pool = pool; __entry->inflight = inflight; __entry->hold = hold; __entry->release = release; __entry->cnt = pool->destroy_cnt; ), TP_printk("page_pool=%p inflight=%d hold=%u release=%u cnt=%llu", __entry->pool, __entry->inflight, __entry->hold, __entry->release, __entry->cnt) ); TRACE_EVENT(page_pool_state_release, TP_PROTO(const struct page_pool *pool, netmem_ref netmem, u32 release), TP_ARGS(pool, netmem, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(unsigned long, netmem) __field(u32, release) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->release = release; __entry->pfn = netmem_pfn_trace(netmem); ), TP_printk("page_pool=%p netmem=%p is_net_iov=%lu pfn=0x%lx release=%u", __entry->pool, (void *)__entry->netmem, __entry->netmem & NET_IOV, __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, TP_PROTO(const struct page_pool *pool, netmem_ref netmem, u32 hold), TP_ARGS(pool, netmem, hold), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(unsigned long, netmem) __field(u32, hold) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; __entry->pfn = netmem_pfn_trace(netmem); ), TP_printk("page_pool=%p netmem=%p is_net_iov=%lu, pfn=0x%lx hold=%u", __entry->pool, (void *)__entry->netmem, __entry->netmem & NET_IOV, __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, TP_PROTO(const struct page_pool *pool, int new_nid), TP_ARGS(pool, new_nid), TP_STRUCT__entry( __field(const struct page_pool *, pool) __field(int, pool_nid) __field(int, new_nid) ), TP_fast_assign( __entry->pool = pool; __entry->pool_nid = pool->p.nid; __entry->new_nid = new_nid; ), TP_printk("page_pool=%p pool_nid=%d new_nid=%d", __entry->pool, __entry->pool_nid, __entry->new_nid) ); #endif /* _TRACE_PAGE_POOL_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 13 56 56 13 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETMEM_PRIV_H #define __NETMEM_PRIV_H static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) { __netmem_clear_lsb(netmem)->pp_magic |= pp_magic; } static inline void netmem_clear_pp_magic(netmem_ref netmem) { WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK); __netmem_clear_lsb(netmem)->pp_magic = 0; } static inline bool netmem_is_pp(netmem_ref netmem) { return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE; } static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) { __netmem_clear_lsb(netmem)->pp = pool; } static inline void netmem_set_dma_addr(netmem_ref netmem, unsigned long dma_addr) { __netmem_clear_lsb(netmem)->dma_addr = dma_addr; } static inline unsigned long netmem_get_dma_index(netmem_ref netmem) { unsigned long magic; if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return 0; magic = __netmem_clear_lsb(netmem)->pp_magic; return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; } static inline void netmem_set_dma_index(netmem_ref netmem, unsigned long id) { unsigned long magic; if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return; magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); __netmem_clear_lsb(netmem)->pp_magic = magic; } #endif |
| 1 1 1 1 1 1 1 3 1 2 1 1 3 7 1 6 1 5 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C)2003-2006 Helsinki University of Technology * Copyright (C)2003-2006 USAGI/WIDE Project */ /* * Authors: * Noriaki TAKAMIYA @USAGI * Masahide NAKAMURA @USAGI */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/time.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <net/rawv6.h> #include <net/xfrm.h> #include <net/mip6.h> static inline unsigned int calc_padlen(unsigned int len, unsigned int n) { return (n - len + 16) & 0x7; } static inline void *mip6_padn(__u8 *data, __u8 padlen) { if (!data) return NULL; if (padlen == 1) { data[0] = IPV6_TLV_PAD1; } else if (padlen > 1) { data[0] = IPV6_TLV_PADN; data[1] = padlen - 2; if (padlen > 2) memset(data+2, 0, data[1]); } return data + padlen; } static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos); } static int mip6_mh_len(int type) { int len = 0; switch (type) { case IP6_MH_TYPE_BRR: len = 0; break; case IP6_MH_TYPE_HOTI: case IP6_MH_TYPE_COTI: case IP6_MH_TYPE_BU: case IP6_MH_TYPE_BACK: len = 1; break; case IP6_MH_TYPE_HOT: case IP6_MH_TYPE_COT: case IP6_MH_TYPE_BERROR: len = 2; break; } return len; } static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { struct ip6_mh _hdr; const struct ip6_mh *mh; mh = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_hdr), &_hdr); if (!mh) return -1; if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + skb_network_header_len(skb)); return -1; } return 0; } struct mip6_report_rate_limiter { spinlock_t lock; ktime_t stamp; int iif; struct in6_addr src; struct in6_addr dst; }; static struct mip6_report_rate_limiter mip6_report_rl = { .lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock) }; static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; int err = destopt->nexthdr; spin_lock(&x->lock); if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); return err; } /* Destination Option Header is inserted. * IP Header's src address is replaced with Home Address Option in * Destination Option Header. */ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph; struct ipv6_destopt_hdr *dstopt; struct ipv6_destopt_hao *hao; u8 nexthdr; int len; skb_push(skb, -skb_network_offset(skb)); iph = ipv6_hdr(skb); nexthdr = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_DSTOPTS; dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb); dstopt->nexthdr = nexthdr; hao = mip6_padn((char *)(dstopt + 1), calc_padlen(sizeof(*dstopt), 6)); hao->type = IPV6_TLV_HAO; BUILD_BUG_ON(sizeof(*hao) != 18); hao->length = sizeof(*hao) - 2; len = ((char *)hao - (char *)dstopt) + sizeof(*hao); memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); spin_lock_bh(&x->lock); memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); spin_unlock_bh(&x->lock); WARN_ON(len != x->props.header_len); dstopt->hdrlen = (x->props.header_len >> 3) - 1; return 0; } static inline int mip6_report_rl_allow(ktime_t stamp, const struct in6_addr *dst, const struct in6_addr *src, int iif) { int allow = 0; spin_lock_bh(&mip6_report_rl.lock); if (mip6_report_rl.stamp != stamp || mip6_report_rl.iif != iif || !ipv6_addr_equal(&mip6_report_rl.src, src) || !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { mip6_report_rl.stamp = stamp; mip6_report_rl.iif = iif; mip6_report_rl.src = *src; mip6_report_rl.dst = *dst; allow = 1; } spin_unlock_bh(&mip6_report_rl.lock); return allow; } static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, const struct flowi *fl) { struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; const struct flowi6 *fl6 = &fl->u.ip6; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; int offset; ktime_t stamp; int err = 0; if (unlikely(fl6->flowi6_proto == IPPROTO_MH && fl6->fl6_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); if (likely(offset >= 0)) hao = (struct ipv6_destopt_hao *) (skb_network_header(skb) + offset); } stamp = skb_get_ktime(skb); if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr, hao ? &hao->addr : &ipv6_hdr(skb)->saddr, opt->iif)) goto out; memset(&sel, 0, sizeof(sel)); memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, sizeof(sel.daddr)); sel.prefixlen_d = 128; memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; sel.proto = fl6->flowi6_proto; sel.dport = xfrm_flowi_dport(fl, &fl6->uli); if (sel.dport) sel.dport_mask = htons(~0); sel.sport = xfrm_flowi_sport(fl, &fl6->uli); if (sel.sport) sel.sport_mask = htons(~0); sel.ifindex = fl6->flowi6_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); out: return err; } static int mip6_destopt_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { NL_SET_ERR_MSG(extack, "SPI must be 0"); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION"); return -EINVAL; } x->props.header_len = sizeof(struct ipv6_destopt_hdr) + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + sizeof(struct ipv6_destopt_hao); WARN_ON(x->props.header_len != 24); return 0; } /* * Do nothing about destroying since it has no specific operation for * destination options header unlike IPsec protocols. */ static void mip6_destopt_destroy(struct xfrm_state *x) { } static const struct xfrm_type mip6_destopt_type = { .owner = THIS_MODULE, .proto = IPPROTO_DSTOPTS, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, .init_state = mip6_destopt_init_state, .destructor = mip6_destopt_destroy, .input = mip6_destopt_input, .output = mip6_destopt_output, .reject = mip6_destopt_reject, }; static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; int err = rt2->rt_hdr.nexthdr; spin_lock(&x->lock); if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); return err; } /* Routing Header type 2 is inserted. * IP Header's dst address is replaced with Routing Header's Home Address. */ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph; struct rt2_hdr *rt2; u8 nexthdr; skb_push(skb, -skb_network_offset(skb)); iph = ipv6_hdr(skb); nexthdr = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_ROUTING; rt2 = (struct rt2_hdr *)skb_transport_header(skb); rt2->rt_hdr.nexthdr = nexthdr; rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; rt2->rt_hdr.segments_left = 1; memset(&rt2->reserved, 0, sizeof(rt2->reserved)); WARN_ON(rt2->rt_hdr.hdrlen != 2); memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); spin_lock_bh(&x->lock); memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); spin_unlock_bh(&x->lock); return 0; } static int mip6_rthdr_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { NL_SET_ERR_MSG(extack, "SPI must be 0"); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION"); return -EINVAL; } x->props.header_len = sizeof(struct rt2_hdr); return 0; } /* * Do nothing about destroying since it has no specific operation for routing * header type 2 unlike IPsec protocols. */ static void mip6_rthdr_destroy(struct xfrm_state *x) { } static const struct xfrm_type mip6_rthdr_type = { .owner = THIS_MODULE, .proto = IPPROTO_ROUTING, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, .init_state = mip6_rthdr_init_state, .destructor = mip6_rthdr_destroy, .input = mip6_rthdr_input, .output = mip6_rthdr_output, }; static int __init mip6_init(void) { pr_info("Mobile IPv6\n"); if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type(destopt)\n", __func__); goto mip6_destopt_xfrm_fail; } if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type(rthdr)\n", __func__); goto mip6_rthdr_xfrm_fail; } if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { pr_info("%s: can't add rawv6 mh filter\n", __func__); goto mip6_rawv6_mh_fail; } return 0; mip6_rawv6_mh_fail: xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); mip6_rthdr_xfrm_fail: xfrm_unregister_type(&mip6_destopt_type, AF_INET6); mip6_destopt_xfrm_fail: return -EAGAIN; } static void __exit mip6_fini(void) { if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) pr_info("%s: can't remove rawv6 mh filter\n", __func__); xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); xfrm_unregister_type(&mip6_destopt_type, AF_INET6); } module_init(mip6_init); module_exit(mip6_fini); MODULE_DESCRIPTION("IPv6 Mobility driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING); |
| 8 1 7 7 8 2 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2013 Astaro GmbH & Co KG */ #include <linux/module.h> #include <linux/skbuff.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_DESCRIPTION("Xtables: add/match connection tracking labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); static bool connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connlabel_mtinfo *info = par->matchinfo; enum ip_conntrack_info ctinfo; struct nf_conn_labels *labels; struct nf_conn *ct; bool invert = info->options & XT_CONNLABEL_OP_INVERT; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return invert; labels = nf_ct_labels_find(ct); if (!labels) return invert; if (test_bit(info->bit, labels->bits)) return !invert; if (info->options & XT_CONNLABEL_OP_SET) { if (!test_and_set_bit(info->bit, labels->bits)) nf_conntrack_event_cache(IPCT_LABEL, ct); return !invert; } return invert; } static int connlabel_mt_check(const struct xt_mtchk_param *par) { const int options = XT_CONNLABEL_OP_INVERT | XT_CONNLABEL_OP_SET; struct xt_connlabel_mtinfo *info = par->matchinfo; int ret; if (info->options & ~options) { pr_info_ratelimited("Unknown options in mask %x\n", info->options); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info_ratelimited("cannot load conntrack support for proto=%u\n", par->family); return ret; } ret = nf_connlabels_get(par->net, info->bit); if (ret < 0) nf_ct_netns_put(par->net, par->family); return ret; } static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) { nf_connlabels_put(par->net); nf_ct_netns_put(par->net, par->family); } static struct xt_match connlabels_mt_reg __read_mostly = { .name = "connlabel", .family = NFPROTO_UNSPEC, .checkentry = connlabel_mt_check, .match = connlabel_mt, .matchsize = sizeof(struct xt_connlabel_mtinfo), .destroy = connlabel_mt_destroy, .me = THIS_MODULE, }; static int __init connlabel_mt_init(void) { return xt_register_match(&connlabels_mt_reg); } static void __exit connlabel_mt_exit(void) { xt_unregister_match(&connlabels_mt_reg); } module_init(connlabel_mt_init); module_exit(connlabel_mt_exit); |
| 134 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NET_DST_CACHE_H #define _NET_DST_CACHE_H #include <linux/jiffies.h> #include <net/dst.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_fib.h> #endif struct dst_cache { struct dst_cache_pcpu __percpu *cache; unsigned long reset_ts; }; /** * dst_cache_get - perform cache lookup * @dst_cache: the cache * * The caller should use dst_cache_get_ip4() if it need to retrieve the * source address to be used when xmitting to the cached dst. * local BH must be disabled. */ struct dst_entry *dst_cache_get(struct dst_cache *dst_cache); /** * dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address * @dst_cache: the cache * @saddr: return value for the retrieved source address * * local BH must be disabled. */ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr); /** * dst_cache_set_ip4 - store the ipv4 dst into the cache * @dst_cache: the cache * @dst: the entry to be cached * @saddr: the source address to be stored inside the cache * * local BH must be disabled. */ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, __be32 saddr); #if IS_ENABLED(CONFIG_IPV6) /** * dst_cache_set_ip6 - store the ipv6 dst into the cache * @dst_cache: the cache * @dst: the entry to be cached * @saddr: the source address to be stored inside the cache * * local BH must be disabled. */ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, const struct in6_addr *saddr); /** * dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address * @dst_cache: the cache * @saddr: return value for the retrieved source address * * local BH must be disabled. */ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, struct in6_addr *saddr); #endif /** * dst_cache_reset - invalidate the cache contents * @dst_cache: the cache * * This does not free the cached dst to avoid races and contentions. * the dst will be freed on later cache lookup. */ static inline void dst_cache_reset(struct dst_cache *dst_cache) { WRITE_ONCE(dst_cache->reset_ts, jiffies); } /** * dst_cache_reset_now - invalidate the cache contents immediately * @dst_cache: the cache * * The caller must be sure there are no concurrent users, as this frees * all dst_cache users immediately, rather than waiting for the next * per-cpu usage like dst_cache_reset does. Most callers should use the * higher speed lazily-freed dst_cache_reset function instead. */ void dst_cache_reset_now(struct dst_cache *dst_cache); /** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache * @gfp: allocation flags */ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp); /** * dst_cache_destroy - empty the cache and free the allocated storage * @dst_cache: the cache * * No synchronization is enforced: it must be called only when the cache * is unused. */ void dst_cache_destroy(struct dst_cache *dst_cache); #endif |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_CRC32_H #define _LINUX_CRC32_H #include <linux/types.h> #include <linux/bitrev.h> /** * crc32_le() - Compute least-significant-bit-first IEEE CRC-32 * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or * the previous CRC value if computing incrementally. * @p: Pointer to the data buffer * @len: Length of data in bytes * * This implements the CRC variant that is often known as the IEEE CRC-32, or * simply CRC-32, and is widely used in Ethernet and other applications: * * - Polynomial: x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + * x^7 + x^5 + x^4 + x^2 + x^1 + x^0 * - Bit order: Least-significant-bit-first * - Polynomial in integer form: 0xedb88320 * * This does *not* invert the CRC at the beginning or end. The caller is * expected to do that if it needs to. Inverting at both ends is recommended. * * For new applications, prefer to use CRC-32C instead. See crc32c(). * * Context: Any context * Return: The new CRC value */ u32 crc32_le(u32 crc, const void *p, size_t len); /* This is just an alias for crc32_le(). */ static inline u32 crc32(u32 crc, const void *p, size_t len) { return crc32_le(crc, p, len); } /** * crc32_be() - Compute most-significant-bit-first IEEE CRC-32 * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or * the previous CRC value if computing incrementally. * @p: Pointer to the data buffer * @len: Length of data in bytes * * crc32_be() is the same as crc32_le() except that crc32_be() computes the * *most-significant-bit-first* variant of the CRC. I.e., within each byte, the * most significant bit is processed first (treated as highest order polynomial * coefficient). The same bit order is also used for the CRC value itself: * * - Polynomial: x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + * x^7 + x^5 + x^4 + x^2 + x^1 + x^0 * - Bit order: Most-significant-bit-first * - Polynomial in integer form: 0x04c11db7 * * Context: Any context * Return: The new CRC value */ u32 crc32_be(u32 crc, const void *p, size_t len); /** * crc32c() - Compute CRC-32C * @crc: Initial CRC value. ~0 (recommended) or 0 for a new CRC computation, or * the previous CRC value if computing incrementally. * @p: Pointer to the data buffer * @len: Length of data in bytes * * This implements CRC-32C, i.e. the Castagnoli CRC. This is the recommended * CRC variant to use in new applications that want a 32-bit CRC. * * - Polynomial: x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + * x^18 + x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 * - Bit order: Least-significant-bit-first * - Polynomial in integer form: 0x82f63b78 * * This does *not* invert the CRC at the beginning or end. The caller is * expected to do that if it needs to. Inverting at both ends is recommended. * * Context: Any context * Return: The new CRC value */ u32 crc32c(u32 crc, const void *p, size_t len); /* * crc32_optimizations() returns flags that indicate which CRC32 library * functions are using architecture-specific optimizations. Unlike * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 * variants and also whether any needed CPU features are available at runtime. */ #define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ #define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ #define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */ #if IS_ENABLED(CONFIG_CRC32_ARCH) u32 crc32_optimizations(void); #else static inline u32 crc32_optimizations(void) { return 0; } #endif /* * Helpers for hash table generation of ethernet nics: * * Ethernet sends the least significant bit of a byte first, thus crc32_le * is used. The output of crc32_le is bit reversed [most significant bit * is in bit nr 0], thus it must be reversed before use. Except for * nics that bit swap the result internally... */ #define ether_crc(length, data) bitrev32(crc32_le(~0, data, length)) #define ether_crc_le(length, data) crc32_le(~0, data, length) #endif /* _LINUX_CRC32_H */ |
| 18 17 6 1 3 2 3 15 16 16 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2006 Ralf Baechle (ralf@linux-mips.org) * Copyright (c) 2018 Jim Wilson (jimw@sifive.com) */ #ifndef _ASM_RISCV_FUTEX_H #define _ASM_RISCV_FUTEX_H #include <linux/futex.h> #include <linux/uaccess.h> #include <linux/errno.h> #include <asm/asm.h> #include <asm/asm-extable.h> /* We don't even really need the extable code, but for now keep it simple */ #ifndef CONFIG_MMU #define __enable_user_access() do { } while (0) #define __disable_user_access() do { } while (0) #endif #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ { \ __enable_user_access(); \ __asm__ __volatile__ ( \ "1: " insn " \n" \ "2: \n" \ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \ : [r] "+r" (ret), [ov] "=&r" (oldval), \ [u] "+m" (*uaddr) \ : [op] "Jr" (oparg) \ : "memory"); \ __disable_user_access(); \ } static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, ret = 0; if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; switch (op) { case FUTEX_OP_SET: __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: __futex_atomic_op("amoadd.w.aqrl %[ov],%z[op],%[u]", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: __futex_atomic_op("amoor.w.aqrl %[ov],%z[op],%[u]", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ANDN: __futex_atomic_op("amoand.w.aqrl %[ov],%z[op],%[u]", ret, oldval, uaddr, ~oparg); break; case FUTEX_OP_XOR: __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]", ret, oldval, uaddr, oparg); break; default: ret = -ENOSYS; } if (!ret) *oval = oldval; return ret; } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; u32 val; uintptr_t tmp; if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; __enable_user_access(); __asm__ __volatile__ ( "1: lr.w %[v],%[u] \n" " bne %[v],%z[ov],3f \n" "2: sc.w.aqrl %[t],%z[nv],%[u] \n" " bnez %[t],1b \n" "3: \n" _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp) : [ov] "Jr" ((long)(int)oldval), [nv] "Jr" (newval) : "memory"); __disable_user_access(); *uval = val; return ret; } #endif /* _ASM_RISCV_FUTEX_H */ |
| 114 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_STRING_CHOICES_H_ #define _LINUX_STRING_CHOICES_H_ /* * Here provide a series of helpers in the str_$TRUE_$FALSE format (you can * also expand some helpers as needed), where $TRUE and $FALSE are their * corresponding literal strings. These helpers can be used in the printing * and also in other places where constant strings are required. Using these * helpers offers the following benefits: * 1) Reducing the hardcoding of strings, which makes the code more elegant * through these simple literal-meaning helpers. * 2) Unifying the output, which prevents the same string from being printed * in various forms, such as enable/disable, enabled/disabled, en/dis. * 3) Deduping by the linker, which results in a smaller binary file. */ #include <linux/types.h> static inline const char *str_assert_deassert(bool v) { return v ? "assert" : "deassert"; } #define str_deassert_assert(v) str_assert_deassert(!(v)) static inline const char *str_enable_disable(bool v) { return v ? "enable" : "disable"; } #define str_disable_enable(v) str_enable_disable(!(v)) static inline const char *str_enabled_disabled(bool v) { return v ? "enabled" : "disabled"; } #define str_disabled_enabled(v) str_enabled_disabled(!(v)) static inline const char *str_hi_lo(bool v) { return v ? "hi" : "lo"; } #define str_lo_hi(v) str_hi_lo(!(v)) static inline const char *str_high_low(bool v) { return v ? "high" : "low"; } #define str_low_high(v) str_high_low(!(v)) static inline const char *str_input_output(bool v) { return v ? "input" : "output"; } #define str_output_input(v) str_input_output(!(v)) static inline const char *str_on_off(bool v) { return v ? "on" : "off"; } #define str_off_on(v) str_on_off(!(v)) static inline const char *str_read_write(bool v) { return v ? "read" : "write"; } #define str_write_read(v) str_read_write(!(v)) static inline const char *str_true_false(bool v) { return v ? "true" : "false"; } #define str_false_true(v) str_true_false(!(v)) static inline const char *str_up_down(bool v) { return v ? "up" : "down"; } #define str_down_up(v) str_up_down(!(v)) static inline const char *str_yes_no(bool v) { return v ? "yes" : "no"; } #define str_no_yes(v) str_yes_no(!(v)) /** * str_plural - Return the simple pluralization based on English counts * @num: Number used for deciding pluralization * * If @num is 1, returns empty string, otherwise returns "s". */ static inline const char *str_plural(size_t num) { return num == 1 ? "" : "s"; } #endif |
| 58 10 10 1 18 27 18 26 7 19 26 2 58 10 62 59 60 46 47 47 65 61 50 50 10 10 10 10 3 7 7 7 7 10 7 7 60 2 2 7 7 38 20 40 40 38 4 3 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Synchronous Cryptographic Hash operations. * * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/scatterwalk.h> #include <linux/cryptouser.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/string.h> #include <net/netlink.h> #include "hash.h" static inline bool crypto_shash_block_only(struct crypto_shash *tfm) { return crypto_shash_alg(tfm)->base.cra_flags & CRYPTO_AHASH_ALG_BLOCK_ONLY; } static inline bool crypto_shash_final_nonzero(struct crypto_shash *tfm) { return crypto_shash_alg(tfm)->base.cra_flags & CRYPTO_AHASH_ALG_FINAL_NONZERO; } static inline bool crypto_shash_finup_max(struct crypto_shash *tfm) { return crypto_shash_alg(tfm)->base.cra_flags & CRYPTO_AHASH_ALG_FINUP_MAX; } int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { return -ENOSYS; } EXPORT_SYMBOL_GPL(shash_no_setkey); static void shash_set_needkey(struct crypto_shash *tfm, struct shash_alg *alg) { if (crypto_shash_alg_needs_key(alg)) crypto_shash_set_flags(tfm, CRYPTO_TFM_NEED_KEY); } int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct shash_alg *shash = crypto_shash_alg(tfm); int err; err = shash->setkey(tfm, key, keylen); if (unlikely(err)) { shash_set_needkey(tfm, shash); return err; } crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); return 0; } EXPORT_SYMBOL_GPL(crypto_shash_setkey); static int __crypto_shash_init(struct shash_desc *desc) { struct crypto_shash *tfm = desc->tfm; if (crypto_shash_block_only(tfm)) { u8 *buf = shash_desc_ctx(desc); buf += crypto_shash_descsize(tfm) - 1; *buf = 0; } return crypto_shash_alg(tfm)->init(desc); } int crypto_shash_init(struct shash_desc *desc) { if (crypto_shash_get_flags(desc->tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return __crypto_shash_init(desc); } EXPORT_SYMBOL_GPL(crypto_shash_init); static int shash_default_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct shash_alg *shash = crypto_shash_alg(desc->tfm); return shash->update(desc, data, len) ?: shash->final(desc, out); } static int crypto_shash_op_and_zero( int (*op)(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out), struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { int err; err = op(desc, data, len, out); memset(shash_desc_ctx(desc), 0, crypto_shash_descsize(desc->tfm)); return err; } int crypto_shash_finup(struct shash_desc *restrict desc, const u8 *data, unsigned int len, u8 *restrict out) { struct crypto_shash *tfm = desc->tfm; u8 *blenp = shash_desc_ctx(desc); bool finup_max, nonzero; unsigned int bs; int err; u8 *buf; if (!crypto_shash_block_only(tfm)) { if (out) goto finup; return crypto_shash_alg(tfm)->update(desc, data, len); } finup_max = out && crypto_shash_finup_max(tfm); /* Retain extra block for final nonzero algorithms. */ nonzero = crypto_shash_final_nonzero(tfm); /* * The partial block buffer follows the algorithm desc context. * The byte following that contains the length. */ blenp += crypto_shash_descsize(tfm) - 1; bs = crypto_shash_blocksize(tfm); buf = blenp - bs; if (likely(!*blenp && finup_max)) goto finup; while ((*blenp + len) >= bs + nonzero) { unsigned int nbytes = len - nonzero; const u8 *src = data; if (*blenp) { memcpy(buf + *blenp, data, bs - *blenp); nbytes = bs; src = buf; } err = crypto_shash_alg(tfm)->update(desc, src, nbytes); if (err < 0) return err; data += nbytes - err - *blenp; len -= nbytes - err - *blenp; *blenp = 0; } if (*blenp || !out) { memcpy(buf + *blenp, data, len); *blenp += len; if (!out) return 0; data = buf; len = *blenp; } finup: return crypto_shash_op_and_zero(crypto_shash_alg(tfm)->finup, desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_finup); static int shash_default_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return __crypto_shash_init(desc) ?: crypto_shash_finup(desc, data, len, out); } int crypto_shash_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct crypto_shash *tfm = desc->tfm; if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return crypto_shash_op_and_zero(crypto_shash_alg(tfm)->digest, desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_digest); int crypto_shash_tfm_digest(struct crypto_shash *tfm, const u8 *data, unsigned int len, u8 *out) { SHASH_DESC_ON_STACK(desc, tfm); desc->tfm = tfm; return crypto_shash_digest(desc, data, len, out); } EXPORT_SYMBOL_GPL(crypto_shash_tfm_digest); static int __crypto_shash_export(struct shash_desc *desc, void *out, int (*export)(struct shash_desc *desc, void *out)) { struct crypto_shash *tfm = desc->tfm; u8 *buf = shash_desc_ctx(desc); unsigned int plen, ss; plen = crypto_shash_blocksize(tfm) + 1; ss = crypto_shash_statesize(tfm); if (crypto_shash_block_only(tfm)) ss -= plen; if (!export) { memcpy(out, buf, ss); return 0; } return export(desc, out); } int crypto_shash_export_core(struct shash_desc *desc, void *out) { return __crypto_shash_export(desc, out, crypto_shash_alg(desc->tfm)->export_core); } EXPORT_SYMBOL_GPL(crypto_shash_export_core); int crypto_shash_export(struct shash_desc *desc, void *out) { struct crypto_shash *tfm = desc->tfm; if (crypto_shash_block_only(tfm)) { unsigned int plen = crypto_shash_blocksize(tfm) + 1; unsigned int descsize = crypto_shash_descsize(tfm); unsigned int ss = crypto_shash_statesize(tfm); u8 *buf = shash_desc_ctx(desc); memcpy(out + ss - plen, buf + descsize - plen, plen); } return __crypto_shash_export(desc, out, crypto_shash_alg(tfm)->export); } EXPORT_SYMBOL_GPL(crypto_shash_export); static int __crypto_shash_import(struct shash_desc *desc, const void *in, int (*import)(struct shash_desc *desc, const void *in)) { struct crypto_shash *tfm = desc->tfm; unsigned int descsize, plen, ss; u8 *buf = shash_desc_ctx(desc); if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; ss = crypto_shash_statesize(tfm); if (crypto_shash_block_only(tfm)) { plen = crypto_shash_blocksize(tfm) + 1; ss -= plen; descsize = crypto_shash_descsize(tfm); buf[descsize - 1] = 0; } if (!import) { memcpy(buf, in, ss); return 0; } return import(desc, in); } int crypto_shash_import_core(struct shash_desc *desc, const void *in) { return __crypto_shash_import(desc, in, crypto_shash_alg(desc->tfm)->import_core); } EXPORT_SYMBOL_GPL(crypto_shash_import_core); int crypto_shash_import(struct shash_desc *desc, const void *in) { struct crypto_shash *tfm = desc->tfm; int err; err = __crypto_shash_import(desc, in, crypto_shash_alg(tfm)->import); if (crypto_shash_block_only(tfm)) { unsigned int plen = crypto_shash_blocksize(tfm) + 1; unsigned int descsize = crypto_shash_descsize(tfm); unsigned int ss = crypto_shash_statesize(tfm); u8 *buf = shash_desc_ctx(desc); memcpy(buf + descsize - plen, in + ss - plen, plen); if (buf[descsize - 1] >= plen) err = -EOVERFLOW; } return err; } EXPORT_SYMBOL_GPL(crypto_shash_import); static void crypto_shash_exit_tfm(struct crypto_tfm *tfm) { struct crypto_shash *hash = __crypto_shash_cast(tfm); struct shash_alg *alg = crypto_shash_alg(hash); alg->exit_tfm(hash); } static int crypto_shash_init_tfm(struct crypto_tfm *tfm) { struct crypto_shash *hash = __crypto_shash_cast(tfm); struct shash_alg *alg = crypto_shash_alg(hash); shash_set_needkey(hash, alg); if (alg->exit_tfm) tfm->exit = crypto_shash_exit_tfm; if (!alg->init_tfm) return 0; return alg->init_tfm(hash); } static void crypto_shash_free_instance(struct crypto_instance *inst) { struct shash_instance *shash = shash_instance(inst); shash->free(shash); } static int __maybe_unused crypto_shash_report( struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_hash rhash; struct shash_alg *salg = __crypto_shash_alg(alg); memset(&rhash, 0, sizeof(rhash)); strscpy(rhash.type, "shash", sizeof(rhash.type)); rhash.blocksize = alg->cra_blocksize; rhash.digestsize = salg->digestsize; return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash); } static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) { struct shash_alg *salg = __crypto_shash_alg(alg); seq_printf(m, "type : shash\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "digestsize : %u\n", salg->digestsize); } const struct crypto_type crypto_shash_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_shash_init_tfm, .free = crypto_shash_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_shash_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_shash_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SHASH, .tfmsize = offsetof(struct crypto_shash, base), .algsize = offsetof(struct shash_alg, base), }; int crypto_grab_shash(struct crypto_shash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_shash_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_shash); struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_shash_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_shash); int crypto_has_shash(const char *alg_name, u32 type, u32 mask) { return crypto_type_has_alg(alg_name, &crypto_shash_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_has_shash); struct crypto_shash *crypto_clone_shash(struct crypto_shash *hash) { struct crypto_tfm *tfm = crypto_shash_tfm(hash); struct shash_alg *alg = crypto_shash_alg(hash); struct crypto_shash *nhash; int err; if (!crypto_shash_alg_has_setkey(alg)) { tfm = crypto_tfm_get(tfm); if (IS_ERR(tfm)) return ERR_CAST(tfm); return hash; } if (!alg->clone_tfm && (alg->init_tfm || alg->base.cra_init)) return ERR_PTR(-ENOSYS); nhash = crypto_clone_tfm(&crypto_shash_type, tfm); if (IS_ERR(nhash)) return nhash; if (alg->clone_tfm) { err = alg->clone_tfm(nhash, hash); if (err) { crypto_free_shash(nhash); return ERR_PTR(err); } } if (alg->exit_tfm) crypto_shash_tfm(nhash)->exit = crypto_shash_exit_tfm; return nhash; } EXPORT_SYMBOL_GPL(crypto_clone_shash); int hash_prepare_alg(struct hash_alg_common *alg) { struct crypto_alg *base = &alg->base; if (alg->digestsize > HASH_MAX_DIGESTSIZE) return -EINVAL; /* alignmask is not useful for hashes, so it is not supported. */ if (base->cra_alignmask) return -EINVAL; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; return 0; } static int shash_default_export_core(struct shash_desc *desc, void *out) { return -ENOSYS; } static int shash_default_import_core(struct shash_desc *desc, const void *in) { return -ENOSYS; } static int shash_prepare_alg(struct shash_alg *alg) { struct crypto_alg *base = &alg->halg.base; int err; if ((alg->export && !alg->import) || (alg->import && !alg->export)) return -EINVAL; err = hash_prepare_alg(&alg->halg); if (err) return err; base->cra_type = &crypto_shash_type; base->cra_flags |= CRYPTO_ALG_TYPE_SHASH; base->cra_flags |= CRYPTO_ALG_REQ_VIRT; /* * Handle missing optional functions. For each one we can either * install a default here, or we can leave the pointer as NULL and check * the pointer for NULL in crypto_shash_*(), avoiding an indirect call * when the default behavior is desired. For ->finup and ->digest we * install defaults, since for optimal performance algorithms should * implement these anyway. On the other hand, for ->import and * ->export the common case and best performance comes from the simple * memcpy of the shash_desc_ctx, so when those pointers are NULL we * leave them NULL and provide the memcpy with no indirect call. */ if (!alg->finup) alg->finup = shash_default_finup; if (!alg->digest) alg->digest = shash_default_digest; if (!alg->export && !alg->halg.statesize) alg->halg.statesize = alg->descsize; if (!alg->setkey) alg->setkey = shash_no_setkey; if (base->cra_flags & CRYPTO_AHASH_ALG_BLOCK_ONLY) { BUILD_BUG_ON(MAX_ALGAPI_BLOCKSIZE >= 256); alg->descsize += base->cra_blocksize + 1; alg->statesize += base->cra_blocksize + 1; alg->export_core = alg->export; alg->import_core = alg->import; } else if (!alg->export_core || !alg->import_core) { alg->export_core = shash_default_export_core; alg->import_core = shash_default_import_core; base->cra_flags |= CRYPTO_AHASH_ALG_NO_EXPORT_CORE; } if (alg->descsize > HASH_MAX_DESCSIZE) return -EINVAL; if (alg->statesize > HASH_MAX_STATESIZE) return -EINVAL; base->cra_reqsize = sizeof(struct shash_desc) + alg->descsize; return 0; } int crypto_register_shash(struct shash_alg *alg) { struct crypto_alg *base = &alg->base; int err; err = shash_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_shash); void crypto_unregister_shash(struct shash_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_shash); int crypto_register_shashes(struct shash_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_shash(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_shash(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_shashes); void crypto_unregister_shashes(struct shash_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_shash(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_shashes); int shash_register_instance(struct crypto_template *tmpl, struct shash_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = shash_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, shash_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(shash_register_instance); void shash_free_singlespawn_instance(struct shash_instance *inst) { crypto_drop_spawn(shash_instance_ctx(inst)); kfree(inst); } EXPORT_SYMBOL_GPL(shash_free_singlespawn_instance); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Synchronous cryptographic hash type"); |
| 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | /* SPDX-License-Identifier: GPL-2.0 */ /* * * Generic internet FLOW. * */ #ifndef _NET_FLOW_H #define _NET_FLOW_H #include <linux/in6.h> #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/uidgid.h> #include <net/inet_dscp.h> struct flow_keys; /* * ifindex generation is per-net namespace, and loopback is * always the 1st device in ns (see net_dev_init), thus any * loopback device should get ifindex 1 */ #define LOOPBACK_IFINDEX 1 struct flowi_tunnel { __be64 tun_id; }; struct flowi_common { int flowic_oif; int flowic_iif; int flowic_l3mdev; __u32 flowic_mark; dscp_t flowic_dscp; __u8 flowic_scope; __u8 flowic_proto; __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_L3MDEV_OIF 0x04 #define FLOWI_FLAG_ANY_SPORT 0x08 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; struct flowi_tunnel flowic_tun_key; }; union flowi_uli { struct { __be16 dport; __be16 sport; } ports; struct { __u8 type; __u8 code; } icmpt; __be32 gre_key; struct { __u8 type; } mht; }; struct flowi4 { struct flowi_common __fl_common; #define flowi4_oif __fl_common.flowic_oif #define flowi4_iif __fl_common.flowic_iif #define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark #define flowi4_dscp __fl_common.flowic_dscp #define flowi4_scope __fl_common.flowic_scope #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key #define flowi4_uid __fl_common.flowic_uid #define flowi4_multipath_hash __fl_common.flowic_multipath_hash /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; __be32 daddr; union flowi_uli uli; #define fl4_sport uli.ports.sport #define fl4_dport uli.ports.dport #define fl4_icmp_type uli.icmpt.type #define fl4_icmp_code uli.icmpt.code #define fl4_mh_type uli.mht.type #define fl4_gre_key uli.gre_key } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, __be16 dport, __be16 sport, kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; fl4->flowi4_dscp = inet_dsfield_to_dscp(tos); fl4->flowi4_scope = scope; fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = sport; fl4->flowi4_multipath_hash = 0; } /* Reset some input parameters after previous lookup */ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __be32 daddr, __be32 saddr) { fl4->flowi4_oif = oif; fl4->daddr = daddr; fl4->saddr = saddr; } struct flowi6 { struct flowi_common __fl_common; #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif #define flowi6_l3mdev __fl_common.flowic_l3mdev #define flowi6_mark __fl_common.flowic_mark #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key #define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_dscp is encoded in flowlabel, too. */ __be32 flowlabel; union flowi_uli uli; #define fl6_sport uli.ports.sport #define fl6_dport uli.ports.dport #define fl6_icmp_type uli.icmpt.type #define fl6_icmp_code uli.icmpt.code #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowi { union { struct flowi_common __fl_common; struct flowi4 ip4; struct flowi6 ip6; } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif #define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark #define flowi_dscp u.__fl_common.flowic_dscp #define flowi_scope u.__fl_common.flowic_scope #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key #define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) { return container_of(fl4, struct flowi, u.ip4); } static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) { return &(fl4->__fl_common); } static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) { return container_of(fl6, struct flowi, u.ip6); } static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) { return &(fl6->__fl_common); } __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); #endif |
| 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * This file implements the various access functions for the * PROC file system. It is mainly used for debugging and * statistics. * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> * Erik Schoenfelder, <schoenfr@ibr.cs.tu-bs.de> * * Fixes: * Alan Cox : UDP sockets show the rxqueue/txqueue * using hint flag for the netinfo. * Pauline Middelink : identd support * Alan Cox : Make /proc safer. * Erik Schoenfelder : /proc/net/snmp * Alan Cox : Handle dead sockets properly. * Gerhard Koerting : Show both timers * Alan Cox : Allow inode to be NULL (kernel socket) * Andi Kleen : Add support for open_requests and * split functions for more readibility. * Andi Kleen : Add support for /proc/net/netstat * Arnaldo C. Melo : Convert to seq_file */ #include <linux/types.h> #include <net/net_namespace.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/mptcp.h> #include <net/proto_memory.h> #include <net/udp.h> #include <net/udplite.h> #include <linux/bottom_half.h> #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> #include <net/sock.h> #include <net/raw.h> #define TCPUDP_MIB_MAX MAX_T(u32, UDP_MIB_MAX, TCP_MIB_MAX) /* * Report socket allocation statistics [mea@utu.fi] */ static int sockstat_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; int orphans, sockets; orphans = tcp_orphan_count_sum(); sockets = proto_sockets_allocated_sum_positive(&tcp_prot); socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, refcount_read(&net->ipv4.tcp_death_row.tw_refcount) - 1, sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), proto_memory_allocated(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); seq_printf(seq, "FRAG: inuse %u memory %lu\n", atomic_read(&net->ipv4.fqdir->rhashtable.nelems), frag_mem_limit(net->ipv4.fqdir)); return 0; } /* snmp items */ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS), SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS), SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS), SNMP_MIB_ITEM("ForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), SNMP_MIB_ITEM("ReasmReqds", IPSTATS_MIB_REASMREQDS), SNMP_MIB_ITEM("ReasmOKs", IPSTATS_MIB_REASMOKS), SNMP_MIB_ITEM("ReasmFails", IPSTATS_MIB_REASMFAILS), SNMP_MIB_ITEM("FragOKs", IPSTATS_MIB_FRAGOKS), SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS), SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES), SNMP_MIB_ITEM("OutTransmits", IPSTATS_MIB_OUTPKTS), }; /* Following items are displayed in /proc/net/netstat */ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES), SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), SNMP_MIB_ITEM("InMcastPkts", IPSTATS_MIB_INMCASTPKTS), SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS), SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS), SNMP_MIB_ITEM("InOctets", IPSTATS_MIB_INOCTETS), SNMP_MIB_ITEM("OutOctets", IPSTATS_MIB_OUTOCTETS), SNMP_MIB_ITEM("InMcastOctets", IPSTATS_MIB_INMCASTOCTETS), SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), /* Non RFC4293 fields */ SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), SNMP_MIB_ITEM("InNoECTPkts", IPSTATS_MIB_NOECTPKTS), SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS), SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS), }; static const struct { const char *name; int index; } icmpmibmap[] = { { "DestUnreachs", ICMP_DEST_UNREACH }, { "TimeExcds", ICMP_TIME_EXCEEDED }, { "ParmProbs", ICMP_PARAMETERPROB }, { "SrcQuenchs", ICMP_SOURCE_QUENCH }, { "Redirects", ICMP_REDIRECT }, { "Echos", ICMP_ECHO }, { "EchoReps", ICMP_ECHOREPLY }, { "Timestamps", ICMP_TIMESTAMP }, { "TimestampReps", ICMP_TIMESTAMPREPLY }, { "AddrMasks", ICMP_ADDRESS }, { "AddrMaskReps", ICMP_ADDRESSREPLY }, { NULL, 0 } }; static const struct snmp_mib snmp4_tcp_list[] = { SNMP_MIB_ITEM("RtoAlgorithm", TCP_MIB_RTOALGORITHM), SNMP_MIB_ITEM("RtoMin", TCP_MIB_RTOMIN), SNMP_MIB_ITEM("RtoMax", TCP_MIB_RTOMAX), SNMP_MIB_ITEM("MaxConn", TCP_MIB_MAXCONN), SNMP_MIB_ITEM("ActiveOpens", TCP_MIB_ACTIVEOPENS), SNMP_MIB_ITEM("PassiveOpens", TCP_MIB_PASSIVEOPENS), SNMP_MIB_ITEM("AttemptFails", TCP_MIB_ATTEMPTFAILS), SNMP_MIB_ITEM("EstabResets", TCP_MIB_ESTABRESETS), SNMP_MIB_ITEM("CurrEstab", TCP_MIB_CURRESTAB), SNMP_MIB_ITEM("InSegs", TCP_MIB_INSEGS), SNMP_MIB_ITEM("OutSegs", TCP_MIB_OUTSEGS), SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS), SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS), SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS), }; static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS), SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_ITEM("MemErrors", UDP_MIB_MEMERRORS), }; static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("SyncookiesSent", LINUX_MIB_SYNCOOKIESSENT), SNMP_MIB_ITEM("SyncookiesRecv", LINUX_MIB_SYNCOOKIESRECV), SNMP_MIB_ITEM("SyncookiesFailed", LINUX_MIB_SYNCOOKIESFAILED), SNMP_MIB_ITEM("EmbryonicRsts", LINUX_MIB_EMBRYONICRSTS), SNMP_MIB_ITEM("PruneCalled", LINUX_MIB_PRUNECALLED), SNMP_MIB_ITEM("RcvPruned", LINUX_MIB_RCVPRUNED), SNMP_MIB_ITEM("OfoPruned", LINUX_MIB_OFOPRUNED), SNMP_MIB_ITEM("OutOfWindowIcmps", LINUX_MIB_OUTOFWINDOWICMPS), SNMP_MIB_ITEM("LockDroppedIcmps", LINUX_MIB_LOCKDROPPEDICMPS), SNMP_MIB_ITEM("ArpFilter", LINUX_MIB_ARPFILTER), SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED), SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED), SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), SNMP_MIB_ITEM("BeyondWindow", LINUX_MIB_BEYOND_WINDOW), SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED), SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED), SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS), SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS), SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS), SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS), SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS), SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER), SNMP_MIB_ITEM("TCPFullUndo", LINUX_MIB_TCPFULLUNDO), SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES), SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED), SNMP_MIB_ITEM("TCPBacklogCoalesce", LINUX_MIB_TCPBACKLOGCOALESCE), SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT), SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT), SNMP_MIB_ITEM("TCPDSACKRecv", LINUX_MIB_TCPDSACKRECV), SNMP_MIB_ITEM("TCPDSACKOfoRecv", LINUX_MIB_TCPDSACKOFORECV), SNMP_MIB_ITEM("TCPAbortOnData", LINUX_MIB_TCPABORTONDATA), SNMP_MIB_ITEM("TCPAbortOnClose", LINUX_MIB_TCPABORTONCLOSE), SNMP_MIB_ITEM("TCPAbortOnMemory", LINUX_MIB_TCPABORTONMEMORY), SNMP_MIB_ITEM("TCPAbortOnTimeout", LINUX_MIB_TCPABORTONTIMEOUT), SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO), SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE), SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), SNMP_MIB_ITEM("PFMemallocDrop", LINUX_MIB_PFMEMALLOCDROP), SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), SNMP_MIB_ITEM("TCPRetransFail", LINUX_MIB_TCPRETRANSFAIL), SNMP_MIB_ITEM("TCPRcvCoalesce", LINUX_MIB_TCPRCVCOALESCE), SNMP_MIB_ITEM("TCPOFOQueue", LINUX_MIB_TCPOFOQUEUE), SNMP_MIB_ITEM("TCPOFODrop", LINUX_MIB_TCPOFODROP), SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), SNMP_MIB_ITEM("TCPFastOpenActiveFail", LINUX_MIB_TCPFASTOPENACTIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPFastOpenBlackhole", LINUX_MIB_TCPFASTOPENBLACKHOLE), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV), SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV), SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV), SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS), SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT), SNMP_MIB_ITEM("TCPHystartTrainDetect", LINUX_MIB_TCPHYSTARTTRAINDETECT), SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND), SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT), SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND), SNMP_MIB_ITEM("TCPACKSkippedSynRecv", LINUX_MIB_TCPACKSKIPPEDSYNRECV), SNMP_MIB_ITEM("TCPACKSkippedPAWS", LINUX_MIB_TCPACKSKIPPEDPAWS), SNMP_MIB_ITEM("TCPACKSkippedSeq", LINUX_MIB_TCPACKSKIPPEDSEQ), SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE), SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY), SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH), SNMP_MIB_ITEM("TCPAORequired", LINUX_MIB_TCPAOREQUIRED), SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD), SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND), SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD), SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS), }; static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, unsigned short *type, int count) { int j; if (count) { seq_puts(seq, "\nIcmpMsg:"); for (j = 0; j < count; ++j) seq_printf(seq, " %sType%u", type[j] & 0x100 ? "Out" : "In", type[j] & 0xff); seq_puts(seq, "\nIcmpMsg:"); for (j = 0; j < count; ++j) seq_printf(seq, " %lu", vals[j]); } } static void icmpmsg_put(struct seq_file *seq) { #define PERLINE 16 int i, count; unsigned short type[PERLINE]; unsigned long vals[PERLINE], val; struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]); if (val) { type[count] = i; vals[count++] = val; } if (count == PERLINE) { icmpmsg_put_line(seq, vals, type, count); count = 0; } } icmpmsg_put_line(seq, vals, type, count); #undef PERLINE } static void icmp_put(struct seq_file *seq) { int i; struct net *net = seq->private; atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors OutRateLimitGlobal OutRateLimitHost"); for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITGLOBAL), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITHOST)); for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { const int cnt = ARRAY_SIZE(snmp4_ipstats_list); u64 buff64[ARRAY_SIZE(snmp4_ipstats_list)]; struct net *net = seq->private; int i; memset(buff64, 0, sizeof(buff64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); for (i = 0; i < cnt; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL_RO(net, FORWARDING) ? 1 : 2, READ_ONCE(net->ipv4.sysctl_ip_default_ttl)); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); snmp_get_cpu_field64_batch_cnt(buff64, snmp4_ipstats_list, cnt, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); for (i = 0; i < cnt; i++) seq_printf(seq, " %llu", buff64[i]); return 0; } static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) { const int udp_cnt = ARRAY_SIZE(snmp4_udp_list); const int tcp_cnt = ARRAY_SIZE(snmp4_tcp_list); unsigned long buff[TCPUDP_MIB_MAX]; struct net *net = seq->private; int i; memset(buff, 0, tcp_cnt * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); for (i = 0; i < tcp_cnt; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); snmp_get_cpu_field_batch_cnt(buff, snmp4_tcp_list, tcp_cnt, net->mib.tcp_statistics); for (i = 0; i < tcp_cnt; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) seq_printf(seq, " %ld", buff[i]); else seq_printf(seq, " %lu", buff[i]); } memset(buff, 0, udp_cnt * sizeof(unsigned long)); snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list, udp_cnt, net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); for (i = 0; i < udp_cnt; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdp:"); for (i = 0; i < udp_cnt; i++) seq_printf(seq, " %lu", buff[i]); memset(buff, 0, udp_cnt * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list, udp_cnt, net->mib.udplite_statistics); for (i = 0; i < udp_cnt; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); seq_puts(seq, "\nUdpLite:"); for (i = 0; i < udp_cnt; i++) seq_printf(seq, " %lu", buff[i]); seq_putc(seq, '\n'); return 0; } static int snmp_seq_show(struct seq_file *seq, void *v) { snmp_seq_show_ipstats(seq, v); icmp_put(seq); /* RFC 2011 compatibility */ icmpmsg_put(seq); snmp_seq_show_tcp_udp(seq, v); return 0; } /* * Output /proc/net/netstat */ static int netstat_seq_show(struct seq_file *seq, void *v) { const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list); const int tcp_cnt = ARRAY_SIZE(snmp4_net_list); struct net *net = seq->private; unsigned long *buff; int i; seq_puts(seq, "TcpExt:"); for (i = 0; i < tcp_cnt; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); buff = kzalloc(max(tcp_cnt * sizeof(long), ip_cnt * sizeof(u64)), GFP_KERNEL); if (buff) { snmp_get_cpu_field_batch_cnt(buff, snmp4_net_list, tcp_cnt, net->mib.net_statistics); for (i = 0; i < tcp_cnt; i++) seq_printf(seq, " %lu", buff[i]); } else { for (i = 0; i < tcp_cnt; i++) seq_printf(seq, " %lu", snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); } seq_puts(seq, "\nIpExt:"); for (i = 0; i < ip_cnt; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); if (buff) { u64 *buff64 = (u64 *)buff; memset(buff64, 0, ip_cnt * sizeof(u64)); snmp_get_cpu_field64_batch_cnt(buff64, snmp4_ipextstats_list, ip_cnt, net->mib.ip_statistics, offsetof(struct ipstats_mib, syncp)); for (i = 0; i < ip_cnt; i++) seq_printf(seq, " %llu", buff64[i]); } else { for (i = 0; i < ip_cnt; i++) seq_printf(seq, " %llu", snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, offsetof(struct ipstats_mib, syncp))); } kfree(buff); seq_putc(seq, '\n'); mptcp_seq_show(seq); return 0; } static __net_init int ip_proc_init_net(struct net *net) { if (!proc_create_net_single("sockstat", 0444, net->proc_net, sockstat_seq_show, NULL)) goto out_sockstat; if (!proc_create_net_single("netstat", 0444, net->proc_net, netstat_seq_show, NULL)) goto out_netstat; if (!proc_create_net_single("snmp", 0444, net->proc_net, snmp_seq_show, NULL)) goto out_snmp; return 0; out_snmp: remove_proc_entry("netstat", net->proc_net); out_netstat: remove_proc_entry("sockstat", net->proc_net); out_sockstat: return -ENOMEM; } static __net_exit void ip_proc_exit_net(struct net *net) { remove_proc_entry("snmp", net->proc_net); remove_proc_entry("netstat", net->proc_net); remove_proc_entry("sockstat", net->proc_net); } static __net_initdata struct pernet_operations ip_proc_ops = { .init = ip_proc_init_net, .exit = ip_proc_exit_net, }; int __init ip_misc_proc_init(void) { return register_pernet_subsys(&ip_proc_ops); } |
| 2688 1179 116 75 1067 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 | /* SPDX-License-Identifier: GPL-2.0 */ /* * workqueue.h --- work queue handling for Linux. */ #ifndef _LINUX_WORKQUEUE_H #define _LINUX_WORKQUEUE_H #include <linux/alloc_tag.h> #include <linux/timer.h> #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/lockdep.h> #include <linux/threads.h> #include <linux/atomic.h> #include <linux/cpumask_types.h> #include <linux/rcupdate.h> #include <linux/workqueue_types.h> /* * The first word is the work queue pointer and the flags rolled into * one */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ WORK_STRUCT_PWQ_BIT, /* data points to pwq */ WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif WORK_STRUCT_FLAG_BITS, /* color for workqueue flushing */ WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, /* * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. * * MSB * [ pwq pointer ] [ flush color ] [ STRUCT flags ] * 4 bits 4 or 5 bits */ WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, /* * data contains off-queue information when !WORK_STRUCT_PWQ. * * MSB * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] * 16 bits 1 bit 4 or 5 bits */ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_FLAG_END, WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_DISABLE_BITS = 16, /* * When a work item is off queue, the high bits encode off-queue flags * and the last pool it was on. Cap pool ID to 31 bits and use the * highest number to indicate that no pool is associated. */ WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, }; enum work_flags { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, #else WORK_STRUCT_STATIC = 0, #endif }; enum wq_misc_consts { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ #define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT) #define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) #define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) #define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)) struct delayed_work { struct work_struct work; struct timer_list timer; /* target workqueue and CPU ->timer uses to queue ->work */ struct workqueue_struct *wq; int cpu; }; struct rcu_work { struct work_struct work; struct rcu_head rcu; /* target workqueue ->rcu uses to queue ->work */ struct workqueue_struct *wq; }; enum wq_affn_scope { WQ_AFFN_DFL, /* use system default */ WQ_AFFN_CPU, /* one pod per CPU */ WQ_AFFN_SMT, /* one pod poer SMT */ WQ_AFFN_CACHE, /* one pod per LLC */ WQ_AFFN_NUMA, /* one pod per NUMA node */ WQ_AFFN_SYSTEM, /* one pod across the whole system */ WQ_AFFN_NR_TYPES, }; /** * struct workqueue_attrs - A struct for workqueue attributes. * * This can be used to change attributes of an unbound workqueue. */ struct workqueue_attrs { /** * @nice: nice level */ int nice; /** * @cpumask: allowed CPUs * * Work items in this workqueue are affine to these CPUs and not allowed * to execute on other CPUs. A pool serving a workqueue must have the * same @cpumask. */ cpumask_var_t cpumask; /** * @__pod_cpumask: internal attribute used to create per-pod pools * * Internal use only. * * Per-pod unbound worker pools are used to improve locality. Always a * subset of ->cpumask. A workqueue can be associated with multiple * worker pools with disjoint @__pod_cpumask's. Whether the enforcement * of a pool's @__pod_cpumask is strict depends on @affn_strict. */ cpumask_var_t __pod_cpumask; /** * @affn_strict: affinity scope is strict * * If clear, workqueue will make a best-effort attempt at starting the * worker inside @__pod_cpumask but the scheduler is free to migrate it * outside. * * If set, workers are only allowed to run inside @__pod_cpumask. */ bool affn_strict; /* * Below fields aren't properties of a worker_pool. They only modify how * :c:func:`apply_workqueue_attrs` select pools and thus don't * participate in pool hash calculations or equality comparisons. * * If @affn_strict is set, @cpumask isn't a property of a worker_pool * either. */ /** * @affn_scope: unbound CPU affinity scope * * CPU pods are used to improve execution locality of unbound work * items. There are multiple pod types, one for each wq_affn_scope, and * every CPU in the system belongs to one pod in every pod type. CPUs * that belong to the same pod share the worker pool. For example, * selecting %WQ_AFFN_NUMA makes the workqueue use a separate worker * pool for each NUMA node. */ enum wq_affn_scope affn_scope; /** * @ordered: work items must be executed one by one in queueing order */ bool ordered; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) { return container_of(work, struct delayed_work, work); } static inline struct rcu_work *to_rcu_work(struct work_struct *work) { return container_of(work, struct rcu_work, work); } struct execute_work { struct work_struct work; }; #ifdef CONFIG_LOCKDEP /* * NB: because we have to copy the lockdep_map, setting _key * here is required, otherwise it could get initialised to the * copy of the lockdep_map! */ #define __WORK_INIT_LOCKDEP_MAP(n, k) \ .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k), #else #define __WORK_INIT_LOCKDEP_MAP(n, k) #endif #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_STATIC_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ } #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\ (tflags) | TIMER_IRQSAFE), \ } #define DECLARE_WORK(n, f) \ struct work_struct n = __WORK_INITIALIZER(n, f) #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0) #define DECLARE_DEFERRABLE_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE) #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); extern void destroy_delayed_work_on_stack(struct delayed_work *work); static inline unsigned int work_static(struct work_struct *work) { return *work_data_bits(work) & WORK_STRUCT_STATIC; } #else static inline void __init_work(struct work_struct *work, int onstack) { } static inline void destroy_work_on_stack(struct work_struct *work) { } static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { } static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif /* * initialize all of a work item in one go * * NOTE! No point in using "atomic_long_set()": using a direct * assignment of the work data initializer allows the compiler * to generate better code. */ #ifdef CONFIG_LOCKDEP #define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else #define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #endif #define __INIT_WORK(_work, _func, _onstack) \ do { \ static __maybe_unused struct lock_class_key __key; \ \ __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ } while (0) #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) #define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ __INIT_WORK_KEY((_work), (_func), 1, _key) #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ __timer_init(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ __timer_init_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) #define INIT_DELAYED_WORK(_work, _func) \ __INIT_DELAYED_WORK(_work, _func, 0) #define INIT_DELAYED_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, 0) #define INIT_DEFERRABLE_WORK(_work, _func) \ __INIT_DELAYED_WORK(_work, _func, TIMER_DEFERRABLE) #define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func) \ __INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE) #define INIT_RCU_WORK(_work, _func) \ INIT_WORK(&(_work)->work, (_func)) #define INIT_RCU_WORK_ONSTACK(_work, _func) \ INIT_WORK_ONSTACK(&(_work)->work, (_func)) /** * work_pending - Find out whether a work item is currently pending * @work: The work item in question */ #define work_pending(work) \ test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) /** * delayed_work_pending - Find out whether a delayable work item is currently * pending * @w: The work item in question */ #define delayed_work_pending(w) \ work_pending(&(w)->work) /* * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ enum wq_flags { WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see workqueue_sysfs_register() */ /* * Per-cpu workqueues are generally preferred because they tend to * show better performance thanks to cache locality. Per-cpu * workqueues exclude the scheduler from choosing the CPU to * execute the worker threads, which has an unfortunate side effect * of increasing power consumption. * * The scheduler considers a CPU idle if it doesn't have any task * to execute and tries to keep idle cores idle to conserve power; * however, for example, a per-cpu work item scheduled from an * interrupt handler on an idle CPU will force the scheduler to * execute the work item on that CPU breaking the idleness, which in * turn may lead to more scheduling choices which are sub-optimal * in terms of power consumption. * * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default * but become unbound if workqueue.power_efficient kernel param is * specified. Per-cpu workqueues which are identified to * contribute significantly to power-consumption are identified and * marked with this flag and enabling the power_efficient mode * leads to noticeable power saving at the cost of small * performance disadvantage. * * http://thread.gmane.org/gmane.linux.kernel/1480396 */ WQ_POWER_EFFICIENT = 1 << 7, WQ_PERCPU = 1 << 8, /* bound to a specific cpu */ __WQ_DESTROYING = 1 << 15, /* internal: workqueue is destroying */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ /* BH wq only allows the following flags */ __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI | WQ_PERCPU, }; enum wq_consts { WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, /* * Per-node default cap on min_active. Unless explicitly set, min_active * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see * workqueue_struct->min_active definition. */ WQ_DFL_MIN_ACTIVE = 8, }; /* * System-wide workqueues which are always present. * * system_percpu_wq is the one used by schedule[_delayed]_work[_on](). * Multi-CPU multi-threaded. There are users which expect relatively * short queue flush time. Don't queue works which can run for too * long. * * system_highpri_wq is similar to system_percpu_wq but for work items which * require WQ_HIGHPRI. * * system_long_wq is similar to system_percpu_wq but may host long running * works. Queue flushing might take relatively long. * * system_dfl_wq is unbound workqueue. Workers are not bound to * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. * * system_freezable_wq is equivalent to system_percpu_wq except that it's * freezable. * * *_power_efficient_wq are inclined towards saving power and converted * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise, * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_percpu_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. * * system_bh[_highpri]_wq are convenience interface to softirq. BH work items * are executed in the queueing CPU's BH context in the queueing order. */ extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */ extern struct workqueue_struct *system_percpu_wq; extern struct workqueue_struct *system_highpri_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_dfl_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; extern struct workqueue_struct *system_bh_wq; extern struct workqueue_struct *system_bh_highpri_wq; void workqueue_softirq_action(bool highpri); void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default * @...: args for @fmt * * For a per-cpu workqueue, @max_active limits the number of in-flight work * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be * executing at most one work item for the workqueue. * * For unbound workqueues, @max_active limits the number of in-flight work items * for the whole system. e.g. @max_active of 16 indicates that there can be * at most 16 work items executing for the workqueue in the whole system. * * As sharing the same active counter for an unbound workqueue across multiple * NUMA nodes can be expensive, @max_active is distributed to each NUMA node * according to the proportion of the number of online CPUs and enforced * independently. * * Depending on online CPU distribution, a node may end up with per-node * max_active which is significantly lower than @max_active, which can lead to * deadlocks if the per-node concurrency limit is lower than the maximum number * of interdependent work items for the workqueue. * * To guarantee forward progress regardless of online CPU distribution, the * concurrency limit on every node is guaranteed to be equal to or greater than * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means * that the sum of per-node max_active's may be larger than @max_active. * * For detailed information on %WQ_\* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(1, 4) struct workqueue_struct * alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...); #define alloc_workqueue(...) alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__)) #ifdef CONFIG_LOCKDEP /** * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default * @lockdep_map: user-defined lockdep_map * @...: args for @fmt * * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for * workqueues created with the same purpose and to avoid leaking a lockdep_map * on each workqueue creation. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ __printf(1, 5) struct workqueue_struct * alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, struct lockdep_map *lockdep_map, ...); /** * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with * user-defined lockdep_map * * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @lockdep_map: user-defined lockdep_map * @args: args for @fmt * * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. * Useful for workqueues created with the same purpose and to avoid leaking a * lockdep_map on each workqueue creation. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ alloc_hooks(alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags),\ 1, lockdep_map, ##args)) #endif /** * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @args: args for @fmt * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are * implemented as unbound workqueues with @max_active of one. * * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name)) #define create_freezable_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ WQ_MEM_RECLAIM, 1, (name)) #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) #define from_work(var, callback_work, work_fieldname) \ container_of(callback_work, typeof(*var), work_fieldname) extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs_noprof(void); #define alloc_workqueue_attrs(...) alloc_hooks(alloc_workqueue_attrs_noprof(__VA_ARGS__)) void free_workqueue_attrs(struct workqueue_attrs *attrs); int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs); extern int workqueue_unbound_exclude_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_work_node(int node, struct workqueue_struct *wq, struct work_struct *work); extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); extern void __flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern bool disable_work(struct work_struct *work); extern bool disable_work_sync(struct work_struct *work); extern bool enable_work(struct work_struct *work); extern bool disable_delayed_work(struct delayed_work *dwork); extern bool disable_delayed_work_sync(struct delayed_work *dwork); extern bool enable_delayed_work(struct delayed_work *dwork); extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); extern void workqueue_set_min_active(struct workqueue_struct *wq, int min_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); extern unsigned int work_busy(struct work_struct *work); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern void print_worker_info(const char *log_lvl, struct task_struct *task); extern void show_all_workqueues(void); extern void show_freezable_workqueues(void); extern void show_one_workqueue(struct workqueue_struct *wq); extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); /** * queue_work - queue work on a workqueue * @wq: workqueue to use * @work: work to queue * * Returns %false if @work was already on a queue, %true otherwise. * * We queue the work to the CPU on which it was submitted, but if the CPU dies * it can be processed by another CPU. * * Memory-ordering properties: If it returns %true, guarantees that all stores * preceding the call to queue_work() in the program order will be visible from * the CPU which will execute @work by the time such work executes, e.g., * * { x is initially 0 } * * CPU0 CPU1 * * WRITE_ONCE(x, 1); [ @work is being executed ] * r0 = queue_work(wq, work); r1 = READ_ONCE(x); * * Forbids: r0 == true && r1 == 0 */ static inline bool queue_work(struct workqueue_struct *wq, struct work_struct *work) { return queue_work_on(WORK_CPU_UNBOUND, wq, work); } /** * queue_delayed_work - queue work on a workqueue after delay * @wq: workqueue to use * @dwork: delayable work to queue * @delay: number of jiffies to wait before queueing * * Equivalent to queue_delayed_work_on() but tries to use the local CPU. */ static inline bool queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } /** * mod_delayed_work - modify delay of or queue a delayed work * @wq: workqueue to use * @dwork: work to queue * @delay: number of jiffies to wait before queueing * * mod_delayed_work_on() on local CPU. */ static inline bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay); } /** * schedule_work_on - put work task on a specific cpu * @cpu: cpu to put the work task on * @work: job to be done * * This puts a job on a specific cpu */ static inline bool schedule_work_on(int cpu, struct work_struct *work) { return queue_work_on(cpu, system_percpu_wq, work); } /** * schedule_work - put work task in global workqueue * @work: job to be done * * Returns %false if @work was already on the kernel-global workqueue and * %true otherwise. * * This puts a job in the kernel-global workqueue if it was not already * queued and leaves it in the same position on the kernel-global * workqueue otherwise. * * Shares the same memory-ordering properties of queue_work(), cf. the * DocBook header of queue_work(). */ static inline bool schedule_work(struct work_struct *work) { return queue_work(system_percpu_wq, work); } /** * enable_and_queue_work - Enable and queue a work item on a specific workqueue * @wq: The target workqueue * @work: The work item to be enabled and queued * * This function combines the operations of enable_work() and queue_work(), * providing a convenient way to enable and queue a work item in a single call. * It invokes enable_work() on @work and then queues it if the disable depth * reached 0. Returns %true if the disable depth reached 0 and @work is queued, * and %false otherwise. * * Note that @work is always queued when disable depth reaches zero. If the * desired behavior is queueing only if certain events took place while @work is * disabled, the user should implement the necessary state tracking and perform * explicit conditional queueing after enable_work(). */ static inline bool enable_and_queue_work(struct workqueue_struct *wq, struct work_struct *work) { if (enable_work(work)) { queue_work(wq, work); return true; } return false; } /* * Detect attempt to flush system-wide workqueues at compile time when possible. * Warn attempt to flush system-wide workqueues at runtime. * * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp * for reasons and steps for converting system-wide workqueues into local workqueues. */ extern void __warn_flushing_systemwide_wq(void) __compiletime_warning("Please avoid flushing system-wide workqueues."); /* Please stop using this function, for this function will be removed in near future. */ #define flush_scheduled_work() \ ({ \ __warn_flushing_systemwide_wq(); \ __flush_workqueue(system_percpu_wq); \ }) #define flush_workqueue(wq) \ ({ \ struct workqueue_struct *_wq = (wq); \ \ if ((__builtin_constant_p(_wq == system_percpu_wq) && \ _wq == system_percpu_wq) || \ (__builtin_constant_p(_wq == system_highpri_wq) && \ _wq == system_highpri_wq) || \ (__builtin_constant_p(_wq == system_long_wq) && \ _wq == system_long_wq) || \ (__builtin_constant_p(_wq == system_dfl_wq) && \ _wq == system_dfl_wq) || \ (__builtin_constant_p(_wq == system_freezable_wq) && \ _wq == system_freezable_wq) || \ (__builtin_constant_p(_wq == system_power_efficient_wq) && \ _wq == system_power_efficient_wq) || \ (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \ _wq == system_freezable_power_efficient_wq)) \ __warn_flushing_systemwide_wq(); \ __flush_workqueue(_wq); \ }) /** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay * @cpu: cpu to use * @dwork: job to be done * @delay: number of jiffies to wait * * After waiting for a given time this puts a job in the kernel-global * workqueue on the specified CPU. */ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work_on(cpu, system_percpu_wq, dwork, delay); } /** * schedule_delayed_work - put work task in global workqueue after delay * @dwork: job to be done * @delay: number of jiffies to wait or 0 for immediate execution * * After waiting for a given time this puts a job in the kernel-global * workqueue. */ static inline bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { return queue_delayed_work(system_percpu_wq, dwork, delay); } #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } #else long work_on_cpu_key(int cpu, long (*fn)(void *), void *arg, struct lock_class_key *key); /* * A new key is defined for each caller to make sure the work * associated with the function doesn't share its locking class. */ #define work_on_cpu(_cpu, _fn, _arg) \ ({ \ static struct lock_class_key __key; \ \ work_on_cpu_key(_cpu, _fn, _arg, &__key); \ }) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER extern void freeze_workqueues_begin(void); extern bool freeze_workqueues_busy(void); extern void thaw_workqueues(void); #endif /* CONFIG_FREEZER */ #ifdef CONFIG_SYSFS int workqueue_sysfs_register(struct workqueue_struct *wq); #else /* CONFIG_SYSFS */ static inline int workqueue_sysfs_register(struct workqueue_struct *wq) { return 0; } #endif /* CONFIG_SYSFS */ #ifdef CONFIG_WQ_WATCHDOG void wq_watchdog_touch(int cpu); #else /* CONFIG_WQ_WATCHDOG */ static inline void wq_watchdog_touch(int cpu) { } #endif /* CONFIG_WQ_WATCHDOG */ #ifdef CONFIG_SMP int workqueue_prepare_cpu(unsigned int cpu); int workqueue_online_cpu(unsigned int cpu); int workqueue_offline_cpu(unsigned int cpu); #endif void __init workqueue_init_early(void); void __init workqueue_init(void); void __init workqueue_init_topology(void); #endif |
| 24 1 41 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H #include <linux/mm.h> extern unsigned int khugepaged_max_ptes_none __read_mostly; #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; extern int khugepaged_init(void); extern void khugepaged_destroy(void); extern int start_stop_khugepaged(void); extern void __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); extern void khugepaged_enter_vma(struct vm_area_struct *vma, vm_flags_t vm_flags); extern void khugepaged_min_free_kbytes_update(void); extern bool current_is_khugepaged(void); extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd); static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { if (mm_flags_test(MMF_VM_HUGEPAGE, oldmm)) __khugepaged_enter(mm); } static inline void khugepaged_exit(struct mm_struct *mm) { if (mm_flags_test(MMF_VM_HUGEPAGE, mm)) __khugepaged_exit(mm); } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { } static inline void khugepaged_exit(struct mm_struct *mm) { } static inline void khugepaged_enter_vma(struct vm_area_struct *vma, vm_flags_t vm_flags) { } static inline int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd) { return 0; } static inline void khugepaged_min_free_kbytes_update(void) { } static inline bool current_is_khugepaged(void) { return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_KHUGEPAGED_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | // SPDX-License-Identifier: GPL-2.0-only /* * 32-bit compatibility support for ELF format executables and core dumps. * * Copyright (C) 2007 Red Hat, Inc. All rights reserved. * * Red Hat Author: Roland McGrath. * * This file is used in a 64-bit kernel that wants to support 32-bit ELF. * asm/elf.h is responsible for defining the compat_* and COMPAT_* macros * used below, with definitions appropriate for 32-bit ABI compatibility. * * We use macros to rename the ABI types and machine-dependent * functions used in binfmt_elf.c to compat versions. */ #include <linux/elfcore-compat.h> #include <linux/time.h> #define ELF_COMPAT 1 /* * Rename the basic ELF layout types to refer to the 32-bit class of files. */ #undef ELF_CLASS #define ELF_CLASS ELFCLASS32 #undef elfhdr #undef elf_phdr #undef elf_shdr #undef elf_note #undef elf_addr_t #undef ELF_GNU_PROPERTY_ALIGN #define elfhdr elf32_hdr #define elf_phdr elf32_phdr #define elf_shdr elf32_shdr #define elf_note elf32_note #define elf_addr_t Elf32_Addr #define ELF_GNU_PROPERTY_ALIGN ELF32_GNU_PROPERTY_ALIGN /* * Some data types as stored in coredump. */ #define user_long_t compat_long_t #define user_siginfo_t compat_siginfo_t #define copy_siginfo_to_external copy_siginfo_to_external32 /* * The machine-dependent core note format types are defined in elfcore-compat.h, * which requires asm/elf.h to define compat_elf_gregset_t et al. */ #define elf_prstatus compat_elf_prstatus #define elf_prstatus_common compat_elf_prstatus_common #define elf_prpsinfo compat_elf_prpsinfo #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 /* * To use this file, asm/elf.h must define compat_elf_check_arch. * The other following macros can be defined if the compat versions * differ from the native ones, or omitted when they match. */ #undef elf_check_arch #define elf_check_arch compat_elf_check_arch #ifdef COMPAT_ELF_PLATFORM #undef ELF_PLATFORM #define ELF_PLATFORM COMPAT_ELF_PLATFORM #endif #ifdef COMPAT_ELF_HWCAP #undef ELF_HWCAP #define ELF_HWCAP COMPAT_ELF_HWCAP #endif #ifdef COMPAT_ELF_HWCAP2 #undef ELF_HWCAP2 #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 #endif #ifdef COMPAT_ELF_HWCAP3 #undef ELF_HWCAP3 #define ELF_HWCAP3 COMPAT_ELF_HWCAP3 #endif #ifdef COMPAT_ELF_HWCAP4 #undef ELF_HWCAP4 #define ELF_HWCAP4 COMPAT_ELF_HWCAP4 #endif #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO #endif #ifdef COMPAT_ELF_ET_DYN_BASE #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE COMPAT_ELF_ET_DYN_BASE #endif #ifdef COMPAT_ELF_PLAT_INIT #undef ELF_PLAT_INIT #define ELF_PLAT_INIT COMPAT_ELF_PLAT_INIT #endif #ifdef COMPAT_SET_PERSONALITY #undef SET_PERSONALITY #define SET_PERSONALITY COMPAT_SET_PERSONALITY #endif #ifdef compat_start_thread #define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \ compat_start_thread(regs, new_ip, new_sp) #endif #ifdef COMPAT_START_THREAD #undef START_THREAD #define START_THREAD COMPAT_START_THREAD #endif #ifdef compat_arch_setup_additional_pages #define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \ compat_arch_setup_additional_pages(bprm, interpreter) #endif #ifdef COMPAT_ARCH_SETUP_ADDITIONAL_PAGES #undef ARCH_HAS_SETUP_ADDITIONAL_PAGES #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 #undef ARCH_SETUP_ADDITIONAL_PAGES #define ARCH_SETUP_ADDITIONAL_PAGES COMPAT_ARCH_SETUP_ADDITIONAL_PAGES #endif #ifdef compat_elf_read_implies_exec #undef elf_read_implies_exec #define elf_read_implies_exec compat_elf_read_implies_exec #endif /* * Rename a few of the symbols that binfmt_elf.c will define. * These are all local so the names don't really matter, but it * might make some debugging less confusing not to duplicate them. */ #define elf_format compat_elf_format #define init_elf_binfmt init_compat_elf_binfmt #define exit_elf_binfmt exit_compat_elf_binfmt #define binfmt_elf_test_cases compat_binfmt_elf_test_cases #define binfmt_elf_test_suite compat_binfmt_elf_test_suite /* * We share all the actual code with the native (64-bit) version. */ #include "binfmt_elf.c" |
| 2 14 1 2 2 2 2 2 1 2 2 1 1 1 2 1 2 2 1 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2 2 1 2 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include "devl_internal.h" struct devlink_sb { struct list_head list; unsigned int index; u32 size; u16 ingress_pools_count; u16 egress_pools_count; u16 ingress_tc_count; u16 egress_tc_count; }; static u16 devlink_sb_pool_count(struct devlink_sb *devlink_sb) { return devlink_sb->ingress_pools_count + devlink_sb->egress_pools_count; } static struct devlink_sb *devlink_sb_get_by_index(struct devlink *devlink, unsigned int sb_index) { struct devlink_sb *devlink_sb; list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (devlink_sb->index == sb_index) return devlink_sb; } return NULL; } static bool devlink_sb_index_exists(struct devlink *devlink, unsigned int sb_index) { return devlink_sb_get_by_index(devlink, sb_index); } static struct devlink_sb *devlink_sb_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) { if (attrs[DEVLINK_ATTR_SB_INDEX]) { u32 sb_index = nla_get_u32(attrs[DEVLINK_ATTR_SB_INDEX]); struct devlink_sb *devlink_sb; devlink_sb = devlink_sb_get_by_index(devlink, sb_index); if (!devlink_sb) return ERR_PTR(-ENODEV); return devlink_sb; } return ERR_PTR(-EINVAL); } static struct devlink_sb *devlink_sb_get_from_info(struct devlink *devlink, struct genl_info *info) { return devlink_sb_get_from_attrs(devlink, info->attrs); } static int devlink_sb_pool_index_get_from_attrs(struct devlink_sb *devlink_sb, struct nlattr **attrs, u16 *p_pool_index) { u16 val; if (!attrs[DEVLINK_ATTR_SB_POOL_INDEX]) return -EINVAL; val = nla_get_u16(attrs[DEVLINK_ATTR_SB_POOL_INDEX]); if (val >= devlink_sb_pool_count(devlink_sb)) return -EINVAL; *p_pool_index = val; return 0; } static int devlink_sb_pool_index_get_from_info(struct devlink_sb *devlink_sb, struct genl_info *info, u16 *p_pool_index) { return devlink_sb_pool_index_get_from_attrs(devlink_sb, info->attrs, p_pool_index); } static int devlink_sb_pool_type_get_from_attrs(struct nlattr **attrs, enum devlink_sb_pool_type *p_pool_type) { u8 val; if (!attrs[DEVLINK_ATTR_SB_POOL_TYPE]) return -EINVAL; val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_TYPE]); if (val != DEVLINK_SB_POOL_TYPE_INGRESS && val != DEVLINK_SB_POOL_TYPE_EGRESS) return -EINVAL; *p_pool_type = val; return 0; } static int devlink_sb_pool_type_get_from_info(struct genl_info *info, enum devlink_sb_pool_type *p_pool_type) { return devlink_sb_pool_type_get_from_attrs(info->attrs, p_pool_type); } static int devlink_sb_th_type_get_from_attrs(struct nlattr **attrs, enum devlink_sb_threshold_type *p_th_type) { u8 val; if (!attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]) return -EINVAL; val = nla_get_u8(attrs[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE]); if (val != DEVLINK_SB_THRESHOLD_TYPE_STATIC && val != DEVLINK_SB_THRESHOLD_TYPE_DYNAMIC) return -EINVAL; *p_th_type = val; return 0; } static int devlink_sb_th_type_get_from_info(struct genl_info *info, enum devlink_sb_threshold_type *p_th_type) { return devlink_sb_th_type_get_from_attrs(info->attrs, p_th_type); } static int devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb, struct nlattr **attrs, enum devlink_sb_pool_type pool_type, u16 *p_tc_index) { u16 val; if (!attrs[DEVLINK_ATTR_SB_TC_INDEX]) return -EINVAL; val = nla_get_u16(attrs[DEVLINK_ATTR_SB_TC_INDEX]); if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS && val >= devlink_sb->ingress_tc_count) return -EINVAL; if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS && val >= devlink_sb->egress_tc_count) return -EINVAL; *p_tc_index = val; return 0; } static int devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, struct genl_info *info, enum devlink_sb_pool_type pool_type, u16 *p_tc_index) { return devlink_sb_tc_index_get_from_attrs(devlink_sb, info->attrs, pool_type, p_tc_index); } static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, u32 seq, int flags) { void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_SIZE, devlink_sb->size)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_POOL_COUNT, devlink_sb->ingress_pools_count)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_POOL_COUNT, devlink_sb->egress_pools_count)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_INGRESS_TC_COUNT, devlink_sb->ingress_tc_count)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_EGRESS_TC_COUNT, devlink_sb->egress_tc_count)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_sb *devlink_sb; struct sk_buff *msg; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_sb_fill(msg, devlink, devlink_sb, DEVLINK_CMD_SB_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int devlink_nl_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; int idx = 0; int err = 0; list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < state->idx) { idx++; continue; } err = devlink_nl_sb_fill(msg, devlink, devlink_sb, DEVLINK_CMD_SB_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err) { state->idx = idx; break; } idx++; } return err; } int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_sb_get_dump_one); } static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, u16 pool_index, enum devlink_command cmd, u32 portid, u32 seq, int flags) { struct devlink_sb_pool_info pool_info; void *hdr; int err; err = devlink->ops->sb_pool_get(devlink, devlink_sb->index, pool_index, &pool_info); if (err) return err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_info.pool_type)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_SIZE, pool_info.size)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, pool_info.threshold_type)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE, pool_info.cell_size)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_sb *devlink_sb; struct sk_buff *msg; u16 pool_index; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) return err; if (!devlink->ops->sb_pool_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index, DEVLINK_CMD_SB_POOL_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, struct devlink *devlink, struct devlink_sb *devlink_sb, u32 portid, u32 seq, int flags) { u16 pool_count = devlink_sb_pool_count(devlink_sb); u16 pool_index; int err; for (pool_index = 0; pool_index < pool_count; pool_index++) { if (*p_idx < start) { (*p_idx)++; continue; } err = devlink_nl_sb_pool_fill(msg, devlink, devlink_sb, pool_index, DEVLINK_CMD_SB_POOL_NEW, portid, seq, flags); if (err) return err; (*p_idx)++; } return 0; } static int devlink_nl_sb_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; int err = 0; int idx = 0; if (!devlink->ops->sb_pool_get) return 0; list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, state->idx, &idx, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err == -EOPNOTSUPP) { err = 0; } else if (err) { state->idx = idx; break; } } return err; } int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_sb_pool_get_dump_one); } static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, u16 pool_index, u32 size, enum devlink_sb_threshold_type threshold_type, struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink->ops; if (ops->sb_pool_set) return ops->sb_pool_set(devlink, sb_index, pool_index, size, threshold_type, extack); return -EOPNOTSUPP; } int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_sb_threshold_type threshold_type; struct devlink_sb *devlink_sb; u16 pool_index; u32 size; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) return err; err = devlink_sb_th_type_get_from_info(info, &threshold_type); if (err) return err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_POOL_SIZE)) return -EINVAL; size = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_POOL_SIZE]); return devlink_sb_pool_set(devlink, devlink_sb->index, pool_index, size, threshold_type, info->extack); } static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_port *devlink_port, struct devlink_sb *devlink_sb, u16 pool_index, enum devlink_command cmd, u32 portid, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; u32 threshold; void *hdr; int err; err = ops->sb_port_pool_get(devlink_port, devlink_sb->index, pool_index, &threshold); if (err) return err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) goto nla_put_failure; if (ops->sb_occ_port_pool_get) { u32 cur; u32 max; err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, pool_index, &cur, &max); if (err && err != -EOPNOTSUPP) goto sb_occ_get_failure; if (!err) { if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) goto nla_put_failure; } } genlmsg_end(msg, hdr); return 0; nla_put_failure: err = -EMSGSIZE; sb_occ_get_failure: genlmsg_cancel(msg, hdr); return err; } int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; struct devlink_sb *devlink_sb; struct sk_buff *msg; u16 pool_index; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) return err; if (!devlink->ops->sb_port_pool_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port, devlink_sb, pool_index, DEVLINK_CMD_SB_PORT_POOL_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, struct devlink *devlink, struct devlink_sb *devlink_sb, u32 portid, u32 seq, int flags) { struct devlink_port *devlink_port; u16 pool_count = devlink_sb_pool_count(devlink_sb); unsigned long port_index; u16 pool_index; int err; xa_for_each(&devlink->ports, port_index, devlink_port) { for (pool_index = 0; pool_index < pool_count; pool_index++) { if (*p_idx < start) { (*p_idx)++; continue; } err = devlink_nl_sb_port_pool_fill(msg, devlink, devlink_port, devlink_sb, pool_index, DEVLINK_CMD_SB_PORT_POOL_NEW, portid, seq, flags); if (err) return err; (*p_idx)++; } } return 0; } static int devlink_nl_sb_port_pool_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; int idx = 0; int err = 0; if (!devlink->ops->sb_port_pool_get) return 0; list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, state->idx, &idx, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err == -EOPNOTSUPP) { err = 0; } else if (err) { state->idx = idx; break; } } return err; } int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_sb_port_pool_get_dump_one); } static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, u32 threshold, struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink_port->devlink->ops; if (ops->sb_port_pool_set) return ops->sb_port_pool_set(devlink_port, sb_index, pool_index, threshold, extack); return -EOPNOTSUPP; } int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; struct devlink_sb *devlink_sb; u16 pool_index; u32 threshold; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) return err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD)) return -EINVAL; threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); return devlink_sb_port_pool_set(devlink_port, devlink_sb->index, pool_index, threshold, info->extack); } static int devlink_nl_sb_tc_pool_bind_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_port *devlink_port, struct devlink_sb *devlink_sb, u16 tc_index, enum devlink_sb_pool_type pool_type, enum devlink_command cmd, u32 portid, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; u16 pool_index; u32 threshold; void *hdr; int err; err = ops->sb_tc_pool_bind_get(devlink_port, devlink_sb->index, tc_index, pool_type, &pool_index, &threshold); if (err) return err; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_INDEX, devlink_sb->index)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_TC_INDEX, tc_index)) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_TYPE, pool_type)) goto nla_put_failure; if (nla_put_u16(msg, DEVLINK_ATTR_SB_POOL_INDEX, pool_index)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_THRESHOLD, threshold)) goto nla_put_failure; if (ops->sb_occ_tc_port_bind_get) { u32 cur; u32 max; err = ops->sb_occ_tc_port_bind_get(devlink_port, devlink_sb->index, tc_index, pool_type, &cur, &max); if (err && err != -EOPNOTSUPP) return err; if (!err) { if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_MAX, max)) goto nla_put_failure; } } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; struct devlink_sb *devlink_sb; struct sk_buff *msg; enum devlink_sb_pool_type pool_type; u16 tc_index; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); err = devlink_sb_pool_type_get_from_info(info, &pool_type); if (err) return err; err = devlink_sb_tc_index_get_from_info(devlink_sb, info, pool_type, &tc_index); if (err) return err; if (!devlink->ops->sb_tc_pool_bind_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port, devlink_sb, tc_index, pool_type, DEVLINK_CMD_SB_TC_POOL_BIND_NEW, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, int start, int *p_idx, struct devlink *devlink, struct devlink_sb *devlink_sb, u32 portid, u32 seq, int flags) { struct devlink_port *devlink_port; unsigned long port_index; u16 tc_index; int err; xa_for_each(&devlink->ports, port_index, devlink_port) { for (tc_index = 0; tc_index < devlink_sb->ingress_tc_count; tc_index++) { if (*p_idx < start) { (*p_idx)++; continue; } err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port, devlink_sb, tc_index, DEVLINK_SB_POOL_TYPE_INGRESS, DEVLINK_CMD_SB_TC_POOL_BIND_NEW, portid, seq, flags); if (err) return err; (*p_idx)++; } for (tc_index = 0; tc_index < devlink_sb->egress_tc_count; tc_index++) { if (*p_idx < start) { (*p_idx)++; continue; } err = devlink_nl_sb_tc_pool_bind_fill(msg, devlink, devlink_port, devlink_sb, tc_index, DEVLINK_SB_POOL_TYPE_EGRESS, DEVLINK_CMD_SB_TC_POOL_BIND_NEW, portid, seq, flags); if (err) return err; (*p_idx)++; } } return 0; } static int devlink_nl_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; int idx = 0; int err = 0; if (!devlink->ops->sb_tc_pool_bind_get) return 0; list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err == -EOPNOTSUPP) { err = 0; } else if (err) { state->idx = idx; break; } } return err; } int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_sb_tc_pool_bind_get_dump_one); } static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u16 pool_index, u32 threshold, struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink_port->devlink->ops; if (ops->sb_tc_pool_bind_set) return ops->sb_tc_pool_bind_set(devlink_port, sb_index, tc_index, pool_type, pool_index, threshold, extack); return -EOPNOTSUPP; } int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; enum devlink_sb_pool_type pool_type; struct devlink_sb *devlink_sb; u16 tc_index; u16 pool_index; u32 threshold; int err; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); err = devlink_sb_pool_type_get_from_info(info, &pool_type); if (err) return err; err = devlink_sb_tc_index_get_from_info(devlink_sb, info, pool_type, &tc_index); if (err) return err; err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) return err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SB_THRESHOLD)) return -EINVAL; threshold = nla_get_u32(info->attrs[DEVLINK_ATTR_SB_THRESHOLD]); return devlink_sb_tc_pool_bind_set(devlink_port, devlink_sb->index, tc_index, pool_type, pool_index, threshold, info->extack); } int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; struct devlink_sb *devlink_sb; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); if (ops->sb_occ_snapshot) return ops->sb_occ_snapshot(devlink, devlink_sb->index); return -EOPNOTSUPP; } int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; struct devlink_sb *devlink_sb; devlink_sb = devlink_sb_get_from_info(devlink, info); if (IS_ERR(devlink_sb)) return PTR_ERR(devlink_sb); if (ops->sb_occ_max_clear) return ops->sb_occ_max_clear(devlink, devlink_sb->index); return -EOPNOTSUPP; } int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count) { struct devlink_sb *devlink_sb; lockdep_assert_held(&devlink->lock); if (devlink_sb_index_exists(devlink, sb_index)) return -EEXIST; devlink_sb = kzalloc(sizeof(*devlink_sb), GFP_KERNEL); if (!devlink_sb) return -ENOMEM; devlink_sb->index = sb_index; devlink_sb->size = size; devlink_sb->ingress_pools_count = ingress_pools_count; devlink_sb->egress_pools_count = egress_pools_count; devlink_sb->ingress_tc_count = ingress_tc_count; devlink_sb->egress_tc_count = egress_tc_count; list_add_tail(&devlink_sb->list, &devlink->sb_list); return 0; } EXPORT_SYMBOL_GPL(devl_sb_register); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count) { int err; devl_lock(devlink); err = devl_sb_register(devlink, sb_index, size, ingress_pools_count, egress_pools_count, ingress_tc_count, egress_tc_count); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_sb_register); void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index) { struct devlink_sb *devlink_sb; lockdep_assert_held(&devlink->lock); devlink_sb = devlink_sb_get_by_index(devlink, sb_index); WARN_ON(!devlink_sb); list_del(&devlink_sb->list); kfree(devlink_sb); } EXPORT_SYMBOL_GPL(devl_sb_unregister); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) { devl_lock(devlink); devl_sb_unregister(devlink, sb_index); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_sb_unregister); |
| 12 11 12 12 23 23 23 23 22 23 12 12 12 12 11 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | /* * llc_core.c - Minimum needed routines for sap handling and module init/exit * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/init.h> #include <net/net_namespace.h> #include <net/llc.h> LIST_HEAD(llc_sap_list); static DEFINE_SPINLOCK(llc_sap_list_lock); /** * llc_sap_alloc - allocates and initializes sap. * * Allocates and initializes sap. */ static struct llc_sap *llc_sap_alloc(void) { struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); int i; if (sap) { /* sap->laddr.mac - leave as a null, it's filled by bind */ sap->state = LLC_SAP_STATE_ACTIVE; spin_lock_init(&sap->sk_lock); for (i = 0; i < LLC_SK_LADDR_HASH_ENTRIES; i++) INIT_HLIST_NULLS_HEAD(&sap->sk_laddr_hash[i], i); refcount_set(&sap->refcnt, 1); } return sap; } static struct llc_sap *__llc_sap_find(unsigned char sap_value) { struct llc_sap *sap; list_for_each_entry(sap, &llc_sap_list, node) if (sap->laddr.lsap == sap_value) goto out; sap = NULL; out: return sap; } /** * llc_sap_find - searches a SAP in station * @sap_value: sap to be found * * Searches for a sap in the sap list of the LLC's station upon the sap ID. * If the sap is found it will be refcounted and the user will have to do * a llc_sap_put after use. * Returns the sap or %NULL if not found. */ struct llc_sap *llc_sap_find(unsigned char sap_value) { struct llc_sap *sap; rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); if (!sap || !llc_sap_hold_safe(sap)) sap = NULL; rcu_read_unlock_bh(); return sap; } /** * llc_sap_open - open interface to the upper layers. * @lsap: SAP number. * @func: rcv func for datalink protos * * Interface function to upper layer. Each one who wants to get a SAP * (for example NetBEUI) should call this function. Returns the opened * SAP for success, NULL for failure. */ struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)) { struct llc_sap *sap = NULL; spin_lock_bh(&llc_sap_list_lock); if (__llc_sap_find(lsap)) /* SAP already exists */ goto out; sap = llc_sap_alloc(); if (!sap) goto out; sap->laddr.lsap = lsap; sap->rcv_func = func; list_add_tail_rcu(&sap->node, &llc_sap_list); out: spin_unlock_bh(&llc_sap_list_lock); return sap; } /** * llc_sap_close - close interface for upper layers. * @sap: SAP to be closed. * * Close interface function to upper layer. Each one who wants to * close an open SAP (for example NetBEUI) should call this function. * Removes this sap from the list of saps in the station and then * frees the memory for this sap. */ void llc_sap_close(struct llc_sap *sap) { WARN_ON(sap->sk_count); spin_lock_bh(&llc_sap_list_lock); list_del_rcu(&sap->node); spin_unlock_bh(&llc_sap_list_lock); kfree_rcu(sap, rcu); } static struct packet_type llc_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_2), .func = llc_rcv, }; static int __init llc_init(void) { dev_add_pack(&llc_packet_type); return 0; } static void __exit llc_exit(void) { dev_remove_pack(&llc_packet_type); } module_init(llc_init); module_exit(llc_exit); EXPORT_SYMBOL(llc_sap_list); EXPORT_SYMBOL(llc_sap_find); EXPORT_SYMBOL(llc_sap_open); EXPORT_SYMBOL(llc_sap_close); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); MODULE_DESCRIPTION("LLC IEEE 802.2 core support"); |
| 486 44 482 4 117 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IP/TCP/UDP checksumming routines * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Tom May, <ftom@netcom.com> * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de> * Lots of code moved from tcp.c and ip.c; see those files * for more names. * * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: * Fixed some nasty bugs, causing some horrible crashes. * A: At some points, the sum (%0) was used as * length-counter instead of the length counter * (%1). Thanks to Roman Hodek for pointing this out. * B: GCC seems to mess up if one uses too many * data-registers to hold input values and one tries to * specify d0 and d1 as scratch registers. Letting gcc * choose these registers itself solves the problem. */ /* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access kills, so most of the assembly has to go. */ #include <linux/export.h> #include <net/checksum.h> #include <asm/byteorder.h> #ifndef do_csum static unsigned int do_csum(const unsigned char *buff, int len) { int odd; unsigned int result = 0; if (len <= 0) goto out; odd = 1 & (unsigned long) buff; if (odd) { #ifdef __LITTLE_ENDIAN result += (*buff << 8); #else result = *buff; #endif len--; buff++; } if (len >= 2) { if (2 & (unsigned long) buff) { result += *(unsigned short *) buff; len -= 2; buff += 2; } if (len >= 4) { const unsigned char *end = buff + ((unsigned)len & ~3); unsigned int carry = 0; do { unsigned int w = *(unsigned int *) buff; buff += 4; result += carry; result += w; carry = (w > result); } while (buff < end); result += carry; result = (result & 0xffff) + (result >> 16); } if (len & 2) { result += *(unsigned short *) buff; buff += 2; } } if (len & 1) #ifdef __LITTLE_ENDIAN result += *buff; #else result += (*buff << 8); #endif result = csum_from32to16(result); if (odd) result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); out: return result; } #endif #ifndef ip_fast_csum /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. */ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { return (__force __sum16)~do_csum(iph, ihl*4); } EXPORT_SYMBOL(ip_fast_csum); #endif /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic * * this function must be called with even lengths, except * for the last fragment, which may be odd * * it's best to have buff aligned on a 32-bit boundary */ __wsum csum_partial(const void *buff, int len, __wsum wsum) { unsigned int sum = (__force unsigned int)wsum; unsigned int result = do_csum(buff, len); /* add in old sum, and carry.. */ result += sum; if (sum > result) result += 1; return (__force __wsum)result; } EXPORT_SYMBOL(csum_partial); /* * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ __sum16 ip_compute_csum(const void *buff, int len) { return (__force __sum16)~do_csum(buff, len); } EXPORT_SYMBOL(ip_compute_csum); #ifndef csum_tcpudp_nofold static inline u32 from64to32(u64 x) { /* add up 32-bit and 32-bit for 32+c bit */ x = (x & 0xffffffff) + (x >> 32); /* add up carry.. */ x = (x & 0xffffffff) + (x >> 32); return (u32)x; } __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { unsigned long long s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; #ifdef __BIG_ENDIAN s += proto + len; #else s += (proto + len) << 8; #endif return (__force __wsum)from64to32(s); } EXPORT_SYMBOL(csum_tcpudp_nofold); #endif |
| 1096 342 634 631 343 30 30 4 29 5 5 3 685 26 50 4 17 632 9 29 29 9 9 26 21 25 29 83 632 243 243 679 681 75 75 75 893 632 25 23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | // SPDX-License-Identifier: GPL-2.0-only /* tnum: tracked (or tristate) numbers * * A tnum tracks knowledge about the bits of a value. Each bit can be either * known (0 or 1), or unknown (x). Arithmetic operations on tnums will * propagate the unknown bits such that the tnum result represents all the * possible results for possible values of the operands. */ #include <linux/kernel.h> #include <linux/tnum.h> #define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} /* A completely unknown value */ const struct tnum tnum_unknown = { .value = 0, .mask = -1 }; struct tnum tnum_const(u64 value) { return TNUM(value, 0); } struct tnum tnum_range(u64 min, u64 max) { u64 chi = min ^ max, delta; u8 bits = fls64(chi); /* special case, needed because 1ULL << 64 is undefined */ if (bits > 63) return tnum_unknown; /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7. * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return * constant min (since min == max). */ delta = (1ULL << bits) - 1; return TNUM(min & ~delta, delta); } struct tnum tnum_lshift(struct tnum a, u8 shift) { return TNUM(a.value << shift, a.mask << shift); } struct tnum tnum_rshift(struct tnum a, u8 shift) { return TNUM(a.value >> shift, a.mask >> shift); } struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness) { /* if a.value is negative, arithmetic shifting by minimum shift * will have larger negative offset compared to more shifting. * If a.value is nonnegative, arithmetic shifting by minimum shift * will have larger positive offset compare to more shifting. */ if (insn_bitness == 32) return TNUM((u32)(((s32)a.value) >> min_shift), (u32)(((s32)a.mask) >> min_shift)); else return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); } struct tnum tnum_add(struct tnum a, struct tnum b) { u64 sm, sv, sigma, chi, mu; sm = a.mask + b.mask; sv = a.value + b.value; sigma = sm + sv; chi = sigma ^ sv; mu = chi | a.mask | b.mask; return TNUM(sv & ~mu, mu); } struct tnum tnum_sub(struct tnum a, struct tnum b) { u64 dv, alpha, beta, chi, mu; dv = a.value - b.value; alpha = dv + a.mask; beta = dv - b.mask; chi = alpha ^ beta; mu = chi | a.mask | b.mask; return TNUM(dv & ~mu, mu); } struct tnum tnum_neg(struct tnum a) { return tnum_sub(TNUM(0, 0), a); } struct tnum tnum_and(struct tnum a, struct tnum b) { u64 alpha, beta, v; alpha = a.value | a.mask; beta = b.value | b.mask; v = a.value & b.value; return TNUM(v, alpha & beta & ~v); } struct tnum tnum_or(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask | b.mask; return TNUM(v, mu & ~v); } struct tnum tnum_xor(struct tnum a, struct tnum b) { u64 v, mu; v = a.value ^ b.value; mu = a.mask | b.mask; return TNUM(v & ~mu, mu); } /* Perform long multiplication, iterating through the bits in a using rshift: * - if LSB(a) is a known 0, keep current accumulator * - if LSB(a) is a known 1, add b to current accumulator * - if LSB(a) is unknown, take a union of the above cases. * * For example: * * acc_0: acc_1: * * 11 * -> 11 * -> 11 * -> union(0011, 1001) == x0x1 * x1 01 11 * ------ ------ ------ * 11 11 11 * xx 00 11 * ------ ------ ------ * ???? 0011 1001 */ struct tnum tnum_mul(struct tnum a, struct tnum b) { struct tnum acc = TNUM(0, 0); while (a.value || a.mask) { /* LSB of tnum a is a certain 1 */ if (a.value & 1) acc = tnum_add(acc, b); /* LSB of tnum a is uncertain */ else if (a.mask & 1) { /* acc = tnum_union(acc_0, acc_1), where acc_0 and * acc_1 are partial accumulators for cases * LSB(a) = certain 0 and LSB(a) = certain 1. * acc_0 = acc + 0 * b = acc. * acc_1 = acc + 1 * b = tnum_add(acc, b). */ acc = tnum_union(acc, tnum_add(acc, b)); } /* Note: no case for LSB is certain 0 */ a = tnum_rshift(a, 1); b = tnum_lshift(b, 1); } return acc; } bool tnum_overlap(struct tnum a, struct tnum b) { u64 mu; mu = ~a.mask & ~b.mask; return (a.value & mu) == (b.value & mu); } /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has * a 'known 0' - this will return a 'known 1' for that bit. */ struct tnum tnum_intersect(struct tnum a, struct tnum b) { u64 v, mu; v = a.value | b.value; mu = a.mask & b.mask; return TNUM(v & ~mu, mu); } /* Returns a tnum with the uncertainty from both a and b, and in addition, new * uncertainty at any position that a and b disagree. This represents a * superset of the union of the concrete sets of both a and b. Despite the * overapproximation, it is optimal. */ struct tnum tnum_union(struct tnum a, struct tnum b) { u64 v = a.value & b.value; u64 mu = (a.value ^ b.value) | a.mask | b.mask; return TNUM(v & ~mu, mu); } struct tnum tnum_cast(struct tnum a, u8 size) { a.value &= (1ULL << (size * 8)) - 1; a.mask &= (1ULL << (size * 8)) - 1; return a; } bool tnum_is_aligned(struct tnum a, u64 size) { if (!size) return true; return !((a.value | a.mask) & (size - 1)); } bool tnum_in(struct tnum a, struct tnum b) { if (b.mask & ~a.mask) return false; b.value &= ~a.mask; return a.value == b.value; } int tnum_sbin(char *str, size_t size, struct tnum a) { size_t n; for (n = 64; n; n--) { if (n < size) { if (a.mask & 1) str[n - 1] = 'x'; else if (a.value & 1) str[n - 1] = '1'; else str[n - 1] = '0'; } a.mask >>= 1; a.value >>= 1; } str[min(size - 1, (size_t)64)] = 0; return 64; } struct tnum tnum_subreg(struct tnum a) { return tnum_cast(a, 4); } struct tnum tnum_clear_subreg(struct tnum a) { return tnum_lshift(tnum_rshift(a, 32), 32); } struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg) { return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg)); } struct tnum tnum_const_subreg(struct tnum a, u32 value) { return tnum_with_subreg(a, tnum_const(value)); } |
| 1814 1817 1825 1814 4 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | /* SPDX-License-Identifier: GPL-2.0+ */ /* * Driver for 8250/16550-type serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * * Copyright (C) 2001 Russell King. */ #include <linux/bits.h> #include <linux/serial_8250.h> #include <linux/serial_core.h> #include <linux/dmaengine.h> #include "../serial_mctrl_gpio.h" struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); void (*prepare_tx_dma)(struct uart_8250_port *p); void (*prepare_rx_dma)(struct uart_8250_port *p); /* Filter function */ dma_filter_fn fn; /* Parameter to the filter function */ void *rx_param; void *tx_param; struct dma_slave_config rxconf; struct dma_slave_config txconf; struct dma_chan *rxchan; struct dma_chan *txchan; /* Device address base for DMA operations */ phys_addr_t rx_dma_addr; phys_addr_t tx_dma_addr; /* DMA address of the buffer in memory */ dma_addr_t rx_addr; dma_addr_t tx_addr; dma_cookie_t rx_cookie; dma_cookie_t tx_cookie; void *rx_buf; size_t rx_size; size_t tx_size; unsigned char tx_running; unsigned char tx_err; unsigned char rx_running; }; struct old_serial_port { unsigned int uart; unsigned int baud_base; unsigned int port; unsigned int irq; upf_t flags; unsigned char io_type; unsigned char __iomem *iomem_base; unsigned short iomem_reg_shift; }; struct serial8250_config { const char *name; unsigned short fifo_size; unsigned short tx_loadsz; unsigned char fcr; unsigned char rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE]; unsigned int flags; }; #define UART_CAP_FIFO BIT(8) /* UART has FIFO */ #define UART_CAP_EFR BIT(9) /* UART has EFR */ #define UART_CAP_SLEEP BIT(10) /* UART has IER sleep */ #define UART_CAP_AFE BIT(11) /* MCR-based hw flow control */ #define UART_CAP_UUE BIT(12) /* UART needs IER bit 6 set (Xscale) */ #define UART_CAP_RTOIE BIT(13) /* UART needs IER bit 4 set (Xscale, Tegra) */ #define UART_CAP_HFIFO BIT(14) /* UART has a "hidden" FIFO */ #define UART_CAP_RPM BIT(15) /* Runtime PM is active while idle */ #define UART_CAP_IRDA BIT(16) /* UART supports IrDA line discipline */ #define UART_CAP_MINI BIT(17) /* Mini UART on BCM283X family lacks: * STOP PARITY EPAR SPAR WLEN5 WLEN6 */ #define UART_CAP_NOTEMT BIT(18) /* UART without interrupt on TEMT available */ #define UART_BUG_QUOT BIT(0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ #define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */ #define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */ /* Module parameters */ #define UART_NR CONFIG_SERIAL_8250_NR_UARTS extern unsigned int nr_uarts; #ifdef CONFIG_SERIAL_8250_SHARE_IRQ #define SERIAL8250_SHARE_IRQS 1 #else #define SERIAL8250_SHARE_IRQS 0 #endif extern unsigned int share_irqs; extern unsigned int skip_txen_test; #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ { \ .iobase = _base, \ .irq = _irq, \ .uartclk = 1843200, \ .iotype = UPIO_PORT, \ .flags = UPF_BOOT_AUTOCONF | (_flags), \ } #define SERIAL8250_PORT(_base, _irq) SERIAL8250_PORT_FLAGS(_base, _irq, 0) extern struct uart_driver serial8250_reg; void serial8250_register_ports(struct uart_driver *drv, struct device *dev); /* Legacy ISA bus related APIs */ typedef void (*serial8250_isa_config_fn)(int, struct uart_port *, u32 *); extern serial8250_isa_config_fn serial8250_isa_config; void serial8250_isa_init_ports(void); extern struct platform_device *serial8250_isa_devs; extern const struct uart_ops *univ8250_port_base_ops; extern struct uart_ops univ8250_port_ops; static inline int serial_in(struct uart_8250_port *up, int offset) { return up->port.serial_in(&up->port, offset); } static inline void serial_out(struct uart_8250_port *up, int offset, int value) { up->port.serial_out(&up->port, offset, value); } /** * serial_lsr_in - Read LSR register and preserve flags across reads * @up: uart 8250 port * * Read LSR register and handle saving non-preserved flags across reads. * The flags that are not preserved across reads are stored into * up->lsr_saved_flags. * * Returns LSR value or'ed with the preserved flags (if any). */ static inline u16 serial_lsr_in(struct uart_8250_port *up) { u16 lsr = up->lsr_saved_flags; lsr |= serial_in(up, UART_LSR); up->lsr_saved_flags = lsr & up->lsr_save_mask; return lsr; } /* * For the 16C950 */ static void serial_icr_write(struct uart_8250_port *up, int offset, int value) { serial_out(up, UART_SCR, offset); serial_out(up, UART_ICR, value); } static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up, int offset) { unsigned int value; serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); serial_out(up, UART_SCR, offset); value = serial_in(up, UART_ICR); serial_icr_write(up, UART_ACR, up->acr); return value; } void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p); void serial8250_rpm_get(struct uart_8250_port *p); void serial8250_rpm_put(struct uart_8250_port *p); DEFINE_GUARD(serial8250_rpm, struct uart_8250_port *, serial8250_rpm_get(_T), serial8250_rpm_put(_T)); static inline u32 serial_dl_read(struct uart_8250_port *up) { return up->dl_read(up); } static inline void serial_dl_write(struct uart_8250_port *up, u32 value) { up->dl_write(up, value); } static inline bool serial8250_set_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (up->ier & UART_IER_THRI) return false; up->ier |= UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } static inline bool serial8250_clear_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } struct uart_8250_port *serial8250_setup_port(int index); struct uart_8250_port *serial8250_get_port(int line); int serial8250_em485_config(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485); void serial8250_em485_start_tx(struct uart_8250_port *p, bool toggle_ier); void serial8250_em485_stop_tx(struct uart_8250_port *p, bool toggle_ier); void serial8250_em485_destroy(struct uart_8250_port *p); extern struct serial_rs485 serial8250_em485_supported; /* MCR <-> TIOCM conversion */ static inline int serial8250_TIOCM_to_MCR(int tiocm) { int mcr = 0; if (tiocm & TIOCM_RTS) mcr |= UART_MCR_RTS; if (tiocm & TIOCM_DTR) mcr |= UART_MCR_DTR; if (tiocm & TIOCM_OUT1) mcr |= UART_MCR_OUT1; if (tiocm & TIOCM_OUT2) mcr |= UART_MCR_OUT2; if (tiocm & TIOCM_LOOP) mcr |= UART_MCR_LOOP; return mcr; } static inline int serial8250_MCR_to_TIOCM(int mcr) { int tiocm = 0; if (mcr & UART_MCR_RTS) tiocm |= TIOCM_RTS; if (mcr & UART_MCR_DTR) tiocm |= TIOCM_DTR; if (mcr & UART_MCR_OUT1) tiocm |= TIOCM_OUT1; if (mcr & UART_MCR_OUT2) tiocm |= TIOCM_OUT2; if (mcr & UART_MCR_LOOP) tiocm |= TIOCM_LOOP; return tiocm; } /* MSR <-> TIOCM conversion */ static inline int serial8250_MSR_to_TIOCM(int msr) { int tiocm = 0; if (msr & UART_MSR_DCD) tiocm |= TIOCM_CAR; if (msr & UART_MSR_RI) tiocm |= TIOCM_RNG; if (msr & UART_MSR_DSR) tiocm |= TIOCM_DSR; if (msr & UART_MSR_CTS) tiocm |= TIOCM_CTS; return tiocm; } static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { serial_out(up, UART_MCR, value); if (up->gpios) mctrl_gpio_set(up->gpios, serial8250_MCR_to_TIOCM(value)); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { int mctrl; mctrl = serial_in(up, UART_MCR); if (up->gpios) { unsigned int mctrl_gpio = 0; mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); mctrl |= serial8250_TIOCM_to_MCR(mctrl_gpio); } return mctrl; } #ifdef CONFIG_SERIAL_8250_PNP int serial8250_pnp_init(void); void serial8250_pnp_exit(void); #else static inline int serial8250_pnp_init(void) { return 0; } static inline void serial8250_pnp_exit(void) { } #endif #ifdef CONFIG_SERIAL_8250_RSA void univ8250_rsa_support(struct uart_ops *ops); void rsa_enable(struct uart_8250_port *up); void rsa_disable(struct uart_8250_port *up); void rsa_autoconfig(struct uart_8250_port *up); void rsa_reset(struct uart_8250_port *up); #else static inline void univ8250_rsa_support(struct uart_ops *ops) { } static inline void rsa_enable(struct uart_8250_port *up) {} static inline void rsa_disable(struct uart_8250_port *up) {} static inline void rsa_autoconfig(struct uart_8250_port *up) {} static inline void rsa_reset(struct uart_8250_port *up) {} #endif #ifdef CONFIG_SERIAL_8250_FINTEK int fintek_8250_probe(struct uart_8250_port *uart); #else static inline int fintek_8250_probe(struct uart_8250_port *uart) { return 0; } #endif #ifdef CONFIG_ARCH_OMAP1 #include <linux/soc/ti/omap1-soc.h> static inline int is_omap1_8250(struct uart_8250_port *pt) { int res; switch (pt->port.mapbase) { case OMAP1_UART1_BASE: case OMAP1_UART2_BASE: case OMAP1_UART3_BASE: res = 1; break; default: res = 0; break; } return res; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { if (!cpu_is_omap1510()) return 0; return is_omap1_8250(pt); } #else static inline int is_omap1_8250(struct uart_8250_port *pt) { return 0; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { return 0; } #endif #ifdef CONFIG_SERIAL_8250_DMA extern int serial8250_tx_dma(struct uart_8250_port *); extern void serial8250_tx_dma_flush(struct uart_8250_port *); extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); extern void serial8250_release_dma(struct uart_8250_port *); static inline void serial8250_do_prepare_tx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_tx_dma) dma->prepare_tx_dma(p); } static inline void serial8250_do_prepare_rx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_rx_dma) dma->prepare_rx_dma(p); } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; return dma && dma->tx_running; } #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_tx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_rx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_rx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_request_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_release_dma(struct uart_8250_port *p) { } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { return false; } #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) { unsigned char status; status = serial_in(up, 0x04); /* EXCR2 */ #define PRESL(x) ((x) & 0x30) if (PRESL(status) == 0x10) { /* already in high speed mode */ return 0; } else { status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */ status |= 0x10; /* 1.625 divisor for baud_base --> 921600 */ serial_out(up, 0x04, status); } return 1; } static inline int serial_index(struct uart_port *port) { return port->minor - 64; } |
| 49 51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen <martin.petersen@oracle.com> * * Automatically generate and verify integrity data on PI capable devices if the * bio submitter didn't provide PI itself. This ensures that kernel verifies * data integrity even if the file system (or other user of the block device) is * not aware of PI. */ #include <linux/blk-integrity.h> #include <linux/t10-pi.h> #include <linux/workqueue.h> #include "blk.h" struct bio_integrity_data { struct bio *bio; struct bvec_iter saved_bio_iter; struct work_struct work; struct bio_integrity_payload bip; struct bio_vec bvec; }; static struct kmem_cache *bid_slab; static mempool_t bid_pool; static struct workqueue_struct *kintegrityd_wq; static void bio_integrity_finish(struct bio_integrity_data *bid) { bid->bio->bi_integrity = NULL; bid->bio->bi_opf &= ~REQ_INTEGRITY; kfree(bvec_virt(bid->bip.bip_vec)); mempool_free(bid, &bid_pool); } static void bio_integrity_verify_fn(struct work_struct *work) { struct bio_integrity_data *bid = container_of(work, struct bio_integrity_data, work); struct bio *bio = bid->bio; blk_integrity_verify_iter(bio, &bid->saved_bio_iter); bio_integrity_finish(bid); bio_endio(bio); } #define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) static bool bip_should_check(struct bio_integrity_payload *bip) { return bip->bip_flags & BIP_CHECK_FLAGS; } static bool bi_offload_capable(struct blk_integrity *bi) { switch (bi->csum_type) { case BLK_INTEGRITY_CSUM_CRC64: return bi->metadata_size == sizeof(struct crc64_pi_tuple); case BLK_INTEGRITY_CSUM_CRC: case BLK_INTEGRITY_CSUM_IP: return bi->metadata_size == sizeof(struct t10_pi_tuple); default: pr_warn_once("%s: unknown integrity checksum type:%d\n", __func__, bi->csum_type); fallthrough; case BLK_INTEGRITY_CSUM_NONE: return false; } } /** * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio * * Normally I/O completion is done in interrupt context. However, verifying I/O * integrity is a time-consuming task which must be run in process context. * * This function postpones completion accordingly. */ bool __bio_integrity_endio(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_data *bid = container_of(bip, struct bio_integrity_data, bip); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bip_should_check(bip)) { INIT_WORK(&bid->work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bid->work); return false; } bio_integrity_finish(bid); return true; } /** * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare * * Checks if the bio already has an integrity payload attached. If it does, the * payload has been generated by another kernel subsystem, and we just pass it * through. * Otherwise allocates integrity payload and for writes the integrity metadata * will be generated. For reads, the completion handler will verify the * metadata. */ bool bio_integrity_prep(struct bio *bio) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_data *bid; bool set_flags = true; gfp_t gfp = GFP_NOIO; unsigned int len; void *buf; if (!bi) return true; if (!bio_sectors(bio)) return true; /* Already protected? */ if (bio_integrity(bio)) return true; switch (bio_op(bio)) { case REQ_OP_READ: if (bi->flags & BLK_INTEGRITY_NOVERIFY) { if (bi_offload_capable(bi)) return true; set_flags = false; } break; case REQ_OP_WRITE: /* * Zero the memory allocated to not leak uninitialized kernel * memory to disk for non-integrity metadata where nothing else * initializes the memory. */ if (bi->flags & BLK_INTEGRITY_NOGENERATE) { if (bi_offload_capable(bi)) return true; set_flags = false; gfp |= __GFP_ZERO; } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) gfp |= __GFP_ZERO; break; default: return true; } if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) return true; /* Allocate kernel buffer for protection data */ len = bio_integrity_bytes(bi, bio_sectors(bio)); buf = kmalloc(len, gfp); if (!buf) goto err_end_io; bid = mempool_alloc(&bid_pool, GFP_NOIO); if (!bid) goto err_free_buf; bio_integrity_init(bio, &bid->bip, &bid->bvec, 1); bid->bio = bio; bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); if (set_flags) { if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) bid->bip.bip_flags |= BIP_IP_CHECKSUM; if (bi->csum_type) bid->bip.bip_flags |= BIP_CHECK_GUARD; if (bi->flags & BLK_INTEGRITY_REF_TAG) bid->bip.bip_flags |= BIP_CHECK_REFTAG; } if (bio_integrity_add_page(bio, virt_to_page(buf), len, offset_in_page(buf)) < len) goto err_end_io; /* Auto-generate integrity metadata if this is a write */ if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) blk_integrity_generate(bio); else bid->saved_bio_iter = bio->bi_iter; return true; err_free_buf: kfree(buf); err_end_io: bio->bi_status = BLK_STS_RESOURCE; bio_endio(bio); return false; } EXPORT_SYMBOL(bio_integrity_prep); void blk_flush_integrity(void) { flush_workqueue(kintegrityd_wq); } static int __init blk_integrity_auto_init(void) { bid_slab = kmem_cache_create("bio_integrity_data", sizeof(struct bio_integrity_data), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); if (mempool_init_slab_pool(&bid_pool, BIO_POOL_SIZE, bid_slab)) panic("bio: can't create integrity pool\n"); /* * kintegrityd won't block much but may burn a lot of CPU cycles. * Make it highpri CPU intensive wq with max concurrency of 1. */ kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1); if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); return 0; } subsys_initcall(blk_integrity_auto_init); |
| 3 3 3 1 3 3 1 1 3 2 3 1 3 1 1 2 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 | // SPDX-License-Identifier: GPL-2.0+ /* * TI 3410/5052 USB Serial Driver * * Copyright (C) 2004 Texas Instruments * * This driver is based on the Linux io_ti driver, which is * Copyright (C) 2000-2002 Inside Out Networks * Copyright (C) 2001-2002 Greg Kroah-Hartman * * For questions or problems with this driver, contact Texas Instruments * technical support, or Al Borchers <alborchers@steinerpoint.com>, or * Peter Berger <pberger@brimson.com>. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/firmware.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/ioctl.h> #include <linux/serial.h> #include <linux/kfifo.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> /* Configuration ids */ #define TI_BOOT_CONFIG 1 #define TI_ACTIVE_CONFIG 2 /* Vendor and product ids */ #define TI_VENDOR_ID 0x0451 #define IBM_VENDOR_ID 0x04b3 #define STARTECH_VENDOR_ID 0x14b0 #define TI_3410_PRODUCT_ID 0x3410 #define IBM_4543_PRODUCT_ID 0x4543 #define IBM_454B_PRODUCT_ID 0x454b #define IBM_454C_PRODUCT_ID 0x454c #define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ #define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ #define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ #define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */ #define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */ #define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */ /* Multi-Tech vendor and product ids */ #define MTS_VENDOR_ID 0x06E0 #define MTS_GSM_NO_FW_PRODUCT_ID 0xF108 #define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109 #define MTS_CDMA_PRODUCT_ID 0xF110 #define MTS_GSM_PRODUCT_ID 0xF111 #define MTS_EDGE_PRODUCT_ID 0xF112 #define MTS_MT9234MU_PRODUCT_ID 0xF114 #define MTS_MT9234ZBA_PRODUCT_ID 0xF115 #define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319 /* Abbott Diabetics vendor and product ids */ #define ABBOTT_VENDOR_ID 0x1a61 #define ABBOTT_STEREO_PLUG_ID 0x3410 #define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID #define ABBOTT_STRIP_PORT_ID 0x3420 /* Honeywell vendor and product IDs */ #define HONEYWELL_VENDOR_ID 0x10ac #define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ /* Moxa UPORT 11x0 vendor and product IDs */ #define MXU1_VENDOR_ID 0x110a #define MXU1_1110_PRODUCT_ID 0x1110 #define MXU1_1130_PRODUCT_ID 0x1130 #define MXU1_1150_PRODUCT_ID 0x1150 #define MXU1_1151_PRODUCT_ID 0x1151 #define MXU1_1131_PRODUCT_ID 0x1131 /* Commands */ #define TI_GET_VERSION 0x01 #define TI_GET_PORT_STATUS 0x02 #define TI_GET_PORT_DEV_INFO 0x03 #define TI_GET_CONFIG 0x04 #define TI_SET_CONFIG 0x05 #define TI_OPEN_PORT 0x06 #define TI_CLOSE_PORT 0x07 #define TI_START_PORT 0x08 #define TI_STOP_PORT 0x09 #define TI_TEST_PORT 0x0A #define TI_PURGE_PORT 0x0B #define TI_RESET_EXT_DEVICE 0x0C #define TI_WRITE_DATA 0x80 #define TI_READ_DATA 0x81 #define TI_REQ_TYPE_CLASS 0x82 /* Module identifiers */ #define TI_I2C_PORT 0x01 #define TI_IEEE1284_PORT 0x02 #define TI_UART1_PORT 0x03 #define TI_UART2_PORT 0x04 #define TI_RAM_PORT 0x05 /* Modem status */ #define TI_MSR_DELTA_CTS 0x01 #define TI_MSR_DELTA_DSR 0x02 #define TI_MSR_DELTA_RI 0x04 #define TI_MSR_DELTA_CD 0x08 #define TI_MSR_CTS 0x10 #define TI_MSR_DSR 0x20 #define TI_MSR_RI 0x40 #define TI_MSR_CD 0x80 #define TI_MSR_DELTA_MASK 0x0F #define TI_MSR_MASK 0xF0 /* Line status */ #define TI_LSR_OVERRUN_ERROR 0x01 #define TI_LSR_PARITY_ERROR 0x02 #define TI_LSR_FRAMING_ERROR 0x04 #define TI_LSR_BREAK 0x08 #define TI_LSR_ERROR 0x0F #define TI_LSR_RX_FULL 0x10 #define TI_LSR_TX_EMPTY 0x20 #define TI_LSR_TX_EMPTY_BOTH 0x40 /* Line control */ #define TI_LCR_BREAK 0x40 /* Modem control */ #define TI_MCR_LOOP 0x04 #define TI_MCR_DTR 0x10 #define TI_MCR_RTS 0x20 /* Mask settings */ #define TI_UART_ENABLE_RTS_IN 0x0001 #define TI_UART_DISABLE_RTS 0x0002 #define TI_UART_ENABLE_PARITY_CHECKING 0x0008 #define TI_UART_ENABLE_DSR_OUT 0x0010 #define TI_UART_ENABLE_CTS_OUT 0x0020 #define TI_UART_ENABLE_X_OUT 0x0040 #define TI_UART_ENABLE_XA_OUT 0x0080 #define TI_UART_ENABLE_X_IN 0x0100 #define TI_UART_ENABLE_DTR_IN 0x0800 #define TI_UART_DISABLE_DTR 0x1000 #define TI_UART_ENABLE_MS_INTS 0x2000 #define TI_UART_ENABLE_AUTO_START_DMA 0x4000 /* Parity */ #define TI_UART_NO_PARITY 0x00 #define TI_UART_ODD_PARITY 0x01 #define TI_UART_EVEN_PARITY 0x02 #define TI_UART_MARK_PARITY 0x03 #define TI_UART_SPACE_PARITY 0x04 /* Stop bits */ #define TI_UART_1_STOP_BITS 0x00 #define TI_UART_1_5_STOP_BITS 0x01 #define TI_UART_2_STOP_BITS 0x02 /* Bits per character */ #define TI_UART_5_DATA_BITS 0x00 #define TI_UART_6_DATA_BITS 0x01 #define TI_UART_7_DATA_BITS 0x02 #define TI_UART_8_DATA_BITS 0x03 /* 232/485 modes */ #define TI_UART_232 0x00 #define TI_UART_485_RECEIVER_DISABLED 0x01 #define TI_UART_485_RECEIVER_ENABLED 0x02 /* Pipe transfer mode and timeout */ #define TI_PIPE_MODE_CONTINUOUS 0x01 #define TI_PIPE_MODE_MASK 0x03 #define TI_PIPE_TIMEOUT_MASK 0x7C #define TI_PIPE_TIMEOUT_ENABLE 0x80 /* Config struct */ struct ti_uart_config { __be16 wBaudRate; __be16 wFlags; u8 bDataBits; u8 bParity; u8 bStopBits; char cXon; char cXoff; u8 bUartMode; }; /* Get port status */ struct ti_port_status { u8 bCmdCode; u8 bModuleId; u8 bErrorCode; u8 bMSR; u8 bLSR; }; /* Purge modes */ #define TI_PURGE_OUTPUT 0x00 #define TI_PURGE_INPUT 0x80 /* Read/Write data */ #define TI_RW_DATA_ADDR_SFR 0x10 #define TI_RW_DATA_ADDR_IDATA 0x20 #define TI_RW_DATA_ADDR_XDATA 0x30 #define TI_RW_DATA_ADDR_CODE 0x40 #define TI_RW_DATA_ADDR_GPIO 0x50 #define TI_RW_DATA_ADDR_I2C 0x60 #define TI_RW_DATA_ADDR_FLASH 0x70 #define TI_RW_DATA_ADDR_DSP 0x80 #define TI_RW_DATA_UNSPECIFIED 0x00 #define TI_RW_DATA_BYTE 0x01 #define TI_RW_DATA_WORD 0x02 #define TI_RW_DATA_DOUBLE_WORD 0x04 struct ti_write_data_bytes { u8 bAddrType; u8 bDataType; u8 bDataCounter; __be16 wBaseAddrHi; __be16 wBaseAddrLo; u8 bData[]; } __packed; struct ti_read_data_request { u8 bAddrType; u8 bDataType; u8 bDataCounter; __be16 wBaseAddrHi; __be16 wBaseAddrLo; } __packed; struct ti_read_data_bytes { u8 bCmdCode; u8 bModuleId; u8 bErrorCode; u8 bData[]; }; /* Interrupt struct */ struct ti_interrupt { u8 bICode; u8 bIInfo; }; /* Interrupt codes */ #define TI_CODE_HARDWARE_ERROR 0xFF #define TI_CODE_DATA_ERROR 0x03 #define TI_CODE_MODEM_STATUS 0x04 /* Download firmware max packet size */ #define TI_DOWNLOAD_MAX_PACKET_SIZE 64 /* Firmware image header */ struct ti_firmware_header { __le16 wLength; u8 bCheckSum; } __packed; /* UART addresses */ #define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */ #define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */ #define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */ #define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */ #define TI_DRIVER_AUTHOR "Al Borchers <alborchers@steinerpoint.com>" #define TI_DRIVER_DESC "TI USB 3410/5052 Serial Driver" #define TI_FIRMWARE_BUF_SIZE 16284 #define TI_TRANSFER_TIMEOUT 2 /* read urb states */ #define TI_READ_URB_RUNNING 0 #define TI_READ_URB_STOPPING 1 #define TI_READ_URB_STOPPED 2 #define TI_EXTRA_VID_PID_COUNT 5 struct ti_port { int tp_is_open; u8 tp_msr; u8 tp_shadow_mcr; u8 tp_uart_mode; /* 232 or 485 modes */ unsigned int tp_uart_base_addr; struct ti_device *tp_tdev; struct usb_serial_port *tp_port; spinlock_t tp_lock; int tp_read_urb_state; int tp_write_urb_in_use; }; struct ti_device { struct mutex td_open_close_lock; int td_open_port_count; struct usb_serial *td_serial; int td_is_3410; bool td_rs485_only; }; static int ti_startup(struct usb_serial *serial); static void ti_release(struct usb_serial *serial); static int ti_port_probe(struct usb_serial_port *port); static void ti_port_remove(struct usb_serial_port *port); static int ti_open(struct tty_struct *tty, struct usb_serial_port *port); static void ti_close(struct usb_serial_port *port); static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count); static unsigned int ti_write_room(struct tty_struct *tty); static unsigned int ti_chars_in_buffer(struct tty_struct *tty); static bool ti_tx_empty(struct usb_serial_port *port); static void ti_throttle(struct tty_struct *tty); static void ti_unthrottle(struct tty_struct *tty); static void ti_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int ti_tiocmget(struct tty_struct *tty); static int ti_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); static int ti_break(struct tty_struct *tty, int break_state); static void ti_interrupt_callback(struct urb *urb); static void ti_bulk_in_callback(struct urb *urb); static void ti_bulk_out_callback(struct urb *urb); static void ti_recv(struct usb_serial_port *port, unsigned char *data, int length); static void ti_send(struct ti_port *tport); static int ti_set_mcr(struct ti_port *tport, unsigned int mcr); static int ti_get_lsr(struct ti_port *tport, u8 *lsr); static void ti_get_serial_info(struct tty_struct *tty, struct serial_struct *ss); static void ti_handle_new_msr(struct ti_port *tport, u8 msr); static void ti_stop_read(struct ti_port *tport, struct tty_struct *tty); static int ti_restart_read(struct ti_port *tport, struct tty_struct *tty); static int ti_command_out_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size); static int ti_command_in_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size); static int ti_port_cmd_out(struct usb_serial_port *port, u8 command, u16 value, void *data, int size); static int ti_port_cmd_in(struct usb_serial_port *port, u8 command, u16 value, void *data, int size); static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev, unsigned long addr, u8 mask, u8 byte); static int ti_download_firmware(struct ti_device *tdev); static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_EDGE_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234MU_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBAOLD_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454B_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454C_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; static const struct usb_device_id ti_id_table_5052[] = { { USB_DEVICE(TI_VENDOR_ID, TI_5052_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, { } }; static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_EDGE_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234MU_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBAOLD_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454B_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454C_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; static struct usb_serial_driver ti_1port_device = { .driver = { .name = "ti_usb_3410_5052_1", }, .description = "TI USB 3410 1 port adapter", .id_table = ti_id_table_3410, .num_ports = 1, .num_bulk_out = 1, .attach = ti_startup, .release = ti_release, .port_probe = ti_port_probe, .port_remove = ti_port_remove, .open = ti_open, .close = ti_close, .write = ti_write, .write_room = ti_write_room, .chars_in_buffer = ti_chars_in_buffer, .tx_empty = ti_tx_empty, .throttle = ti_throttle, .unthrottle = ti_unthrottle, .get_serial = ti_get_serial_info, .set_termios = ti_set_termios, .tiocmget = ti_tiocmget, .tiocmset = ti_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .break_ctl = ti_break, .read_int_callback = ti_interrupt_callback, .read_bulk_callback = ti_bulk_in_callback, .write_bulk_callback = ti_bulk_out_callback, }; static struct usb_serial_driver ti_2port_device = { .driver = { .name = "ti_usb_3410_5052_2", }, .description = "TI USB 5052 2 port adapter", .id_table = ti_id_table_5052, .num_ports = 2, .num_bulk_out = 1, .attach = ti_startup, .release = ti_release, .port_probe = ti_port_probe, .port_remove = ti_port_remove, .open = ti_open, .close = ti_close, .write = ti_write, .write_room = ti_write_room, .chars_in_buffer = ti_chars_in_buffer, .tx_empty = ti_tx_empty, .throttle = ti_throttle, .unthrottle = ti_unthrottle, .get_serial = ti_get_serial_info, .set_termios = ti_set_termios, .tiocmget = ti_tiocmget, .tiocmset = ti_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .break_ctl = ti_break, .read_int_callback = ti_interrupt_callback, .read_bulk_callback = ti_bulk_in_callback, .write_bulk_callback = ti_bulk_out_callback, }; static struct usb_serial_driver * const serial_drivers[] = { &ti_1port_device, &ti_2port_device, NULL }; MODULE_AUTHOR(TI_DRIVER_AUTHOR); MODULE_DESCRIPTION(TI_DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_FIRMWARE("ti_3410.fw"); MODULE_FIRMWARE("ti_5052.fw"); MODULE_FIRMWARE("mts_cdma.fw"); MODULE_FIRMWARE("mts_gsm.fw"); MODULE_FIRMWARE("mts_edge.fw"); MODULE_FIRMWARE("mts_mt9234mu.fw"); MODULE_FIRMWARE("mts_mt9234zba.fw"); MODULE_FIRMWARE("moxa/moxa-1110.fw"); MODULE_FIRMWARE("moxa/moxa-1130.fw"); MODULE_FIRMWARE("moxa/moxa-1131.fw"); MODULE_FIRMWARE("moxa/moxa-1150.fw"); MODULE_FIRMWARE("moxa/moxa-1151.fw"); MODULE_DEVICE_TABLE(usb, ti_id_table_combined); module_usb_serial_driver(serial_drivers, ti_id_table_combined); static int ti_startup(struct usb_serial *serial) { struct ti_device *tdev; struct usb_device *dev = serial->dev; struct usb_host_interface *cur_altsetting; int num_endpoints; u16 vid, pid; int status; dev_dbg(&dev->dev, "%s - product 0x%4X, num configurations %d, configuration value %d\n", __func__, le16_to_cpu(dev->descriptor.idProduct), dev->descriptor.bNumConfigurations, dev->actconfig->desc.bConfigurationValue); tdev = kzalloc(sizeof(struct ti_device), GFP_KERNEL); if (!tdev) return -ENOMEM; mutex_init(&tdev->td_open_close_lock); tdev->td_serial = serial; usb_set_serial_data(serial, tdev); /* determine device type */ if (serial->type == &ti_1port_device) tdev->td_is_3410 = 1; dev_dbg(&dev->dev, "%s - device type is %s\n", __func__, tdev->td_is_3410 ? "3410" : "5052"); vid = le16_to_cpu(dev->descriptor.idVendor); pid = le16_to_cpu(dev->descriptor.idProduct); if (vid == MXU1_VENDOR_ID) { switch (pid) { case MXU1_1130_PRODUCT_ID: case MXU1_1131_PRODUCT_ID: tdev->td_rs485_only = true; break; } } cur_altsetting = serial->interface->cur_altsetting; num_endpoints = cur_altsetting->desc.bNumEndpoints; /* if we have only 1 configuration and 1 endpoint, download firmware */ if (dev->descriptor.bNumConfigurations == 1 && num_endpoints == 1) { status = ti_download_firmware(tdev); if (status != 0) goto free_tdev; /* 3410 must be reset, 5052 resets itself */ if (tdev->td_is_3410) { msleep_interruptible(100); usb_reset_device(dev); } status = -ENODEV; goto free_tdev; } /* the second configuration must be set */ if (dev->actconfig->desc.bConfigurationValue == TI_BOOT_CONFIG) { status = usb_driver_set_configuration(dev, TI_ACTIVE_CONFIG); status = status ? status : -ENODEV; goto free_tdev; } if (serial->num_bulk_in < serial->num_ports || serial->num_bulk_out < serial->num_ports) { dev_err(&serial->interface->dev, "missing endpoints\n"); status = -ENODEV; goto free_tdev; } return 0; free_tdev: kfree(tdev); usb_set_serial_data(serial, NULL); return status; } static void ti_release(struct usb_serial *serial) { struct ti_device *tdev = usb_get_serial_data(serial); kfree(tdev); } static int ti_port_probe(struct usb_serial_port *port) { struct ti_port *tport; tport = kzalloc(sizeof(*tport), GFP_KERNEL); if (!tport) return -ENOMEM; spin_lock_init(&tport->tp_lock); if (port == port->serial->port[0]) tport->tp_uart_base_addr = TI_UART1_BASE_ADDR; else tport->tp_uart_base_addr = TI_UART2_BASE_ADDR; tport->tp_port = port; tport->tp_tdev = usb_get_serial_data(port->serial); if (tport->tp_tdev->td_rs485_only) tport->tp_uart_mode = TI_UART_485_RECEIVER_DISABLED; else tport->tp_uart_mode = TI_UART_232; usb_set_serial_port_data(port, tport); /* * The TUSB5052 LSR does not tell when the transmitter shift register * has emptied so add a one-character drain delay. */ if (!tport->tp_tdev->td_is_3410) port->port.drain_delay = 1; return 0; } static void ti_port_remove(struct usb_serial_port *port) { struct ti_port *tport; tport = usb_get_serial_port_data(port); kfree(tport); } static int ti_open(struct tty_struct *tty, struct usb_serial_port *port) { struct ti_port *tport = usb_get_serial_port_data(port); struct ti_device *tdev; struct usb_device *dev; struct urb *urb; int status; u16 open_settings; open_settings = (TI_PIPE_MODE_CONTINUOUS | TI_PIPE_TIMEOUT_ENABLE | (TI_TRANSFER_TIMEOUT << 2)); dev = port->serial->dev; tdev = tport->tp_tdev; /* only one open on any port on a device at a time */ if (mutex_lock_interruptible(&tdev->td_open_close_lock)) return -ERESTARTSYS; tport->tp_msr = 0; tport->tp_shadow_mcr |= (TI_MCR_RTS | TI_MCR_DTR); /* start interrupt urb the first time a port is opened on this device */ if (tdev->td_open_port_count == 0) { dev_dbg(&port->dev, "%s - start interrupt in urb\n", __func__); urb = tdev->td_serial->port[0]->interrupt_in_urb; if (!urb) { dev_err(&port->dev, "%s - no interrupt urb\n", __func__); status = -EINVAL; goto release_lock; } urb->context = tdev; status = usb_submit_urb(urb, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s - submit interrupt urb failed, %d\n", __func__, status); goto release_lock; } } if (tty) ti_set_termios(tty, port, &tty->termios); status = ti_port_cmd_out(port, TI_OPEN_PORT, open_settings, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send open command, %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_START_PORT, 0, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send start command, %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_PURGE_PORT, TI_PURGE_INPUT, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot clear input buffers, %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_PURGE_PORT, TI_PURGE_OUTPUT, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot clear output buffers, %d\n", __func__, status); goto unlink_int_urb; } /* reset the data toggle on the bulk endpoints to work around bug in * host controllers where things get out of sync some times */ usb_clear_halt(dev, port->write_urb->pipe); usb_clear_halt(dev, port->read_urb->pipe); if (tty) ti_set_termios(tty, port, &tty->termios); status = ti_port_cmd_out(port, TI_OPEN_PORT, open_settings, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send open command (2), %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_START_PORT, 0, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send start command (2), %d\n", __func__, status); goto unlink_int_urb; } /* start read urb */ urb = port->read_urb; tport->tp_read_urb_state = TI_READ_URB_RUNNING; urb->context = tport; status = usb_submit_urb(urb, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s - submit read urb failed, %d\n", __func__, status); goto unlink_int_urb; } tport->tp_is_open = 1; ++tdev->td_open_port_count; goto release_lock; unlink_int_urb: if (tdev->td_open_port_count == 0) usb_kill_urb(port->serial->port[0]->interrupt_in_urb); release_lock: mutex_unlock(&tdev->td_open_close_lock); return status; } static void ti_close(struct usb_serial_port *port) { struct ti_device *tdev; struct ti_port *tport; int status; unsigned long flags; tdev = usb_get_serial_data(port->serial); tport = usb_get_serial_port_data(port); tport->tp_is_open = 0; usb_kill_urb(port->read_urb); usb_kill_urb(port->write_urb); tport->tp_write_urb_in_use = 0; spin_lock_irqsave(&tport->tp_lock, flags); kfifo_reset_out(&port->write_fifo); spin_unlock_irqrestore(&tport->tp_lock, flags); status = ti_port_cmd_out(port, TI_CLOSE_PORT, 0, NULL, 0); if (status) dev_err(&port->dev, "%s - cannot send close port command, %d\n" , __func__, status); mutex_lock(&tdev->td_open_close_lock); --tdev->td_open_port_count; if (tdev->td_open_port_count == 0) { /* last port is closed, shut down interrupt urb */ usb_kill_urb(port->serial->port[0]->interrupt_in_urb); } mutex_unlock(&tdev->td_open_close_lock); } static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count) { struct ti_port *tport = usb_get_serial_port_data(port); if (count == 0) { return 0; } if (!tport->tp_is_open) return -ENODEV; count = kfifo_in_locked(&port->write_fifo, data, count, &tport->tp_lock); ti_send(tport); return count; } static unsigned int ti_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int room; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); room = kfifo_avail(&port->write_fifo); spin_unlock_irqrestore(&tport->tp_lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, room); return room; } static unsigned int ti_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int chars; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); chars = kfifo_len(&port->write_fifo); spin_unlock_irqrestore(&tport->tp_lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, chars); return chars; } static bool ti_tx_empty(struct usb_serial_port *port) { struct ti_port *tport = usb_get_serial_port_data(port); u8 lsr, mask; int ret; /* * TUSB5052 does not have the TEMT bit to tell if the shift register * is empty. */ if (tport->tp_tdev->td_is_3410) mask = TI_LSR_TX_EMPTY_BOTH; else mask = TI_LSR_TX_EMPTY; ret = ti_get_lsr(tport, &lsr); if (!ret && !(lsr & mask)) return false; return true; } static void ti_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); if (I_IXOFF(tty) || C_CRTSCTS(tty)) ti_stop_read(tport, tty); } static void ti_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); int status; if (I_IXOFF(tty) || C_CRTSCTS(tty)) { status = ti_restart_read(tport, tty); if (status) dev_err(&port->dev, "%s - cannot restart read, %d\n", __func__, status); } } static void ti_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct ti_port *tport = usb_get_serial_port_data(port); struct ti_uart_config *config; int baud; int status; unsigned int mcr; u16 wbaudrate; u16 wflags = 0; config = kmalloc(sizeof(*config), GFP_KERNEL); if (!config) return; /* these flags must be set */ wflags |= TI_UART_ENABLE_MS_INTS; wflags |= TI_UART_ENABLE_AUTO_START_DMA; config->bUartMode = tport->tp_uart_mode; switch (C_CSIZE(tty)) { case CS5: config->bDataBits = TI_UART_5_DATA_BITS; break; case CS6: config->bDataBits = TI_UART_6_DATA_BITS; break; case CS7: config->bDataBits = TI_UART_7_DATA_BITS; break; default: case CS8: config->bDataBits = TI_UART_8_DATA_BITS; break; } /* CMSPAR isn't supported by this driver */ tty->termios.c_cflag &= ~CMSPAR; if (C_PARENB(tty)) { if (C_PARODD(tty)) { wflags |= TI_UART_ENABLE_PARITY_CHECKING; config->bParity = TI_UART_ODD_PARITY; } else { wflags |= TI_UART_ENABLE_PARITY_CHECKING; config->bParity = TI_UART_EVEN_PARITY; } } else { wflags &= ~TI_UART_ENABLE_PARITY_CHECKING; config->bParity = TI_UART_NO_PARITY; } if (C_CSTOPB(tty)) config->bStopBits = TI_UART_2_STOP_BITS; else config->bStopBits = TI_UART_1_STOP_BITS; if (C_CRTSCTS(tty)) { /* RTS flow control must be off to drop RTS for baud rate B0 */ if ((C_BAUD(tty)) != B0) wflags |= TI_UART_ENABLE_RTS_IN; wflags |= TI_UART_ENABLE_CTS_OUT; } else { ti_restart_read(tport, tty); } if (I_IXOFF(tty) || I_IXON(tty)) { config->cXon = START_CHAR(tty); config->cXoff = STOP_CHAR(tty); if (I_IXOFF(tty)) wflags |= TI_UART_ENABLE_X_IN; else ti_restart_read(tport, tty); if (I_IXON(tty)) wflags |= TI_UART_ENABLE_X_OUT; } baud = tty_get_baud_rate(tty); if (!baud) baud = 9600; if (tport->tp_tdev->td_is_3410) wbaudrate = (923077 + baud/2) / baud; else wbaudrate = (461538 + baud/2) / baud; /* FIXME: Should calculate resulting baud here and report it back */ if ((C_BAUD(tty)) != B0) tty_encode_baud_rate(tty, baud, baud); dev_dbg(&port->dev, "%s - BaudRate=%d, wBaudRate=%d, wFlags=0x%04X, bDataBits=%d, bParity=%d, bStopBits=%d, cXon=%d, cXoff=%d, bUartMode=%d\n", __func__, baud, wbaudrate, wflags, config->bDataBits, config->bParity, config->bStopBits, config->cXon, config->cXoff, config->bUartMode); config->wBaudRate = cpu_to_be16(wbaudrate); config->wFlags = cpu_to_be16(wflags); status = ti_port_cmd_out(port, TI_SET_CONFIG, 0, config, sizeof(*config)); if (status) dev_err(&port->dev, "%s - cannot set config on port %d, %d\n", __func__, port->port_number, status); /* SET_CONFIG asserts RTS and DTR, reset them correctly */ mcr = tport->tp_shadow_mcr; /* if baud rate is B0, clear RTS and DTR */ if (C_BAUD(tty) == B0) mcr &= ~(TI_MCR_DTR | TI_MCR_RTS); status = ti_set_mcr(tport, mcr); if (status) dev_err(&port->dev, "%s - cannot set modem control on port %d, %d\n", __func__, port->port_number, status); kfree(config); } static int ti_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int result; unsigned int msr; unsigned int mcr; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); msr = tport->tp_msr; mcr = tport->tp_shadow_mcr; spin_unlock_irqrestore(&tport->tp_lock, flags); result = ((mcr & TI_MCR_DTR) ? TIOCM_DTR : 0) | ((mcr & TI_MCR_RTS) ? TIOCM_RTS : 0) | ((mcr & TI_MCR_LOOP) ? TIOCM_LOOP : 0) | ((msr & TI_MSR_CTS) ? TIOCM_CTS : 0) | ((msr & TI_MSR_CD) ? TIOCM_CAR : 0) | ((msr & TI_MSR_RI) ? TIOCM_RI : 0) | ((msr & TI_MSR_DSR) ? TIOCM_DSR : 0); dev_dbg(&port->dev, "%s - 0x%04X\n", __func__, result); return result; } static int ti_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int mcr; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); mcr = tport->tp_shadow_mcr; if (set & TIOCM_RTS) mcr |= TI_MCR_RTS; if (set & TIOCM_DTR) mcr |= TI_MCR_DTR; if (set & TIOCM_LOOP) mcr |= TI_MCR_LOOP; if (clear & TIOCM_RTS) mcr &= ~TI_MCR_RTS; if (clear & TIOCM_DTR) mcr &= ~TI_MCR_DTR; if (clear & TIOCM_LOOP) mcr &= ~TI_MCR_LOOP; spin_unlock_irqrestore(&tport->tp_lock, flags); return ti_set_mcr(tport, mcr); } static int ti_break(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); int status; dev_dbg(&port->dev, "%s - state = %d\n", __func__, break_state); status = ti_write_byte(port, tport->tp_tdev, tport->tp_uart_base_addr + TI_UART_OFFSET_LCR, TI_LCR_BREAK, break_state == -1 ? TI_LCR_BREAK : 0); if (status) { dev_dbg(&port->dev, "%s - error setting break, %d\n", __func__, status); return status; } return 0; } static int ti_get_port_from_code(unsigned char code) { return (code >> 6) & 0x01; } static int ti_get_func_from_code(unsigned char code) { return code & 0x0f; } static void ti_interrupt_callback(struct urb *urb) { struct ti_device *tdev = urb->context; struct usb_serial_port *port; struct usb_serial *serial = tdev->td_serial; struct ti_port *tport; struct device *dev = &urb->dev->dev; unsigned char *data = urb->transfer_buffer; int length = urb->actual_length; int port_number; int function; int status = urb->status; int retval; u8 msr; switch (status) { case 0: break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(dev, "%s - urb shutting down, %d\n", __func__, status); return; default: dev_err(dev, "%s - nonzero urb status, %d\n", __func__, status); goto exit; } if (length != 2) { dev_dbg(dev, "%s - bad packet size, %d\n", __func__, length); goto exit; } if (data[0] == TI_CODE_HARDWARE_ERROR) { dev_err(dev, "%s - hardware error, %d\n", __func__, data[1]); goto exit; } port_number = ti_get_port_from_code(data[0]); function = ti_get_func_from_code(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, data 0x%02X\n", __func__, port_number, function, data[1]); if (port_number >= serial->num_ports) { dev_err(dev, "%s - bad port number, %d\n", __func__, port_number); goto exit; } port = serial->port[port_number]; tport = usb_get_serial_port_data(port); if (!tport) goto exit; switch (function) { case TI_CODE_DATA_ERROR: dev_err(dev, "%s - DATA ERROR, port %d, data 0x%02X\n", __func__, port_number, data[1]); break; case TI_CODE_MODEM_STATUS: msr = data[1]; dev_dbg(dev, "%s - port %d, msr 0x%02X\n", __func__, port_number, msr); ti_handle_new_msr(tport, msr); break; default: dev_err(dev, "%s - unknown interrupt code, 0x%02X\n", __func__, data[1]); break; } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - resubmit interrupt urb failed, %d\n", __func__, retval); } static void ti_bulk_in_callback(struct urb *urb) { struct ti_port *tport = urb->context; struct usb_serial_port *port = tport->tp_port; struct device *dev = &urb->dev->dev; int status = urb->status; unsigned long flags; int retval = 0; switch (status) { case 0: break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(dev, "%s - urb shutting down, %d\n", __func__, status); return; default: dev_err(dev, "%s - nonzero urb status, %d\n", __func__, status); } if (status == -EPIPE) goto exit; if (status) { dev_err(dev, "%s - stopping read!\n", __func__); return; } if (urb->actual_length) { usb_serial_debug_data(dev, __func__, urb->actual_length, urb->transfer_buffer); if (!tport->tp_is_open) dev_dbg(dev, "%s - port closed, dropping data\n", __func__); else ti_recv(port, urb->transfer_buffer, urb->actual_length); spin_lock_irqsave(&tport->tp_lock, flags); port->icount.rx += urb->actual_length; spin_unlock_irqrestore(&tport->tp_lock, flags); } exit: /* continue to read unless stopping */ spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_read_urb_state == TI_READ_URB_RUNNING) retval = usb_submit_urb(urb, GFP_ATOMIC); else if (tport->tp_read_urb_state == TI_READ_URB_STOPPING) tport->tp_read_urb_state = TI_READ_URB_STOPPED; spin_unlock_irqrestore(&tport->tp_lock, flags); if (retval) dev_err(dev, "%s - resubmit read urb failed, %d\n", __func__, retval); } static void ti_bulk_out_callback(struct urb *urb) { struct ti_port *tport = urb->context; struct usb_serial_port *port = tport->tp_port; int status = urb->status; tport->tp_write_urb_in_use = 0; switch (status) { case 0: break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb shutting down, %d\n", __func__, status); return; default: dev_err_console(port, "%s - nonzero urb status, %d\n", __func__, status); } /* send any buffered data */ ti_send(tport); } static void ti_recv(struct usb_serial_port *port, unsigned char *data, int length) { int cnt; do { cnt = tty_insert_flip_string(&port->port, data, length); if (cnt < length) { dev_err(&port->dev, "%s - dropping data, %d bytes lost\n", __func__, length - cnt); if (cnt == 0) break; } tty_flip_buffer_push(&port->port); data += cnt; length -= cnt; } while (length > 0); } static void ti_send(struct ti_port *tport) { int count, result; struct usb_serial_port *port = tport->tp_port; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_write_urb_in_use) goto unlock; count = kfifo_out(&port->write_fifo, port->write_urb->transfer_buffer, port->bulk_out_size); if (count == 0) goto unlock; tport->tp_write_urb_in_use = 1; spin_unlock_irqrestore(&tport->tp_lock, flags); usb_serial_debug_data(&port->dev, __func__, count, port->write_urb->transfer_buffer); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, count, ti_bulk_out_callback, tport); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); if (result) { dev_err_console(port, "%s - submit write urb failed, %d\n", __func__, result); tport->tp_write_urb_in_use = 0; /* TODO: reschedule ti_send */ } else { spin_lock_irqsave(&tport->tp_lock, flags); port->icount.tx += count; spin_unlock_irqrestore(&tport->tp_lock, flags); } /* more room in the buffer for new writes, wakeup */ tty_port_tty_wakeup(&port->port); return; unlock: spin_unlock_irqrestore(&tport->tp_lock, flags); return; } static int ti_set_mcr(struct ti_port *tport, unsigned int mcr) { unsigned long flags; int status; status = ti_write_byte(tport->tp_port, tport->tp_tdev, tport->tp_uart_base_addr + TI_UART_OFFSET_MCR, TI_MCR_RTS | TI_MCR_DTR | TI_MCR_LOOP, mcr); spin_lock_irqsave(&tport->tp_lock, flags); if (!status) tport->tp_shadow_mcr = mcr; spin_unlock_irqrestore(&tport->tp_lock, flags); return status; } static int ti_get_lsr(struct ti_port *tport, u8 *lsr) { int size, status; struct usb_serial_port *port = tport->tp_port; struct ti_port_status *data; size = sizeof(struct ti_port_status); data = kmalloc(size, GFP_KERNEL); if (!data) return -ENOMEM; status = ti_port_cmd_in(port, TI_GET_PORT_STATUS, 0, data, size); if (status) { dev_err(&port->dev, "%s - get port status command failed, %d\n", __func__, status); goto free_data; } dev_dbg(&port->dev, "%s - lsr 0x%02X\n", __func__, data->bLSR); *lsr = data->bLSR; free_data: kfree(data); return status; } static void ti_get_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); ss->baud_base = tport->tp_tdev->td_is_3410 ? 921600 : 460800; } static void ti_handle_new_msr(struct ti_port *tport, u8 msr) { struct async_icount *icount; struct tty_struct *tty; unsigned long flags; dev_dbg(&tport->tp_port->dev, "%s - msr 0x%02X\n", __func__, msr); if (msr & TI_MSR_DELTA_MASK) { spin_lock_irqsave(&tport->tp_lock, flags); icount = &tport->tp_port->icount; if (msr & TI_MSR_DELTA_CTS) icount->cts++; if (msr & TI_MSR_DELTA_DSR) icount->dsr++; if (msr & TI_MSR_DELTA_CD) icount->dcd++; if (msr & TI_MSR_DELTA_RI) icount->rng++; wake_up_interruptible(&tport->tp_port->port.delta_msr_wait); spin_unlock_irqrestore(&tport->tp_lock, flags); } tport->tp_msr = msr & TI_MSR_MASK; /* handle CTS flow control */ tty = tty_port_tty_get(&tport->tp_port->port); if (tty && C_CRTSCTS(tty)) { if (msr & TI_MSR_CTS) tty_wakeup(tty); } tty_kref_put(tty); } static void ti_stop_read(struct ti_port *tport, struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_read_urb_state == TI_READ_URB_RUNNING) tport->tp_read_urb_state = TI_READ_URB_STOPPING; spin_unlock_irqrestore(&tport->tp_lock, flags); } static int ti_restart_read(struct ti_port *tport, struct tty_struct *tty) { struct urb *urb; int status = 0; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_read_urb_state == TI_READ_URB_STOPPED) { tport->tp_read_urb_state = TI_READ_URB_RUNNING; urb = tport->tp_port->read_urb; spin_unlock_irqrestore(&tport->tp_lock, flags); urb->context = tport; status = usb_submit_urb(urb, GFP_KERNEL); } else { tport->tp_read_urb_state = TI_READ_URB_RUNNING; spin_unlock_irqrestore(&tport->tp_lock, flags); } return status; } static int ti_command_out_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size) { int status; status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), command, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, moduleid, data, size, 1000); if (status < 0) return status; return 0; } static int ti_command_in_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size) { int status; status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), command, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, moduleid, data, size, 1000); if (status == size) status = 0; else if (status >= 0) status = -ECOMM; return status; } static int ti_port_cmd_out(struct usb_serial_port *port, u8 command, u16 value, void *data, int size) { return ti_command_out_sync(port->serial->dev, command, TI_UART1_PORT + port->port_number, value, data, size); } static int ti_port_cmd_in(struct usb_serial_port *port, u8 command, u16 value, void *data, int size) { return ti_command_in_sync(port->serial->dev, command, TI_UART1_PORT + port->port_number, value, data, size); } static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev, unsigned long addr, u8 mask, u8 byte) { int status; unsigned int size; struct ti_write_data_bytes *data; dev_dbg(&port->dev, "%s - addr 0x%08lX, mask 0x%02X, byte 0x%02X\n", __func__, addr, mask, byte); size = sizeof(struct ti_write_data_bytes) + 2; data = kmalloc(size, GFP_KERNEL); if (!data) return -ENOMEM; data->bAddrType = TI_RW_DATA_ADDR_XDATA; data->bDataType = TI_RW_DATA_BYTE; data->bDataCounter = 1; data->wBaseAddrHi = cpu_to_be16(addr>>16); data->wBaseAddrLo = cpu_to_be16(addr); data->bData[0] = mask; data->bData[1] = byte; status = ti_command_out_sync(port->serial->dev, TI_WRITE_DATA, TI_RAM_PORT, 0, data, size); if (status < 0) dev_err(&port->dev, "%s - failed, %d\n", __func__, status); kfree(data); return status; } static int ti_do_download(struct usb_device *dev, int pipe, u8 *buffer, int size) { int pos; u8 cs = 0; int done; struct ti_firmware_header *header; int status = 0; int len; for (pos = sizeof(struct ti_firmware_header); pos < size; pos++) cs = (u8)(cs + buffer[pos]); header = (struct ti_firmware_header *)buffer; header->wLength = cpu_to_le16(size - sizeof(*header)); header->bCheckSum = cs; dev_dbg(&dev->dev, "%s - downloading firmware\n", __func__); for (pos = 0; pos < size; pos += done) { len = min(size - pos, TI_DOWNLOAD_MAX_PACKET_SIZE); status = usb_bulk_msg(dev, pipe, buffer + pos, len, &done, 1000); if (status) break; } return status; } static int ti_download_firmware(struct ti_device *tdev) { int status; int buffer_size; u8 *buffer; struct usb_device *dev = tdev->td_serial->dev; unsigned int pipe = usb_sndbulkpipe(dev, tdev->td_serial->port[0]->bulk_out_endpointAddress); const struct firmware *fw_p; char buf[32]; if (le16_to_cpu(dev->descriptor.idVendor) == MXU1_VENDOR_ID) { snprintf(buf, sizeof(buf), "moxa/moxa-%04x.fw", le16_to_cpu(dev->descriptor.idProduct)); status = request_firmware(&fw_p, buf, &dev->dev); goto check_firmware; } /* try ID specific firmware first, then try generic firmware */ sprintf(buf, "ti_usb-v%04x-p%04x.fw", le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); status = request_firmware(&fw_p, buf, &dev->dev); if (status != 0) { buf[0] = '\0'; if (le16_to_cpu(dev->descriptor.idVendor) == MTS_VENDOR_ID) { switch (le16_to_cpu(dev->descriptor.idProduct)) { case MTS_CDMA_PRODUCT_ID: strcpy(buf, "mts_cdma.fw"); break; case MTS_GSM_PRODUCT_ID: strcpy(buf, "mts_gsm.fw"); break; case MTS_EDGE_PRODUCT_ID: strcpy(buf, "mts_edge.fw"); break; case MTS_MT9234MU_PRODUCT_ID: strcpy(buf, "mts_mt9234mu.fw"); break; case MTS_MT9234ZBA_PRODUCT_ID: strcpy(buf, "mts_mt9234zba.fw"); break; case MTS_MT9234ZBAOLD_PRODUCT_ID: strcpy(buf, "mts_mt9234zba.fw"); break; } } if (buf[0] == '\0') { if (tdev->td_is_3410) strcpy(buf, "ti_3410.fw"); else strcpy(buf, "ti_5052.fw"); } status = request_firmware(&fw_p, buf, &dev->dev); } check_firmware: if (status) { dev_err(&dev->dev, "%s - firmware not found\n", __func__); return -ENOENT; } if (fw_p->size > TI_FIRMWARE_BUF_SIZE) { dev_err(&dev->dev, "%s - firmware too large %zu\n", __func__, fw_p->size); release_firmware(fw_p); return -ENOENT; } buffer_size = TI_FIRMWARE_BUF_SIZE + sizeof(struct ti_firmware_header); buffer = kmalloc(buffer_size, GFP_KERNEL); if (buffer) { memcpy(buffer, fw_p->data, fw_p->size); memset(buffer + fw_p->size, 0xff, buffer_size - fw_p->size); status = ti_do_download(dev, pipe, buffer, fw_p->size); kfree(buffer); } else { status = -ENOMEM; } release_firmware(fw_p); if (status) { dev_err(&dev->dev, "%s - error downloading firmware, %d\n", __func__, status); return status; } dev_dbg(&dev->dev, "%s - download successful\n", __func__); return 0; } |
| 135 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | // SPDX-License-Identifier: GPL-2.0-or-later /* * tsacct.c - System accounting over taskstats interface * * Copyright (C) Jay Lan, <jlan@sgi.com> */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/sched/cputime.h> #include <linux/tsacct_kern.h> #include <linux/acct.h> #include <linux/jiffies.h> #include <linux/mm.h> /* * fill in basic accounting fields */ void bacct_add_tsk(struct user_namespace *user_ns, struct pid_namespace *pid_ns, struct taskstats *stats, struct task_struct *tsk) { const struct cred *tcred; u64 utime, stime, utimescaled, stimescaled; u64 now_ns, delta; time64_t btime; BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); /* calculate task elapsed time in nsec */ now_ns = ktime_get_ns(); /* store whole group time first */ delta = now_ns - tsk->group_leader->start_time; /* Convert to micro seconds */ do_div(delta, NSEC_PER_USEC); stats->ac_tgetime = delta; delta = now_ns - tsk->start_time; do_div(delta, NSEC_PER_USEC); stats->ac_etime = delta; /* Convert to seconds for btime (note y2106 limit) */ btime = ktime_get_real_seconds() - div_u64(delta, USEC_PER_SEC); stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX); stats->ac_btime64 = btime; if (tsk->flags & PF_EXITING) stats->ac_exitcode = tsk->exit_code; if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC)) stats->ac_flag |= AFORK; if (tsk->flags & PF_SUPERPRIV) stats->ac_flag |= ASU; if (tsk->flags & PF_DUMPCORE) stats->ac_flag |= ACORE; if (tsk->flags & PF_SIGNALED) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; stats->ac_pid = task_pid_nr_ns(tsk, pid_ns); stats->ac_tgid = task_tgid_nr_ns(tsk, pid_ns); stats->ac_ppid = task_ppid_nr_ns(tsk, pid_ns); rcu_read_lock(); tcred = __task_cred(tsk); stats->ac_uid = from_kuid_munged(user_ns, tcred->uid); stats->ac_gid = from_kgid_munged(user_ns, tcred->gid); rcu_read_unlock(); task_cputime(tsk, &utime, &stime); stats->ac_utime = div_u64(utime, NSEC_PER_USEC); stats->ac_stime = div_u64(stime, NSEC_PER_USEC); task_cputime_scaled(tsk, &utimescaled, &stimescaled); stats->ac_utimescaled = div_u64(utimescaled, NSEC_PER_USEC); stats->ac_stimescaled = div_u64(stimescaled, NSEC_PER_USEC); stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; strscpy_pad(stats->ac_comm, tsk->comm); } #ifdef CONFIG_TASK_XACCT #define KB 1024 #define MB (1024*KB) #define KB_MASK (~(KB-1)) /* * fill in extended accounting fields */ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) { struct mm_struct *mm; /* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */ stats->coremem = p->acct_rss_mem1 * PAGE_SIZE; do_div(stats->coremem, 1000 * KB); stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE; do_div(stats->virtmem, 1000 * KB); mm = get_task_mm(p); if (mm) { /* adjust to KB unit */ stats->hiwater_rss = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB; stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; mmput(mm); } stats->read_char = p->ioac.rchar & KB_MASK; stats->write_char = p->ioac.wchar & KB_MASK; stats->read_syscalls = p->ioac.syscr & KB_MASK; stats->write_syscalls = p->ioac.syscw & KB_MASK; #ifdef CONFIG_TASK_IO_ACCOUNTING stats->read_bytes = p->ioac.read_bytes & KB_MASK; stats->write_bytes = p->ioac.write_bytes & KB_MASK; stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK; #else stats->read_bytes = 0; stats->write_bytes = 0; stats->cancelled_write_bytes = 0; #endif } #undef KB #undef MB static void __acct_update_integrals(struct task_struct *tsk, u64 utime, u64 stime) { u64 time, delta; if (!likely(tsk->mm)) return; time = stime + utime; delta = time - tsk->acct_timexpd; if (delta < TICK_NSEC) return; tsk->acct_timexpd = time; /* * Divide by 1024 to avoid overflow, and to avoid division. * The final unit reported to userspace is Mbyte-usecs, * the rest of the math is done in xacct_add_tsk. */ tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; tsk->acct_vm_mem1 += delta * READ_ONCE(tsk->mm->total_vm) >> 10; } /** * acct_update_integrals - update mm integral fields in task_struct * @tsk: task_struct for accounting */ void acct_update_integrals(struct task_struct *tsk) { u64 utime, stime; unsigned long flags; local_irq_save(flags); task_cputime(tsk, &utime, &stime); __acct_update_integrals(tsk, utime, stime); local_irq_restore(flags); } /** * acct_account_cputime - update mm integral after cputime update * @tsk: task_struct for accounting */ void acct_account_cputime(struct task_struct *tsk) { __acct_update_integrals(tsk, tsk->utime, tsk->stime); } /** * acct_clear_integrals - clear the mm integral fields in task_struct * @tsk: task_struct whose accounting fields are cleared */ void acct_clear_integrals(struct task_struct *tsk) { tsk->acct_timexpd = 0; tsk->acct_rss_mem1 = 0; tsk->acct_vm_mem1 = 0; } #endif |
| 1 3 1 1 4 1 4 1 1 1 6 3 2 6 2 2 1 6 6 4 4 1 1 1 4 4 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel Network Address Lists * * This file contains network address list functions used to manage ordered * lists of network addresses for use by the NetLabel subsystem. The NetLabel * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 */ #include <linux/types.h> #include <linux/rcupdate.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <net/ip.h> #include <net/ipv6.h> #include <linux/audit.h> #include "netlabel_addrlist.h" /* * Address List Functions */ /** * netlbl_af4list_search - Search for a matching IPv4 address entry * @addr: IPv4 address * @head: the list head * * Description: * Searches the IPv4 address list given by @head. If a matching address entry * is found it is returned, otherwise NULL is returned. The caller is * responsible for calling the rcu_read_[un]lock() functions. * */ struct netlbl_af4list *netlbl_af4list_search(__be32 addr, struct list_head *head) { struct netlbl_af4list *iter; list_for_each_entry_rcu(iter, head, list) if (iter->valid && (addr & iter->mask) == iter->addr) return iter; return NULL; } /** * netlbl_af4list_search_exact - Search for an exact IPv4 address entry * @addr: IPv4 address * @mask: IPv4 address mask * @head: the list head * * Description: * Searches the IPv4 address list given by @head. If an exact match if found * it is returned, otherwise NULL is returned. The caller is responsible for * calling the rcu_read_[un]lock() functions. * */ struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, __be32 mask, struct list_head *head) { struct netlbl_af4list *iter; list_for_each_entry_rcu(iter, head, list) if (iter->valid && iter->addr == addr && iter->mask == mask) return iter; return NULL; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_search - Search for a matching IPv6 address entry * @addr: IPv6 address * @head: the list head * * Description: * Searches the IPv6 address list given by @head. If a matching address entry * is found it is returned, otherwise NULL is returned. The caller is * responsible for calling the rcu_read_[un]lock() functions. * */ struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, struct list_head *head) { struct netlbl_af6list *iter; list_for_each_entry_rcu(iter, head, list) if (iter->valid && ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) return iter; return NULL; } /** * netlbl_af6list_search_exact - Search for an exact IPv6 address entry * @addr: IPv6 address * @mask: IPv6 address mask * @head: the list head * * Description: * Searches the IPv6 address list given by @head. If an exact match if found * it is returned, otherwise NULL is returned. The caller is responsible for * calling the rcu_read_[un]lock() functions. * */ struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, const struct in6_addr *mask, struct list_head *head) { struct netlbl_af6list *iter; list_for_each_entry_rcu(iter, head, list) if (iter->valid && ipv6_addr_equal(&iter->addr, addr) && ipv6_addr_equal(&iter->mask, mask)) return iter; return NULL; } #endif /* IPv6 */ /** * netlbl_af4list_add - Add a new IPv4 address entry to a list * @entry: address entry * @head: the list head * * Description: * Add a new address entry to the list pointed to by @head. On success zero is * returned, otherwise a negative value is returned. The caller is responsible * for calling the necessary locking functions. * */ int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head) { struct netlbl_af4list *iter; iter = netlbl_af4list_search(entry->addr, head); if (iter != NULL && iter->addr == entry->addr && iter->mask == entry->mask) return -EEXIST; /* in order to speed up address searches through the list (the common * case) we need to keep the list in order based on the size of the * address mask such that the entry with the widest mask (smallest * numerical value) appears first in the list */ list_for_each_entry_rcu(iter, head, list) if (iter->valid && ntohl(entry->mask) > ntohl(iter->mask)) { __list_add_rcu(&entry->list, iter->list.prev, &iter->list); return 0; } list_add_tail_rcu(&entry->list, head); return 0; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_add - Add a new IPv6 address entry to a list * @entry: address entry * @head: the list head * * Description: * Add a new address entry to the list pointed to by @head. On success zero is * returned, otherwise a negative value is returned. The caller is responsible * for calling the necessary locking functions. * */ int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head) { struct netlbl_af6list *iter; iter = netlbl_af6list_search(&entry->addr, head); if (iter != NULL && ipv6_addr_equal(&iter->addr, &entry->addr) && ipv6_addr_equal(&iter->mask, &entry->mask)) return -EEXIST; /* in order to speed up address searches through the list (the common * case) we need to keep the list in order based on the size of the * address mask such that the entry with the widest mask (smallest * numerical value) appears first in the list */ list_for_each_entry_rcu(iter, head, list) if (iter->valid && ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { __list_add_rcu(&entry->list, iter->list.prev, &iter->list); return 0; } list_add_tail_rcu(&entry->list, head); return 0; } #endif /* IPv6 */ /** * netlbl_af4list_remove_entry - Remove an IPv4 address entry * @entry: address entry * * Description: * Remove the specified IP address entry. The caller is responsible for * calling the necessary locking functions. * */ void netlbl_af4list_remove_entry(struct netlbl_af4list *entry) { entry->valid = 0; list_del_rcu(&entry->list); } /** * netlbl_af4list_remove - Remove an IPv4 address entry * @addr: IP address * @mask: IP address mask * @head: the list head * * Description: * Remove an IP address entry from the list pointed to by @head. Returns the * entry on success, NULL on failure. The caller is responsible for calling * the necessary locking functions. * */ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, struct list_head *head) { struct netlbl_af4list *entry; entry = netlbl_af4list_search_exact(addr, mask, head); if (entry == NULL) return NULL; netlbl_af4list_remove_entry(entry); return entry; } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_remove_entry - Remove an IPv6 address entry * @entry: address entry * * Description: * Remove the specified IP address entry. The caller is responsible for * calling the necessary locking functions. * */ void netlbl_af6list_remove_entry(struct netlbl_af6list *entry) { entry->valid = 0; list_del_rcu(&entry->list); } /** * netlbl_af6list_remove - Remove an IPv6 address entry * @addr: IP address * @mask: IP address mask * @head: the list head * * Description: * Remove an IP address entry from the list pointed to by @head. Returns the * entry on success, NULL on failure. The caller is responsible for calling * the necessary locking functions. * */ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, const struct in6_addr *mask, struct list_head *head) { struct netlbl_af6list *entry; entry = netlbl_af6list_search_exact(addr, mask, head); if (entry == NULL) return NULL; netlbl_af6list_remove_entry(entry); return entry; } #endif /* IPv6 */ /* * Audit Helper Functions */ #ifdef CONFIG_AUDIT /** * netlbl_af4list_audit_addr - Audit an IPv4 address * @audit_buf: audit buffer * @src: true if source address, false if destination * @dev: network interface * @addr: IP address * @mask: IP address mask * * Description: * Write the IPv4 address and address mask, if necessary, to @audit_buf. * */ void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, int src, const char *dev, __be32 addr, __be32 mask) { u32 mask_val = ntohl(mask); char *dir = (src ? "src" : "dst"); if (dev != NULL) audit_log_format(audit_buf, " netif=%s", dev); audit_log_format(audit_buf, " %s=%pI4", dir, &addr); if (mask_val != 0xffffffff) { u32 mask_len = 0; while (mask_val > 0) { mask_val <<= 1; mask_len++; } audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); } } #if IS_ENABLED(CONFIG_IPV6) /** * netlbl_af6list_audit_addr - Audit an IPv6 address * @audit_buf: audit buffer * @src: true if source address, false if destination * @dev: network interface * @addr: IP address * @mask: IP address mask * * Description: * Write the IPv6 address and address mask, if necessary, to @audit_buf. * */ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, int src, const char *dev, const struct in6_addr *addr, const struct in6_addr *mask) { char *dir = (src ? "src" : "dst"); if (dev != NULL) audit_log_format(audit_buf, " netif=%s", dev); audit_log_format(audit_buf, " %s=%pI6", dir, addr); if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { u32 mask_len = 0; u32 mask_val; int iter = -1; while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) mask_len += 32; mask_val = ntohl(mask->s6_addr32[iter]); while (mask_val > 0) { mask_val <<= 1; mask_len++; } audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); } } #endif /* IPv6 */ #endif /* CONFIG_AUDIT */ |
| 151 150 49 2 63 62 4 4 4 75 105 85 95 8 8 71 71 70 86 51 71 74 47 86 105 86 10 10 11 57 57 57 57 3 42 133 34 94 14 104 30 30 30 30 30 30 2 3 1 3 4 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct ptr_ring' datastructure. * * Author: * Michael S. Tsirkin <mst@redhat.com> * * Copyright (C) 2016 Red Hat, Inc. * * This is a limited-size FIFO maintaining pointers in FIFO order, with * one CPU producing entries and another consuming entries from a FIFO. * * This implementation tries to minimize cache-contention when there is a * single producer and a single consumer CPU. */ #ifndef _LINUX_PTR_RING_H #define _LINUX_PTR_RING_H 1 #ifdef __KERNEL__ #include <linux/spinlock.h> #include <linux/cache.h> #include <linux/types.h> #include <linux/compiler.h> #include <linux/slab.h> #include <linux/mm.h> #include <asm/errno.h> #endif struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ int batch; /* number of entries to consume in a batch */ void **queue; }; /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). * * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock: * see e.g. ptr_ring_full. */ static inline bool __ptr_ring_full(struct ptr_ring *r) { return r->queue[r->producer]; } static inline bool ptr_ring_full(struct ptr_ring *r) { bool ret; spin_lock(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock(&r->producer_lock); return ret; } static inline bool ptr_ring_full_irq(struct ptr_ring *r) { bool ret; spin_lock_irq(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock_irq(&r->producer_lock); return ret; } static inline bool ptr_ring_full_any(struct ptr_ring *r) { unsigned long flags; bool ret; spin_lock_irqsave(&r->producer_lock, flags); ret = __ptr_ring_full(r); spin_unlock_irqrestore(&r->producer_lock, flags); return ret; } static inline bool ptr_ring_full_bh(struct ptr_ring *r) { bool ret; spin_lock_bh(&r->producer_lock); ret = __ptr_ring_full(r); spin_unlock_bh(&r->producer_lock); return ret; } /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. * Callers are responsible for making sure pointer that is being queued * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; /* Make sure the pointer we are storing points to a valid data. */ /* Pairs with the dependency ordering in __ptr_ring_consume. */ smp_wmb(); WRITE_ONCE(r->queue[r->producer++], ptr); if (unlikely(r->producer >= r->size)) r->producer = 0; return 0; } /* * Note: resize (below) nests producer lock within consumer lock, so if you * consume in interrupt or BH context, you must disable interrupts/BH when * calling this. */ static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) { int ret; spin_lock(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock(&r->producer_lock); return ret; } static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr) { int ret; spin_lock_irq(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock_irq(&r->producer_lock); return ret; } static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr) { unsigned long flags; int ret; spin_lock_irqsave(&r->producer_lock, flags); ret = __ptr_ring_produce(r, ptr); spin_unlock_irqrestore(&r->producer_lock, flags); return ret; } static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) { int ret; spin_lock_bh(&r->producer_lock); ret = __ptr_ring_produce(r, ptr); spin_unlock_bh(&r->producer_lock); return ret; } static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) return READ_ONCE(r->queue[r->consumer_head]); return NULL; } /* * Test ring empty status without taking any locks. * * NB: This is only safe to call if ring is never resized. * * However, if some other CPU consumes ring entries at the same time, the value * returned is not guaranteed to be correct. * * In this case - to avoid incorrectly detecting the ring * as empty - the CPU consuming the ring entries is responsible * for either consuming all ring entries until the ring is empty, * or synchronizing with some other CPU and causing it to * re-test __ptr_ring_empty and/or consume the ring enteries * after the synchronization point. * * Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { if (likely(r->size)) return !r->queue[READ_ONCE(r->consumer_head)]; return true; } static inline bool ptr_ring_empty(struct ptr_ring *r) { bool ret; spin_lock(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock(&r->consumer_lock); return ret; } static inline bool ptr_ring_empty_irq(struct ptr_ring *r) { bool ret; spin_lock_irq(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock_irq(&r->consumer_lock); return ret; } static inline bool ptr_ring_empty_any(struct ptr_ring *r) { unsigned long flags; bool ret; spin_lock_irqsave(&r->consumer_lock, flags); ret = __ptr_ring_empty(r); spin_unlock_irqrestore(&r->consumer_lock, flags); return ret; } static inline bool ptr_ring_empty_bh(struct ptr_ring *r) { bool ret; spin_lock_bh(&r->consumer_lock); ret = __ptr_ring_empty(r); spin_unlock_bh(&r->consumer_lock); return ret; } /* Zero entries from tail to specified head. * NB: if consumer_head can be >= r->size need to fixup tail later. */ static inline void __ptr_ring_zero_tail(struct ptr_ring *r, int consumer_head) { int head = consumer_head; /* Zero out entries in the reverse order: this way we touch the * cache line that producer might currently be reading the last; * producer won't make progress and touch other cache lines * besides the first one until we write out all entries. */ while (likely(head > r->consumer_tail)) r->queue[--head] = NULL; r->consumer_tail = consumer_head; } /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { /* Fundamentally, what we want to do is update consumer * index and zero out the entry so producer can reuse it. * Doing it naively at each consume would be as simple as: * consumer = r->consumer; * r->queue[consumer++] = NULL; * if (unlikely(consumer >= r->size)) * consumer = 0; * r->consumer = consumer; * but that is suboptimal when the ring is full as producer is writing * out new entries in the same cache line. Defer these updates until a * batch of entries has been consumed. */ /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty * to work correctly. */ int consumer_head = r->consumer_head + 1; /* Once we have processed enough entries invalidate them in * the ring all at once so producer can reuse their space in the ring. * We also do this when we reach end of the ring - not mandatory * but helps keep the implementation simple. */ if (unlikely(consumer_head - r->consumer_tail >= r->batch || consumer_head >= r->size)) __ptr_ring_zero_tail(r, consumer_head); if (unlikely(consumer_head >= r->size)) { consumer_head = 0; r->consumer_tail = 0; } /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ WRITE_ONCE(r->consumer_head, consumer_head); } static inline void *__ptr_ring_consume(struct ptr_ring *r) { void *ptr; /* The READ_ONCE in __ptr_ring_peek guarantees that anyone * accessing data through the pointer is up to date. Pairs * with smp_wmb in __ptr_ring_produce. */ ptr = __ptr_ring_peek(r); if (ptr) __ptr_ring_discard_one(r); return ptr; } static inline int __ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { void *ptr; int i; for (i = 0; i < n; i++) { ptr = __ptr_ring_consume(r); if (!ptr) break; array[i] = ptr; } return i; } /* * Note: resize (below) nests producer lock within consumer lock, so if you * call this in interrupt or BH context, you must disable interrupts/BH when * producing. */ static inline void *ptr_ring_consume(struct ptr_ring *r) { void *ptr; spin_lock(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock(&r->consumer_lock); return ptr; } static inline void *ptr_ring_consume_irq(struct ptr_ring *r) { void *ptr; spin_lock_irq(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock_irq(&r->consumer_lock); return ptr; } static inline void *ptr_ring_consume_any(struct ptr_ring *r) { unsigned long flags; void *ptr; spin_lock_irqsave(&r->consumer_lock, flags); ptr = __ptr_ring_consume(r); spin_unlock_irqrestore(&r->consumer_lock, flags); return ptr; } static inline void *ptr_ring_consume_bh(struct ptr_ring *r) { void *ptr; spin_lock_bh(&r->consumer_lock); ptr = __ptr_ring_consume(r); spin_unlock_bh(&r->consumer_lock); return ptr; } static inline int ptr_ring_consume_batched(struct ptr_ring *r, void **array, int n) { int ret; spin_lock(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock(&r->consumer_lock); return ret; } static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r, void **array, int n) { int ret; spin_lock_irq(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_irq(&r->consumer_lock); return ret; } static inline int ptr_ring_consume_batched_any(struct ptr_ring *r, void **array, int n) { unsigned long flags; int ret; spin_lock_irqsave(&r->consumer_lock, flags); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_irqrestore(&r->consumer_lock, flags); return ret; } static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, void **array, int n) { int ret; spin_lock_bh(&r->consumer_lock); ret = __ptr_ring_consume_batched(r, array, n); spin_unlock_bh(&r->consumer_lock); return ret; } /* Cast to structure type and call a function without discarding from FIFO. * Function must return a value. * Callers must take consumer_lock. */ #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r))) #define PTR_RING_PEEK_CALL(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock_irq(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_irq(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_BH(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ \ spin_lock_bh(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_bh(&(r)->consumer_lock); \ __PTR_RING_PEEK_CALL_v; \ }) #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \ typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \ unsigned long __PTR_RING_PEEK_CALL_f;\ \ spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \ spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \ __PTR_RING_PEEK_CALL_v; \ }) /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See * documentation for vmalloc for which of them are legal. */ static inline void **__ptr_ring_init_queue_alloc_noprof(unsigned int size, gfp_t gfp) { if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; return kvmalloc_array_noprof(size, sizeof(void *), gfp | __GFP_ZERO); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) { r->size = size; r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); /* We need to set batch at least to 1 to make logic * in __ptr_ring_discard_one work correctly. * Batching too much (because ring is small) would cause a lot of * burstiness. Needs tuning, for now disable batching. */ if (r->batch > r->size / 2 || !r->batch) r->batch = 1; } static inline int ptr_ring_init_noprof(struct ptr_ring *r, int size, gfp_t gfp) { r->queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!r->queue) return -ENOMEM; __ptr_ring_set_size(r, size); r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); return 0; } #define ptr_ring_init(...) alloc_hooks(ptr_ring_init_noprof(__VA_ARGS__)) /* * Return entries into ring. Destroy entries that don't fit. * * Note: this is expected to be a rare slow path operation. * * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n, void (*destroy)(void *)) { unsigned long flags; spin_lock_irqsave(&r->consumer_lock, flags); spin_lock(&r->producer_lock); if (!r->size) goto done; /* * Clean out buffered entries (for simplicity). This way following code * can test entries for NULL and if not assume they are valid. */ __ptr_ring_zero_tail(r, r->consumer_head); /* * Go over entries in batch, start moving head back and copy entries. * Stop when we run into previously unconsumed entries. */ while (n) { int head = r->consumer_head - 1; if (head < 0) head = r->size - 1; if (r->queue[head]) { /* This batch entry will have to be destroyed. */ goto done; } r->queue[head] = batch[--n]; r->consumer_tail = head; /* matching READ_ONCE in __ptr_ring_empty for lockless tests */ WRITE_ONCE(r->consumer_head, head); } done: /* Destroy all entries left in the batch. */ while (n) destroy(batch[--n]); spin_unlock(&r->producer_lock); spin_unlock_irqrestore(&r->consumer_lock, flags); } static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, int size, gfp_t gfp, void (*destroy)(void *)) { int producer = 0; void **old; void *ptr; while ((ptr = __ptr_ring_consume(r))) if (producer < size) queue[producer++] = ptr; else if (destroy) destroy(ptr); if (producer >= size) producer = 0; __ptr_ring_set_size(r, size); r->producer = producer; r->consumer_head = 0; r->consumer_tail = 0; old = r->queue; r->queue = queue; return old; } /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ static inline int ptr_ring_resize_noprof(struct ptr_ring *r, int size, gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; void **queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); void **old; if (!queue) return -ENOMEM; spin_lock_irqsave(&(r)->consumer_lock, flags); spin_lock(&(r)->producer_lock); old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); spin_unlock(&(r)->producer_lock); spin_unlock_irqrestore(&(r)->consumer_lock, flags); kvfree(old); return 0; } #define ptr_ring_resize(...) alloc_hooks(ptr_ring_resize_noprof(__VA_ARGS__)) /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. * In particular if you consume ring in BH context, you must * disable BH when doing so. */ static inline int ptr_ring_resize_multiple_bh_noprof(struct ptr_ring **rings, unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { void ***queues; int i; queues = kmalloc_array_noprof(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; for (i = 0; i < nrings; ++i) { queues[i] = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!queues[i]) goto nomem; } for (i = 0; i < nrings; ++i) { spin_lock_bh(&(rings[i])->consumer_lock); spin_lock(&(rings[i])->producer_lock); queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], size, gfp, destroy); spin_unlock(&(rings[i])->producer_lock); spin_unlock_bh(&(rings[i])->consumer_lock); } for (i = 0; i < nrings; ++i) kvfree(queues[i]); kfree(queues); return 0; nomem: while (--i >= 0) kvfree(queues[i]); kfree(queues); noqueues: return -ENOMEM; } #define ptr_ring_resize_multiple_bh(...) \ alloc_hooks(ptr_ring_resize_multiple_bh_noprof(__VA_ARGS__)) static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { void *ptr; if (destroy) while ((ptr = ptr_ring_consume(r))) destroy(ptr); kvfree(r->queue); } #endif /* _LINUX_PTR_RING_H */ |
| 924 72 10 9 861 468 478 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * kernfs.h - pseudo filesystem decoupled from vfs locking */ #ifndef __LINUX_KERNFS_H #define __LINUX_KERNFS_H #include <linux/err.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/idr.h> #include <linux/lockdep.h> #include <linux/rbtree.h> #include <linux/atomic.h> #include <linux/bug.h> #include <linux/types.h> #include <linux/uidgid.h> #include <linux/wait.h> #include <linux/rwsem.h> #include <linux/cache.h> struct file; struct dentry; struct iattr; struct seq_file; struct vm_area_struct; struct vm_operations_struct; struct super_block; struct file_system_type; struct poll_table_struct; struct fs_context; struct kernfs_fs_context; struct kernfs_open_node; struct kernfs_iattrs; /* * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash * table of locks. * Having a small hash table would impact scalability, since * more and more kernfs_node objects will end up using same lock * and having a very large hash table would waste memory. * * At the moment size of hash table of locks is being set based on * the number of CPUs as follows: * * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS * 1 1 2 * 2-3 2 4 * 4-7 4 16 * 8-15 6 64 * 16-31 8 256 * 32 and more 10 1024 * * The above relation between NR_CPU and number of locks is based * on some internal experimentation which involved booting qemu * with different values of smp, performing some sysfs operations * on all CPUs and observing how increase in number of locks impacts * completion time of these sysfs operations on each CPU. */ #ifdef CONFIG_SMP #define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32))) #else #define NR_KERNFS_LOCK_BITS 1 #endif #define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS) /* * There's one kernfs_open_file for each open file and one kernfs_open_node * for each kernfs_node with one or more open files. * * filp->private_data points to seq_file whose ->private points to * kernfs_open_file. * * kernfs_open_files are chained at kernfs_open_node->files, which is * protected by kernfs_global_locks.open_file_mutex[i]. * * To reduce possible contention in sysfs access, arising due to single * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node * object address as hash keys to get the index of these locks. * * Hashed mutexes are safe to use here because operations using these don't * rely on global exclusion. * * In future we intend to replace other global locks with hashed ones as well. * kernfs_global_locks acts as a holder for all such hash tables. */ struct kernfs_global_locks { struct mutex open_file_mutex[NR_KERNFS_LOCKS]; }; enum kernfs_node_type { KERNFS_DIR = 0x0001, KERNFS_FILE = 0x0002, KERNFS_LINK = 0x0004, }; #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK #define KERNFS_MAX_USER_XATTRS 128 #define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, KERNFS_NS = 0x0020, KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, KERNFS_HIDDEN = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, KERNFS_HAS_RELEASE = 0x2000, KERNFS_REMOVING = 0x4000, }; /* @flags for kernfs_create_root() */ enum kernfs_root_flag { /* * kernfs_nodes are created in the deactivated state and invisible. * They require explicit kernfs_activate() to become visible. This * can be used to make related nodes become visible atomically * after all nodes are created successfully. */ KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, /* * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2) * succeeds regardless of the RW permissions. sysfs had an extra * layer of enforcement where open(2) fails with -EACCES regardless * of CAP_DAC_OVERRIDE if the permission doesn't have the * respective read or write access at all (none of S_IRUGO or * S_IWUGO) or the respective operation isn't implemented. The * following flag enables that behavior. */ KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002, /* * The filesystem supports exportfs operation, so userspace can use * fhandle to access nodes of the fs. */ KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004, /* * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, /* * Renames must not change the parent node. */ KERNFS_ROOT_INVARIANT_PARENT = 0x0010, }; /* type-specific structures for kernfs_node union members */ struct kernfs_elem_dir { unsigned long subdirs; /* children rbtree starts here and goes through kn->rb */ struct rb_root children; /* * The kernfs hierarchy this directory belongs to. This fits * better directly in kernfs_node but is here to save space. */ struct kernfs_root *root; /* * Monotonic revision counter, used to identify if a directory * node has changed during negative dentry revalidation. */ unsigned long rev; }; struct kernfs_elem_symlink { struct kernfs_node *target_kn; }; struct kernfs_elem_attr { const struct kernfs_ops *ops; struct kernfs_open_node __rcu *open; loff_t size; struct kernfs_node *notify_next; /* for kernfs_notify() */ }; /* * kernfs_node - the building block of kernfs hierarchy. Each and every * kernfs node is represented by single kernfs_node. Most fields are * private to kernfs and shouldn't be accessed directly by kernfs users. * * As long as count reference is held, the kernfs_node itself is * accessible. Dereferencing elem or any other outer entity requires * active reference. */ struct kernfs_node { atomic_t count; atomic_t active; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* * Use kernfs_get_parent() and kernfs_name/path() instead of * accessing the following two fields directly. If the node is * never moved to a different parent, it is safe to access the * parent directly. */ struct kernfs_node __rcu *__parent; const char __rcu *name; struct rb_node rb; const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ unsigned short flags; umode_t mode; union { struct kernfs_elem_dir dir; struct kernfs_elem_symlink symlink; struct kernfs_elem_attr attr; }; /* * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, * the low 32bits are ino and upper generation. */ u64 id; void *priv; struct kernfs_iattrs *iattr; struct rcu_head rcu; }; /* * kernfs_syscall_ops may be specified on kernfs_create_root() to support * syscalls. These optional callbacks are invoked on the matching syscalls * and can perform any kernfs operations which don't necessarily have to be * the exact operation requested. An active reference is held for each * kernfs_node parameter. */ struct kernfs_syscall_ops { int (*show_options)(struct seq_file *sf, struct kernfs_root *root); int (*mkdir)(struct kernfs_node *parent, const char *name, umode_t mode); int (*rmdir)(struct kernfs_node *kn); int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name); int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, struct kernfs_root *root); }; struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root); struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; struct seq_file *seq_file; void *priv; /* private fields, do not use outside kernfs proper */ struct mutex mutex; struct mutex prealloc_mutex; int event; struct list_head list; char *prealloc_buf; size_t atomic_write_len; bool mmapped:1; bool released:1; const struct vm_operations_struct *vm_ops; }; struct kernfs_ops { /* * Optional open/release methods. Both are called with * @of->seq_file populated. */ int (*open)(struct kernfs_open_file *of); void (*release)(struct kernfs_open_file *of); /* * Read is handled by either seq_file or raw_read(). * * If seq_show() is present, seq_file path is active. Other seq * operations are optional and if not implemented, the behavior is * equivalent to single_open(). @sf->private points to the * associated kernfs_open_file. * * read() is bounced through kernel buffer and a read larger than * PAGE_SIZE results in partial operation of PAGE_SIZE. */ int (*seq_show)(struct seq_file *sf, void *v); void *(*seq_start)(struct seq_file *sf, loff_t *ppos); void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); void (*seq_stop)(struct seq_file *sf, void *v); ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); /* * write() is bounced through kernel buffer. If atomic_write_len * is not set, a write larger than PAGE_SIZE results in partial * operations of PAGE_SIZE chunks. If atomic_write_len is set, * writes upto the specified size are executed atomically but * larger ones are rejected with -E2BIG. */ size_t atomic_write_len; /* * "prealloc" causes a buffer to be allocated at open for * all read/write requests. As ->seq_show uses seq_read() * which does its own allocation, it is incompatible with * ->prealloc. Provide ->read and ->write with ->prealloc. */ bool prealloc; ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); loff_t (*llseek)(struct kernfs_open_file *of, loff_t offset, int whence); }; /* * The kernfs superblock creation/mount parameter context. */ struct kernfs_fs_context { struct kernfs_root *root; /* Root of the hierarchy being mounted */ void *ns_tag; /* Namespace tag of the mount (or NULL) */ unsigned long magic; /* File system specific magic number */ /* The following are set/used by kernfs_mount() */ bool new_sb_created; /* Set to T if we allocated a new sb */ }; #ifdef CONFIG_KERNFS static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return kn->flags & KERNFS_TYPE_MASK; } static inline ino_t kernfs_id_ino(u64 id) { /* id is ino if ino_t is 64bit; otherwise, low 32bits */ if (sizeof(ino_t) >= sizeof(u64)) return id; else return (u32)id; } static inline u32 kernfs_id_gen(u64 id) { /* gen is fixed at 1 if ino_t is 64bit; otherwise, high 32bits */ if (sizeof(ino_t) >= sizeof(u64)) return 1; else return id >> 32; } static inline ino_t kernfs_ino(struct kernfs_node *kn) { return kernfs_id_ino(kn->id); } static inline ino_t kernfs_gen(struct kernfs_node *kn) { return kernfs_id_gen(kn->id); } /** * kernfs_enable_ns - enable namespace under a directory * @kn: directory of interest, should be empty * * This is to be called right after @kn is created to enable namespace * under it. All children of @kn must have non-NULL namespace tags and * only the ones which match the super_block's tag will be visible. */ static inline void kernfs_enable_ns(struct kernfs_node *kn) { WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children)); kn->flags |= KERNFS_NS; } /** * kernfs_ns_enabled - test whether namespace is enabled * @kn: the node to test * * Test whether namespace filtering is enabled for the children of @ns. */ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return kn->flags & KERNFS_NS; } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target); void kernfs_activate(struct kernfs_node *kn); void kernfs_show(struct kernfs_node *kn, bool show); void kernfs_remove(struct kernfs_node *kn); void kernfs_break_active_protection(struct kernfs_node *kn); void kernfs_unbreak_active_protection(struct kernfs_node *kn); bool kernfs_remove_self(struct kernfs_node *kn); int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns); int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt); void kernfs_notify(struct kernfs_node *kn); int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size); int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags); const void *kernfs_super_ns(struct super_block *sb); int kernfs_get_tree(struct fs_context *fc); void kernfs_free_fs_context(struct fs_context *fc); void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) { return 0; } /* whatever */ static inline void kernfs_enable_ns(struct kernfs_node *kn) { } static inline bool kernfs_ns_enabled(struct kernfs_node *kn) { return false; } static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } static inline int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, char *buf, size_t buflen) { return -ENOSYS; } static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) { return NULL; } static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { return NULL; } static inline struct kernfs_node * kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, const void *ns) { return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) { return NULL; } static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { return NULL; } static inline struct inode * kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn) { return NULL; } static inline struct kernfs_root * kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv) { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) { return 0; } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, void *priv, const void *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * kernfs_create_link(struct kernfs_node *parent, const char *name, struct kernfs_node *target) { return ERR_PTR(-ENOSYS); } static inline void kernfs_activate(struct kernfs_node *kn) { } static inline void kernfs_remove(struct kernfs_node *kn) { } static inline bool kernfs_remove_self(struct kernfs_node *kn) { return false; } static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn, const char *name, const void *ns) { return -ENOSYS; } static inline int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { return -ENOSYS; } static inline int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) { return -ENOSYS; } static inline __poll_t kernfs_generic_poll(struct kernfs_open_file *of, struct poll_table_struct *pt) { return -ENOSYS; } static inline void kernfs_notify(struct kernfs_node *kn) { } static inline int kernfs_xattr_get(struct kernfs_node *kn, const char *name, void *value, size_t size) { return -ENOSYS; } static inline int kernfs_xattr_set(struct kernfs_node *kn, const char *name, const void *value, size_t size, int flags) { return -ENOSYS; } static inline const void *kernfs_super_ns(struct super_block *sb) { return NULL; } static inline int kernfs_get_tree(struct fs_context *fc) { return -ENOSYS; } static inline void kernfs_free_fs_context(struct fs_context *fc) { } static inline void kernfs_kill_sb(struct super_block *sb) { } static inline void kernfs_init(void) { } #endif /* CONFIG_KERNFS */ /** * kernfs_path - build full path of a given node * @kn: kernfs_node of interest * @buf: buffer to copy @kn's name into * @buflen: size of @buf * * If @kn is NULL result will be "(null)". * * Returns the length of the full path. If the full length is equal to or * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) { return kernfs_path_from_node(kn, NULL, buf, buflen); } static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) { return kernfs_find_and_get_ns(kn, name, NULL); } static inline struct kernfs_node * kernfs_walk_and_get(struct kernfs_node *kn, const char *path) { return kernfs_walk_and_get_ns(kn, path, NULL); } static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { return kernfs_create_dir_ns(parent, name, mode, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, priv, NULL); } static inline int kernfs_remove_by_name(struct kernfs_node *parent, const char *name) { return kernfs_remove_by_name_ns(parent, name, NULL); } static inline int kernfs_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { return kernfs_rename_ns(kn, new_parent, new_name, NULL); } #endif /* __LINUX_KERNFS_H */ |
| 7 7 7 6 6 7 7 7 18 18 8 7 8 21 21 21 21 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> */ #include <linux/list.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/srcu.h> #include <linux/rculist.h> #include <linux/wait.h> #include <linux/memcontrol.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" #include <linux/atomic.h> /* * Final freeing of a group */ static void fsnotify_final_destroy_group(struct fsnotify_group *group) { if (group->ops->free_group_priv) group->ops->free_group_priv(group); mem_cgroup_put(group->memcg); mutex_destroy(&group->mark_mutex); kfree(group); } /* * Stop queueing new events for this group. Once this function returns * fsnotify_add_event() will not add any new events to the group's queue. */ void fsnotify_group_stop_queueing(struct fsnotify_group *group) { spin_lock(&group->notification_lock); group->shutdown = true; spin_unlock(&group->notification_lock); } /* * Trying to get rid of a group. Remove all marks, flush all events and release * the group reference. * Note that another thread calling fsnotify_clear_marks_by_group() may still * hold a ref to the group. */ void fsnotify_destroy_group(struct fsnotify_group *group) { /* * Stop queueing new events. The code below is careful enough to not * require this but fanotify needs to stop queuing events even before * fsnotify_destroy_group() is called and this makes the other callers * of fsnotify_destroy_group() to see the same behavior. */ fsnotify_group_stop_queueing(group); /* Clear all marks for this group and queue them for destruction */ fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_ANY); /* * Some marks can still be pinned when waiting for response from * userspace. Wait for those now. fsnotify_prepare_user_wait() will * not succeed now so this wait is race-free. */ wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); /* * Wait until all marks get really destroyed. We could actually destroy * them ourselves instead of waiting for worker to do it, however that * would be racy as worker can already be processing some marks before * we even entered fsnotify_destroy_group(). */ fsnotify_wait_marks_destroyed(); /* * Since we have waited for fsnotify_mark_srcu in * fsnotify_mark_destroy_list() there can be no outstanding event * notification against this group. So clearing the notification queue * of all events is reliable now. */ fsnotify_flush_notify(group); /* * Destroy overflow event (we cannot use fsnotify_destroy_event() as * that deliberately ignores overflow events. */ if (group->overflow_event) group->ops->free_event(group, group->overflow_event); fsnotify_put_group(group); } /* * Get reference to a group. */ void fsnotify_get_group(struct fsnotify_group *group) { refcount_inc(&group->refcnt); } /* * Drop a reference to a group. Free it if it's through. */ void fsnotify_put_group(struct fsnotify_group *group) { if (refcount_dec_and_test(&group->refcnt)) fsnotify_final_destroy_group(group); } EXPORT_SYMBOL_GPL(fsnotify_put_group); static struct fsnotify_group *__fsnotify_alloc_group( const struct fsnotify_ops *ops, int flags, gfp_t gfp) { struct fsnotify_group *group; group = kzalloc(sizeof(struct fsnotify_group), gfp); if (!group) return ERR_PTR(-ENOMEM); /* set to 0 when there a no external references to this group */ refcount_set(&group->refcnt, 1); atomic_set(&group->user_waits, 0); spin_lock_init(&group->notification_lock); INIT_LIST_HEAD(&group->notification_list); init_waitqueue_head(&group->notification_waitq); group->max_events = UINT_MAX; mutex_init(&group->mark_mutex); INIT_LIST_HEAD(&group->marks_list); group->ops = ops; group->flags = flags; return group; } /* * Create a new fsnotify_group and hold a reference for the group returned. */ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops, int flags) { gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT : GFP_KERNEL; return __fsnotify_alloc_group(ops, flags, gfp); } EXPORT_SYMBOL_GPL(fsnotify_alloc_group); int fsnotify_fasync(int fd, struct file *file, int on) { struct fsnotify_group *group = file->private_data; return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO; } |
| 1 1 1 1 1 1 1 1 1 1 1 1 15 15 1 1 110 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 | // SPDX-License-Identifier: GPL-2.0-only /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ #include <linux/module.h> #include <linux/gfp.h> #include <net/tcp.h> #include <net/rstreason.h> static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); u32 elapsed, user_timeout; s32 remaining; user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout) return icsk->icsk_rto; elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp; if (tp->tcp_usec_ts) elapsed /= USEC_PER_MSEC; remaining = user_timeout - elapsed; if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); } u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 remaining, user_timeout; s32 elapsed; user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout || !icsk->icsk_probes_tstamp) return when; elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; if (unlikely(elapsed < 0)) elapsed = 0; remaining = msecs_to_jiffies(user_timeout) - elapsed; remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); return min_t(u32, remaining, when); } /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. * * Returns: Nothing (void) */ static void tcp_write_err(struct sock *sk) { tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); } /** * tcp_out_of_resources() - Close socket if out of resources * @sk: pointer to current socket * @do_reset: send a last packet with reset flag * * Do not allow orphaned sockets to eat all our resources. * This is direct violation of TCP specs, but it is required * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * * Also close if our net namespace is exiting; in that case there is no * hope of ever communicating again since all netns interfaces are already * down (or about to be down), and we need to release our dst references, * which have been moved to the netns loopback interface, so the namespace * can finish exiting. This condition is only possible if we are a kernel * socket, as those do not hold references to the namespace. * * Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. * 3. If our net namespace is exiting. */ static int tcp_out_of_resources(struct sock *sk, bool do_reset) { struct tcp_sock *tp = tcp_sk(sk); int shift = 0; /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*tcp_rto_max(sk) || !do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ if (READ_ONCE(sk->sk_err_soft)) shift++; if (tcp_check_oom(sk, shift)) { /* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) do_reset = true; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_ABORT_ON_MEMORY); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } if (!check_net(sock_net(sk))) { /* Not possible to send reset; just close */ tcp_done(sk); return 1; } return 0; } /** * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket * @sk: Pointer to the current socket. * @alive: bool, socket alive state */ static int tcp_orphan_retries(struct sock *sk, bool alive) { int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ if (READ_ONCE(sk->sk_err_soft) && !alive) retries = 0; /* However, if socket sent something recently, select some safe * number of retries. 8 corresponds to >100 seconds with minimal * RTO of 200msec. */ if (retries == 0 && alive) retries = 8; return retries; } static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { const struct net *net = sock_net(sk); int mss; /* Black hole detection */ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) return; if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } static unsigned int tcp_model_timeout(struct sock *sk, unsigned int boundary, unsigned int rto_base) { unsigned int linear_backoff_thresh, timeout; linear_backoff_thresh = ilog2(tcp_rto_max(sk) / rto_base); if (boundary <= linear_backoff_thresh) timeout = ((2 << boundary) - 1) * rto_base; else timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * tcp_rto_max(sk); return jiffies_to_msecs(timeout); } /** * retransmits_timed_out() - returns true if this connection has timed out * @sk: The current socket * @boundary: max number of retransmissions * @timeout: A custom timeout value. * If set to 0 the default timeout is calculated and used. * Using TCP_RTO_MIN and the number of unsuccessful retransmits. * * The default "timeout" value this function can calculate and use * is equivalent to the timeout of a TCP Connection * after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, unsigned int timeout) { struct tcp_sock *tp = tcp_sk(sk); unsigned int start_ts, delta; if (!inet_csk(sk)->icsk_retransmits) return false; start_ts = tp->retrans_stamp; if (likely(timeout == 0)) { unsigned int rto_base = TCP_RTO_MIN; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) rto_base = tcp_timeout_init(sk); timeout = tcp_model_timeout(sk, boundary, rto_base); } if (tp->tcp_usec_ts) { /* delta maybe off up to a jiffy due to timer granularity. */ delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1); return (s32)(delta - timeout * USEC_PER_MSEC) >= 0; } return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0; } /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); bool expired = false, do_reset; int retry_until, max_retransmits; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */ retry_until = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); max_retransmits = retry_until; if (sk->sk_state == TCP_SYN_SENT) max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts); expired = icsk->icsk_retransmits >= max_retransmits; } else { if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); __dst_negative_advice(sk); } retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < tcp_rto_max(sk); retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } if (!expired) expired = retransmits_timed_out(sk, retry_until, READ_ONCE(icsk->icsk_user_timeout)); tcp_fastopen_active_detect_blackhole(sk, expired); mptcp_active_detect_blackhole(sk, expired); if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB, icsk->icsk_retransmits, icsk->icsk_rto, (int)expired); if (expired) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; } if (sk_rethink_txhash(sk)) { tp->timeout_rehash++; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); } return 0; } /* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; /* Handling the sack compression case */ if (tp->compressed_ack) { tcp_mstamp_refresh(tp); tcp_sack_compress_send_ack(sk); return; } if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) return; if (time_after(icsk_delack_timeout(icsk), jiffies)) { sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk_delack_timeout(icsk)); return; } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_mstamp_refresh(tp); tcp_send_ack(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } } /** * tcp_delack_timer() - The TCP delayed ACK timeout handler * @t: Pointer to the timer. (gets casted to struct sock *) * * This function gets (indirectly) called when the kernel timer for a TCP packet * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work. * * Returns: Nothing (void) */ static void tcp_delack_timer(struct timer_list *t) { struct inet_connection_sock *icsk = timer_container_of(icsk, t, icsk_delack_timer); struct sock *sk = &icsk->icsk_inet.sk; /* Avoid taking socket spinlock if there is no ACK to send. * The compressed_ack check is racy, but a separate hrtimer * will take care of it eventually. */ if (!(smp_load_acquire(&icsk->icsk_ack.pending) & ICSK_ACK_TIMER) && !READ_ONCE(tcp_sk(sk)->compressed_ack)) goto out; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); } else { __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); out: sock_put(sk); } static void tcp_probe_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb = tcp_send_head(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; if (tp->packets_out || !skb) { WRITE_ONCE(icsk->icsk_probes_out, 0); icsk->icsk_probes_tstamp = 0; return; } /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as * long as the receiver continues to respond probes. We support this by * default and reset icsk_probes_out with incoming ACKs. But if the * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we * kill the socket when the retry count and the time exceeds the * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ if (!icsk->icsk_probes_tstamp) { icsk->icsk_probes_tstamp = tcp_jiffies32; } else { u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (user_timeout && (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= msecs_to_jiffies(user_timeout)) goto abort; } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { unsigned int rto_max = tcp_rto_max(sk); const bool alive = inet_csk_rto_backoff(icsk, rto_max) < rto_max; max_probes = tcp_orphan_retries(sk, alive); if (!alive && icsk->icsk_backoff >= max_probes) goto abort; if (tcp_out_of_resources(sk, true)) return; } if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ tcp_send_probe0(sk); } } static void tcp_update_rto_stats(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); if (!icsk->icsk_retransmits) { tp->total_rto_recoveries++; tp->rto_stamp = tcp_time_stamp_ms(tp); } WRITE_ONCE(icsk->icsk_retransmits, icsk->icsk_retransmits + 1); tp->total_rto++; } /* * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. */ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int max_retries; req->rsk_ops->syn_ack_timeout(req); /* Add one more retry for fastopen. * Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */ max_retries = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; } /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ if (icsk->icsk_retransmits == 1) tcp_enter_loss(sk); /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error * returned from rtx_syn_ack() to make it more persistent like * regular retransmit because if the child socket has been accepted * it's not good to give up too easily. */ tcp_rtx_synack(sk, req); req->num_timeout++; tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp_ts(tp); tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, req->timeout << req->num_timeout, false); } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct sk_buff *skb, u32 rtx_delta) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); int timeout = tcp_rto_max(sk) * 2; s32 rcv_delta; if (user_timeout) { /* If user application specified a TCP_USER_TIMEOUT, * it does not want win 0 packets to 'reset the timer' * while retransmits are not making progress. */ if (rtx_delta > user_timeout) return true; timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); } /* Note: timer interrupt might have been delayed by at least one jiffy, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ rcv_delta = icsk_timeout(icsk) - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; return msecs_to_jiffies(rtx_delta) > timeout; } /** * tcp_retransmit_timer() - The TCP retransmit timeout handler * @sk: Pointer to the current socket. * * This function gets called when the kernel timer for a TCP packet * of this socket expires. * * It handles retransmission, timer adjustment and other necessary measures. * * Returns: Nothing (void) */ void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; struct sk_buff *skb; req = rcu_dereference_protected(tp->fastopen_rsk, lockdep_sock_is_held(sk)); if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); tcp_fastopen_synack_timer(sk, req); /* Before we receive ACK to our SYN-ACK don't retransmit * anything else (e.g., data or FIN segments). */ return; } if (!tp->packets_out) return; skb = tcp_rtx_queue_head(sk); if (WARN_ON_ONCE(!skb)) return; if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits * become zero probes, but we should not timeout this * connection. If the socket is an orphan, time it out, * we cannot allow such beasts to hang infinitely. */ struct inet_sock *inet = inet_sk(sk); u32 rtx_delta; rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); if (tp->tcp_usec_ts) rtx_delta /= USEC_PER_MSEC; if (sk->sk_family == AF_INET) { net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &inet->inet_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt, jiffies_to_msecs(jiffies - tp->rcv_tstamp), rtx_delta); } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &sk->sk_v6_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt, jiffies_to_msecs(jiffies - tp->rcv_tstamp), rtx_delta); } #endif if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) { tcp_write_err(sk); goto out; } tcp_enter_loss(sk); tcp_retransmit_skb(sk, skb, 1); __sk_dst_reset(sk); goto out_reset_timer; } __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; } else if (icsk->icsk_ca_state == TCP_CA_Loss) { mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) || tp->sacked_out) { if (tcp_is_sack(tp)) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; } if (mib_idx) __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); tcp_update_rto_stats(sk); if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. */ tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_RESOURCE_PROBE_INTERVAL, false); goto out; } /* Increase the timeout each time we retransmit. Note that * we do not increase the rtt estimate. rto is initialized * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests * that doubling rto each time is the least we can get away with. * In KA9Q, Karn uses this for the first few times, and then * goes to quadratic. netBSD doubles, but only goes up to *64, * and clamps at 1 to 64 sec afterwards. Note that 120 sec is * defined in the protocol as the maximum possible RTT. I guess * we'll have to use something other than TCP to talk to the * University of Mars. * * PAWS allows us longer timeouts and large windows, so once * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ out_reset_timer: /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is * used to reset timer, set to 0. Recalculate 'icsk_rto' as this * might be increased if the stream oscillates between thin and thick, * thus the old value might already be too high compared to the value * set by 'tcp_set_rto' in tcp_input.c which resets the rto without * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating * exponential backoff behaviour to avoid continue hammering * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; icsk->icsk_rto = clamp(__tcp_set_rto(tp), tcp_rto_min(sk), tcp_rto_max(sk)); } else if (sk->sk_state != TCP_SYN_SENT || tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { /* Use normal (exponential) backoff unless linear timeouts are * activated. */ icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, tcp_rto_max(sk)); } tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), false); if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) __sk_dst_reset(sk); out:; } /* Called with bottom-half processing disabled. * Called by tcp_write_timer() and tcp_release_cb(). */ void tcp_write_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int event; if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending) return; if (time_after(icsk_timeout(icsk), jiffies)) { sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk_timeout(icsk)); return; } tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; switch (event) { case ICSK_TIME_REO_TIMEOUT: tcp_rack_reo_timeout(sk); break; case ICSK_TIME_LOSS_PROBE: tcp_send_loss_probe(sk); break; case ICSK_TIME_RETRANS: smp_store_release(&icsk->icsk_pending, 0); tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: smp_store_release(&icsk->icsk_pending, 0); tcp_probe_timer(sk); break; } } static void tcp_write_timer(struct timer_list *t) { struct inet_connection_sock *icsk = timer_container_of(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; /* Avoid locking the socket when there is no pending event. */ if (!smp_load_acquire(&icsk->icsk_pending)) goto out; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_write_timer_handler(sk); } else { /* delegate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); out: sock_put(sk); } void tcp_syn_ack_timeout(const struct request_sock *req) { struct net *net = read_pnet(&inet_rsk(req)->ireq_net); __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); } EXPORT_IPV6_MOD(tcp_syn_ack_timeout); void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } static void tcp_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) tcp_delete_keepalive_timer(sk); } EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); static void tcp_keepalive_timer(struct timer_list *t) { struct sock *sk = timer_container_of(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 elapsed; /* Only process if socket is not in use. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ tcp_reset_keepalive_timer(sk, HZ/20); goto out; } if (sk->sk_state == TCP_LISTEN) { pr_err("Hmm... keepalive on a LISTEN ???\n"); goto out; } tcp_mstamp_refresh(tp); if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (READ_ONCE(tp->linger2) >= 0) { const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; if (tmo > 0) { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } } tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE); goto death; } if (!sock_flag(sk, SOCK_KEEPOPEN) || ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ if (tp->packets_out || !tcp_write_queue_empty(sk)) goto resched; elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); /* If the TCP_USER_TIMEOUT option is enabled, use that * to determine when to timeout instead. */ if ((user_timeout != 0 && elapsed >= msecs_to_jiffies(user_timeout) && icsk->icsk_probes_out > 0) || (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT); tcp_write_err(sk); goto out; } if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { WRITE_ONCE(icsk->icsk_probes_out, icsk->icsk_probes_out + 1); elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, * try harder. */ elapsed = TCP_RESOURCE_PROBE_INTERVAL; } } else { /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ elapsed = keepalive_time_when(tp) - elapsed; } resched: tcp_reset_keepalive_timer(sk, elapsed); goto out; death: tcp_done(sk); out: bh_unlock_sock(sk); sock_put(sk); } static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) { struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer); struct sock *sk = (struct sock *)tp; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { if (tp->compressed_ack) { /* Since we have to send one ack finally, * subtract one from tp->compressed_ack to keep * LINUX_MIB_TCPACKCOMPRESSED accurate. */ tp->compressed_ack--; tcp_mstamp_refresh(tp); tcp_send_ack(sk); } } else { if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); return HRTIMER_NORESTART; } void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); hrtimer_setup(&tcp_sk(sk)->pacing_timer, tcp_pace_kick, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_SOFT); hrtimer_setup(&tcp_sk(sk)->compressed_ack_timer, tcp_compressed_ack_kick, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_SOFT); } |
| 4 4 1 5 5 2 1 1 1 5 5 4 4 4 3 2 1 1 5 1 1 1 1 1 1 5 1 5 5 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 | // SPDX-License-Identifier: GPL-2.0 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline. * * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> * Vinicius Costa Gomes <vinicius.gomes@intel.com> */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/errqueue.h> #include <linux/rbtree.h> #include <linux/skbuff.h> #include <linux/posix-timers.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/sock.h> #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK) struct etf_sched_data { bool offload; bool deadline_mode; bool skip_sock_check; int clockid; int queue; s32 delta; /* in ns */ ktime_t last; /* The txtime of the last skb sent to the netdevice. */ struct rb_root_cached head; struct qdisc_watchdog watchdog; ktime_t (*get_time)(void); }; static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) }, }; static inline int validate_input_params(struct tc_etf_qopt *qopt, struct netlink_ext_ack *extack) { /* Check if params comply to the following rules: * * Clockid and delta must be valid. * * * Dynamic clockids are not supported. * * * Delta must be a positive integer. * * Also note that for the HW offload case, we must * expect that system clocks have been synchronized to PHC. */ if (qopt->clockid < 0) { NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); return -ENOTSUPP; } if (qopt->clockid != CLOCK_TAI) { NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); return -EINVAL; } if (qopt->delta < 0) { NL_SET_ERR_MSG(extack, "Delta must be positive"); return -EINVAL; } return 0; } static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) { struct etf_sched_data *q = qdisc_priv(sch); ktime_t txtime = nskb->tstamp; struct sock *sk = nskb->sk; ktime_t now; if (q->skip_sock_check) goto skip; if (!sk || !sk_fullsock(sk)) return false; if (!sock_flag(sk, SOCK_TXTIME)) return false; /* We don't perform crosstimestamping. * Drop if packet's clockid differs from qdisc's. */ if (sk->sk_clockid != q->clockid) return false; if (sk->sk_txtime_deadline_mode != q->deadline_mode) return false; skip: now = q->get_time(); if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) return false; return true; } static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); struct rb_node *p; p = rb_first_cached(&q->head); if (!p) return NULL; return rb_to_skb(p); } static void reset_watchdog(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb = etf_peek_timesortedlist(sch); ktime_t next; if (!skb) { qdisc_watchdog_cancel(&q->watchdog); return; } next = ktime_sub_ns(skb->tstamp, q->delta); qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); } static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) { struct sock_exterr_skb *serr; struct sk_buff *clone; ktime_t txtime = skb->tstamp; struct sock *sk = skb->sk; if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors)) return; clone = skb_clone(skb, GFP_ATOMIC); if (!clone) return; serr = SKB_EXT_ERR(clone); serr->ee.ee_errno = err; serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; serr->ee.ee_type = 0; serr->ee.ee_code = code; serr->ee.ee_pad = 0; serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ serr->ee.ee_info = txtime; /* low part of tstamp */ if (sock_queue_err_skb(sk, clone)) kfree_skb(clone); } static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, struct sk_buff **to_free) { struct etf_sched_data *q = qdisc_priv(sch); struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; ktime_t txtime = nskb->tstamp; bool leftmost = true; if (!is_packet_valid(sch, nskb)) { report_sock_error(nskb, EINVAL, SO_EE_CODE_TXTIME_INVALID_PARAM); return qdisc_drop(nskb, sch, to_free); } while (*p) { struct sk_buff *skb; parent = *p; skb = rb_to_skb(parent); if (ktime_compare(txtime, skb->tstamp) >= 0) { p = &parent->rb_right; leftmost = false; } else { p = &parent->rb_left; } } rb_link_node(&nskb->rbnode, parent, p); rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); qdisc_qstats_backlog_inc(sch, nskb); sch->q.qlen++; /* Now we may need to re-arm the qdisc watchdog for the next packet. */ reset_watchdog(sch); return NET_XMIT_SUCCESS; } static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, ktime_t now) { struct etf_sched_data *q = qdisc_priv(sch); struct sk_buff *to_free = NULL; struct sk_buff *tmp = NULL; skb_rbtree_walk_from_safe(skb, tmp) { if (ktime_after(skb->tstamp, now)) break; rb_erase_cached(&skb->rbnode, &q->head); /* The rbnode field in the skb re-uses these fields, now that * we are done with the rbnode, reset them. */ skb->next = NULL; skb->prev = NULL; skb->dev = qdisc_dev(sch); report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); qdisc_qstats_backlog_dec(sch, skb); qdisc_drop(skb, sch, &to_free); qdisc_qstats_overlimit(sch); sch->q.qlen--; } kfree_skb_list(to_free); } static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) { struct etf_sched_data *q = qdisc_priv(sch); rb_erase_cached(&skb->rbnode, &q->head); /* The rbnode field in the skb re-uses these fields, now that * we are done with the rbnode, reset them. */ skb->next = NULL; skb->prev = NULL; skb->dev = qdisc_dev(sch); qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); q->last = skb->tstamp; sch->q.qlen--; } static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; ktime_t now, next; skb = etf_peek_timesortedlist(sch); if (!skb) return NULL; now = q->get_time(); /* Drop if packet has expired while in queue. */ if (ktime_before(skb->tstamp, now)) { timesortedlist_drop(sch, skb, now); skb = NULL; goto out; } /* When in deadline mode, dequeue as soon as possible and change the * txtime from deadline to (now + delta). */ if (q->deadline_mode) { timesortedlist_remove(sch, skb); skb->tstamp = now; goto out; } next = ktime_sub_ns(skb->tstamp, q->delta); /* Dequeue only if now is within the [txtime - delta, txtime] range. */ if (ktime_after(now, next)) timesortedlist_remove(sch, skb); else skb = NULL; out: /* Now we may need to re-arm the qdisc watchdog for the next packet. */ reset_watchdog(sch); return skb; } static void etf_disable_offload(struct net_device *dev, struct etf_sched_data *q) { struct tc_etf_qopt_offload etf = { }; const struct net_device_ops *ops; int err; if (!q->offload) return; ops = dev->netdev_ops; if (!ops->ndo_setup_tc) return; etf.queue = q->queue; etf.enable = 0; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); if (err < 0) pr_warn("Couldn't disable ETF offload for queue %d\n", etf.queue); } static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; struct tc_etf_qopt_offload etf = { }; int err; if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); return -EOPNOTSUPP; } etf.queue = q->queue; etf.enable = 1; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); if (err < 0) { NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); return err; } return 0; } static int etf_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct etf_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct nlattr *tb[TCA_ETF_MAX + 1]; struct tc_etf_qopt *qopt; int err; if (!opt) { NL_SET_ERR_MSG(extack, "Missing ETF qdisc options which are mandatory"); return -EINVAL; } err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, extack); if (err < 0) return err; if (!tb[TCA_ETF_PARMS]) { NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); return -EINVAL; } qopt = nla_data(tb[TCA_ETF_PARMS]); pr_debug("delta %d clockid %d offload %s deadline %s\n", qopt->delta, qopt->clockid, OFFLOAD_IS_ON(qopt) ? "on" : "off", DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); err = validate_input_params(qopt, extack); if (err < 0) return err; q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); if (OFFLOAD_IS_ON(qopt)) { err = etf_enable_offload(dev, q, extack); if (err < 0) return err; } /* Everything went OK, save the parameters used. */ q->delta = qopt->delta; q->clockid = qopt->clockid; q->offload = OFFLOAD_IS_ON(qopt); q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt); switch (q->clockid) { case CLOCK_REALTIME: q->get_time = ktime_get_real; break; case CLOCK_MONOTONIC: q->get_time = ktime_get; break; case CLOCK_BOOTTIME: q->get_time = ktime_get_boottime; break; case CLOCK_TAI: q->get_time = ktime_get_clocktai; break; default: NL_SET_ERR_MSG(extack, "Clockid is not supported"); return -ENOTSUPP; } qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); return 0; } static void timesortedlist_clear(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); struct rb_node *p = rb_first_cached(&q->head); while (p) { struct sk_buff *skb = rb_to_skb(p); p = rb_next(p); rb_erase_cached(&skb->rbnode, &q->head); rtnl_kfree_skbs(skb, skb); sch->q.qlen--; } } static void etf_reset(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); /* Only cancel watchdog if it's been initialized. */ if (q->watchdog.qdisc == sch) qdisc_watchdog_cancel(&q->watchdog); /* No matter which mode we are on, it's safe to clear both lists. */ timesortedlist_clear(sch); __qdisc_reset_queue(&sch->q); q->last = 0; } static void etf_destroy(struct Qdisc *sch) { struct etf_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); /* Only cancel watchdog if it's been initialized. */ if (q->watchdog.qdisc == sch) qdisc_watchdog_cancel(&q->watchdog); etf_disable_offload(dev, q); } static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct etf_sched_data *q = qdisc_priv(sch); struct tc_etf_qopt opt = { }; struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; opt.delta = READ_ONCE(q->delta); opt.clockid = READ_ONCE(q->clockid); if (READ_ONCE(q->offload)) opt.flags |= TC_ETF_OFFLOAD_ON; if (READ_ONCE(q->deadline_mode)) opt.flags |= TC_ETF_DEADLINE_MODE_ON; if (READ_ONCE(q->skip_sock_check)) opt.flags |= TC_ETF_SKIP_SOCK_CHECK; if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct Qdisc_ops etf_qdisc_ops __read_mostly = { .id = "etf", .priv_size = sizeof(struct etf_sched_data), .enqueue = etf_enqueue_timesortedlist, .dequeue = etf_dequeue_timesortedlist, .peek = etf_peek_timesortedlist, .init = etf_init, .reset = etf_reset, .destroy = etf_destroy, .dump = etf_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("etf"); static int __init etf_module_init(void) { return register_qdisc(&etf_qdisc_ops); } static void __exit etf_module_exit(void) { unregister_qdisc(&etf_qdisc_ops); } module_init(etf_module_init) module_exit(etf_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 | // SPDX-License-Identifier: GPL-2.0+ /* * vmk80xx.c * Velleman USB Board Low-Level Driver * * Copyright (C) 2009 Manuel Gebele <forensixs@gmx.de>, Germany * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2000 David A. Schleef <ds@schleef.org> */ /* * Driver: vmk80xx * Description: Velleman USB Board Low-Level Driver * Devices: [Velleman] K8055 (K8055/VM110), K8061 (K8061/VM140), * VM110 (K8055/VM110), VM140 (K8061/VM140) * Author: Manuel Gebele <forensixs@gmx.de> * Updated: Sun, 10 May 2009 11:14:59 +0200 * Status: works * * Supports: * - analog input * - analog output * - digital input * - digital output * - counter * - pwm */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/errno.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/uaccess.h> #include <linux/comedi/comedi_usb.h> enum { DEVICE_VMK8055, DEVICE_VMK8061 }; #define VMK8055_DI_REG 0x00 #define VMK8055_DO_REG 0x01 #define VMK8055_AO1_REG 0x02 #define VMK8055_AO2_REG 0x03 #define VMK8055_AI1_REG 0x02 #define VMK8055_AI2_REG 0x03 #define VMK8055_CNT1_REG 0x04 #define VMK8055_CNT2_REG 0x06 #define VMK8061_CH_REG 0x01 #define VMK8061_DI_REG 0x01 #define VMK8061_DO_REG 0x01 #define VMK8061_PWM_REG1 0x01 #define VMK8061_PWM_REG2 0x02 #define VMK8061_CNT_REG 0x02 #define VMK8061_AO_REG 0x02 #define VMK8061_AI_REG1 0x02 #define VMK8061_AI_REG2 0x03 #define VMK8055_CMD_RST 0x00 #define VMK8055_CMD_DEB1_TIME 0x01 #define VMK8055_CMD_DEB2_TIME 0x02 #define VMK8055_CMD_RST_CNT1 0x03 #define VMK8055_CMD_RST_CNT2 0x04 #define VMK8055_CMD_WRT_AD 0x05 #define VMK8061_CMD_RD_AI 0x00 #define VMK8061_CMR_RD_ALL_AI 0x01 /* !non-active! */ #define VMK8061_CMD_SET_AO 0x02 #define VMK8061_CMD_SET_ALL_AO 0x03 /* !non-active! */ #define VMK8061_CMD_OUT_PWM 0x04 #define VMK8061_CMD_RD_DI 0x05 #define VMK8061_CMD_DO 0x06 /* !non-active! */ #define VMK8061_CMD_CLR_DO 0x07 #define VMK8061_CMD_SET_DO 0x08 #define VMK8061_CMD_RD_CNT 0x09 /* TODO: completely pointless? */ #define VMK8061_CMD_RST_CNT 0x0a /* TODO: completely pointless? */ #define VMK8061_CMD_RD_VERSION 0x0b /* internal usage */ #define VMK8061_CMD_RD_JMP_STAT 0x0c /* TODO: not implemented yet */ #define VMK8061_CMD_RD_PWR_STAT 0x0d /* internal usage */ #define VMK8061_CMD_RD_DO 0x0e #define VMK8061_CMD_RD_AO 0x0f #define VMK8061_CMD_RD_PWM 0x10 #define IC3_VERSION BIT(0) #define IC6_VERSION BIT(1) #define MIN_BUF_SIZE 64 #define PACKET_TIMEOUT 10000 /* ms */ enum vmk80xx_model { VMK8055_MODEL, VMK8061_MODEL }; static const struct comedi_lrange vmk8061_range = { 2, { UNI_RANGE(5), UNI_RANGE(10) } }; struct vmk80xx_board { const char *name; enum vmk80xx_model model; const struct comedi_lrange *range; int ai_nchans; unsigned int ai_maxdata; int ao_nchans; int di_nchans; unsigned int cnt_maxdata; int pwm_nchans; unsigned int pwm_maxdata; }; static const struct vmk80xx_board vmk80xx_boardinfo[] = { [DEVICE_VMK8055] = { .name = "K8055 (VM110)", .model = VMK8055_MODEL, .range = &range_unipolar5, .ai_nchans = 2, .ai_maxdata = 0x00ff, .ao_nchans = 2, .di_nchans = 6, .cnt_maxdata = 0xffff, }, [DEVICE_VMK8061] = { .name = "K8061 (VM140)", .model = VMK8061_MODEL, .range = &vmk8061_range, .ai_nchans = 8, .ai_maxdata = 0x03ff, .ao_nchans = 8, .di_nchans = 8, .cnt_maxdata = 0, /* unknown, device is not writeable */ .pwm_nchans = 1, .pwm_maxdata = 0x03ff, }, }; struct vmk80xx_private { struct usb_endpoint_descriptor *ep_rx; struct usb_endpoint_descriptor *ep_tx; struct semaphore limit_sem; unsigned char *usb_rx_buf; unsigned char *usb_tx_buf; enum vmk80xx_model model; }; static void vmk80xx_do_bulk_msg(struct comedi_device *dev) { struct vmk80xx_private *devpriv = dev->private; struct usb_device *usb = comedi_to_usb_dev(dev); __u8 tx_addr; __u8 rx_addr; unsigned int tx_pipe; unsigned int rx_pipe; size_t tx_size; size_t rx_size; tx_addr = devpriv->ep_tx->bEndpointAddress; rx_addr = devpriv->ep_rx->bEndpointAddress; tx_pipe = usb_sndbulkpipe(usb, tx_addr); rx_pipe = usb_rcvbulkpipe(usb, rx_addr); tx_size = usb_endpoint_maxp(devpriv->ep_tx); rx_size = usb_endpoint_maxp(devpriv->ep_rx); usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf, tx_size, NULL, PACKET_TIMEOUT); usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, rx_size, NULL, PACKET_TIMEOUT); } static int vmk80xx_read_packet(struct comedi_device *dev) { struct vmk80xx_private *devpriv = dev->private; struct usb_device *usb = comedi_to_usb_dev(dev); struct usb_endpoint_descriptor *ep; unsigned int pipe; if (devpriv->model == VMK8061_MODEL) { vmk80xx_do_bulk_msg(dev); return 0; } ep = devpriv->ep_rx; pipe = usb_rcvintpipe(usb, ep->bEndpointAddress); return usb_interrupt_msg(usb, pipe, devpriv->usb_rx_buf, usb_endpoint_maxp(ep), NULL, PACKET_TIMEOUT); } static int vmk80xx_write_packet(struct comedi_device *dev, int cmd) { struct vmk80xx_private *devpriv = dev->private; struct usb_device *usb = comedi_to_usb_dev(dev); struct usb_endpoint_descriptor *ep; unsigned int pipe; devpriv->usb_tx_buf[0] = cmd; if (devpriv->model == VMK8061_MODEL) { vmk80xx_do_bulk_msg(dev); return 0; } ep = devpriv->ep_tx; pipe = usb_sndintpipe(usb, ep->bEndpointAddress); return usb_interrupt_msg(usb, pipe, devpriv->usb_tx_buf, usb_endpoint_maxp(ep), NULL, PACKET_TIMEOUT); } static int vmk80xx_reset_device(struct comedi_device *dev) { struct vmk80xx_private *devpriv = dev->private; size_t size; int retval; size = usb_endpoint_maxp(devpriv->ep_tx); memset(devpriv->usb_tx_buf, 0, size); retval = vmk80xx_write_packet(dev, VMK8055_CMD_RST); if (retval) return retval; /* set outputs to known state as we cannot read them */ return vmk80xx_write_packet(dev, VMK8055_CMD_WRT_AD); } static int vmk80xx_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; int chan; int reg[2]; int n; down(&devpriv->limit_sem); chan = CR_CHAN(insn->chanspec); switch (devpriv->model) { case VMK8055_MODEL: if (!chan) reg[0] = VMK8055_AI1_REG; else reg[0] = VMK8055_AI2_REG; break; case VMK8061_MODEL: default: reg[0] = VMK8061_AI_REG1; reg[1] = VMK8061_AI_REG2; devpriv->usb_tx_buf[0] = VMK8061_CMD_RD_AI; devpriv->usb_tx_buf[VMK8061_CH_REG] = chan; break; } for (n = 0; n < insn->n; n++) { if (vmk80xx_read_packet(dev)) break; if (devpriv->model == VMK8055_MODEL) { data[n] = devpriv->usb_rx_buf[reg[0]]; continue; } /* VMK8061_MODEL */ data[n] = devpriv->usb_rx_buf[reg[0]] + 256 * devpriv->usb_rx_buf[reg[1]]; } up(&devpriv->limit_sem); return n; } static int vmk80xx_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; int chan; int cmd; int reg; int n; down(&devpriv->limit_sem); chan = CR_CHAN(insn->chanspec); switch (devpriv->model) { case VMK8055_MODEL: cmd = VMK8055_CMD_WRT_AD; if (!chan) reg = VMK8055_AO1_REG; else reg = VMK8055_AO2_REG; break; default: /* NOTE: avoid compiler warnings */ cmd = VMK8061_CMD_SET_AO; reg = VMK8061_AO_REG; devpriv->usb_tx_buf[VMK8061_CH_REG] = chan; break; } for (n = 0; n < insn->n; n++) { devpriv->usb_tx_buf[reg] = data[n]; if (vmk80xx_write_packet(dev, cmd)) break; } up(&devpriv->limit_sem); return n; } static int vmk80xx_ao_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; int chan; int reg; int n; down(&devpriv->limit_sem); chan = CR_CHAN(insn->chanspec); reg = VMK8061_AO_REG - 1; devpriv->usb_tx_buf[0] = VMK8061_CMD_RD_AO; for (n = 0; n < insn->n; n++) { if (vmk80xx_read_packet(dev)) break; data[n] = devpriv->usb_rx_buf[reg + chan]; } up(&devpriv->limit_sem); return n; } static int vmk80xx_di_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; unsigned char *rx_buf; int reg; int retval; down(&devpriv->limit_sem); rx_buf = devpriv->usb_rx_buf; if (devpriv->model == VMK8061_MODEL) { reg = VMK8061_DI_REG; devpriv->usb_tx_buf[0] = VMK8061_CMD_RD_DI; } else { reg = VMK8055_DI_REG; } retval = vmk80xx_read_packet(dev); if (!retval) { if (devpriv->model == VMK8055_MODEL) data[1] = (((rx_buf[reg] >> 4) & 0x03) | ((rx_buf[reg] << 2) & 0x04) | ((rx_buf[reg] >> 3) & 0x18)); else data[1] = rx_buf[reg]; retval = 2; } up(&devpriv->limit_sem); return retval; } static int vmk80xx_do_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; unsigned char *rx_buf = devpriv->usb_rx_buf; unsigned char *tx_buf = devpriv->usb_tx_buf; int reg, cmd; int ret = 0; if (devpriv->model == VMK8061_MODEL) { reg = VMK8061_DO_REG; cmd = VMK8061_CMD_DO; } else { /* VMK8055_MODEL */ reg = VMK8055_DO_REG; cmd = VMK8055_CMD_WRT_AD; } down(&devpriv->limit_sem); if (comedi_dio_update_state(s, data)) { tx_buf[reg] = s->state; ret = vmk80xx_write_packet(dev, cmd); if (ret) goto out; } if (devpriv->model == VMK8061_MODEL) { tx_buf[0] = VMK8061_CMD_RD_DO; ret = vmk80xx_read_packet(dev); if (ret) goto out; data[1] = rx_buf[reg]; } else { data[1] = s->state; } out: up(&devpriv->limit_sem); return ret ? ret : insn->n; } static int vmk80xx_cnt_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; int chan; int reg[2]; int n; down(&devpriv->limit_sem); chan = CR_CHAN(insn->chanspec); switch (devpriv->model) { case VMK8055_MODEL: if (!chan) reg[0] = VMK8055_CNT1_REG; else reg[0] = VMK8055_CNT2_REG; break; case VMK8061_MODEL: default: reg[0] = VMK8061_CNT_REG; reg[1] = VMK8061_CNT_REG; devpriv->usb_tx_buf[0] = VMK8061_CMD_RD_CNT; break; } for (n = 0; n < insn->n; n++) { if (vmk80xx_read_packet(dev)) break; if (devpriv->model == VMK8055_MODEL) data[n] = devpriv->usb_rx_buf[reg[0]]; else /* VMK8061_MODEL */ data[n] = devpriv->usb_rx_buf[reg[0] * (chan + 1) + 1] + 256 * devpriv->usb_rx_buf[reg[1] * 2 + 2]; } up(&devpriv->limit_sem); return n; } static int vmk80xx_cnt_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); int cmd; int reg; int ret; down(&devpriv->limit_sem); switch (data[0]) { case INSN_CONFIG_RESET: if (devpriv->model == VMK8055_MODEL) { if (!chan) { cmd = VMK8055_CMD_RST_CNT1; reg = VMK8055_CNT1_REG; } else { cmd = VMK8055_CMD_RST_CNT2; reg = VMK8055_CNT2_REG; } devpriv->usb_tx_buf[reg] = 0x00; } else { cmd = VMK8061_CMD_RST_CNT; } ret = vmk80xx_write_packet(dev, cmd); break; default: ret = -EINVAL; break; } up(&devpriv->limit_sem); return ret ? ret : insn->n; } static int vmk80xx_cnt_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; unsigned long debtime; unsigned long val; int chan; int cmd; int n; down(&devpriv->limit_sem); chan = CR_CHAN(insn->chanspec); if (!chan) cmd = VMK8055_CMD_DEB1_TIME; else cmd = VMK8055_CMD_DEB2_TIME; for (n = 0; n < insn->n; n++) { debtime = data[n]; if (debtime == 0) debtime = 1; /* TODO: Prevent overflows */ if (debtime > 7450) debtime = 7450; val = int_sqrt(debtime * 1000 / 115); if (((val + 1) * val) < debtime * 1000 / 115) val += 1; devpriv->usb_tx_buf[6 + chan] = val; if (vmk80xx_write_packet(dev, cmd)) break; } up(&devpriv->limit_sem); return n; } static int vmk80xx_pwm_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; unsigned char *tx_buf; unsigned char *rx_buf; int reg[2]; int n; down(&devpriv->limit_sem); tx_buf = devpriv->usb_tx_buf; rx_buf = devpriv->usb_rx_buf; reg[0] = VMK8061_PWM_REG1; reg[1] = VMK8061_PWM_REG2; tx_buf[0] = VMK8061_CMD_RD_PWM; for (n = 0; n < insn->n; n++) { if (vmk80xx_read_packet(dev)) break; data[n] = rx_buf[reg[0]] + 4 * rx_buf[reg[1]]; } up(&devpriv->limit_sem); return n; } static int vmk80xx_pwm_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct vmk80xx_private *devpriv = dev->private; unsigned char *tx_buf; int reg[2]; int cmd; int n; down(&devpriv->limit_sem); tx_buf = devpriv->usb_tx_buf; reg[0] = VMK8061_PWM_REG1; reg[1] = VMK8061_PWM_REG2; cmd = VMK8061_CMD_OUT_PWM; /* * The followin piece of code was translated from the inline * assembler code in the DLL source code. * * asm * mov eax, k ; k is the value (data[n]) * and al, 03h ; al are the lower 8 bits of eax * mov lo, al ; lo is the low part (tx_buf[reg[0]]) * mov eax, k * shr eax, 2 ; right shift eax register by 2 * mov hi, al ; hi is the high part (tx_buf[reg[1]]) * end; */ for (n = 0; n < insn->n; n++) { tx_buf[reg[0]] = (unsigned char)(data[n] & 0x03); tx_buf[reg[1]] = (unsigned char)(data[n] >> 2) & 0xff; if (vmk80xx_write_packet(dev, cmd)) break; } up(&devpriv->limit_sem); return n; } static int vmk80xx_find_usb_endpoints(struct comedi_device *dev) { struct vmk80xx_private *devpriv = dev->private; struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc; int ret; if (devpriv->model == VMK8061_MODEL) ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc, &ep_tx_desc, NULL, NULL); else ret = usb_find_common_endpoints(iface_desc, NULL, NULL, &ep_rx_desc, &ep_tx_desc); if (ret) return -ENODEV; devpriv->ep_rx = ep_rx_desc; devpriv->ep_tx = ep_tx_desc; if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx)) return -EINVAL; return 0; } static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) { struct vmk80xx_private *devpriv = dev->private; size_t size; size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE); devpriv->usb_rx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_rx_buf) return -ENOMEM; size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; return 0; } static int vmk80xx_init_subdevices(struct comedi_device *dev) { const struct vmk80xx_board *board = dev->board_ptr; struct vmk80xx_private *devpriv = dev->private; struct comedi_subdevice *s; int n_subd; int ret; down(&devpriv->limit_sem); if (devpriv->model == VMK8055_MODEL) n_subd = 5; else n_subd = 6; ret = comedi_alloc_subdevices(dev, n_subd); if (ret) { up(&devpriv->limit_sem); return ret; } /* Analog input subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND; s->n_chan = board->ai_nchans; s->maxdata = board->ai_maxdata; s->range_table = board->range; s->insn_read = vmk80xx_ai_insn_read; /* Analog output subdevice */ s = &dev->subdevices[1]; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE | SDF_GROUND; s->n_chan = board->ao_nchans; s->maxdata = 0x00ff; s->range_table = board->range; s->insn_write = vmk80xx_ao_insn_write; if (devpriv->model == VMK8061_MODEL) { s->subdev_flags |= SDF_READABLE; s->insn_read = vmk80xx_ao_insn_read; } /* Digital input subdevice */ s = &dev->subdevices[2]; s->type = COMEDI_SUBD_DI; s->subdev_flags = SDF_READABLE; s->n_chan = board->di_nchans; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = vmk80xx_di_insn_bits; /* Digital output subdevice */ s = &dev->subdevices[3]; s->type = COMEDI_SUBD_DO; s->subdev_flags = SDF_WRITABLE; s->n_chan = 8; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = vmk80xx_do_insn_bits; /* Counter subdevice */ s = &dev->subdevices[4]; s->type = COMEDI_SUBD_COUNTER; s->subdev_flags = SDF_READABLE; s->n_chan = 2; s->maxdata = board->cnt_maxdata; s->insn_read = vmk80xx_cnt_insn_read; s->insn_config = vmk80xx_cnt_insn_config; if (devpriv->model == VMK8055_MODEL) { s->subdev_flags |= SDF_WRITABLE; s->insn_write = vmk80xx_cnt_insn_write; } /* PWM subdevice */ if (devpriv->model == VMK8061_MODEL) { s = &dev->subdevices[5]; s->type = COMEDI_SUBD_PWM; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = board->pwm_nchans; s->maxdata = board->pwm_maxdata; s->insn_read = vmk80xx_pwm_insn_read; s->insn_write = vmk80xx_pwm_insn_write; } up(&devpriv->limit_sem); return 0; } static int vmk80xx_auto_attach(struct comedi_device *dev, unsigned long context) { struct usb_interface *intf = comedi_to_usb_interface(dev); const struct vmk80xx_board *board = NULL; struct vmk80xx_private *devpriv; int ret; if (context < ARRAY_SIZE(vmk80xx_boardinfo)) board = &vmk80xx_boardinfo[context]; if (!board) return -ENODEV; dev->board_ptr = board; dev->board_name = board->name; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; devpriv->model = board->model; sema_init(&devpriv->limit_sem, 8); ret = vmk80xx_find_usb_endpoints(dev); if (ret) return ret; ret = vmk80xx_alloc_usb_buffers(dev); if (ret) return ret; usb_set_intfdata(intf, devpriv); if (devpriv->model == VMK8055_MODEL) vmk80xx_reset_device(dev); return vmk80xx_init_subdevices(dev); } static void vmk80xx_detach(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct vmk80xx_private *devpriv = dev->private; if (!devpriv) return; down(&devpriv->limit_sem); usb_set_intfdata(intf, NULL); kfree(devpriv->usb_rx_buf); kfree(devpriv->usb_tx_buf); up(&devpriv->limit_sem); } static struct comedi_driver vmk80xx_driver = { .module = THIS_MODULE, .driver_name = "vmk80xx", .auto_attach = vmk80xx_auto_attach, .detach = vmk80xx_detach, }; static int vmk80xx_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return comedi_usb_auto_config(intf, &vmk80xx_driver, id->driver_info); } static const struct usb_device_id vmk80xx_usb_id_table[] = { { USB_DEVICE(0x10cf, 0x5500), .driver_info = DEVICE_VMK8055 }, { USB_DEVICE(0x10cf, 0x5501), .driver_info = DEVICE_VMK8055 }, { USB_DEVICE(0x10cf, 0x5502), .driver_info = DEVICE_VMK8055 }, { USB_DEVICE(0x10cf, 0x5503), .driver_info = DEVICE_VMK8055 }, { USB_DEVICE(0x10cf, 0x8061), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8062), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8063), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8064), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8065), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8066), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8067), .driver_info = DEVICE_VMK8061 }, { USB_DEVICE(0x10cf, 0x8068), .driver_info = DEVICE_VMK8061 }, { } }; MODULE_DEVICE_TABLE(usb, vmk80xx_usb_id_table); static struct usb_driver vmk80xx_usb_driver = { .name = "vmk80xx", .id_table = vmk80xx_usb_id_table, .probe = vmk80xx_usb_probe, .disconnect = comedi_usb_auto_unconfig, }; module_comedi_usb_driver(vmk80xx_driver, vmk80xx_usb_driver); MODULE_AUTHOR("Manuel Gebele <forensixs@gmx.de>"); MODULE_DESCRIPTION("Velleman USB Board Low-Level Driver"); MODULE_LICENSE("GPL"); |
| 62 62 62 8 8 8 8 2 5 3 2 1 3 29 29 29 8 8 4 28 29 5 1 29 1 29 29 28 29 29 29 8 29 29 29 26 27 27 32 2 2 2 2 2 2 2 1 2 2 31 1 1 24 1 1 1 1 1 1 1 1 1 1 1 1 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer Priority Queue * Copyright (c) 1998-1999 by Frank van de Pol <fvdpol@coil.demon.nl> */ #include <linux/time.h> #include <linux/slab.h> #include <sound/core.h> #include "seq_timer.h" #include "seq_prioq.h" /* Implementation is a simple linked list for now... This priority queue orders the events on timestamp. For events with an equeal timestamp the queue behaves as a FIFO. * * +-------+ * Head --> | first | * +-------+ * |next * +-----v-+ * | | * +-------+ * | * +-----v-+ * | | * +-------+ * | * +-----v-+ * Tail --> | last | * +-------+ * */ /* create new prioq (constructor) */ struct snd_seq_prioq *snd_seq_prioq_new(void) { struct snd_seq_prioq *f; f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) return NULL; spin_lock_init(&f->lock); f->head = NULL; f->tail = NULL; f->cells = 0; return f; } /* delete prioq (destructor) */ void snd_seq_prioq_delete(struct snd_seq_prioq **fifo) { struct snd_seq_prioq *f = *fifo; *fifo = NULL; if (f == NULL) { pr_debug("ALSA: seq: snd_seq_prioq_delete() called with NULL prioq\n"); return; } /* release resources...*/ /*....................*/ if (f->cells > 0) { /* drain prioQ */ while (f->cells > 0) snd_seq_cell_free(snd_seq_prioq_cell_out(f, NULL)); } kfree(f); } /* compare timestamp between events */ /* return 1 if a >= b; 0 */ static inline int compare_timestamp(struct snd_seq_event *a, struct snd_seq_event *b) { if ((a->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK) { /* compare ticks */ return (snd_seq_compare_tick_time(&a->time.tick, &b->time.tick)); } else { /* compare real time */ return (snd_seq_compare_real_time(&a->time.time, &b->time.time)); } } /* compare timestamp between events */ /* return negative if a < b; * zero if a = b; * positive if a > b; */ static inline int compare_timestamp_rel(struct snd_seq_event *a, struct snd_seq_event *b) { if ((a->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK) { /* compare ticks */ if (a->time.tick > b->time.tick) return 1; else if (a->time.tick == b->time.tick) return 0; else return -1; } else { /* compare real time */ if (a->time.time.tv_sec > b->time.time.tv_sec) return 1; else if (a->time.time.tv_sec == b->time.time.tv_sec) { if (a->time.time.tv_nsec > b->time.time.tv_nsec) return 1; else if (a->time.time.tv_nsec == b->time.time.tv_nsec) return 0; else return -1; } else return -1; } } /* enqueue cell to prioq */ int snd_seq_prioq_cell_in(struct snd_seq_prioq * f, struct snd_seq_event_cell * cell) { struct snd_seq_event_cell *cur, *prev; int count; int prior; if (snd_BUG_ON(!f || !cell)) return -EINVAL; /* check flags */ prior = (cell->event.flags & SNDRV_SEQ_PRIORITY_MASK); guard(spinlock_irqsave)(&f->lock); /* check if this element needs to inserted at the end (ie. ordered data is inserted) This will be very likeley if a sequencer application or midi file player is feeding us (sequential) data */ if (f->tail && !prior) { if (compare_timestamp(&cell->event, &f->tail->event)) { /* add new cell to tail of the fifo */ f->tail->next = cell; f->tail = cell; cell->next = NULL; f->cells++; return 0; } } /* traverse list of elements to find the place where the new cell is to be inserted... Note that this is a order n process ! */ prev = NULL; /* previous cell */ cur = f->head; /* cursor */ count = 10000; /* FIXME: enough big, isn't it? */ while (cur != NULL) { /* compare timestamps */ int rel = compare_timestamp_rel(&cell->event, &cur->event); if (rel < 0) /* new cell has earlier schedule time, */ break; else if (rel == 0 && prior) /* equal schedule time and prior to others */ break; /* new cell has equal or larger schedule time, */ /* move cursor to next cell */ prev = cur; cur = cur->next; if (! --count) { pr_err("ALSA: seq: cannot find a pointer.. infinite loop?\n"); return -EINVAL; } } /* insert it before cursor */ if (prev != NULL) prev->next = cell; cell->next = cur; if (f->head == cur) /* this is the first cell, set head to it */ f->head = cell; if (cur == NULL) /* reached end of the list */ f->tail = cell; f->cells++; return 0; } /* return 1 if the current time >= event timestamp */ static int event_is_ready(struct snd_seq_event *ev, void *current_time) { if ((ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) == SNDRV_SEQ_TIME_STAMP_TICK) return snd_seq_compare_tick_time(current_time, &ev->time.tick); else return snd_seq_compare_real_time(current_time, &ev->time.time); } /* dequeue cell from prioq */ struct snd_seq_event_cell *snd_seq_prioq_cell_out(struct snd_seq_prioq *f, void *current_time) { struct snd_seq_event_cell *cell; if (f == NULL) { pr_debug("ALSA: seq: snd_seq_prioq_cell_in() called with NULL prioq\n"); return NULL; } guard(spinlock_irqsave)(&f->lock); cell = f->head; if (cell && current_time && !event_is_ready(&cell->event, current_time)) cell = NULL; if (cell) { f->head = cell->next; /* reset tail if this was the last element */ if (f->tail == cell) f->tail = NULL; cell->next = NULL; f->cells--; } return cell; } /* return number of events available in prioq */ int snd_seq_prioq_avail(struct snd_seq_prioq * f) { if (f == NULL) { pr_debug("ALSA: seq: snd_seq_prioq_cell_in() called with NULL prioq\n"); return 0; } return f->cells; } /* remove cells matching with the condition */ static void prioq_remove_cells(struct snd_seq_prioq *f, bool (*match)(struct snd_seq_event_cell *cell, void *arg), void *arg) { register struct snd_seq_event_cell *cell, *next; struct snd_seq_event_cell *prev = NULL; struct snd_seq_event_cell *freefirst = NULL, *freeprev = NULL, *freenext; /* collect all removed cells */ scoped_guard(spinlock_irqsave, &f->lock) { for (cell = f->head; cell; cell = next) { next = cell->next; if (!match(cell, arg)) { prev = cell; continue; } /* remove cell from prioq */ if (cell == f->head) f->head = cell->next; else prev->next = cell->next; if (cell == f->tail) f->tail = cell->next; f->cells--; /* add cell to free list */ cell->next = NULL; if (freefirst == NULL) freefirst = cell; else freeprev->next = cell; freeprev = cell; } } /* remove selected cells */ while (freefirst) { freenext = freefirst->next; snd_seq_cell_free(freefirst); freefirst = freenext; } } struct prioq_match_arg { int client; int timestamp; }; static inline bool prioq_match(struct snd_seq_event_cell *cell, void *arg) { struct prioq_match_arg *v = arg; if (cell->event.source.client == v->client || cell->event.dest.client == v->client) return true; if (!v->timestamp) return false; switch (cell->event.flags & SNDRV_SEQ_TIME_STAMP_MASK) { case SNDRV_SEQ_TIME_STAMP_TICK: if (cell->event.time.tick) return true; break; case SNDRV_SEQ_TIME_STAMP_REAL: if (cell->event.time.time.tv_sec || cell->event.time.time.tv_nsec) return true; break; } return false; } /* remove cells for left client */ void snd_seq_prioq_leave(struct snd_seq_prioq *f, int client, int timestamp) { struct prioq_match_arg arg = { client, timestamp }; return prioq_remove_cells(f, prioq_match, &arg); } struct prioq_remove_match_arg { int client; struct snd_seq_remove_events *info; }; static bool prioq_remove_match(struct snd_seq_event_cell *cell, void *arg) { struct prioq_remove_match_arg *v = arg; struct snd_seq_event *ev = &cell->event; struct snd_seq_remove_events *info = v->info; int res; if (ev->source.client != v->client) return false; if (info->remove_mode & SNDRV_SEQ_REMOVE_DEST) { if (ev->dest.client != info->dest.client || ev->dest.port != info->dest.port) return false; } if (info->remove_mode & SNDRV_SEQ_REMOVE_DEST_CHANNEL) { if (! snd_seq_ev_is_channel_type(ev)) return false; /* data.note.channel and data.control.channel are identical */ if (ev->data.note.channel != info->channel) return false; } if (info->remove_mode & SNDRV_SEQ_REMOVE_TIME_AFTER) { if (info->remove_mode & SNDRV_SEQ_REMOVE_TIME_TICK) res = snd_seq_compare_tick_time(&ev->time.tick, &info->time.tick); else res = snd_seq_compare_real_time(&ev->time.time, &info->time.time); if (!res) return false; } if (info->remove_mode & SNDRV_SEQ_REMOVE_TIME_BEFORE) { if (info->remove_mode & SNDRV_SEQ_REMOVE_TIME_TICK) res = snd_seq_compare_tick_time(&ev->time.tick, &info->time.tick); else res = snd_seq_compare_real_time(&ev->time.time, &info->time.time); if (res) return false; } if (info->remove_mode & SNDRV_SEQ_REMOVE_EVENT_TYPE) { if (ev->type != info->type) return false; } if (info->remove_mode & SNDRV_SEQ_REMOVE_IGNORE_OFF) { /* Do not remove off events */ switch (ev->type) { case SNDRV_SEQ_EVENT_NOTEOFF: /* case SNDRV_SEQ_EVENT_SAMPLE_STOP: */ return false; default: break; } } if (info->remove_mode & SNDRV_SEQ_REMOVE_TAG_MATCH) { if (info->tag != ev->tag) return false; } return true; } /* remove cells matching remove criteria */ void snd_seq_prioq_remove_events(struct snd_seq_prioq * f, int client, struct snd_seq_remove_events *info) { struct prioq_remove_match_arg arg = { client, info }; return prioq_remove_cells(f, prioq_remove_match, &arg); } |
| 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" struct wol_req_info { struct ethnl_req_info base; }; struct wol_reply_data { struct ethnl_reply_data base; struct ethtool_wolinfo wol; bool show_sopass; }; #define WOL_REPDATA(__reply_base) \ container_of(__reply_base, struct wol_reply_data, base) const struct nla_policy ethnl_wol_get_policy[] = { [ETHTOOL_A_WOL_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int wol_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct wol_reply_data *data = WOL_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; if (!dev->ethtool_ops->get_wol) return -EOPNOTSUPP; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; dev->ethtool_ops->get_wol(dev, &data->wol); ethnl_ops_complete(dev); /* do not include password in notifications */ data->show_sopass = !genl_info_is_ntf(info) && (data->wol.supported & WAKE_MAGICSECURE); return 0; } static int wol_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct wol_reply_data *data = WOL_REPDATA(reply_base); int len; len = ethnl_bitset32_size(&data->wol.wolopts, &data->wol.supported, WOL_MODE_COUNT, wol_mode_names, compact); if (len < 0) return len; if (data->show_sopass) len += nla_total_size(sizeof(data->wol.sopass)); return len; } static int wol_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct wol_reply_data *data = WOL_REPDATA(reply_base); int ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_WOL_MODES, &data->wol.wolopts, &data->wol.supported, WOL_MODE_COUNT, wol_mode_names, compact); if (ret < 0) return ret; if (data->show_sopass && nla_put(skb, ETHTOOL_A_WOL_SOPASS, sizeof(data->wol.sopass), data->wol.sopass)) return -EMSGSIZE; return 0; } /* WOL_SET */ const struct nla_policy ethnl_wol_set_policy[] = { [ETHTOOL_A_WOL_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_WOL_MODES] = { .type = NLA_NESTED }, [ETHTOOL_A_WOL_SOPASS] = { .type = NLA_BINARY, .len = SOPASS_MAX }, }; static int ethnl_set_wol_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; return ops->get_wol && ops->set_wol ? 1 : -EOPNOTSUPP; } static int ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info) { struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; bool mod = false; int ret; dev->ethtool_ops->get_wol(dev, &wol); ret = ethnl_update_bitset32(&wol.wolopts, WOL_MODE_COUNT, tb[ETHTOOL_A_WOL_MODES], wol_mode_names, info->extack, &mod); if (ret < 0) return ret; if (wol.wolopts & ~wol.supported) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_WOL_MODES], "cannot enable unsupported WoL mode"); return -EINVAL; } if (tb[ETHTOOL_A_WOL_SOPASS]) { if (!(wol.supported & WAKE_MAGICSECURE)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_WOL_SOPASS], "magicsecure not supported, cannot set password"); return -EINVAL; } ethnl_update_binary(wol.sopass, sizeof(wol.sopass), tb[ETHTOOL_A_WOL_SOPASS], &mod); } if (!mod) return 0; ret = dev->ethtool_ops->set_wol(dev, &wol); if (ret) return ret; dev->ethtool->wol_enabled = !!wol.wolopts; return 1; } const struct ethnl_request_ops ethnl_wol_request_ops = { .request_cmd = ETHTOOL_MSG_WOL_GET, .reply_cmd = ETHTOOL_MSG_WOL_GET_REPLY, .hdr_attr = ETHTOOL_A_WOL_HEADER, .req_info_size = sizeof(struct wol_req_info), .reply_data_size = sizeof(struct wol_reply_data), .prepare_data = wol_prepare_data, .reply_size = wol_reply_size, .fill_reply = wol_fill_reply, .set_validate = ethnl_set_wol_validate, .set = ethnl_set_wol, .set_ntf_cmd = ETHTOOL_MSG_WOL_NTF, }; |
| 15 1188 1211 14 248 1312 13 8 106 2766 3 2766 978 110 125 4 1207 1205 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | /* SPDX-License-Identifier: GPL-2.0-only */ /* A pointer that can point to either kernel or userspace memory. */ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H #include <linux/mm.h> #include <linux/sockptr.h> typedef sockptr_t bpfptr_t; static inline bool bpfptr_is_kernel(bpfptr_t bpfptr) { return bpfptr.is_kernel; } static inline bpfptr_t KERNEL_BPFPTR(void *p) { return (bpfptr_t) { .kernel = p, .is_kernel = true }; } static inline bpfptr_t USER_BPFPTR(void __user *p) { return (bpfptr_t) { .user = p }; } static inline bpfptr_t make_bpfptr(u64 addr, bool is_kernel) { if (is_kernel) return KERNEL_BPFPTR((void*) (uintptr_t) addr); else return USER_BPFPTR(u64_to_user_ptr(addr)); } static inline bool bpfptr_is_null(bpfptr_t bpfptr) { if (bpfptr_is_kernel(bpfptr)) return !bpfptr.kernel; return !bpfptr.user; } static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) { if (bpfptr_is_kernel(*bpfptr)) bpfptr->kernel += val; else bpfptr->user += val; } static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, size_t offset, size_t size) { if (!bpfptr_is_kernel(src)) return copy_from_user(dst, src.user + offset, size); return copy_from_kernel_nofault(dst, src.kernel + offset, size); } static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) { return copy_from_bpfptr_offset(dst, src, 0, size); } static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, const void *src, size_t size) { return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size); } static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { void *p = kvmalloc_node_align_noprof(len, 1, GFP_USER | __GFP_NOWARN, NUMA_NO_NODE); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_bpfptr(p, src, len)) { kvfree(p); return ERR_PTR(-EFAULT); } return p; } #define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__)) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { if (bpfptr_is_kernel(src)) return strncpy_from_kernel_nofault(dst, src.kernel, count); return strncpy_from_user(dst, src.user, count); } #endif /* _LINUX_BPFPTR_H */ |
| 4 13 13 13 5 4 3 3 22 11 22 10 9 8 10 6 6 6 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_UDP_TUNNEL_H #define __NET_UDP_TUNNEL_H #include <net/ip_tunnels.h> #include <net/udp.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ipv6_stubs.h> #endif struct udp_port_cfg { u8 family; /* Used only for kernel-created sockets */ union { struct in_addr local_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr local_ip6; #endif }; union { struct in_addr peer_ip; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr peer_ip6; #endif }; __be16 local_udp_port; __be16 peer_udp_port; int bind_ifindex; unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1; }; int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #if IS_ENABLED(CONFIG_IPV6) int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp); #else static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { return 0; } #endif static inline int udp_sock_create(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { if (cfg->family == AF_INET) return udp_sock_create4(net, cfg, sockp); if (cfg->family == AF_INET6) return udp_sock_create6(net, cfg, sockp); return -EPFNOSUPPORT; } typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, struct sk_buff *skb); typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb, int nhoff); struct udp_tunnel_sock_cfg { void *sk_user_data; /* user data used by encap_rcv call back */ /* Used for setting up udp_sock fields, see udp.h for details */ __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_err_lookup_t encap_err_lookup; udp_tunnel_encap_err_rcv_t encap_err_rcv; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; }; /* Setup the given (UDP) sock to receive UDP encapsulated packets */ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* -- List of parsable UDP tunnel types -- * * Adding to this list will result in serious debate. The main issue is * that this list is essentially a list of workarounds for either poorly * designed tunnels, or poorly designed device offloads. * * The parsing supported via these types should really be used for Rx * traffic only as the network stack will have already inserted offsets for * the location of the headers in the skb. In addition any ports that are * pushed should be kept within the namespace without leaking to other * devices such as VFs or other ports on the same device. * * It is strongly encouraged to use CHECKSUM_COMPLETE for Rx to avoid the * need to use this for Rx checksum offload. It should not be necessary to * call this function to perform Tx offloads on outgoing traffic. */ enum udp_parsable_tunnel_type { UDP_TUNNEL_TYPE_VXLAN = BIT(0), /* RFC 7348 */ UDP_TUNNEL_TYPE_GENEVE = BIT(1), /* draft-ietf-nvo3-geneve */ UDP_TUNNEL_TYPE_VXLAN_GPE = BIT(2), /* draft-ietf-nvo3-vxlan-gpe */ }; struct udp_tunnel_info { unsigned short type; sa_family_t sa_family; __be16 port; u8 hw_priv; }; /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck, u16 ipcb_flags); void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck, u16 ip6cb_flags); void udp_tunnel_sock_release(struct socket *sock); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, int oif, __be32 *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 tos, struct dst_cache *dst_cache); struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, struct socket *sock, int oif, struct in6_addr *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 dsfield, struct dst_cache *dst_cache); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, const unsigned long *flags, __be64 tunnel_id, int md_size); #ifdef CONFIG_INET static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) { int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; return iptunnel_handle_offloads(skb, type); } #endif #if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); void udp_tunnel_update_gro_rcv(struct sock *sk, bool add); #else static inline void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add) {} static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {} #endif static inline void udp_tunnel_cleanup_gro(struct sock *sk) { udp_tunnel_update_gro_rcv(sk, false); udp_tunnel_update_gro_lookup(sock_net(sk), sk, false); } static inline void udp_tunnel_encap_enable(struct sock *sk) { if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; #if IS_ENABLED(CONFIG_IPV6) if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); } #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { /* Device only supports offloads when it's open, all ports * will be removed before close and re-added after open. */ UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(0), /* Device supports only IPv4 tunnels */ UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(1), /* Device has hard-coded the IANA VXLAN port (4789) as VXLAN. * This port must not be counted towards n_entries of any table. * Driver will not receive any callback associated with port 4789. */ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(2), }; struct udp_tunnel_nic; #define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) struct udp_tunnel_nic_shared { struct udp_tunnel_nic *udp_tunnel_nic_info; struct list_head devices; }; struct udp_tunnel_nic_shared_node { struct net_device *dev; struct list_head list; }; /** * struct udp_tunnel_nic_info - driver UDP tunnel offload information * @set_port: callback for adding a new port * @unset_port: callback for removing a port * @sync_table: callback for syncing the entire port table at once * @shared: reference to device global state (optional) * @flags: device flags from enum udp_tunnel_nic_info_flags * @tables: UDP port tables this device has * @tables.n_entries: number of entries in this table * @tables.tunnel_types: types of tunnels this table accepts * * Drivers are expected to provide either @set_port and @unset_port callbacks * or the @sync_table callback. Callbacks are invoked with rtnl lock held. * * Devices which (misguidedly) share the UDP tunnel port table across multiple * netdevs should allocate an instance of struct udp_tunnel_nic_shared and * point @shared at it. * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices * sharing a table. * * Known limitations: * - UDP tunnel port notifications are fundamentally best-effort - * it is likely the driver will both see skbs which use a UDP tunnel port, * while not being a tunneled skb, and tunnel skbs from other ports - * drivers should only use these ports for non-critical RX-side offloads, * e.g. the checksum offload; * - none of the devices care about the socket family at present, so we don't * track it. Please extend this code if you care. */ struct udp_tunnel_nic_info { /* one-by-one */ int (*set_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); int (*unset_port)(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti); /* all at once */ int (*sync_table)(struct net_device *dev, unsigned int table); struct udp_tunnel_nic_shared *shared; unsigned int flags; struct udp_tunnel_nic_table_info { unsigned int n_entries; unsigned int tunnel_types; } tables[UDP_TUNNEL_NIC_MAX_TABLES]; }; /* UDP tunnel module dependencies * * Tunnel drivers are expected to have a hard dependency on the udp_tunnel * module. NIC drivers are not, they just attach their * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. * Loading a tunnel driver will cause the udp_tunnel module to be loaded * and only then will all the required state structures be allocated. * Since we want a weak dependency from the drivers and the core to udp_tunnel * we call things through the following stubs. */ struct udp_tunnel_nic_ops { void (*get_port)(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); void (*set_port_priv)(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv); void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); void (*reset_ntf)(struct net_device *dev); size_t (*dump_size)(struct net_device *dev, unsigned int table); int (*dump_write)(struct net_device *dev, unsigned int table, struct sk_buff *skb); void (*assert_locked)(struct net_device *dev); void (*lock)(struct net_device *dev); void (*unlock)(struct net_device *dev); }; #ifdef CONFIG_INET extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; #else #define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) #endif static inline void udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti) { /* This helper is used from .sync_table, we indicate empty entries * by zero'ed @ti. Drivers which need to know the details of a port * when it gets deleted should use the .set_port / .unset_port * callbacks. * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. */ memset(ti, 0, sizeof(*ti)); if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->get_port(dev, table, idx, ti); } static inline void udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, unsigned int idx, u8 priv) { if (udp_tunnel_nic_ops) { udp_tunnel_nic_ops->assert_locked(dev); udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); } } static inline void udp_tunnel_nic_assert_locked(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->assert_locked(dev); } static inline void udp_tunnel_nic_lock(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->lock(dev); } static inline void udp_tunnel_nic_unlock(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->unlock(dev); } static inline void udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->add_port(dev, ti); } static inline void udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) { if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->del_port(dev, ti); } /** * udp_tunnel_nic_reset_ntf() - device-originating reset notification * @dev: network interface device structure * * Called by the driver to inform the core that the entire UDP tunnel port * state has been lost, usually due to device reset. Core will assume device * forgot all the ports and issue .set_port and .sync_table callbacks as * necessary. * * This function must be called with rtnl lock held, and will issue all * the callbacks before returning. */ static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) { if (udp_tunnel_nic_ops) udp_tunnel_nic_ops->reset_ntf(dev); } static inline size_t udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) { size_t ret; if (!udp_tunnel_nic_ops) return 0; udp_tunnel_nic_ops->lock(dev); ret = udp_tunnel_nic_ops->dump_size(dev, table); udp_tunnel_nic_ops->unlock(dev); return ret; } static inline int udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, struct sk_buff *skb) { int ret; if (!udp_tunnel_nic_ops) return 0; udp_tunnel_nic_ops->lock(dev); ret = udp_tunnel_nic_ops->dump_write(dev, table, skb); udp_tunnel_nic_ops->unlock(dev); return ret; } static inline void udp_tunnel_get_rx_info(struct net_device *dev) { ASSERT_RTNL(); if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; udp_tunnel_nic_assert_locked(dev); call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } static inline void udp_tunnel_drop_rx_info(struct net_device *dev) { ASSERT_RTNL(); if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; udp_tunnel_nic_assert_locked(dev); call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); } #endif |
| 71 1 25 71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_IVERSION_H #define _LINUX_IVERSION_H #include <linux/fs.h> /* * The inode->i_version field: * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must * appear larger to observers if there was an explicit change to the inode's * data or metadata since it was last queried. * * An explicit change is one that would ordinarily result in a change to the * inode status change time (aka ctime). i_version must appear to change, even * if the ctime does not (since the whole point is to avoid missing updates due * to timestamp granularity). If POSIX or other relevant spec mandates that the * ctime must change due to an operation, then the i_version counter must be * incremented as well. * * Making the i_version update completely atomic with the operation itself would * be prohibitively expensive. Traditionally the kernel has updated the times on * directories after an operation that changes its contents. For regular files, * the ctime is usually updated before the data is copied into the cache for a * write. This means that there is a window of time when an observer can * associate a new timestamp with old file contents. Since the purpose of the * i_version is to allow for better cache coherency, the i_version must always * be updated after the results of the operation are visible. Updating it before * and after a change is also permitted. (Note that no filesystems currently do * this. Fixing that is a work-in-progress). * * Observers see the i_version as a 64-bit number that never decreases. If it * remains the same since it was last checked, then nothing has changed in the * inode. If it's different then something has changed. Observers cannot infer * anything about the nature or magnitude of the changes from the value, only * that the inode has changed in some fashion. * * Not all filesystems properly implement the i_version counter. Subsystems that * want to use i_version field on an inode should first check whether the * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro). * * Those that set SB_I_VERSION will automatically have their i_version counter * incremented on writes to normal files. If the SB_I_VERSION is not set, then * the VFS will not touch it on writes, and the filesystem can use it how it * wishes. Note that the filesystem is always responsible for updating the * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.). * We consider these sorts of filesystems to have a kernel-managed i_version. * * It may be impractical for filesystems to keep i_version updates atomic with * respect to the changes that cause them. They should, however, guarantee * that i_version updates are never visible before the changes that caused * them. Also, i_version updates should never be delayed longer than it takes * the original change to reach disk. * * This implementation uses the low bit in the i_version field as a flag to * track when the value has been queried. If it has not been queried since it * was last incremented, we can skip the increment in most cases. * * In the event that we're updating the ctime, we will usually go ahead and * bump the i_version anyway. Since that has to go to stable storage in some * fashion, we might as well increment it as well. * * With this implementation, the value should always appear to observers to * increase over time if the file has changed. It's recommended to use * inode_eq_iversion() helper to compare values. * * Note that some filesystems (e.g. NFS and AFS) just use the field to store * a server-provided value (for the most part). For that reason, those * filesystems do not set SB_I_VERSION. These filesystems are considered to * have a self-managed i_version. * * Persistently storing the i_version * ---------------------------------- * Queries of the i_version field are not gated on them hitting the backing * store. It's always possible that the host could crash after allowing * a query of the value but before it has made it to disk. * * To mitigate this problem, filesystems should always use * inode_set_iversion_queried when loading an existing inode from disk. This * ensures that the next attempted inode increment will result in the value * changing. * * Storing the value to disk therefore does not count as a query, so those * filesystems should use inode_peek_iversion to grab the value to be stored. * There is no need to flag the value as having been queried in that case. */ /* * We borrow the lowest bit in the i_version to use as a flag to tell whether * it has been queried since we last incremented it. If it has, then we must * increment it on the next change. After that, we can clear the flag and * avoid incrementing it again until it has again been queried. */ #define I_VERSION_QUERIED_SHIFT (1) #define I_VERSION_QUERIED (1ULL << (I_VERSION_QUERIED_SHIFT - 1)) #define I_VERSION_INCREMENT (1ULL << I_VERSION_QUERIED_SHIFT) /** * inode_set_iversion_raw - set i_version to the specified raw value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for use by * filesystems that self-manage the i_version. * * For example, the NFS client stores its NFSv4 change attribute in this way, * and the AFS client stores the data_version from the server here. */ static inline void inode_set_iversion_raw(struct inode *inode, u64 val) { atomic64_set(&inode->i_version, val); } /** * inode_peek_iversion_raw - grab a "raw" iversion value * @inode: inode from which i_version should be read * * Grab a "raw" inode->i_version value and return it. The i_version is not * flagged or converted in any way. This is mostly used to access a self-managed * i_version. * * With those filesystems, we want to treat the i_version as an entirely * opaque value. */ static inline u64 inode_peek_iversion_raw(const struct inode *inode) { return atomic64_read(&inode->i_version); } /** * inode_set_max_iversion_raw - update i_version new value is larger * @inode: inode to set * @val: new i_version to set * * Some self-managed filesystems (e.g Ceph) will only update the i_version * value if the new value is larger than the one we already have. */ static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { u64 cur = inode_peek_iversion_raw(inode); do { if (cur > val) break; } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val)); } /** * inode_set_iversion - set i_version to a particular value * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val. This function is for filesystems with * a kernel-managed i_version, for initializing a newly-created inode from * scratch. * * In this case, we do not set the QUERIED flag since we know that this value * has never been queried. */ static inline void inode_set_iversion(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT); } /** * inode_set_iversion_queried - set i_version to a particular value as quereied * @inode: inode to set * @val: new i_version value to set * * Set @inode's i_version field to @val, and flag it for increment on the next * change. * * Filesystems that persistently store the i_version on disk should use this * when loading an existing inode from disk. * * When loading in an i_version value from a backing store, we can't be certain * that it wasn't previously viewed before being stored. Thus, we must assume * that it was, to ensure that we don't end up handing out the same value for * different versions of the same inode. */ static inline void inode_set_iversion_queried(struct inode *inode, u64 val) { inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) | I_VERSION_QUERIED); } bool inode_maybe_inc_iversion(struct inode *inode, bool force); /** * inode_inc_iversion - forcibly increment i_version * @inode: inode that needs to be updated * * Forcbily increment the i_version field. This always results in a change to * the observable value. */ static inline void inode_inc_iversion(struct inode *inode) { inode_maybe_inc_iversion(inode, true); } /** * inode_iversion_need_inc - is the i_version in need of being incremented? * @inode: inode to check * * Returns whether the inode->i_version counter needs incrementing on the next * change. Just fetch the value and check the QUERIED flag. */ static inline bool inode_iversion_need_inc(struct inode *inode) { return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED; } /** * inode_inc_iversion_raw - forcibly increment raw i_version * @inode: inode that needs to be updated * * Forcbily increment the raw i_version field. This always results in a change * to the raw value. * * NFS will use the i_version field to store the value from the server. It * mostly treats it as opaque, but in the case where it holds a write * delegation, it must increment the value itself. This function does that. */ static inline void inode_inc_iversion_raw(struct inode *inode) { atomic64_inc(&inode->i_version); } /** * inode_peek_iversion - read i_version without flagging it to be incremented * @inode: inode from which i_version should be read * * Read the inode i_version counter for an inode without registering it as a * query. * * This is typically used by local filesystems that need to store an i_version * on disk. In that situation, it's not necessary to flag it as having been * viewed, as the result won't be used to gauge changes from that point. */ static inline u64 inode_peek_iversion(const struct inode *inode) { return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } /* * For filesystems without any sort of change attribute, the best we can * do is fake one up from the ctime: */ static inline u64 time_to_chattr(const struct timespec64 *t) { u64 chattr = t->tv_sec; chattr <<= 32; chattr += t->tv_nsec; return chattr; } u64 inode_query_iversion(struct inode *inode); /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare the current raw i_version counter with a previous one. Returns true * if they are the same or false if they are different. */ static inline bool inode_eq_iversion_raw(const struct inode *inode, u64 old) { return inode_peek_iversion_raw(inode) == old; } /** * inode_eq_iversion - check whether the i_version counter has changed * @inode: inode to check * @old: old value to check against its i_version * * Compare an i_version counter with a previous one. Returns true if they are * the same, and false if they are different. * * Note that we don't need to set the QUERIED flag in this case, as the value * in the inode is not being recorded for later use. */ static inline bool inode_eq_iversion(const struct inode *inode, u64 old) { return inode_peek_iversion(inode) == old; } #endif |
| 40 40 40 39 39 40 33 34 34 1 1 1 1 1 59 1 59 34 59 9 59 39 40 59 59 1 34 40 39 65 9 66 64 9 66 65 66 66 28 15 7 7 7 12 12 12 28 9 9 9 7 7 7 28 5 5 5 5 9 2 7 9 9 9 9 9 6 6 5 5 65 66 65 1 64 64 65 33 10 55 55 55 1 55 54 55 53 3 52 15 9 40 45 45 46 45 45 46 5 46 9 46 22 6 24 2 46 5 5 41 2 46 17 11 29 40 40 39 39 39 40 39 40 15 25 25 25 66 34 34 34 34 34 34 34 34 4 34 34 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 | // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter driver * * Copyright (C) 2009 - 2013 Johan Hovold (jhovold@gmail.com) * Copyright (C) 1999 - 2012 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2000 Peter Berger (pberger@brimson.com) * Copyright (C) 2000 Al Borchers (borchers@steinerpoint.com) * * This driver was originally based on the ACM driver by Armin Fuerst (which was * based on a driver by Brad Keryan) * * See Documentation/usb/usb-serial.rst for more information on using this * driver */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/uaccess.h> #include <linux/serial.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/kfifo.h> #include <linux/idr.h> #define DRIVER_AUTHOR "Greg Kroah-Hartman <gregkh@linuxfoundation.org>" #define DRIVER_DESC "USB Serial Driver core" #define USB_SERIAL_TTY_MAJOR 188 #define USB_SERIAL_TTY_MINORS 512 /* should be enough for a while */ /* There is no MODULE_DEVICE_TABLE for usbserial.c. Instead the MODULE_DEVICE_TABLE declarations in each serial driver cause the "hotplug" program to pull in whatever module is necessary via modprobe, and modprobe will load usbserial because the serial drivers depend on it. */ static DEFINE_IDR(serial_minors); static DEFINE_MUTEX(table_lock); static LIST_HEAD(usb_serial_driver_list); /* * Look up the serial port structure. If it is found and it hasn't been * disconnected, return with the parent usb_serial structure's disc_mutex held * and its refcount incremented. Otherwise return NULL. */ struct usb_serial_port *usb_serial_port_get_by_minor(unsigned minor) { struct usb_serial *serial; struct usb_serial_port *port; mutex_lock(&table_lock); port = idr_find(&serial_minors, minor); if (!port) goto exit; serial = port->serial; mutex_lock(&serial->disc_mutex); if (serial->disconnected) { mutex_unlock(&serial->disc_mutex); port = NULL; } else { kref_get(&serial->kref); } exit: mutex_unlock(&table_lock); return port; } static int allocate_minors(struct usb_serial *serial, int num_ports) { struct usb_serial_port *port; unsigned int i, j; int minor; dev_dbg(&serial->interface->dev, "%s %d\n", __func__, num_ports); mutex_lock(&table_lock); for (i = 0; i < num_ports; ++i) { port = serial->port[i]; minor = idr_alloc(&serial_minors, port, 0, USB_SERIAL_TTY_MINORS, GFP_KERNEL); if (minor < 0) goto error; port->minor = minor; port->port_number = i; } serial->minors_reserved = 1; mutex_unlock(&table_lock); return 0; error: /* unwind the already allocated minors */ for (j = 0; j < i; ++j) idr_remove(&serial_minors, serial->port[j]->minor); mutex_unlock(&table_lock); return minor; } static void release_minors(struct usb_serial *serial) { int i; mutex_lock(&table_lock); for (i = 0; i < serial->num_ports; ++i) idr_remove(&serial_minors, serial->port[i]->minor); mutex_unlock(&table_lock); serial->minors_reserved = 0; } int usb_serial_claim_interface(struct usb_serial *serial, struct usb_interface *intf) { struct usb_driver *driver = serial->type->usb_driver; int ret; if (serial->sibling) return -EBUSY; ret = usb_driver_claim_interface(driver, intf, serial); if (ret) { dev_err(&serial->interface->dev, "failed to claim sibling interface: %d\n", ret); return ret; } serial->sibling = intf; return 0; } EXPORT_SYMBOL_GPL(usb_serial_claim_interface); static void release_sibling(struct usb_serial *serial, struct usb_interface *intf) { struct usb_driver *driver = serial->type->usb_driver; struct usb_interface *sibling; if (!serial->sibling) return; if (intf == serial->sibling) sibling = serial->interface; else sibling = serial->sibling; usb_set_intfdata(sibling, NULL); usb_driver_release_interface(driver, sibling); } static void destroy_serial(struct kref *kref) { struct usb_serial *serial; struct usb_serial_port *port; int i; serial = to_usb_serial(kref); /* return the minor range that this device had */ if (serial->minors_reserved) release_minors(serial); if (serial->attached && serial->type->release) serial->type->release(serial); /* Now that nothing is using the ports, they can be freed */ for (i = 0; i < serial->num_port_pointers; ++i) { port = serial->port[i]; if (port) { port->serial = NULL; put_device(&port->dev); } } usb_put_intf(serial->interface); usb_put_dev(serial->dev); kfree(serial); } void usb_serial_put(struct usb_serial *serial) { kref_put(&serial->kref, destroy_serial); } /***************************************************************************** * Driver tty interface functions *****************************************************************************/ /** * serial_install - install tty * @driver: the driver (USB in our case) * @tty: the tty being created * * Initialise the termios structure for this tty. We use the default * USB serial settings but permit them to be overridden by * serial->type->init_termios on first open. * * This is the first place a new tty gets used. Hence this is where we * acquire references to the usb_serial structure and the driver module, * where we store a pointer to the port. All these actions are reversed * in serial_cleanup(). */ static int serial_install(struct tty_driver *driver, struct tty_struct *tty) { int idx = tty->index; struct usb_serial *serial; struct usb_serial_port *port; bool init_termios; int retval = -ENODEV; port = usb_serial_port_get_by_minor(idx); if (!port) return retval; serial = port->serial; if (!try_module_get(serial->type->driver.owner)) goto err_put_serial; init_termios = (driver->termios[idx] == NULL); retval = tty_standard_install(driver, tty); if (retval) goto err_put_module; mutex_unlock(&serial->disc_mutex); /* allow the driver to update the initial settings */ if (init_termios && serial->type->init_termios) serial->type->init_termios(tty); tty->driver_data = port; return retval; err_put_module: module_put(serial->type->driver.owner); err_put_serial: usb_serial_put(serial); mutex_unlock(&serial->disc_mutex); return retval; } static int serial_port_activate(struct tty_port *tport, struct tty_struct *tty) { struct usb_serial_port *port = container_of(tport, struct usb_serial_port, port); struct usb_serial *serial = port->serial; int retval; mutex_lock(&serial->disc_mutex); if (serial->disconnected) { retval = -ENODEV; goto out_unlock; } retval = usb_autopm_get_interface(serial->interface); if (retval) goto out_unlock; retval = port->serial->type->open(tty, port); if (retval) usb_autopm_put_interface(serial->interface); out_unlock: mutex_unlock(&serial->disc_mutex); if (retval < 0) retval = usb_translate_errors(retval); return retval; } static int serial_open(struct tty_struct *tty, struct file *filp) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); return tty_port_open(&port->port, tty, filp); } /** * serial_port_shutdown - shut down hardware * @tport: tty port to shut down * * Shut down a USB serial port. Serialized against activate by the * tport mutex and kept to matching open/close pairs * of calls by the tty-port initialized flag. * * Not called if tty is console. */ static void serial_port_shutdown(struct tty_port *tport) { struct usb_serial_port *port = container_of(tport, struct usb_serial_port, port); struct usb_serial_driver *drv = port->serial->type; if (drv->close) drv->close(port); usb_autopm_put_interface(port->serial->interface); } static void serial_hangup(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); tty_port_hangup(&port->port); } static void serial_close(struct tty_struct *tty, struct file *filp) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); tty_port_close(&port->port, tty, filp); } /** * serial_cleanup - free resources post close/hangup * @tty: tty to clean up * * Do the resource freeing and refcount dropping for the port. * Avoid freeing the console. * * Called asynchronously after the last tty kref is dropped. */ static void serial_cleanup(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial; struct module *owner; dev_dbg(&port->dev, "%s\n", __func__); /* The console is magical. Do not hang up the console hardware * or there will be tears. */ if (port->port.console) return; tty->driver_data = NULL; serial = port->serial; owner = serial->type->driver.owner; usb_serial_put(serial); module_put(owner); } static ssize_t serial_write(struct tty_struct *tty, const u8 *buf, size_t count) { struct usb_serial_port *port = tty->driver_data; int retval = -ENODEV; if (port->serial->dev->state == USB_STATE_NOTATTACHED) goto exit; dev_dbg(&port->dev, "%s - %zu byte(s)\n", __func__, count); retval = port->serial->type->write(tty, port, buf, count); if (retval < 0) retval = usb_translate_errors(retval); exit: return retval; } static unsigned int serial_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); return port->serial->type->write_room(tty); } static unsigned int serial_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s\n", __func__); if (serial->disconnected) return 0; return serial->type->chars_in_buffer(tty); } static void serial_wait_until_sent(struct tty_struct *tty, int timeout) { struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; dev_dbg(&port->dev, "%s\n", __func__); if (!port->serial->type->wait_until_sent) return; mutex_lock(&serial->disc_mutex); if (!serial->disconnected) port->serial->type->wait_until_sent(tty, timeout); mutex_unlock(&serial->disc_mutex); } static void serial_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->throttle) port->serial->type->throttle(tty); } static void serial_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->unthrottle) port->serial->type->unthrottle(tty); } static int serial_get_serial(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct tty_port *tport = &port->port; unsigned int close_delay, closing_wait; mutex_lock(&tport->mutex); close_delay = jiffies_to_msecs(tport->close_delay) / 10; closing_wait = tport->closing_wait; if (closing_wait != ASYNC_CLOSING_WAIT_NONE) closing_wait = jiffies_to_msecs(closing_wait) / 10; ss->line = port->minor; ss->close_delay = close_delay; ss->closing_wait = closing_wait; if (port->serial->type->get_serial) port->serial->type->get_serial(tty, ss); mutex_unlock(&tport->mutex); return 0; } static int serial_set_serial(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct tty_port *tport = &port->port; unsigned int close_delay, closing_wait; int ret = 0; close_delay = msecs_to_jiffies(ss->close_delay * 10); closing_wait = ss->closing_wait; if (closing_wait != ASYNC_CLOSING_WAIT_NONE) closing_wait = msecs_to_jiffies(closing_wait * 10); mutex_lock(&tport->mutex); if (!capable(CAP_SYS_ADMIN)) { if (close_delay != tport->close_delay || closing_wait != tport->closing_wait) { ret = -EPERM; goto out_unlock; } } if (port->serial->type->set_serial) { ret = port->serial->type->set_serial(tty, ss); if (ret) goto out_unlock; } tport->close_delay = close_delay; tport->closing_wait = closing_wait; out_unlock: mutex_unlock(&tport->mutex); return ret; } static int serial_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; int retval = -ENOIOCTLCMD; dev_dbg(&port->dev, "%s - cmd 0x%04x\n", __func__, cmd); switch (cmd) { case TIOCMIWAIT: if (port->serial->type->tiocmiwait) retval = port->serial->type->tiocmiwait(tty, arg); break; default: if (port->serial->type->ioctl) retval = port->serial->type->ioctl(tty, cmd, arg); } return retval; } static void serial_set_termios(struct tty_struct *tty, const struct ktermios *old) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->set_termios) port->serial->type->set_termios(tty, port, old); else tty_termios_copy_hw(&tty->termios, old); } static int serial_break(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->break_ctl) return port->serial->type->break_ctl(tty, break_state); return -ENOTTY; } static int serial_proc_show(struct seq_file *m, void *v) { struct usb_serial *serial; struct usb_serial_port *port; int i; char tmp[40]; seq_puts(m, "usbserinfo:1.0 driver:2.0\n"); for (i = 0; i < USB_SERIAL_TTY_MINORS; ++i) { port = usb_serial_port_get_by_minor(i); if (port == NULL) continue; serial = port->serial; seq_printf(m, "%d:", i); if (serial->type->driver.owner) seq_printf(m, " module:%s", module_name(serial->type->driver.owner)); seq_printf(m, " name:\"%s\"", serial->type->description); seq_printf(m, " vendor:%04x product:%04x", le16_to_cpu(serial->dev->descriptor.idVendor), le16_to_cpu(serial->dev->descriptor.idProduct)); seq_printf(m, " num_ports:%d", serial->num_ports); seq_printf(m, " port:%d", port->port_number); usb_make_path(serial->dev, tmp, sizeof(tmp)); seq_printf(m, " path:%s", tmp); seq_putc(m, '\n'); usb_serial_put(serial); mutex_unlock(&serial->disc_mutex); } return 0; } static int serial_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->tiocmget) return port->serial->type->tiocmget(tty); return -ENOTTY; } static int serial_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->tiocmset) return port->serial->type->tiocmset(tty, set, clear); return -ENOTTY; } static int serial_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { struct usb_serial_port *port = tty->driver_data; dev_dbg(&port->dev, "%s\n", __func__); if (port->serial->type->get_icount) return port->serial->type->get_icount(tty, icount); return -ENOTTY; } /* * We would be calling tty_wakeup here, but unfortunately some line * disciplines have an annoying habit of calling tty->write from * the write wakeup callback (e.g. n_hdlc.c). */ void usb_serial_port_softint(struct usb_serial_port *port) { schedule_work(&port->work); } EXPORT_SYMBOL_GPL(usb_serial_port_softint); static void usb_serial_port_work(struct work_struct *work) { struct usb_serial_port *port = container_of(work, struct usb_serial_port, work); tty_port_tty_wakeup(&port->port); } static void usb_serial_port_poison_urbs(struct usb_serial_port *port) { int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_poison_urb(port->read_urbs[i]); for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) usb_poison_urb(port->write_urbs[i]); usb_poison_urb(port->interrupt_in_urb); usb_poison_urb(port->interrupt_out_urb); } static void usb_serial_port_unpoison_urbs(struct usb_serial_port *port) { int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_unpoison_urb(port->read_urbs[i]); for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) usb_unpoison_urb(port->write_urbs[i]); usb_unpoison_urb(port->interrupt_in_urb); usb_unpoison_urb(port->interrupt_out_urb); } static void usb_serial_port_release(struct device *dev) { struct usb_serial_port *port = to_usb_serial_port(dev); int i; dev_dbg(dev, "%s\n", __func__); usb_free_urb(port->interrupt_in_urb); usb_free_urb(port->interrupt_out_urb); for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { usb_free_urb(port->read_urbs[i]); kfree(port->bulk_in_buffers[i]); } for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { usb_free_urb(port->write_urbs[i]); kfree(port->bulk_out_buffers[i]); } kfifo_free(&port->write_fifo); kfree(port->interrupt_in_buffer); kfree(port->interrupt_out_buffer); tty_port_destroy(&port->port); kfree(port); } static struct usb_serial *create_serial(struct usb_device *dev, struct usb_interface *interface, struct usb_serial_driver *driver) { struct usb_serial *serial; serial = kzalloc(sizeof(*serial), GFP_KERNEL); if (!serial) return NULL; serial->dev = usb_get_dev(dev); serial->type = driver; serial->interface = usb_get_intf(interface); kref_init(&serial->kref); mutex_init(&serial->disc_mutex); serial->minors_reserved = 0; return serial; } static const struct usb_device_id *match_dynamic_id(struct usb_interface *intf, struct usb_serial_driver *drv) { struct usb_dynid *dynid; guard(mutex)(&usb_dynids_lock); list_for_each_entry(dynid, &drv->dynids.list, node) { if (usb_match_one_id(intf, &dynid->id)) { return &dynid->id; } } return NULL; } static const struct usb_device_id *get_iface_id(struct usb_serial_driver *drv, struct usb_interface *intf) { const struct usb_device_id *id; id = usb_match_id(intf, drv->id_table); if (id) { dev_dbg(&intf->dev, "static descriptor matches\n"); goto exit; } id = match_dynamic_id(intf, drv); if (id) dev_dbg(&intf->dev, "dynamic descriptor matches\n"); exit: return id; } /* Caller must hold table_lock */ static struct usb_serial_driver *search_serial_device( struct usb_interface *iface) { const struct usb_device_id *id = NULL; struct usb_serial_driver *drv; struct usb_driver *driver = to_usb_driver(iface->dev.driver); /* Check if the usb id matches a known device */ list_for_each_entry(drv, &usb_serial_driver_list, driver_list) { if (drv->usb_driver == driver) id = get_iface_id(drv, iface); if (id) return drv; } return NULL; } static bool serial_port_carrier_raised(struct tty_port *port) { struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); struct usb_serial_driver *drv = p->serial->type; if (drv->carrier_raised) return drv->carrier_raised(p); /* No carrier control - don't block */ return true; } static void serial_port_dtr_rts(struct tty_port *port, bool on) { struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); struct usb_serial_driver *drv = p->serial->type; if (drv->dtr_rts) drv->dtr_rts(p, on); } static ssize_t port_number_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_serial_port *port = to_usb_serial_port(dev); return sprintf(buf, "%u\n", port->port_number); } static DEVICE_ATTR_RO(port_number); static struct attribute *usb_serial_port_attrs[] = { &dev_attr_port_number.attr, NULL }; ATTRIBUTE_GROUPS(usb_serial_port); static const struct tty_port_operations serial_port_ops = { .carrier_raised = serial_port_carrier_raised, .dtr_rts = serial_port_dtr_rts, .activate = serial_port_activate, .shutdown = serial_port_shutdown, }; static void store_endpoint(struct usb_serial *serial, struct usb_serial_endpoints *epds, struct usb_endpoint_descriptor *epd) { struct device *dev = &serial->interface->dev; u8 addr = epd->bEndpointAddress; if (usb_endpoint_is_bulk_in(epd)) { if (epds->num_bulk_in == ARRAY_SIZE(epds->bulk_in)) return; dev_dbg(dev, "found bulk in endpoint %02x\n", addr); epds->bulk_in[epds->num_bulk_in++] = epd; } else if (usb_endpoint_is_bulk_out(epd)) { if (epds->num_bulk_out == ARRAY_SIZE(epds->bulk_out)) return; dev_dbg(dev, "found bulk out endpoint %02x\n", addr); epds->bulk_out[epds->num_bulk_out++] = epd; } else if (usb_endpoint_is_int_in(epd)) { if (epds->num_interrupt_in == ARRAY_SIZE(epds->interrupt_in)) return; dev_dbg(dev, "found interrupt in endpoint %02x\n", addr); epds->interrupt_in[epds->num_interrupt_in++] = epd; } else if (usb_endpoint_is_int_out(epd)) { if (epds->num_interrupt_out == ARRAY_SIZE(epds->interrupt_out)) return; dev_dbg(dev, "found interrupt out endpoint %02x\n", addr); epds->interrupt_out[epds->num_interrupt_out++] = epd; } } static void find_endpoints(struct usb_serial *serial, struct usb_serial_endpoints *epds, struct usb_interface *intf) { struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *epd; unsigned int i; iface_desc = intf->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { epd = &iface_desc->endpoint[i].desc; store_endpoint(serial, epds, epd); } } static int setup_port_bulk_in(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; int i; buffer_size = max_t(int, type->bulk_in_size, usb_endpoint_maxp(epd)); port->bulk_in_size = buffer_size; port->bulk_in_endpointAddress = epd->bEndpointAddress; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { set_bit(i, &port->read_urbs_free); port->read_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!port->read_urbs[i]) return -ENOMEM; port->bulk_in_buffers[i] = kmalloc(buffer_size, GFP_KERNEL); if (!port->bulk_in_buffers[i]) return -ENOMEM; usb_fill_bulk_urb(port->read_urbs[i], udev, usb_rcvbulkpipe(udev, epd->bEndpointAddress), port->bulk_in_buffers[i], buffer_size, type->read_bulk_callback, port); } port->read_urb = port->read_urbs[0]; port->bulk_in_buffer = port->bulk_in_buffers[0]; return 0; } static int setup_port_bulk_out(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; int i; if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL)) return -ENOMEM; if (type->bulk_out_size) buffer_size = type->bulk_out_size; else buffer_size = usb_endpoint_maxp(epd); port->bulk_out_size = buffer_size; port->bulk_out_endpointAddress = epd->bEndpointAddress; for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { set_bit(i, &port->write_urbs_free); port->write_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); if (!port->write_urbs[i]) return -ENOMEM; port->bulk_out_buffers[i] = kmalloc(buffer_size, GFP_KERNEL); if (!port->bulk_out_buffers[i]) return -ENOMEM; usb_fill_bulk_urb(port->write_urbs[i], udev, usb_sndbulkpipe(udev, epd->bEndpointAddress), port->bulk_out_buffers[i], buffer_size, type->write_bulk_callback, port); } port->write_urb = port->write_urbs[0]; port->bulk_out_buffer = port->bulk_out_buffers[0]; return 0; } static int setup_port_interrupt_in(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; port->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!port->interrupt_in_urb) return -ENOMEM; buffer_size = usb_endpoint_maxp(epd); port->interrupt_in_endpointAddress = epd->bEndpointAddress; port->interrupt_in_buffer = kmalloc(buffer_size, GFP_KERNEL); if (!port->interrupt_in_buffer) return -ENOMEM; usb_fill_int_urb(port->interrupt_in_urb, udev, usb_rcvintpipe(udev, epd->bEndpointAddress), port->interrupt_in_buffer, buffer_size, type->read_int_callback, port, epd->bInterval); return 0; } static int setup_port_interrupt_out(struct usb_serial_port *port, struct usb_endpoint_descriptor *epd) { struct usb_serial_driver *type = port->serial->type; struct usb_device *udev = port->serial->dev; int buffer_size; port->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!port->interrupt_out_urb) return -ENOMEM; buffer_size = usb_endpoint_maxp(epd); port->interrupt_out_size = buffer_size; port->interrupt_out_endpointAddress = epd->bEndpointAddress; port->interrupt_out_buffer = kmalloc(buffer_size, GFP_KERNEL); if (!port->interrupt_out_buffer) return -ENOMEM; usb_fill_int_urb(port->interrupt_out_urb, udev, usb_sndintpipe(udev, epd->bEndpointAddress), port->interrupt_out_buffer, buffer_size, type->write_int_callback, port, epd->bInterval); return 0; } static int usb_serial_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct device *ddev = &interface->dev; struct usb_device *dev = interface_to_usbdev(interface); struct usb_serial *serial = NULL; struct usb_serial_port *port; struct usb_serial_endpoints *epds; struct usb_serial_driver *type = NULL; int retval; int i; int num_ports = 0; unsigned char max_endpoints; mutex_lock(&table_lock); type = search_serial_device(interface); if (!type) { mutex_unlock(&table_lock); dev_dbg(ddev, "none matched\n"); return -ENODEV; } if (!try_module_get(type->driver.owner)) { mutex_unlock(&table_lock); dev_err(ddev, "module get failed, exiting\n"); return -EIO; } mutex_unlock(&table_lock); serial = create_serial(dev, interface, type); if (!serial) { retval = -ENOMEM; goto err_put_module; } /* if this device type has a probe function, call it */ if (type->probe) { const struct usb_device_id *id; id = get_iface_id(type, interface); retval = type->probe(serial, id); if (retval) { dev_dbg(ddev, "sub driver rejected device\n"); goto err_release_sibling; } } /* descriptor matches, let's find the endpoints needed */ epds = kzalloc(sizeof(*epds), GFP_KERNEL); if (!epds) { retval = -ENOMEM; goto err_release_sibling; } find_endpoints(serial, epds, interface); if (serial->sibling) find_endpoints(serial, epds, serial->sibling); if (epds->num_bulk_in < type->num_bulk_in || epds->num_bulk_out < type->num_bulk_out || epds->num_interrupt_in < type->num_interrupt_in || epds->num_interrupt_out < type->num_interrupt_out) { dev_err(ddev, "required endpoints missing\n"); retval = -ENODEV; goto err_free_epds; } if (type->calc_num_ports) { retval = type->calc_num_ports(serial, epds); if (retval < 0) goto err_free_epds; num_ports = retval; } if (!num_ports) num_ports = type->num_ports; if (num_ports > MAX_NUM_PORTS) { dev_warn(ddev, "too many ports requested: %d\n", num_ports); num_ports = MAX_NUM_PORTS; } serial->num_ports = (unsigned char)num_ports; serial->num_bulk_in = epds->num_bulk_in; serial->num_bulk_out = epds->num_bulk_out; serial->num_interrupt_in = epds->num_interrupt_in; serial->num_interrupt_out = epds->num_interrupt_out; /* found all that we need */ dev_info(ddev, "%s converter detected\n", type->description); /* create our ports, we need as many as the max endpoints */ /* we don't use num_ports here because some devices have more endpoint pairs than ports */ max_endpoints = max(epds->num_bulk_in, epds->num_bulk_out); max_endpoints = max(max_endpoints, epds->num_interrupt_in); max_endpoints = max(max_endpoints, epds->num_interrupt_out); max_endpoints = max(max_endpoints, serial->num_ports); serial->num_port_pointers = max_endpoints; dev_dbg(ddev, "setting up %d port structure(s)\n", max_endpoints); for (i = 0; i < max_endpoints; ++i) { port = kzalloc(sizeof(struct usb_serial_port), GFP_KERNEL); if (!port) { retval = -ENOMEM; goto err_free_epds; } tty_port_init(&port->port); port->port.ops = &serial_port_ops; port->serial = serial; spin_lock_init(&port->lock); /* Keep this for private driver use for the moment but should probably go away */ INIT_WORK(&port->work, usb_serial_port_work); serial->port[i] = port; port->dev.parent = &interface->dev; port->dev.driver = NULL; port->dev.bus = &usb_serial_bus_type; port->dev.release = &usb_serial_port_release; port->dev.groups = usb_serial_port_groups; device_initialize(&port->dev); } /* set up the endpoint information */ for (i = 0; i < epds->num_bulk_in; ++i) { retval = setup_port_bulk_in(serial->port[i], epds->bulk_in[i]); if (retval) goto err_free_epds; } for (i = 0; i < epds->num_bulk_out; ++i) { retval = setup_port_bulk_out(serial->port[i], epds->bulk_out[i]); if (retval) goto err_free_epds; } if (serial->type->read_int_callback) { for (i = 0; i < epds->num_interrupt_in; ++i) { retval = setup_port_interrupt_in(serial->port[i], epds->interrupt_in[i]); if (retval) goto err_free_epds; } } else if (epds->num_interrupt_in) { dev_dbg(ddev, "The device claims to support interrupt in transfers, but read_int_callback is not defined\n"); } if (serial->type->write_int_callback) { for (i = 0; i < epds->num_interrupt_out; ++i) { retval = setup_port_interrupt_out(serial->port[i], epds->interrupt_out[i]); if (retval) goto err_free_epds; } } else if (epds->num_interrupt_out) { dev_dbg(ddev, "The device claims to support interrupt out transfers, but write_int_callback is not defined\n"); } usb_set_intfdata(interface, serial); /* if this device type has an attach function, call it */ if (type->attach) { retval = type->attach(serial); if (retval < 0) goto err_free_epds; serial->attached = 1; if (retval > 0) { /* quietly accept this device, but don't bind to a serial port as it's about to disappear */ serial->num_ports = 0; goto exit; } } else { serial->attached = 1; } retval = allocate_minors(serial, num_ports); if (retval) { dev_err(ddev, "No more free serial minor numbers\n"); goto err_free_epds; } /* register all of the individual ports with the driver core */ for (i = 0; i < num_ports; ++i) { port = serial->port[i]; dev_set_name(&port->dev, "ttyUSB%d", port->minor); dev_dbg(ddev, "registering %s\n", dev_name(&port->dev)); device_enable_async_suspend(&port->dev); retval = device_add(&port->dev); if (retval) dev_err(ddev, "Error registering port device, continuing\n"); } if (num_ports > 0) usb_serial_console_init(serial->port[0]->minor); exit: kfree(epds); module_put(type->driver.owner); return 0; err_free_epds: kfree(epds); err_release_sibling: release_sibling(serial, interface); usb_serial_put(serial); err_put_module: module_put(type->driver.owner); return retval; } static void usb_serial_disconnect(struct usb_interface *interface) { int i; struct usb_serial *serial = usb_get_intfdata(interface); struct device *dev = &interface->dev; struct usb_serial_port *port; /* sibling interface is cleaning up */ if (!serial) return; usb_serial_console_disconnect(serial); mutex_lock(&serial->disc_mutex); /* must set a flag, to signal subdrivers */ serial->disconnected = 1; mutex_unlock(&serial->disc_mutex); for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; tty_port_tty_vhangup(&port->port); usb_serial_port_poison_urbs(port); wake_up_interruptible(&port->port.delta_msr_wait); cancel_work_sync(&port->work); if (device_is_registered(&port->dev)) device_del(&port->dev); } if (serial->type->disconnect) serial->type->disconnect(serial); release_sibling(serial, interface); /* let the last holder of this object cause it to be cleaned up */ usb_serial_put(serial); dev_info(dev, "device disconnected\n"); } int usb_serial_suspend(struct usb_interface *intf, pm_message_t message) { struct usb_serial *serial = usb_get_intfdata(intf); int i, r; /* suspend when called for first sibling interface */ if (serial->suspend_count++) return 0; /* * serial->type->suspend() MUST return 0 in system sleep context, * otherwise, the resume callback has to recover device from * previous suspend failure. */ if (serial->type->suspend) { r = serial->type->suspend(serial, message); if (r < 0) { serial->suspend_count--; return r; } } for (i = 0; i < serial->num_ports; ++i) usb_serial_port_poison_urbs(serial->port[i]); return 0; } EXPORT_SYMBOL(usb_serial_suspend); static void usb_serial_unpoison_port_urbs(struct usb_serial *serial) { int i; for (i = 0; i < serial->num_ports; ++i) usb_serial_port_unpoison_urbs(serial->port[i]); } int usb_serial_resume(struct usb_interface *intf) { struct usb_serial *serial = usb_get_intfdata(intf); int rv; /* resume when called for last sibling interface */ if (--serial->suspend_count) return 0; usb_serial_unpoison_port_urbs(serial); if (serial->type->resume) rv = serial->type->resume(serial); else rv = usb_serial_generic_resume(serial); return rv; } EXPORT_SYMBOL(usb_serial_resume); static int usb_serial_reset_resume(struct usb_interface *intf) { struct usb_serial *serial = usb_get_intfdata(intf); int rv; /* resume when called for last sibling interface */ if (--serial->suspend_count) return 0; usb_serial_unpoison_port_urbs(serial); if (serial->type->reset_resume) { rv = serial->type->reset_resume(serial); } else { rv = -EOPNOTSUPP; intf->needs_binding = 1; } return rv; } static const struct tty_operations serial_ops = { .open = serial_open, .close = serial_close, .write = serial_write, .hangup = serial_hangup, .write_room = serial_write_room, .ioctl = serial_ioctl, .set_termios = serial_set_termios, .throttle = serial_throttle, .unthrottle = serial_unthrottle, .break_ctl = serial_break, .chars_in_buffer = serial_chars_in_buffer, .wait_until_sent = serial_wait_until_sent, .tiocmget = serial_tiocmget, .tiocmset = serial_tiocmset, .get_icount = serial_get_icount, .set_serial = serial_set_serial, .get_serial = serial_get_serial, .cleanup = serial_cleanup, .install = serial_install, .proc_show = serial_proc_show, }; struct tty_driver *usb_serial_tty_driver; static int __init usb_serial_init(void) { int result; usb_serial_tty_driver = tty_alloc_driver(USB_SERIAL_TTY_MINORS, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV); if (IS_ERR(usb_serial_tty_driver)) return PTR_ERR(usb_serial_tty_driver); /* Initialize our global data */ result = bus_register(&usb_serial_bus_type); if (result) { pr_err("%s - registering bus driver failed\n", __func__); goto err_put_driver; } usb_serial_tty_driver->driver_name = "usbserial"; usb_serial_tty_driver->name = "ttyUSB"; usb_serial_tty_driver->major = USB_SERIAL_TTY_MAJOR; usb_serial_tty_driver->minor_start = 0; usb_serial_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; usb_serial_tty_driver->subtype = SERIAL_TYPE_NORMAL; usb_serial_tty_driver->init_termios = tty_std_termios; usb_serial_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; usb_serial_tty_driver->init_termios.c_ispeed = 9600; usb_serial_tty_driver->init_termios.c_ospeed = 9600; tty_set_operations(usb_serial_tty_driver, &serial_ops); result = tty_register_driver(usb_serial_tty_driver); if (result) { pr_err("%s - tty_register_driver failed\n", __func__); goto err_unregister_bus; } /* register the generic driver, if we should */ result = usb_serial_generic_register(); if (result < 0) { pr_err("%s - registering generic driver failed\n", __func__); goto err_unregister_driver; } return result; err_unregister_driver: tty_unregister_driver(usb_serial_tty_driver); err_unregister_bus: bus_unregister(&usb_serial_bus_type); err_put_driver: pr_err("%s - returning with error %d\n", __func__, result); tty_driver_kref_put(usb_serial_tty_driver); return result; } static void __exit usb_serial_exit(void) { usb_serial_console_exit(); usb_serial_generic_deregister(); tty_unregister_driver(usb_serial_tty_driver); tty_driver_kref_put(usb_serial_tty_driver); bus_unregister(&usb_serial_bus_type); idr_destroy(&serial_minors); } module_init(usb_serial_init); module_exit(usb_serial_exit); #define set_to_generic_if_null(type, function) \ do { \ if (!type->function) { \ type->function = usb_serial_generic_##function; \ pr_debug("%s: using generic " #function "\n", \ type->driver.name); \ } \ } while (0) static void usb_serial_operations_init(struct usb_serial_driver *device) { set_to_generic_if_null(device, open); set_to_generic_if_null(device, write); set_to_generic_if_null(device, close); set_to_generic_if_null(device, write_room); set_to_generic_if_null(device, chars_in_buffer); if (device->tx_empty) set_to_generic_if_null(device, wait_until_sent); set_to_generic_if_null(device, read_bulk_callback); set_to_generic_if_null(device, write_bulk_callback); set_to_generic_if_null(device, process_read_urb); set_to_generic_if_null(device, prepare_write_buffer); } static int usb_serial_register(struct usb_serial_driver *driver) { int retval; if (usb_disabled()) return -ENODEV; if (!driver->description) driver->description = driver->driver.name; if (!driver->usb_driver) { WARN(1, "Serial driver %s has no usb_driver\n", driver->description); return -EINVAL; } /* Prevent individual ports from being unbound. */ driver->driver.suppress_bind_attrs = true; usb_serial_operations_init(driver); /* Add this device to our list of devices */ mutex_lock(&table_lock); list_add(&driver->driver_list, &usb_serial_driver_list); retval = usb_serial_bus_register(driver); if (retval) { pr_err("problem %d when registering driver %s\n", retval, driver->description); list_del(&driver->driver_list); } else { pr_info("USB Serial support registered for %s\n", driver->description); } mutex_unlock(&table_lock); return retval; } static void usb_serial_deregister(struct usb_serial_driver *device) { pr_info("USB Serial deregistering driver %s\n", device->description); mutex_lock(&table_lock); list_del(&device->driver_list); mutex_unlock(&table_lock); usb_serial_bus_deregister(device); } /** * __usb_serial_register_drivers - register drivers for a usb-serial module * @serial_drivers: NULL-terminated array of pointers to drivers to be registered * @owner: owning module * @name: name of the usb_driver for this set of @serial_drivers * @id_table: list of all devices this @serial_drivers set binds to * * Registers all the drivers in the @serial_drivers array, and dynamically * creates a struct usb_driver with the name @name and id_table of @id_table. */ int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], struct module *owner, const char *name, const struct usb_device_id *id_table) { int rc; struct usb_driver *udriver; struct usb_serial_driver * const *sd; /* * udriver must be registered before any of the serial drivers, * because the store_new_id() routine for the serial drivers (in * bus.c) probes udriver. * * Performance hack: We don't want udriver to be probed until * the serial drivers are registered, because the probe would * simply fail for lack of a matching serial driver. * So we leave udriver's id_table set to NULL until we are all set. * * Suspend/resume support is implemented in the usb-serial core, * so fill in the PM-related fields in udriver. */ udriver = kzalloc(sizeof(*udriver), GFP_KERNEL); if (!udriver) return -ENOMEM; udriver->name = name; udriver->no_dynamic_id = 1; udriver->supports_autosuspend = 1; udriver->suspend = usb_serial_suspend; udriver->resume = usb_serial_resume; udriver->probe = usb_serial_probe; udriver->disconnect = usb_serial_disconnect; /* we only set the reset_resume field if the serial_driver has one */ for (sd = serial_drivers; *sd; ++sd) { if ((*sd)->reset_resume) { udriver->reset_resume = usb_serial_reset_resume; break; } } rc = usb_register(udriver); if (rc) goto err_free_driver; for (sd = serial_drivers; *sd; ++sd) { (*sd)->usb_driver = udriver; (*sd)->driver.owner = owner; rc = usb_serial_register(*sd); if (rc) goto err_deregister_drivers; } /* Now set udriver's id_table and look for matches */ udriver->id_table = id_table; rc = driver_attach(&udriver->driver); return 0; err_deregister_drivers: while (sd-- > serial_drivers) usb_serial_deregister(*sd); usb_deregister(udriver); err_free_driver: kfree(udriver); return rc; } EXPORT_SYMBOL_GPL(__usb_serial_register_drivers); /** * usb_serial_deregister_drivers - deregister drivers for a usb-serial module * @serial_drivers: NULL-terminated array of pointers to drivers to be deregistered * * Deregisters all the drivers in the @serial_drivers array and deregisters and * frees the struct usb_driver that was created by the call to * usb_serial_register_drivers(). */ void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]) { struct usb_driver *udriver = (*serial_drivers)->usb_driver; for (; *serial_drivers; ++serial_drivers) usb_serial_deregister(*serial_drivers); usb_deregister(udriver); kfree(udriver); } EXPORT_SYMBOL_GPL(usb_serial_deregister_drivers); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL v2"); |
| 9 107 108 107 100 34 107 107 34 39 39 39 39 39 39 74 1 77 3 3 3 3 3 107 107 107 41 41 41 86 88 11 86 75 68 85 85 2 85 77 74 84 83 84 84 83 84 84 84 79 76 75 75 78 75 73 69 73 73 67 5 70 73 70 66 2 1 74 72 68 73 74 67 67 74 73 74 75 88 88 72 87 68 88 86 83 75 88 7 15 14 7 7 62 64 71 70 71 66 26 11 77 7 7 7 77 64 64 62 64 64 63 69 74 69 61 60 19 61 68 68 67 68 67 68 4 68 64 63 64 23 21 64 42 64 64 63 64 63 23 23 22 63 12 64 64 4 3 4 3 67 84 1 1 1 5 2 5 2 1 1 1 1 1 64 61 64 64 64 63 64 64 62 64 64 63 63 64 1 1 3 8 11 4 4 4 4 4 4 3 2 4 4 3 2 2 4 1 1 1 1 1 1 31 31 32 32 32 53 48 48 53 8 47 47 47 47 46 47 11 11 41 41 41 41 29 28 16 26 29 26 26 26 4 4 26 14 14 26 25 20 9 22 22 22 22 22 22 22 11 11 22 22 1 1 1 1 1 1 1 1 1 1 38 39 39 39 39 39 1 40 39 37 40 39 39 39 39 39 39 1 39 40 40 40 9 9 9 9 9 4 1 1 4 4 9 5 4 9 9 10 9 9 9 9 9 9 9 8 7 8 8 8 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 8 69 69 68 21 21 21 5 4 1 3 4 5 5 5 5 1 1 1 1 1 71 74 72 72 71 70 70 72 72 5 72 71 70 71 71 71 71 71 70 71 64 68 68 68 69 68 69 69 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 51 19 51 51 51 51 19 51 51 51 51 51 51 51 51 51 51 51 51 51 3 3 5 8 8 7 5 7 7 4 7 6 7 54 51 54 51 51 51 51 20 51 20 17 17 17 17 8 8 8 8 12 12 12 12 20 20 20 20 20 3 3 3 20 3 3 3 3 3 3 3 17 4 17 2 1 1 1 2 1 1 1 15 14 12 11 15 15 3 6 6 6 1 4 4 2 2 4 2 2 4 4 2 5 4 4 7 7 7 1 4 4 4 2 2 3 1 3 3 1 4 3 1 1 1 1 4 2 2 2 3 2 2 4 4 83 84 84 84 84 1 3 3 15 84 1 1 1 3 1 3 1 1 1 1 5 1 1 1 1 1 4 7 6 22 4 1 1 1 1 4 4 5 1 85 85 67 67 63 63 39 8 66 64 11 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3 3 3 3 1 2 2 2 2 2 2 2 2 3 3 3 7 3 3 5 4 4 3 3 3 3 7 3 5 5 5 5 5 4 4 4 4 9 3 22 6 7 7 1 6 4 3 4 3 4 4 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Digital Audio (PCM) abstract layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/compat.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/time.h> #include <linux/pm_qos.h> #include <linux/io.h> #include <linux/dma-mapping.h> #include <linux/vmalloc.h> #include <sound/core.h> #include <sound/control.h> #include <sound/info.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include <sound/timer.h> #include <sound/minors.h> #include <linux/uio.h> #include <linux/delay.h> #include <linux/bitops.h> #include "pcm_local.h" #ifdef CONFIG_SND_DEBUG #define CREATE_TRACE_POINTS #include "pcm_param_trace.h" #else #define trace_hw_mask_param_enabled() 0 #define trace_hw_interval_param_enabled() 0 #define trace_hw_mask_param(substream, type, index, prev, curr) #define trace_hw_interval_param(substream, type, index, prev, curr) #endif /* * Compatibility */ struct snd_pcm_hw_params_old { unsigned int flags; unsigned int masks[SNDRV_PCM_HW_PARAM_SUBFORMAT - SNDRV_PCM_HW_PARAM_ACCESS + 1]; struct snd_interval intervals[SNDRV_PCM_HW_PARAM_TICK_TIME - SNDRV_PCM_HW_PARAM_SAMPLE_BITS + 1]; unsigned int rmask; unsigned int cmask; unsigned int info; unsigned int msbits; unsigned int rate_num; unsigned int rate_den; snd_pcm_uframes_t fifo_size; unsigned char reserved[64]; }; #ifdef CONFIG_SND_SUPPORT_OLD_API #define SNDRV_PCM_IOCTL_HW_REFINE_OLD _IOWR('A', 0x10, struct snd_pcm_hw_params_old) #define SNDRV_PCM_IOCTL_HW_PARAMS_OLD _IOWR('A', 0x11, struct snd_pcm_hw_params_old) static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream, struct snd_pcm_hw_params_old __user * _oparams); static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream, struct snd_pcm_hw_params_old __user * _oparams); #endif static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream); /* * */ static DECLARE_RWSEM(snd_pcm_link_rwsem); void snd_pcm_group_init(struct snd_pcm_group *group) { spin_lock_init(&group->lock); mutex_init(&group->mutex); INIT_LIST_HEAD(&group->substreams); refcount_set(&group->refs, 1); } /* define group lock helpers */ #define DEFINE_PCM_GROUP_LOCK(action, bh_lock, bh_unlock, mutex_action) \ static void snd_pcm_group_ ## action(struct snd_pcm_group *group, bool nonatomic) \ { \ if (nonatomic) { \ mutex_ ## mutex_action(&group->mutex); \ } else { \ if (IS_ENABLED(CONFIG_PREEMPT_RT) && bh_lock) \ local_bh_disable(); \ spin_ ## action(&group->lock); \ if (IS_ENABLED(CONFIG_PREEMPT_RT) && bh_unlock) \ local_bh_enable(); \ } \ } DEFINE_PCM_GROUP_LOCK(lock, false, false, lock); DEFINE_PCM_GROUP_LOCK(unlock, false, false, unlock); DEFINE_PCM_GROUP_LOCK(lock_irq, true, false, lock); DEFINE_PCM_GROUP_LOCK(unlock_irq, false, true, unlock); /** * snd_pcm_stream_lock - Lock the PCM stream * @substream: PCM substream * * This locks the PCM stream's spinlock or mutex depending on the nonatomic * flag of the given substream. This also takes the global link rw lock * (or rw sem), too, for avoiding the race with linked streams. */ void snd_pcm_stream_lock(struct snd_pcm_substream *substream) { snd_pcm_group_lock(&substream->self_group, substream->pcm->nonatomic); } EXPORT_SYMBOL_GPL(snd_pcm_stream_lock); /** * snd_pcm_stream_unlock - Unlock the PCM stream * @substream: PCM substream * * This unlocks the PCM stream that has been locked via snd_pcm_stream_lock(). */ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream) { snd_pcm_group_unlock(&substream->self_group, substream->pcm->nonatomic); } EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); /** * snd_pcm_stream_lock_irq - Lock the PCM stream * @substream: PCM substream * * This locks the PCM stream like snd_pcm_stream_lock() and disables the local * IRQ (only when nonatomic is false). In nonatomic case, this is identical * as snd_pcm_stream_lock(). */ void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) { snd_pcm_group_lock_irq(&substream->self_group, substream->pcm->nonatomic); } EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); static void snd_pcm_stream_lock_nested(struct snd_pcm_substream *substream) { struct snd_pcm_group *group = &substream->self_group; if (substream->pcm->nonatomic) mutex_lock_nested(&group->mutex, SINGLE_DEPTH_NESTING); else spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING); } /** * snd_pcm_stream_unlock_irq - Unlock the PCM stream * @substream: PCM substream * * This is a counter-part of snd_pcm_stream_lock_irq(). */ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) { snd_pcm_group_unlock_irq(&substream->self_group, substream->pcm->nonatomic); } EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) { unsigned long flags = 0; if (substream->pcm->nonatomic) mutex_lock(&substream->self_group.mutex); else spin_lock_irqsave(&substream->self_group.lock, flags); return flags; } EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave); unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream) { unsigned long flags = 0; if (substream->pcm->nonatomic) mutex_lock_nested(&substream->self_group.mutex, SINGLE_DEPTH_NESTING); else spin_lock_irqsave_nested(&substream->self_group.lock, flags, SINGLE_DEPTH_NESTING); return flags; } EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested); /** * snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream * @substream: PCM substream * @flags: irq flags * * This is a counter-part of snd_pcm_stream_lock_irqsave(). */ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, unsigned long flags) { if (substream->pcm->nonatomic) mutex_unlock(&substream->self_group.mutex); else spin_unlock_irqrestore(&substream->self_group.lock, flags); } EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); /* Run PCM ioctl ops */ static int snd_pcm_ops_ioctl(struct snd_pcm_substream *substream, unsigned cmd, void *arg) { if (substream->ops->ioctl) return substream->ops->ioctl(substream, cmd, arg); else return snd_pcm_lib_ioctl(substream, cmd, arg); } int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info) { struct snd_pcm *pcm = substream->pcm; struct snd_pcm_str *pstr = substream->pstr; memset(info, 0, sizeof(*info)); info->card = pcm->card->number; info->device = pcm->device; info->stream = substream->stream; info->subdevice = substream->number; strscpy(info->id, pcm->id, sizeof(info->id)); strscpy(info->name, pcm->name, sizeof(info->name)); info->dev_class = pcm->dev_class; info->dev_subclass = pcm->dev_subclass; info->subdevices_count = pstr->substream_count; info->subdevices_avail = pstr->substream_count - pstr->substream_opened; strscpy(info->subname, substream->name, sizeof(info->subname)); return 0; } int snd_pcm_info_user(struct snd_pcm_substream *substream, struct snd_pcm_info __user * _info) { struct snd_pcm_info *info __free(kfree) = NULL; int err; info = kmalloc(sizeof(*info), GFP_KERNEL); if (! info) return -ENOMEM; err = snd_pcm_info(substream, info); if (err >= 0) { if (copy_to_user(_info, info, sizeof(*info))) err = -EFAULT; } return err; } /* macro for simplified cast */ #define PARAM_MASK_BIT(b) (1U << (__force int)(b)) static bool hw_support_mmap(struct snd_pcm_substream *substream) { struct snd_dma_buffer *dmabuf; if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; if (substream->ops->mmap || substream->ops->page) return true; dmabuf = snd_pcm_get_dma_buf(substream); if (!dmabuf) dmabuf = &substream->dma_buffer; switch (dmabuf->dev.type) { case SNDRV_DMA_TYPE_UNKNOWN: /* we can't know the device, so just assume that the driver does * everything right */ return true; case SNDRV_DMA_TYPE_CONTINUOUS: case SNDRV_DMA_TYPE_VMALLOC: return true; default: return dma_can_mmap(dmabuf->dev.dev); } } static int constrain_mask_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_hw_constraints *constrs = &substream->runtime->hw_constraints; struct snd_mask *m; unsigned int k; struct snd_mask old_mask __maybe_unused; int changed; for (k = SNDRV_PCM_HW_PARAM_FIRST_MASK; k <= SNDRV_PCM_HW_PARAM_LAST_MASK; k++) { m = hw_param_mask(params, k); if (snd_mask_empty(m)) return -EINVAL; /* This parameter is not requested to change by a caller. */ if (!(params->rmask & PARAM_MASK_BIT(k))) continue; if (trace_hw_mask_param_enabled()) old_mask = *m; changed = snd_mask_refine(m, constrs_mask(constrs, k)); if (changed < 0) return changed; if (changed == 0) continue; /* Set corresponding flag so that the caller gets it. */ trace_hw_mask_param(substream, k, 0, &old_mask, m); params->cmask |= PARAM_MASK_BIT(k); } return 0; } static int constrain_interval_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_hw_constraints *constrs = &substream->runtime->hw_constraints; struct snd_interval *i; unsigned int k; struct snd_interval old_interval __maybe_unused; int changed; for (k = SNDRV_PCM_HW_PARAM_FIRST_INTERVAL; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++) { i = hw_param_interval(params, k); if (snd_interval_empty(i)) return -EINVAL; /* This parameter is not requested to change by a caller. */ if (!(params->rmask & PARAM_MASK_BIT(k))) continue; if (trace_hw_interval_param_enabled()) old_interval = *i; changed = snd_interval_refine(i, constrs_interval(constrs, k)); if (changed < 0) return changed; if (changed == 0) continue; /* Set corresponding flag so that the caller gets it. */ trace_hw_interval_param(substream, k, 0, &old_interval, i); params->cmask |= PARAM_MASK_BIT(k); } return 0; } static int constrain_params_by_rules(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_hw_constraints *constrs = &substream->runtime->hw_constraints; unsigned int k; unsigned int *rstamps __free(kfree) = NULL; unsigned int vstamps[SNDRV_PCM_HW_PARAM_LAST_INTERVAL + 1]; unsigned int stamp; struct snd_pcm_hw_rule *r; unsigned int d; struct snd_mask old_mask __maybe_unused; struct snd_interval old_interval __maybe_unused; bool again; int changed, err = 0; /* * Each application of rule has own sequence number. * * Each member of 'rstamps' array represents the sequence number of * recent application of corresponding rule. */ rstamps = kcalloc(constrs->rules_num, sizeof(unsigned int), GFP_KERNEL); if (!rstamps) return -ENOMEM; /* * Each member of 'vstamps' array represents the sequence number of * recent application of rule in which corresponding parameters were * changed. * * In initial state, elements corresponding to parameters requested by * a caller is 1. For unrequested parameters, corresponding members * have 0 so that the parameters are never changed anymore. */ for (k = 0; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++) vstamps[k] = (params->rmask & PARAM_MASK_BIT(k)) ? 1 : 0; /* Due to the above design, actual sequence number starts at 2. */ stamp = 2; retry: /* Apply all rules in order. */ again = false; for (k = 0; k < constrs->rules_num; k++) { r = &constrs->rules[k]; /* * Check condition bits of this rule. When the rule has * some condition bits, parameter without the bits is * never processed. SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP * is an example of the condition bits. */ if (r->cond && !(r->cond & params->flags)) continue; /* * The 'deps' array includes maximum four dependencies * to SNDRV_PCM_HW_PARAM_XXXs for this rule. The fifth * member of this array is a sentinel and should be * negative value. * * This rule should be processed in this time when dependent * parameters were changed at former applications of the other * rules. */ for (d = 0; r->deps[d] >= 0; d++) { if (vstamps[r->deps[d]] > rstamps[k]) break; } if (r->deps[d] < 0) continue; if (trace_hw_mask_param_enabled()) { if (hw_is_mask(r->var)) old_mask = *hw_param_mask(params, r->var); } if (trace_hw_interval_param_enabled()) { if (hw_is_interval(r->var)) old_interval = *hw_param_interval(params, r->var); } changed = r->func(params, r); if (changed < 0) return changed; /* * When the parameter is changed, notify it to the caller * by corresponding returned bit, then preparing for next * iteration. */ if (changed && r->var >= 0) { if (hw_is_mask(r->var)) { trace_hw_mask_param(substream, r->var, k + 1, &old_mask, hw_param_mask(params, r->var)); } if (hw_is_interval(r->var)) { trace_hw_interval_param(substream, r->var, k + 1, &old_interval, hw_param_interval(params, r->var)); } params->cmask |= PARAM_MASK_BIT(r->var); vstamps[r->var] = stamp; again = true; } rstamps[k] = stamp++; } /* Iterate to evaluate all rules till no parameters are changed. */ if (again) goto retry; return err; } static int fixup_unreferenced_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { const struct snd_interval *i; const struct snd_mask *m; struct snd_mask *m_rw; int err; if (!params->msbits) { i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS); if (snd_interval_single(i)) params->msbits = snd_interval_value(i); m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); if (snd_mask_single(m)) { snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m); params->msbits = snd_pcm_format_width(format); } } if (params->msbits) { m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); if (snd_mask_single(m)) { snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m); if (snd_pcm_format_linear(format) && snd_pcm_format_width(format) != params->msbits) { m_rw = hw_param_mask(params, SNDRV_PCM_HW_PARAM_SUBFORMAT); snd_mask_reset(m_rw, (__force unsigned)SNDRV_PCM_SUBFORMAT_MSBITS_MAX); if (snd_mask_empty(m_rw)) return -EINVAL; } } } if (!params->rate_den) { i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE); if (snd_interval_single(i)) { params->rate_num = snd_interval_value(i); params->rate_den = 1; } } if (!params->fifo_size) { m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_CHANNELS); if (snd_mask_single(m) && snd_interval_single(i)) { err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_FIFO_SIZE, params); if (err < 0) return err; } } if (!params->info) { params->info = substream->runtime->hw.info; params->info &= ~(SNDRV_PCM_INFO_FIFO_IN_FRAMES | SNDRV_PCM_INFO_DRAIN_TRIGGER); if (!hw_support_mmap(substream)) params->info &= ~(SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID); } err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_SYNC_ID, params); if (err < 0) return err; return 0; } int snd_pcm_hw_refine(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { int err; params->info = 0; params->fifo_size = 0; if (params->rmask & PARAM_MASK_BIT(SNDRV_PCM_HW_PARAM_SAMPLE_BITS)) params->msbits = 0; if (params->rmask & PARAM_MASK_BIT(SNDRV_PCM_HW_PARAM_RATE)) { params->rate_num = 0; params->rate_den = 0; } err = constrain_mask_params(substream, params); if (err < 0) return err; err = constrain_interval_params(substream, params); if (err < 0) return err; err = constrain_params_by_rules(substream, params); if (err < 0) return err; params->rmask = 0; return 0; } EXPORT_SYMBOL(snd_pcm_hw_refine); static int snd_pcm_hw_refine_user(struct snd_pcm_substream *substream, struct snd_pcm_hw_params __user * _params) { struct snd_pcm_hw_params *params __free(kfree) = NULL; int err; params = memdup_user(_params, sizeof(*params)); if (IS_ERR(params)) return PTR_ERR(params); err = snd_pcm_hw_refine(substream, params); if (err < 0) return err; err = fixup_unreferenced_params(substream, params); if (err < 0) return err; if (copy_to_user(_params, params, sizeof(*params))) return -EFAULT; return 0; } static int period_to_usecs(struct snd_pcm_runtime *runtime) { int usecs; if (! runtime->rate) return -1; /* invalid */ /* take 75% of period time as the deadline */ usecs = (750000 / runtime->rate) * runtime->period_size; usecs += ((750000 % runtime->rate) * runtime->period_size) / runtime->rate; return usecs; } static void snd_pcm_set_state(struct snd_pcm_substream *substream, snd_pcm_state_t state) { guard(pcm_stream_lock_irq)(substream); if (substream->runtime->state != SNDRV_PCM_STATE_DISCONNECTED) __snd_pcm_set_state(substream->runtime, state); } static inline void snd_pcm_timer_notify(struct snd_pcm_substream *substream, int event) { #ifdef CONFIG_SND_PCM_TIMER if (substream->timer) snd_timer_notify(substream->timer, event, &substream->runtime->trigger_tstamp); #endif } void snd_pcm_sync_stop(struct snd_pcm_substream *substream, bool sync_irq) { if (substream->runtime && substream->runtime->stop_operating) { substream->runtime->stop_operating = false; if (substream->ops && substream->ops->sync_stop) substream->ops->sync_stop(substream); else if (sync_irq && substream->pcm->card->sync_irq > 0) synchronize_irq(substream->pcm->card->sync_irq); } } /** * snd_pcm_hw_params_choose - choose a configuration defined by @params * @pcm: PCM instance * @params: the hw_params instance * * Choose one configuration from configuration space defined by @params. * The configuration chosen is that obtained fixing in this order: * first access, first format, first subformat, min channels, * min rate, min period time, max buffer size, min tick time * * Return: Zero if successful, or a negative error code on failure. */ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params) { static const int vars[] = { SNDRV_PCM_HW_PARAM_ACCESS, SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_SUBFORMAT, SNDRV_PCM_HW_PARAM_CHANNELS, SNDRV_PCM_HW_PARAM_RATE, SNDRV_PCM_HW_PARAM_PERIOD_TIME, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_TICK_TIME, -1 }; const int *v; struct snd_mask old_mask __maybe_unused; struct snd_interval old_interval __maybe_unused; int changed; for (v = vars; *v != -1; v++) { /* Keep old parameter to trace. */ if (trace_hw_mask_param_enabled()) { if (hw_is_mask(*v)) old_mask = *hw_param_mask(params, *v); } if (trace_hw_interval_param_enabled()) { if (hw_is_interval(*v)) old_interval = *hw_param_interval(params, *v); } if (*v != SNDRV_PCM_HW_PARAM_BUFFER_SIZE) changed = snd_pcm_hw_param_first(pcm, params, *v, NULL); else changed = snd_pcm_hw_param_last(pcm, params, *v, NULL); if (changed < 0) return changed; if (changed == 0) continue; /* Trace the changed parameter. */ if (hw_is_mask(*v)) { trace_hw_mask_param(pcm, *v, 0, &old_mask, hw_param_mask(params, *v)); } if (hw_is_interval(*v)) { trace_hw_interval_param(pcm, *v, 0, &old_interval, hw_param_interval(params, *v)); } } return 0; } /* acquire buffer_mutex; if it's in r/w operation, return -EBUSY, otherwise * block the further r/w operations */ static int snd_pcm_buffer_access_lock(struct snd_pcm_runtime *runtime) { if (!atomic_dec_unless_positive(&runtime->buffer_accessing)) return -EBUSY; mutex_lock(&runtime->buffer_mutex); return 0; /* keep buffer_mutex, unlocked by below */ } /* release buffer_mutex and clear r/w access flag */ static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) { mutex_unlock(&runtime->buffer_mutex); atomic_inc(&runtime->buffer_accessing); } /* fill the PCM buffer with the current silence format; called from pcm_oss.c */ void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime) { snd_pcm_buffer_access_lock(runtime); if (runtime->dma_area) snd_pcm_format_set_silence(runtime->format, runtime->dma_area, bytes_to_samples(runtime, runtime->dma_bytes)); snd_pcm_buffer_access_unlock(runtime); } EXPORT_SYMBOL_GPL(snd_pcm_runtime_buffer_set_silence); #if IS_ENABLED(CONFIG_SND_PCM_OSS) #define is_oss_stream(substream) ((substream)->oss.oss) #else #define is_oss_stream(substream) false #endif static int snd_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_runtime *runtime; int err, usecs; unsigned int bits; snd_pcm_uframes_t frames; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; err = snd_pcm_buffer_access_lock(runtime); if (err < 0) return err; scoped_guard(pcm_stream_lock_irq, substream) { switch (runtime->state) { case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: if (!is_oss_stream(substream) && atomic_read(&substream->mmap_count)) err = -EBADFD; break; default: err = -EBADFD; break; } } if (err) goto unlock; snd_pcm_sync_stop(substream, true); params->rmask = ~0U; err = snd_pcm_hw_refine(substream, params); if (err < 0) goto _error; err = snd_pcm_hw_params_choose(substream, params); if (err < 0) goto _error; err = fixup_unreferenced_params(substream, params); if (err < 0) goto _error; if (substream->managed_buffer_alloc) { err = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params)); if (err < 0) goto _error; runtime->buffer_changed = err > 0; } if (substream->ops->hw_params != NULL) { err = substream->ops->hw_params(substream, params); if (err < 0) goto _error; } runtime->access = params_access(params); runtime->format = params_format(params); runtime->subformat = params_subformat(params); runtime->channels = params_channels(params); runtime->rate = params_rate(params); runtime->period_size = params_period_size(params); runtime->periods = params_periods(params); runtime->buffer_size = params_buffer_size(params); runtime->info = params->info; runtime->rate_num = params->rate_num; runtime->rate_den = params->rate_den; runtime->no_period_wakeup = (params->info & SNDRV_PCM_INFO_NO_PERIOD_WAKEUP) && (params->flags & SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP); bits = snd_pcm_format_physical_width(runtime->format); runtime->sample_bits = bits; bits *= runtime->channels; runtime->frame_bits = bits; frames = 1; while (bits % 8 != 0) { bits *= 2; frames *= 2; } runtime->byte_align = bits / 8; runtime->min_align = frames; /* Default sw params */ runtime->tstamp_mode = SNDRV_PCM_TSTAMP_NONE; runtime->period_step = 1; runtime->control->avail_min = runtime->period_size; runtime->start_threshold = 1; runtime->stop_threshold = runtime->buffer_size; runtime->silence_threshold = 0; runtime->silence_size = 0; runtime->boundary = runtime->buffer_size; while (runtime->boundary * 2 <= LONG_MAX - runtime->buffer_size) runtime->boundary *= 2; /* clear the buffer for avoiding possible kernel info leaks */ if (runtime->dma_area && !substream->ops->copy) { size_t size = runtime->dma_bytes; if (runtime->info & SNDRV_PCM_INFO_MMAP) size = PAGE_ALIGN(size); memset(runtime->dma_area, 0, size); } snd_pcm_timer_resolution_change(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); if (cpu_latency_qos_request_active(&substream->latency_pm_qos_req)) cpu_latency_qos_remove_request(&substream->latency_pm_qos_req); usecs = period_to_usecs(runtime); if (usecs >= 0) cpu_latency_qos_add_request(&substream->latency_pm_qos_req, usecs); err = 0; _error: if (err) { /* hardware might be unusable from this time, * so we force application to retry to set * the correct hardware parameter settings */ snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); if (substream->ops->hw_free != NULL) substream->ops->hw_free(substream); if (substream->managed_buffer_alloc) snd_pcm_lib_free_pages(substream); } unlock: snd_pcm_buffer_access_unlock(runtime); return err; } static int snd_pcm_hw_params_user(struct snd_pcm_substream *substream, struct snd_pcm_hw_params __user * _params) { struct snd_pcm_hw_params *params __free(kfree) = NULL; int err; params = memdup_user(_params, sizeof(*params)); if (IS_ERR(params)) return PTR_ERR(params); err = snd_pcm_hw_params(substream, params); if (err < 0) return err; if (copy_to_user(_params, params, sizeof(*params))) return -EFAULT; return err; } static int do_hw_free(struct snd_pcm_substream *substream) { int result = 0; snd_pcm_sync_stop(substream, true); if (substream->ops->hw_free) result = substream->ops->hw_free(substream); if (substream->managed_buffer_alloc) snd_pcm_lib_free_pages(substream); return result; } static int snd_pcm_hw_free(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; int result = 0; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; result = snd_pcm_buffer_access_lock(runtime); if (result < 0) return result; scoped_guard(pcm_stream_lock_irq, substream) { switch (runtime->state) { case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: if (atomic_read(&substream->mmap_count)) result = -EBADFD; break; default: result = -EBADFD; break; } } if (result) goto unlock; result = do_hw_free(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); cpu_latency_qos_remove_request(&substream->latency_pm_qos_req); unlock: snd_pcm_buffer_access_unlock(runtime); return result; } static int snd_pcm_sw_params(struct snd_pcm_substream *substream, struct snd_pcm_sw_params *params) { struct snd_pcm_runtime *runtime; int err; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; scoped_guard(pcm_stream_lock_irq, substream) { if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; } if (params->tstamp_mode < 0 || params->tstamp_mode > SNDRV_PCM_TSTAMP_LAST) return -EINVAL; if (params->proto >= SNDRV_PROTOCOL_VERSION(2, 0, 12) && params->tstamp_type > SNDRV_PCM_TSTAMP_TYPE_LAST) return -EINVAL; if (params->avail_min == 0) return -EINVAL; if (params->silence_size >= runtime->boundary) { if (params->silence_threshold != 0) return -EINVAL; } else { if (params->silence_size > params->silence_threshold) return -EINVAL; if (params->silence_threshold > runtime->buffer_size) return -EINVAL; } err = 0; scoped_guard(pcm_stream_lock_irq, substream) { runtime->tstamp_mode = params->tstamp_mode; if (params->proto >= SNDRV_PROTOCOL_VERSION(2, 0, 12)) runtime->tstamp_type = params->tstamp_type; runtime->period_step = params->period_step; runtime->control->avail_min = params->avail_min; runtime->start_threshold = params->start_threshold; runtime->stop_threshold = params->stop_threshold; runtime->silence_threshold = params->silence_threshold; runtime->silence_size = params->silence_size; params->boundary = runtime->boundary; if (snd_pcm_running(substream)) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) snd_pcm_playback_silence(substream, ULONG_MAX); err = snd_pcm_update_state(substream, runtime); } } return err; } static int snd_pcm_sw_params_user(struct snd_pcm_substream *substream, struct snd_pcm_sw_params __user * _params) { struct snd_pcm_sw_params params; int err; if (copy_from_user(¶ms, _params, sizeof(params))) return -EFAULT; err = snd_pcm_sw_params(substream, ¶ms); if (copy_to_user(_params, ¶ms, sizeof(params))) return -EFAULT; return err; } static inline snd_pcm_uframes_t snd_pcm_calc_delay(struct snd_pcm_substream *substream) { snd_pcm_uframes_t delay; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) delay = snd_pcm_playback_hw_avail(substream->runtime); else delay = snd_pcm_capture_avail(substream->runtime); return delay + substream->runtime->delay; } int snd_pcm_status64(struct snd_pcm_substream *substream, struct snd_pcm_status64 *status) { struct snd_pcm_runtime *runtime = substream->runtime; guard(pcm_stream_lock_irq)(substream); snd_pcm_unpack_audio_tstamp_config(status->audio_tstamp_data, &runtime->audio_tstamp_config); /* backwards compatible behavior */ if (runtime->audio_tstamp_config.type_requested == SNDRV_PCM_AUDIO_TSTAMP_TYPE_COMPAT) { if (runtime->hw.info & SNDRV_PCM_INFO_HAS_WALL_CLOCK) runtime->audio_tstamp_config.type_requested = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK; else runtime->audio_tstamp_config.type_requested = SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT; runtime->audio_tstamp_report.valid = 0; } else runtime->audio_tstamp_report.valid = 1; status->state = runtime->state; status->suspended_state = runtime->suspended_state; if (status->state == SNDRV_PCM_STATE_OPEN) return 0; status->trigger_tstamp_sec = runtime->trigger_tstamp.tv_sec; status->trigger_tstamp_nsec = runtime->trigger_tstamp.tv_nsec; if (snd_pcm_running(substream)) { snd_pcm_update_hw_ptr(substream); if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE) { status->tstamp_sec = runtime->status->tstamp.tv_sec; status->tstamp_nsec = runtime->status->tstamp.tv_nsec; status->driver_tstamp_sec = runtime->driver_tstamp.tv_sec; status->driver_tstamp_nsec = runtime->driver_tstamp.tv_nsec; status->audio_tstamp_sec = runtime->status->audio_tstamp.tv_sec; status->audio_tstamp_nsec = runtime->status->audio_tstamp.tv_nsec; if (runtime->audio_tstamp_report.valid == 1) /* backwards compatibility, no report provided in COMPAT mode */ snd_pcm_pack_audio_tstamp_report(&status->audio_tstamp_data, &status->audio_tstamp_accuracy, &runtime->audio_tstamp_report); goto _tstamp_end; } } else { /* get tstamp only in fallback mode and only if enabled */ if (runtime->tstamp_mode == SNDRV_PCM_TSTAMP_ENABLE) { struct timespec64 tstamp; snd_pcm_gettime(runtime, &tstamp); status->tstamp_sec = tstamp.tv_sec; status->tstamp_nsec = tstamp.tv_nsec; } } _tstamp_end: status->appl_ptr = runtime->control->appl_ptr; status->hw_ptr = runtime->status->hw_ptr; status->avail = snd_pcm_avail(substream); status->delay = snd_pcm_running(substream) ? snd_pcm_calc_delay(substream) : 0; status->avail_max = runtime->avail_max; status->overrange = runtime->overrange; runtime->avail_max = 0; runtime->overrange = 0; return 0; } static int snd_pcm_status_user64(struct snd_pcm_substream *substream, struct snd_pcm_status64 __user * _status, bool ext) { struct snd_pcm_status64 status; int res; memset(&status, 0, sizeof(status)); /* * with extension, parameters are read/write, * get audio_tstamp_data from user, * ignore rest of status structure */ if (ext && get_user(status.audio_tstamp_data, (u32 __user *)(&_status->audio_tstamp_data))) return -EFAULT; res = snd_pcm_status64(substream, &status); if (res < 0) return res; if (copy_to_user(_status, &status, sizeof(status))) return -EFAULT; return 0; } static int snd_pcm_status_user32(struct snd_pcm_substream *substream, struct snd_pcm_status32 __user * _status, bool ext) { struct snd_pcm_status64 status64; struct snd_pcm_status32 status32; int res; memset(&status64, 0, sizeof(status64)); memset(&status32, 0, sizeof(status32)); /* * with extension, parameters are read/write, * get audio_tstamp_data from user, * ignore rest of status structure */ if (ext && get_user(status64.audio_tstamp_data, (u32 __user *)(&_status->audio_tstamp_data))) return -EFAULT; res = snd_pcm_status64(substream, &status64); if (res < 0) return res; status32 = (struct snd_pcm_status32) { .state = status64.state, .trigger_tstamp_sec = status64.trigger_tstamp_sec, .trigger_tstamp_nsec = status64.trigger_tstamp_nsec, .tstamp_sec = status64.tstamp_sec, .tstamp_nsec = status64.tstamp_nsec, .appl_ptr = status64.appl_ptr, .hw_ptr = status64.hw_ptr, .delay = status64.delay, .avail = status64.avail, .avail_max = status64.avail_max, .overrange = status64.overrange, .suspended_state = status64.suspended_state, .audio_tstamp_data = status64.audio_tstamp_data, .audio_tstamp_sec = status64.audio_tstamp_sec, .audio_tstamp_nsec = status64.audio_tstamp_nsec, .driver_tstamp_sec = status64.audio_tstamp_sec, .driver_tstamp_nsec = status64.audio_tstamp_nsec, .audio_tstamp_accuracy = status64.audio_tstamp_accuracy, }; if (copy_to_user(_status, &status32, sizeof(status32))) return -EFAULT; return 0; } static int snd_pcm_channel_info(struct snd_pcm_substream *substream, struct snd_pcm_channel_info * info) { struct snd_pcm_runtime *runtime; unsigned int channel; channel = info->channel; runtime = substream->runtime; scoped_guard(pcm_stream_lock_irq, substream) { if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; } if (channel >= runtime->channels) return -EINVAL; memset(info, 0, sizeof(*info)); info->channel = channel; return snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_CHANNEL_INFO, info); } static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, struct snd_pcm_channel_info __user * _info) { struct snd_pcm_channel_info info; int res; if (copy_from_user(&info, _info, sizeof(info))) return -EFAULT; res = snd_pcm_channel_info(substream, &info); if (res < 0) return res; if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static void snd_pcm_trigger_tstamp(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->trigger_master == NULL) return; if (runtime->trigger_master == substream) { if (!runtime->trigger_tstamp_latched) snd_pcm_gettime(runtime, &runtime->trigger_tstamp); } else { snd_pcm_trigger_tstamp(runtime->trigger_master); runtime->trigger_tstamp = runtime->trigger_master->runtime->trigger_tstamp; } runtime->trigger_master = NULL; } #define ACTION_ARG_IGNORE (__force snd_pcm_state_t)0 struct action_ops { int (*pre_action)(struct snd_pcm_substream *substream, snd_pcm_state_t state); int (*do_action)(struct snd_pcm_substream *substream, snd_pcm_state_t state); void (*undo_action)(struct snd_pcm_substream *substream, snd_pcm_state_t state); void (*post_action)(struct snd_pcm_substream *substream, snd_pcm_state_t state); }; /* * this functions is core for handling of linked stream * Note: the stream state might be changed also on failure * Note2: call with calling stream lock + link lock */ static int snd_pcm_action_group(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state, bool stream_lock) { struct snd_pcm_substream *s = NULL; struct snd_pcm_substream *s1; int res = 0, depth = 1; snd_pcm_group_for_each_entry(s, substream) { if (s != substream) { if (!stream_lock) mutex_lock_nested(&s->runtime->buffer_mutex, depth); else if (s->pcm->nonatomic) mutex_lock_nested(&s->self_group.mutex, depth); else spin_lock_nested(&s->self_group.lock, depth); depth++; } res = ops->pre_action(s, state); if (res < 0) goto _unlock; } snd_pcm_group_for_each_entry(s, substream) { res = ops->do_action(s, state); if (res < 0) { if (ops->undo_action) { snd_pcm_group_for_each_entry(s1, substream) { if (s1 == s) /* failed stream */ break; ops->undo_action(s1, state); } } s = NULL; /* unlock all */ goto _unlock; } } snd_pcm_group_for_each_entry(s, substream) { ops->post_action(s, state); } _unlock: /* unlock streams */ snd_pcm_group_for_each_entry(s1, substream) { if (s1 != substream) { if (!stream_lock) mutex_unlock(&s1->runtime->buffer_mutex); else if (s1->pcm->nonatomic) mutex_unlock(&s1->self_group.mutex); else spin_unlock(&s1->self_group.lock); } if (s1 == s) /* end */ break; } return res; } /* * Note: call with stream lock */ static int snd_pcm_action_single(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state) { int res; res = ops->pre_action(substream, state); if (res < 0) return res; res = ops->do_action(substream, state); if (res == 0) ops->post_action(substream, state); else if (ops->undo_action) ops->undo_action(substream, state); return res; } static void snd_pcm_group_assign(struct snd_pcm_substream *substream, struct snd_pcm_group *new_group) { substream->group = new_group; list_move(&substream->link_list, &new_group->substreams); } /* * Unref and unlock the group, but keep the stream lock; * when the group becomes empty and no longer referred, destroy itself */ static void snd_pcm_group_unref(struct snd_pcm_group *group, struct snd_pcm_substream *substream) { bool do_free; if (!group) return; do_free = refcount_dec_and_test(&group->refs); snd_pcm_group_unlock(group, substream->pcm->nonatomic); if (do_free) kfree(group); } /* * Lock the group inside a stream lock and reference it; * return the locked group object, or NULL if not linked */ static struct snd_pcm_group * snd_pcm_stream_group_ref(struct snd_pcm_substream *substream) { bool nonatomic = substream->pcm->nonatomic; struct snd_pcm_group *group; bool trylock; for (;;) { if (!snd_pcm_stream_linked(substream)) return NULL; group = substream->group; /* block freeing the group object */ refcount_inc(&group->refs); trylock = nonatomic ? mutex_trylock(&group->mutex) : spin_trylock(&group->lock); if (trylock) break; /* OK */ /* re-lock for avoiding ABBA deadlock */ snd_pcm_stream_unlock(substream); snd_pcm_group_lock(group, nonatomic); snd_pcm_stream_lock(substream); /* check the group again; the above opens a small race window */ if (substream->group == group) break; /* OK */ /* group changed, try again */ snd_pcm_group_unref(group, substream); } return group; } /* * Note: call with stream lock */ static int snd_pcm_action(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_group *group; int res; group = snd_pcm_stream_group_ref(substream); if (group) res = snd_pcm_action_group(ops, substream, state, true); else res = snd_pcm_action_single(ops, substream, state); snd_pcm_group_unref(group, substream); return res; } /* * Note: don't use any locks before */ static int snd_pcm_action_lock_irq(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state) { guard(pcm_stream_lock_irq)(substream); return snd_pcm_action(ops, substream, state); } /* */ static int snd_pcm_action_nonatomic(const struct action_ops *ops, struct snd_pcm_substream *substream, snd_pcm_state_t state) { int res; /* Guarantee the group members won't change during non-atomic action */ guard(rwsem_read)(&snd_pcm_link_rwsem); res = snd_pcm_buffer_access_lock(substream->runtime); if (res < 0) return res; if (snd_pcm_stream_linked(substream)) res = snd_pcm_action_group(ops, substream, state, false); else res = snd_pcm_action_single(ops, substream, state); snd_pcm_buffer_access_unlock(substream->runtime); return res; } /* * start callbacks */ static int snd_pcm_pre_start(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->state != SNDRV_PCM_STATE_PREPARED) return -EBADFD; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && !snd_pcm_playback_data(substream)) return -EPIPE; runtime->trigger_tstamp_latched = false; runtime->trigger_master = substream; return 0; } static int snd_pcm_do_start(struct snd_pcm_substream *substream, snd_pcm_state_t state) { int err; if (substream->runtime->trigger_master != substream) return 0; err = substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_START); /* XRUN happened during the start */ if (err == -EPIPE) __snd_pcm_set_state(substream->runtime, SNDRV_PCM_STATE_XRUN); return err; } static void snd_pcm_undo_start(struct snd_pcm_substream *substream, snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream) { substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP); substream->runtime->stop_operating = true; } } static void snd_pcm_post_start(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); runtime->hw_ptr_jiffies = jiffies; runtime->hw_ptr_buffer_jiffies = (runtime->buffer_size * HZ) / runtime->rate; __snd_pcm_set_state(runtime, state); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) snd_pcm_playback_silence(substream, ULONG_MAX); snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSTART); } static const struct action_ops snd_pcm_action_start = { .pre_action = snd_pcm_pre_start, .do_action = snd_pcm_do_start, .undo_action = snd_pcm_undo_start, .post_action = snd_pcm_post_start }; /** * snd_pcm_start - start all linked streams * @substream: the PCM substream instance * * Return: Zero if successful, or a negative error code. * The stream lock must be acquired before calling this function. */ int snd_pcm_start(struct snd_pcm_substream *substream) { return snd_pcm_action(&snd_pcm_action_start, substream, SNDRV_PCM_STATE_RUNNING); } /* take the stream lock and start the streams */ static int snd_pcm_start_lock_irq(struct snd_pcm_substream *substream) { return snd_pcm_action_lock_irq(&snd_pcm_action_start, substream, SNDRV_PCM_STATE_RUNNING); } /* * stop callbacks */ static int snd_pcm_pre_stop(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; runtime->trigger_master = substream; return 0; } static int snd_pcm_do_stop(struct snd_pcm_substream *substream, snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream && snd_pcm_running(substream)) { substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP); substream->runtime->stop_operating = true; } return 0; /* unconditionally stop all substreams */ } static void snd_pcm_post_stop(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->state != state) { snd_pcm_trigger_tstamp(substream); __snd_pcm_set_state(runtime, state); snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSTOP); } wake_up(&runtime->sleep); wake_up(&runtime->tsleep); } static const struct action_ops snd_pcm_action_stop = { .pre_action = snd_pcm_pre_stop, .do_action = snd_pcm_do_stop, .post_action = snd_pcm_post_stop }; /** * snd_pcm_stop - try to stop all running streams in the substream group * @substream: the PCM substream instance * @state: PCM state after stopping the stream * * The state of each stream is then changed to the given state unconditionally. * * Return: Zero if successful, or a negative error code. */ int snd_pcm_stop(struct snd_pcm_substream *substream, snd_pcm_state_t state) { return snd_pcm_action(&snd_pcm_action_stop, substream, state); } EXPORT_SYMBOL(snd_pcm_stop); /** * snd_pcm_drain_done - stop the DMA only when the given stream is playback * @substream: the PCM substream * * After stopping, the state is changed to SETUP. * Unlike snd_pcm_stop(), this affects only the given stream. * * Return: Zero if successful, or a negative error code. */ int snd_pcm_drain_done(struct snd_pcm_substream *substream) { return snd_pcm_action_single(&snd_pcm_action_stop, substream, SNDRV_PCM_STATE_SETUP); } /** * snd_pcm_stop_xrun - stop the running streams as XRUN * @substream: the PCM substream instance * * This stops the given running substream (and all linked substreams) as XRUN. * Unlike snd_pcm_stop(), this function takes the substream lock by itself. * * Return: Zero if successful, or a negative error code. */ int snd_pcm_stop_xrun(struct snd_pcm_substream *substream) { guard(pcm_stream_lock_irqsave)(substream); if (substream->runtime && snd_pcm_running(substream)) __snd_pcm_xrun(substream); return 0; } EXPORT_SYMBOL_GPL(snd_pcm_stop_xrun); /* * pause callbacks: pass boolean (to start pause or resume) as state argument */ #define pause_pushed(state) (__force bool)(state) static int snd_pcm_pre_pause(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (!(runtime->info & SNDRV_PCM_INFO_PAUSE)) return -ENOSYS; if (pause_pushed(state)) { if (runtime->state != SNDRV_PCM_STATE_RUNNING) return -EBADFD; } else if (runtime->state != SNDRV_PCM_STATE_PAUSED) return -EBADFD; runtime->trigger_master = substream; return 0; } static int snd_pcm_do_pause(struct snd_pcm_substream *substream, snd_pcm_state_t state) { if (substream->runtime->trigger_master != substream) return 0; /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta between the current jiffies, this gives a large enough * delta, effectively to skip the check once. */ substream->runtime->hw_ptr_jiffies = jiffies - HZ * 1000; return substream->ops->trigger(substream, pause_pushed(state) ? SNDRV_PCM_TRIGGER_PAUSE_PUSH : SNDRV_PCM_TRIGGER_PAUSE_RELEASE); } static void snd_pcm_undo_pause(struct snd_pcm_substream *substream, snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream) substream->ops->trigger(substream, pause_pushed(state) ? SNDRV_PCM_TRIGGER_PAUSE_RELEASE : SNDRV_PCM_TRIGGER_PAUSE_PUSH); } static void snd_pcm_post_pause(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); if (pause_pushed(state)) { __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_PAUSED); snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MPAUSE); wake_up(&runtime->sleep); wake_up(&runtime->tsleep); } else { __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_RUNNING); snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MCONTINUE); } } static const struct action_ops snd_pcm_action_pause = { .pre_action = snd_pcm_pre_pause, .do_action = snd_pcm_do_pause, .undo_action = snd_pcm_undo_pause, .post_action = snd_pcm_post_pause }; /* * Push/release the pause for all linked streams. */ static int snd_pcm_pause(struct snd_pcm_substream *substream, bool push) { return snd_pcm_action(&snd_pcm_action_pause, substream, (__force snd_pcm_state_t)push); } static int snd_pcm_pause_lock_irq(struct snd_pcm_substream *substream, bool push) { return snd_pcm_action_lock_irq(&snd_pcm_action_pause, substream, (__force snd_pcm_state_t)push); } #ifdef CONFIG_PM /* suspend callback: state argument ignored */ static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; switch (runtime->state) { case SNDRV_PCM_STATE_SUSPENDED: return -EBUSY; /* unresumable PCM state; return -EBUSY for skipping suspend */ case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_DISCONNECTED: return -EBUSY; } runtime->trigger_master = substream; return 0; } static int snd_pcm_do_suspend(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->trigger_master != substream) return 0; if (! snd_pcm_running(substream)) return 0; substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_SUSPEND); runtime->stop_operating = true; return 0; /* suspend unconditionally */ } static void snd_pcm_post_suspend(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); runtime->suspended_state = runtime->state; runtime->status->suspended_state = runtime->suspended_state; __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_SUSPENDED); snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSUSPEND); wake_up(&runtime->sleep); wake_up(&runtime->tsleep); } static const struct action_ops snd_pcm_action_suspend = { .pre_action = snd_pcm_pre_suspend, .do_action = snd_pcm_do_suspend, .post_action = snd_pcm_post_suspend }; /* * snd_pcm_suspend - trigger SUSPEND to all linked streams * @substream: the PCM substream * * After this call, all streams are changed to SUSPENDED state. * * Return: Zero if successful, or a negative error code. */ static int snd_pcm_suspend(struct snd_pcm_substream *substream) { guard(pcm_stream_lock_irqsave)(substream); return snd_pcm_action(&snd_pcm_action_suspend, substream, ACTION_ARG_IGNORE); } /** * snd_pcm_suspend_all - trigger SUSPEND to all substreams in the given pcm * @pcm: the PCM instance * * After this call, all streams are changed to SUSPENDED state. * * Return: Zero if successful (or @pcm is %NULL), or a negative error code. */ int snd_pcm_suspend_all(struct snd_pcm *pcm) { struct snd_pcm_substream *substream; int stream, err = 0; if (! pcm) return 0; for_each_pcm_substream(pcm, stream, substream) { /* FIXME: the open/close code should lock this as well */ if (!substream->runtime) continue; /* * Skip BE dai link PCM's that are internal and may * not have their substream ops set. */ if (!substream->ops) continue; err = snd_pcm_suspend(substream); if (err < 0 && err != -EBUSY) return err; } for_each_pcm_substream(pcm, stream, substream) snd_pcm_sync_stop(substream, false); return 0; } EXPORT_SYMBOL(snd_pcm_suspend_all); /* resume callbacks: state argument ignored */ static int snd_pcm_pre_resume(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->state != SNDRV_PCM_STATE_SUSPENDED) return -EBADFD; if (!(runtime->info & SNDRV_PCM_INFO_RESUME)) return -ENOSYS; runtime->trigger_master = substream; return 0; } static int snd_pcm_do_resume(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->trigger_master != substream) return 0; /* DMA not running previously? */ if (runtime->suspended_state != SNDRV_PCM_STATE_RUNNING && (runtime->suspended_state != SNDRV_PCM_STATE_DRAINING || substream->stream != SNDRV_PCM_STREAM_PLAYBACK)) return 0; return substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_RESUME); } static void snd_pcm_undo_resume(struct snd_pcm_substream *substream, snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream && snd_pcm_running(substream)) substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_SUSPEND); } static void snd_pcm_post_resume(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); __snd_pcm_set_state(runtime, runtime->suspended_state); snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MRESUME); } static const struct action_ops snd_pcm_action_resume = { .pre_action = snd_pcm_pre_resume, .do_action = snd_pcm_do_resume, .undo_action = snd_pcm_undo_resume, .post_action = snd_pcm_post_resume }; static int snd_pcm_resume(struct snd_pcm_substream *substream) { return snd_pcm_action_lock_irq(&snd_pcm_action_resume, substream, ACTION_ARG_IGNORE); } #else static int snd_pcm_resume(struct snd_pcm_substream *substream) { return -ENOSYS; } #endif /* CONFIG_PM */ /* * xrun ioctl * * Change the RUNNING stream(s) to XRUN state. */ static int snd_pcm_xrun(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; guard(pcm_stream_lock_irq)(substream); switch (runtime->state) { case SNDRV_PCM_STATE_XRUN: return 0; /* already there */ case SNDRV_PCM_STATE_RUNNING: __snd_pcm_xrun(substream); return 0; default: return -EBADFD; } } /* * reset ioctl */ /* reset callbacks: state argument ignored */ static int snd_pcm_pre_reset(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; switch (runtime->state) { case SNDRV_PCM_STATE_RUNNING: case SNDRV_PCM_STATE_PREPARED: case SNDRV_PCM_STATE_PAUSED: case SNDRV_PCM_STATE_SUSPENDED: return 0; default: return -EBADFD; } } static int snd_pcm_do_reset(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL); if (err < 0) return err; guard(pcm_stream_lock_irq)(substream); runtime->hw_ptr_base = 0; runtime->hw_ptr_interrupt = runtime->status->hw_ptr - runtime->status->hw_ptr % runtime->period_size; runtime->silence_start = runtime->status->hw_ptr; runtime->silence_filled = 0; return 0; } static void snd_pcm_post_reset(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; guard(pcm_stream_lock_irq)(substream); runtime->control->appl_ptr = runtime->status->hw_ptr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) snd_pcm_playback_silence(substream, ULONG_MAX); } static const struct action_ops snd_pcm_action_reset = { .pre_action = snd_pcm_pre_reset, .do_action = snd_pcm_do_reset, .post_action = snd_pcm_post_reset }; static int snd_pcm_reset(struct snd_pcm_substream *substream) { return snd_pcm_action_nonatomic(&snd_pcm_action_reset, substream, ACTION_ARG_IGNORE); } /* * prepare ioctl */ /* pass f_flags as state argument */ static int snd_pcm_pre_prepare(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; int f_flags = (__force int)state; if (runtime->state == SNDRV_PCM_STATE_OPEN || runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; if (snd_pcm_running(substream)) return -EBUSY; substream->f_flags = f_flags; return 0; } static int snd_pcm_do_prepare(struct snd_pcm_substream *substream, snd_pcm_state_t state) { int err; snd_pcm_sync_stop(substream, true); err = substream->ops->prepare(substream); if (err < 0) return err; return snd_pcm_do_reset(substream, state); } static void snd_pcm_post_prepare(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; runtime->control->appl_ptr = runtime->status->hw_ptr; snd_pcm_set_state(substream, SNDRV_PCM_STATE_PREPARED); } static const struct action_ops snd_pcm_action_prepare = { .pre_action = snd_pcm_pre_prepare, .do_action = snd_pcm_do_prepare, .post_action = snd_pcm_post_prepare }; /** * snd_pcm_prepare - prepare the PCM substream to be triggerable * @substream: the PCM substream instance * @file: file to refer f_flags * * Return: Zero if successful, or a negative error code. */ static int snd_pcm_prepare(struct snd_pcm_substream *substream, struct file *file) { int f_flags; if (file) f_flags = file->f_flags; else f_flags = substream->f_flags; scoped_guard(pcm_stream_lock_irq, substream) { switch (substream->runtime->state) { case SNDRV_PCM_STATE_PAUSED: snd_pcm_pause(substream, false); fallthrough; case SNDRV_PCM_STATE_SUSPENDED: snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP); break; } } return snd_pcm_action_nonatomic(&snd_pcm_action_prepare, substream, (__force snd_pcm_state_t)f_flags); } /* * drain ioctl */ /* drain init callbacks: state argument ignored */ static int snd_pcm_pre_drain_init(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; switch (runtime->state) { case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_DISCONNECTED: case SNDRV_PCM_STATE_SUSPENDED: return -EBADFD; } runtime->trigger_master = substream; return 0; } static int snd_pcm_do_drain_init(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { switch (runtime->state) { case SNDRV_PCM_STATE_PREPARED: /* start playback stream if possible */ if (! snd_pcm_playback_empty(substream)) { snd_pcm_do_start(substream, SNDRV_PCM_STATE_DRAINING); snd_pcm_post_start(substream, SNDRV_PCM_STATE_DRAINING); } else { __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_SETUP); } break; case SNDRV_PCM_STATE_RUNNING: __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_DRAINING); break; case SNDRV_PCM_STATE_XRUN: __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_SETUP); break; default: break; } } else { /* stop running stream */ if (runtime->state == SNDRV_PCM_STATE_RUNNING) { snd_pcm_state_t new_state; new_state = snd_pcm_capture_avail(runtime) > 0 ? SNDRV_PCM_STATE_DRAINING : SNDRV_PCM_STATE_SETUP; snd_pcm_do_stop(substream, new_state); snd_pcm_post_stop(substream, new_state); } } if (runtime->state == SNDRV_PCM_STATE_DRAINING && runtime->trigger_master == substream && (runtime->hw.info & SNDRV_PCM_INFO_DRAIN_TRIGGER)) return substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_DRAIN); return 0; } static void snd_pcm_post_drain_init(struct snd_pcm_substream *substream, snd_pcm_state_t state) { } static const struct action_ops snd_pcm_action_drain_init = { .pre_action = snd_pcm_pre_drain_init, .do_action = snd_pcm_do_drain_init, .post_action = snd_pcm_post_drain_init }; /* * Drain the stream(s). * When the substream is linked, sync until the draining of all playback streams * is finished. * After this call, all streams are supposed to be either SETUP or DRAINING * (capture only) state. */ static int snd_pcm_drain(struct snd_pcm_substream *substream, struct file *file) { struct snd_card *card; struct snd_pcm_runtime *runtime; struct snd_pcm_substream *s; struct snd_pcm_group *group; wait_queue_entry_t wait; int result = 0; int nonblock = 0; card = substream->pcm->card; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (file) { if (file->f_flags & O_NONBLOCK) nonblock = 1; } else if (substream->f_flags & O_NONBLOCK) nonblock = 1; snd_pcm_stream_lock_irq(substream); /* resume pause */ if (runtime->state == SNDRV_PCM_STATE_PAUSED) snd_pcm_pause(substream, false); /* pre-start/stop - all running streams are changed to DRAINING state */ result = snd_pcm_action(&snd_pcm_action_drain_init, substream, ACTION_ARG_IGNORE); if (result < 0) goto unlock; /* in non-blocking, we don't wait in ioctl but let caller poll */ if (nonblock) { result = -EAGAIN; goto unlock; } for (;;) { long tout; struct snd_pcm_runtime *to_check; if (signal_pending(current)) { result = -ERESTARTSYS; break; } /* find a substream to drain */ to_check = NULL; group = snd_pcm_stream_group_ref(substream); snd_pcm_group_for_each_entry(s, substream) { if (s->stream != SNDRV_PCM_STREAM_PLAYBACK) continue; runtime = s->runtime; if (runtime->state == SNDRV_PCM_STATE_DRAINING) { to_check = runtime; break; } } snd_pcm_group_unref(group, substream); if (!to_check) break; /* all drained */ init_waitqueue_entry(&wait, current); set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&to_check->sleep, &wait); snd_pcm_stream_unlock_irq(substream); if (runtime->no_period_wakeup) tout = MAX_SCHEDULE_TIMEOUT; else { tout = 100; if (runtime->rate) { long t = runtime->buffer_size * 1100 / runtime->rate; tout = max(t, tout); } tout = msecs_to_jiffies(tout); } tout = schedule_timeout(tout); snd_pcm_stream_lock_irq(substream); group = snd_pcm_stream_group_ref(substream); snd_pcm_group_for_each_entry(s, substream) { if (s->runtime == to_check) { remove_wait_queue(&to_check->sleep, &wait); break; } } snd_pcm_group_unref(group, substream); if (card->shutdown) { result = -ENODEV; break; } if (tout == 0) { if (substream->runtime->state == SNDRV_PCM_STATE_SUSPENDED) result = -ESTRPIPE; else { dev_dbg(substream->pcm->card->dev, "playback drain timeout (DMA or IRQ trouble?)\n"); snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP); result = -EIO; } break; } } unlock: snd_pcm_stream_unlock_irq(substream); return result; } /* * drop ioctl * * Immediately put all linked substreams into SETUP state. */ static int snd_pcm_drop(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; int result = 0; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN || runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; guard(pcm_stream_lock_irq)(substream); /* resume pause */ if (runtime->state == SNDRV_PCM_STATE_PAUSED) snd_pcm_pause(substream, false); snd_pcm_stop(substream, SNDRV_PCM_STATE_SETUP); /* runtime->control->appl_ptr = runtime->status->hw_ptr; */ return result; } static bool is_pcm_file(struct file *file) { struct inode *inode = file_inode(file); struct snd_pcm *pcm; unsigned int minor; if (!S_ISCHR(inode->i_mode) || imajor(inode) != snd_major) return false; minor = iminor(inode); pcm = snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_PLAYBACK); if (!pcm) pcm = snd_lookup_minor_data(minor, SNDRV_DEVICE_TYPE_PCM_CAPTURE); if (!pcm) return false; snd_card_unref(pcm->card); return true; } /* * PCM link handling */ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream1; struct snd_pcm_group *group __free(kfree) = NULL; struct snd_pcm_group *target_group; bool nonatomic = substream->pcm->nonatomic; CLASS(fd, f)(fd); if (fd_empty(f)) return -EBADFD; if (!is_pcm_file(fd_file(f))) return -EBADFD; pcm_file = fd_file(f)->private_data; substream1 = pcm_file->substream; if (substream == substream1) return -EINVAL; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return -ENOMEM; snd_pcm_group_init(group); guard(rwsem_write)(&snd_pcm_link_rwsem); if (substream->runtime->state == SNDRV_PCM_STATE_OPEN || substream->runtime->state != substream1->runtime->state || substream->pcm->nonatomic != substream1->pcm->nonatomic) return -EBADFD; if (snd_pcm_stream_linked(substream1)) return -EALREADY; scoped_guard(pcm_stream_lock_irq, substream) { if (!snd_pcm_stream_linked(substream)) { snd_pcm_group_assign(substream, group); group = NULL; /* assigned, don't free this one below */ } target_group = substream->group; } snd_pcm_group_lock_irq(target_group, nonatomic); snd_pcm_stream_lock_nested(substream1); snd_pcm_group_assign(substream1, target_group); refcount_inc(&target_group->refs); snd_pcm_stream_unlock(substream1); snd_pcm_group_unlock_irq(target_group, nonatomic); return 0; } static void relink_to_local(struct snd_pcm_substream *substream) { snd_pcm_stream_lock_nested(substream); snd_pcm_group_assign(substream, &substream->self_group); snd_pcm_stream_unlock(substream); } static int snd_pcm_unlink(struct snd_pcm_substream *substream) { struct snd_pcm_group *group; bool nonatomic = substream->pcm->nonatomic; bool do_free = false; guard(rwsem_write)(&snd_pcm_link_rwsem); if (!snd_pcm_stream_linked(substream)) return -EALREADY; group = substream->group; snd_pcm_group_lock_irq(group, nonatomic); relink_to_local(substream); refcount_dec(&group->refs); /* detach the last stream, too */ if (list_is_singular(&group->substreams)) { relink_to_local(list_first_entry(&group->substreams, struct snd_pcm_substream, link_list)); do_free = refcount_dec_and_test(&group->refs); } snd_pcm_group_unlock_irq(group, nonatomic); if (do_free) kfree(group); return 0; } /* * hw configurator */ static int snd_pcm_hw_rule_mul(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval t; snd_interval_mul(hw_param_interval_c(params, rule->deps[0]), hw_param_interval_c(params, rule->deps[1]), &t); return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int snd_pcm_hw_rule_div(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval t; snd_interval_div(hw_param_interval_c(params, rule->deps[0]), hw_param_interval_c(params, rule->deps[1]), &t); return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int snd_pcm_hw_rule_muldivk(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval t; snd_interval_muldivk(hw_param_interval_c(params, rule->deps[0]), hw_param_interval_c(params, rule->deps[1]), (unsigned long) rule->private, &t); return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int snd_pcm_hw_rule_mulkdiv(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval t; snd_interval_mulkdiv(hw_param_interval_c(params, rule->deps[0]), (unsigned long) rule->private, hw_param_interval_c(params, rule->deps[1]), &t); return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int snd_pcm_hw_rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { snd_pcm_format_t k; const struct snd_interval *i = hw_param_interval_c(params, rule->deps[0]); struct snd_mask m; struct snd_mask *mask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); snd_mask_any(&m); pcm_for_each_format(k) { int bits; if (!snd_mask_test_format(mask, k)) continue; bits = snd_pcm_format_physical_width(k); if (bits <= 0) continue; /* ignore invalid formats */ if ((unsigned)bits < i->min || (unsigned)bits > i->max) snd_mask_reset(&m, (__force unsigned)k); } return snd_mask_refine(mask, &m); } static int snd_pcm_hw_rule_sample_bits(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval t; snd_pcm_format_t k; t.min = UINT_MAX; t.max = 0; t.openmin = 0; t.openmax = 0; pcm_for_each_format(k) { int bits; if (!snd_mask_test_format(hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT), k)) continue; bits = snd_pcm_format_physical_width(k); if (bits <= 0) continue; /* ignore invalid formats */ if (t.min > (unsigned)bits) t.min = bits; if (t.max < (unsigned)bits) t.max = bits; } t.integer = 1; return snd_interval_refine(hw_param_interval(params, rule->var), &t); } #if SNDRV_PCM_RATE_5512 != 1 << 0 || SNDRV_PCM_RATE_192000 != 1 << 12 ||\ SNDRV_PCM_RATE_128000 != 1 << 19 #error "Change this table" #endif /* NOTE: the list is unsorted! */ static const unsigned int rates[] = { 5512, 8000, 11025, 16000, 22050, 32000, 44100, 48000, 64000, 88200, 96000, 176400, 192000, 352800, 384000, 705600, 768000, /* extended */ 12000, 24000, 128000 }; const struct snd_pcm_hw_constraint_list snd_pcm_known_rates = { .count = ARRAY_SIZE(rates), .list = rates, }; static int snd_pcm_hw_rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_pcm_hardware *hw = rule->private; return snd_interval_list(hw_param_interval(params, rule->var), snd_pcm_known_rates.count, snd_pcm_known_rates.list, hw->rates); } static int snd_pcm_hw_rule_buffer_bytes_max(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval t; struct snd_pcm_substream *substream = rule->private; t.min = 0; t.max = substream->buffer_bytes_max; t.openmin = 0; t.openmax = 0; t.integer = 1; return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int snd_pcm_hw_rule_subformats(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_mask *sfmask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_SUBFORMAT); struct snd_mask *fmask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); u32 *subformats = rule->private; snd_pcm_format_t f; struct snd_mask m; snd_mask_none(&m); /* All PCMs support at least the default STD subformat. */ snd_mask_set(&m, (__force unsigned)SNDRV_PCM_SUBFORMAT_STD); pcm_for_each_format(f) { if (!snd_mask_test(fmask, (__force unsigned)f)) continue; if (f == SNDRV_PCM_FORMAT_S32_LE && *subformats) m.bits[0] |= *subformats; else if (snd_pcm_format_linear(f)) snd_mask_set(&m, (__force unsigned)SNDRV_PCM_SUBFORMAT_MSBITS_MAX); } return snd_mask_refine(sfmask, &m); } static int snd_pcm_hw_constraint_subformats(struct snd_pcm_runtime *runtime, unsigned int cond, u32 *subformats) { return snd_pcm_hw_rule_add(runtime, cond, -1, snd_pcm_hw_rule_subformats, (void *)subformats, SNDRV_PCM_HW_PARAM_SUBFORMAT, SNDRV_PCM_HW_PARAM_FORMAT, -1); } static int snd_pcm_hw_constraints_init(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_constraints *constrs = &runtime->hw_constraints; int k, err; for (k = SNDRV_PCM_HW_PARAM_FIRST_MASK; k <= SNDRV_PCM_HW_PARAM_LAST_MASK; k++) { snd_mask_any(constrs_mask(constrs, k)); } for (k = SNDRV_PCM_HW_PARAM_FIRST_INTERVAL; k <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; k++) { snd_interval_any(constrs_interval(constrs, k)); } snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_CHANNELS)); snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_BUFFER_SIZE)); snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_BUFFER_BYTES)); snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_SAMPLE_BITS)); snd_interval_setinteger(constrs_interval(constrs, SNDRV_PCM_HW_PARAM_FRAME_BITS)); err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, snd_pcm_hw_rule_format, NULL, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, snd_pcm_hw_rule_sample_bits, NULL, SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, snd_pcm_hw_rule_div, NULL, SNDRV_PCM_HW_PARAM_FRAME_BITS, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FRAME_BITS, snd_pcm_hw_rule_mul, NULL, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FRAME_BITS, snd_pcm_hw_rule_mulkdiv, (void*) 8, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FRAME_BITS, snd_pcm_hw_rule_mulkdiv, (void*) 8, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, snd_pcm_hw_rule_div, NULL, SNDRV_PCM_HW_PARAM_FRAME_BITS, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, snd_pcm_hw_rule_mulkdiv, (void*) 1000000, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_PERIOD_TIME, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, snd_pcm_hw_rule_mulkdiv, (void*) 1000000, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_BUFFER_TIME, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIODS, snd_pcm_hw_rule_div, NULL, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, snd_pcm_hw_rule_div, NULL, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_PERIODS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, snd_pcm_hw_rule_mulkdiv, (void*) 8, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, snd_pcm_hw_rule_muldivk, (void*) 1000000, SNDRV_PCM_HW_PARAM_PERIOD_TIME, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, snd_pcm_hw_rule_mul, NULL, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_PERIODS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, snd_pcm_hw_rule_mulkdiv, (void*) 8, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, snd_pcm_hw_rule_muldivk, (void*) 1000000, SNDRV_PCM_HW_PARAM_BUFFER_TIME, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, snd_pcm_hw_rule_muldivk, (void*) 8, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, snd_pcm_hw_rule_muldivk, (void*) 8, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_FRAME_BITS, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_TIME, snd_pcm_hw_rule_mulkdiv, (void*) 1000000, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_TIME, snd_pcm_hw_rule_mulkdiv, (void*) 1000000, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) return err; return 0; } static int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hardware *hw = &runtime->hw; int err; unsigned int mask = 0; if (hw->info & SNDRV_PCM_INFO_INTERLEAVED) mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_RW_INTERLEAVED); if (hw->info & SNDRV_PCM_INFO_NONINTERLEAVED) mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_RW_NONINTERLEAVED); if (hw_support_mmap(substream)) { if (hw->info & SNDRV_PCM_INFO_INTERLEAVED) mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_MMAP_INTERLEAVED); if (hw->info & SNDRV_PCM_INFO_NONINTERLEAVED) mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED); if (hw->info & SNDRV_PCM_INFO_COMPLEX) mask |= PARAM_MASK_BIT(SNDRV_PCM_ACCESS_MMAP_COMPLEX); } err = snd_pcm_hw_constraint_mask(runtime, SNDRV_PCM_HW_PARAM_ACCESS, mask); if (err < 0) return err; err = snd_pcm_hw_constraint_mask64(runtime, SNDRV_PCM_HW_PARAM_FORMAT, hw->formats); if (err < 0) return err; err = snd_pcm_hw_constraint_subformats(runtime, 0, &hw->subformats); if (err < 0) return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_CHANNELS, hw->channels_min, hw->channels_max); if (err < 0) return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE, hw->rate_min, hw->rate_max); if (err < 0) return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, hw->period_bytes_min, hw->period_bytes_max); if (err < 0) return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIODS, hw->periods_min, hw->periods_max); if (err < 0) return err; err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, hw->period_bytes_min, hw->buffer_bytes_max); if (err < 0) return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, snd_pcm_hw_rule_buffer_bytes_max, substream, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, -1); if (err < 0) return err; /* FIXME: remove */ if (runtime->dma_bytes) { err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 0, runtime->dma_bytes); if (err < 0) return err; } if (!(hw->rates & (SNDRV_PCM_RATE_KNOT | SNDRV_PCM_RATE_CONTINUOUS))) { err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, snd_pcm_hw_rule_rate, hw, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) return err; } /* FIXME: this belong to lowlevel */ snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); return 0; } static void pcm_release_private(struct snd_pcm_substream *substream) { if (snd_pcm_stream_linked(substream)) snd_pcm_unlink(substream); } void snd_pcm_release_substream(struct snd_pcm_substream *substream) { substream->ref_count--; if (substream->ref_count > 0) return; snd_pcm_drop(substream); if (substream->hw_opened) { if (substream->runtime->state != SNDRV_PCM_STATE_OPEN) do_hw_free(substream); substream->ops->close(substream); substream->hw_opened = 0; } if (cpu_latency_qos_request_active(&substream->latency_pm_qos_req)) cpu_latency_qos_remove_request(&substream->latency_pm_qos_req); if (substream->pcm_release) { substream->pcm_release(substream); substream->pcm_release = NULL; } snd_pcm_detach_substream(substream); } EXPORT_SYMBOL(snd_pcm_release_substream); int snd_pcm_open_substream(struct snd_pcm *pcm, int stream, struct file *file, struct snd_pcm_substream **rsubstream) { struct snd_pcm_substream *substream; int err; err = snd_pcm_attach_substream(pcm, stream, file, &substream); if (err < 0) return err; if (substream->ref_count > 1) { *rsubstream = substream; return 0; } err = snd_pcm_hw_constraints_init(substream); if (err < 0) { pcm_dbg(pcm, "snd_pcm_hw_constraints_init failed\n"); goto error; } err = substream->ops->open(substream); if (err < 0) goto error; substream->hw_opened = 1; err = snd_pcm_hw_constraints_complete(substream); if (err < 0) { pcm_dbg(pcm, "snd_pcm_hw_constraints_complete failed\n"); goto error; } /* automatically set EXPLICIT_SYNC flag in the managed mode whenever * the DMA buffer requires it */ if (substream->managed_buffer_alloc && substream->dma_buffer.dev.need_sync) substream->runtime->hw.info |= SNDRV_PCM_INFO_EXPLICIT_SYNC; *rsubstream = substream; return 0; error: snd_pcm_release_substream(substream); return err; } EXPORT_SYMBOL(snd_pcm_open_substream); static int snd_pcm_open_file(struct file *file, struct snd_pcm *pcm, int stream) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; int err; err = snd_pcm_open_substream(pcm, stream, file, &substream); if (err < 0) return err; pcm_file = kzalloc(sizeof(*pcm_file), GFP_KERNEL); if (pcm_file == NULL) { snd_pcm_release_substream(substream); return -ENOMEM; } pcm_file->substream = substream; if (substream->ref_count == 1) substream->pcm_release = pcm_release_private; file->private_data = pcm_file; return 0; } static int snd_pcm_playback_open(struct inode *inode, struct file *file) { struct snd_pcm *pcm; int err = nonseekable_open(inode, file); if (err < 0) return err; pcm = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_PCM_PLAYBACK); err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_PLAYBACK); if (pcm) snd_card_unref(pcm->card); return err; } static int snd_pcm_capture_open(struct inode *inode, struct file *file) { struct snd_pcm *pcm; int err = nonseekable_open(inode, file); if (err < 0) return err; pcm = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_PCM_CAPTURE); err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_CAPTURE); if (pcm) snd_card_unref(pcm->card); return err; } static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream) { int err; wait_queue_entry_t wait; if (pcm == NULL) { err = -ENODEV; goto __error1; } err = snd_card_file_add(pcm->card, file); if (err < 0) goto __error1; if (!try_module_get(pcm->card->module)) { err = -EFAULT; goto __error2; } init_waitqueue_entry(&wait, current); add_wait_queue(&pcm->open_wait, &wait); mutex_lock(&pcm->open_mutex); while (1) { err = snd_pcm_open_file(file, pcm, stream); if (err >= 0) break; if (err == -EAGAIN) { if (file->f_flags & O_NONBLOCK) { err = -EBUSY; break; } } else break; set_current_state(TASK_INTERRUPTIBLE); mutex_unlock(&pcm->open_mutex); schedule(); mutex_lock(&pcm->open_mutex); if (pcm->card->shutdown) { err = -ENODEV; break; } if (signal_pending(current)) { err = -ERESTARTSYS; break; } } remove_wait_queue(&pcm->open_wait, &wait); mutex_unlock(&pcm->open_mutex); if (err < 0) goto __error; return err; __error: module_put(pcm->card->module); __error2: snd_card_file_remove(pcm->card, file); __error1: return err; } static int snd_pcm_release(struct inode *inode, struct file *file) { struct snd_pcm *pcm; struct snd_pcm_substream *substream; struct snd_pcm_file *pcm_file; pcm_file = file->private_data; substream = pcm_file->substream; if (snd_BUG_ON(!substream)) return -ENXIO; pcm = substream->pcm; /* block until the device gets woken up as it may touch the hardware */ snd_power_wait(pcm->card); scoped_guard(mutex, &pcm->open_mutex) { snd_pcm_release_substream(substream); kfree(pcm_file); } wake_up(&pcm->open_wait); module_put(pcm->card->module); snd_card_file_remove(pcm->card, file); return 0; } /* check and update PCM state; return 0 or a negative error * call this inside PCM lock */ static int do_pcm_hwsync(struct snd_pcm_substream *substream) { switch (substream->runtime->state) { case SNDRV_PCM_STATE_DRAINING: if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) return -EBADFD; fallthrough; case SNDRV_PCM_STATE_RUNNING: return snd_pcm_update_hw_ptr(substream); case SNDRV_PCM_STATE_PREPARED: case SNDRV_PCM_STATE_PAUSED: return 0; case SNDRV_PCM_STATE_SUSPENDED: return -ESTRPIPE; case SNDRV_PCM_STATE_XRUN: return -EPIPE; default: return -EBADFD; } } /* increase the appl_ptr; returns the processed frames or a negative error */ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream, snd_pcm_uframes_t frames, snd_pcm_sframes_t avail) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t appl_ptr; int ret; if (avail <= 0) return 0; if (frames > (snd_pcm_uframes_t)avail) frames = avail; appl_ptr = runtime->control->appl_ptr + frames; if (appl_ptr >= (snd_pcm_sframes_t)runtime->boundary) appl_ptr -= runtime->boundary; ret = pcm_lib_apply_appl_ptr(substream, appl_ptr); return ret < 0 ? ret : frames; } /* decrease the appl_ptr; returns the processed frames or zero for error */ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream, snd_pcm_uframes_t frames, snd_pcm_sframes_t avail) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t appl_ptr; int ret; if (avail <= 0) return 0; if (frames > (snd_pcm_uframes_t)avail) frames = avail; appl_ptr = runtime->control->appl_ptr - frames; if (appl_ptr < 0) appl_ptr += runtime->boundary; ret = pcm_lib_apply_appl_ptr(substream, appl_ptr); /* NOTE: we return zero for errors because PulseAudio gets depressed * upon receiving an error from rewind ioctl and stops processing * any longer. Returning zero means that no rewind is done, so * it's not absolutely wrong to answer like that. */ return ret < 0 ? 0 : frames; } static snd_pcm_sframes_t snd_pcm_rewind(struct snd_pcm_substream *substream, snd_pcm_uframes_t frames) { snd_pcm_sframes_t ret; if (frames == 0) return 0; scoped_guard(pcm_stream_lock_irq, substream) { ret = do_pcm_hwsync(substream); if (!ret) ret = rewind_appl_ptr(substream, frames, snd_pcm_hw_avail(substream)); } if (ret >= 0) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); return ret; } static snd_pcm_sframes_t snd_pcm_forward(struct snd_pcm_substream *substream, snd_pcm_uframes_t frames) { snd_pcm_sframes_t ret; if (frames == 0) return 0; scoped_guard(pcm_stream_lock_irq, substream) { ret = do_pcm_hwsync(substream); if (!ret) ret = forward_appl_ptr(substream, frames, snd_pcm_avail(substream)); } if (ret >= 0) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); return ret; } static int snd_pcm_delay(struct snd_pcm_substream *substream, snd_pcm_sframes_t *delay) { int err; scoped_guard(pcm_stream_lock_irq, substream) { err = do_pcm_hwsync(substream); if (delay && !err) *delay = snd_pcm_calc_delay(substream); } snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_CPU); return err; } static inline int snd_pcm_hwsync(struct snd_pcm_substream *substream) { return snd_pcm_delay(substream, NULL); } #define snd_pcm_sync_ptr_get_user(__f, __c, __ptr) ({ \ __label__ failed, failed_begin; \ int __err = -EFAULT; \ typeof(*(__ptr)) __user *__src = (__ptr); \ \ if (!user_read_access_begin(__src, sizeof(*__src))) \ goto failed_begin; \ unsafe_get_user(__f, &__src->flags, failed); \ unsafe_get_user(__c.appl_ptr, &__src->c.control.appl_ptr, failed); \ unsafe_get_user(__c.avail_min, &__src->c.control.avail_min, failed); \ __err = 0; \ failed: \ user_read_access_end(); \ failed_begin: \ __err; \ }) #define snd_pcm_sync_ptr_put_user(__s, __c, __ptr) ({ \ __label__ failed, failed_begin; \ int __err = -EFAULT; \ typeof(*(__ptr)) __user *__src = (__ptr); \ \ if (!user_write_access_begin(__src, sizeof(*__src))) \ goto failed_begin; \ unsafe_put_user(__s.state, &__src->s.status.state, failed); \ unsafe_put_user(__s.hw_ptr, &__src->s.status.hw_ptr, failed); \ unsafe_put_user(__s.tstamp.tv_sec, &__src->s.status.tstamp.tv_sec, failed); \ unsafe_put_user(__s.tstamp.tv_nsec, &__src->s.status.tstamp.tv_nsec, failed); \ unsafe_put_user(__s.suspended_state, &__src->s.status.suspended_state, failed); \ unsafe_put_user(__s.audio_tstamp.tv_sec, &__src->s.status.audio_tstamp.tv_sec, failed); \ unsafe_put_user(__s.audio_tstamp.tv_nsec, &__src->s.status.audio_tstamp.tv_nsec, failed);\ unsafe_put_user(__c.appl_ptr, &__src->c.control.appl_ptr, failed); \ unsafe_put_user(__c.avail_min, &__src->c.control.avail_min, failed); \ __err = 0; \ failed: \ user_write_access_end(); \ failed_begin: \ __err; \ }) static int snd_pcm_sync_ptr(struct snd_pcm_substream *substream, struct snd_pcm_sync_ptr __user *_sync_ptr) { struct snd_pcm_runtime *runtime = substream->runtime; volatile struct snd_pcm_mmap_status *status; volatile struct snd_pcm_mmap_control *control; u32 sflags; struct snd_pcm_mmap_control scontrol; struct snd_pcm_mmap_status sstatus; int err; if (snd_pcm_sync_ptr_get_user(sflags, scontrol, _sync_ptr)) return -EFAULT; status = runtime->status; control = runtime->control; if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { err = snd_pcm_hwsync(substream); if (err < 0) return err; } scoped_guard(pcm_stream_lock_irq, substream) { if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) { err = pcm_lib_apply_appl_ptr(substream, scontrol.appl_ptr); if (err < 0) return err; } else { scontrol.appl_ptr = control->appl_ptr; } if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) control->avail_min = scontrol.avail_min; else scontrol.avail_min = control->avail_min; sstatus.state = status->state; sstatus.hw_ptr = status->hw_ptr; sstatus.tstamp = status->tstamp; sstatus.suspended_state = status->suspended_state; sstatus.audio_tstamp = status->audio_tstamp; } if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); if (snd_pcm_sync_ptr_put_user(sstatus, scontrol, _sync_ptr)) return -EFAULT; return 0; } struct snd_pcm_mmap_status32 { snd_pcm_state_t state; s32 pad1; u32 hw_ptr; struct __snd_timespec tstamp; snd_pcm_state_t suspended_state; struct __snd_timespec audio_tstamp; } __packed; struct snd_pcm_mmap_control32 { u32 appl_ptr; u32 avail_min; }; struct snd_pcm_sync_ptr32 { u32 flags; union { struct snd_pcm_mmap_status32 status; unsigned char reserved[64]; } s; union { struct snd_pcm_mmap_control32 control; unsigned char reserved[64]; } c; } __packed; /* recalculate the boundary within 32bit */ static snd_pcm_uframes_t recalculate_boundary(struct snd_pcm_runtime *runtime) { snd_pcm_uframes_t boundary; snd_pcm_uframes_t border; int order; if (! runtime->buffer_size) return 0; border = 0x7fffffffUL - runtime->buffer_size; if (runtime->buffer_size > border) return runtime->buffer_size; order = __fls(border) - __fls(runtime->buffer_size); boundary = runtime->buffer_size << order; if (boundary <= border) return boundary; else return boundary / 2; } static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, struct snd_pcm_sync_ptr32 __user *src) { struct snd_pcm_runtime *runtime = substream->runtime; volatile struct snd_pcm_mmap_status *status; volatile struct snd_pcm_mmap_control *control; u32 sflags; struct snd_pcm_mmap_control scontrol; struct snd_pcm_mmap_status sstatus; snd_pcm_uframes_t boundary; int err; if (snd_BUG_ON(!runtime)) return -EINVAL; if (snd_pcm_sync_ptr_get_user(sflags, scontrol, src)) return -EFAULT; if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { err = snd_pcm_hwsync(substream); if (err < 0) return err; } status = runtime->status; control = runtime->control; boundary = recalculate_boundary(runtime); if (! boundary) boundary = 0x7fffffff; scoped_guard(pcm_stream_lock_irq, substream) { /* FIXME: we should consider the boundary for the sync from app */ if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) { err = pcm_lib_apply_appl_ptr(substream, scontrol.appl_ptr); if (err < 0) return err; } else scontrol.appl_ptr = control->appl_ptr % boundary; if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) control->avail_min = scontrol.avail_min; else scontrol.avail_min = control->avail_min; sstatus.state = status->state; sstatus.hw_ptr = status->hw_ptr % boundary; sstatus.tstamp = status->tstamp; sstatus.suspended_state = status->suspended_state; sstatus.audio_tstamp = status->audio_tstamp; } if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) snd_pcm_dma_buffer_sync(substream, SNDRV_DMA_SYNC_DEVICE); if (snd_pcm_sync_ptr_put_user(sstatus, scontrol, src)) return -EFAULT; return 0; } #define __SNDRV_PCM_IOCTL_SYNC_PTR32 _IOWR('A', 0x23, struct snd_pcm_sync_ptr32) static int snd_pcm_tstamp(struct snd_pcm_substream *substream, int __user *_arg) { struct snd_pcm_runtime *runtime = substream->runtime; int arg; if (get_user(arg, _arg)) return -EFAULT; if (arg < 0 || arg > SNDRV_PCM_TSTAMP_TYPE_LAST) return -EINVAL; runtime->tstamp_type = arg; return 0; } static int snd_pcm_xferi_frames_ioctl(struct snd_pcm_substream *substream, struct snd_xferi __user *_xferi) { struct snd_xferi xferi; struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_sframes_t result; if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (put_user(0, &_xferi->result)) return -EFAULT; if (copy_from_user(&xferi, _xferi, sizeof(xferi))) return -EFAULT; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) result = snd_pcm_lib_write(substream, xferi.buf, xferi.frames); else result = snd_pcm_lib_read(substream, xferi.buf, xferi.frames); if (put_user(result, &_xferi->result)) return -EFAULT; return result < 0 ? result : 0; } static int snd_pcm_xfern_frames_ioctl(struct snd_pcm_substream *substream, struct snd_xfern __user *_xfern) { struct snd_xfern xfern; struct snd_pcm_runtime *runtime = substream->runtime; void *bufs __free(kfree) = NULL; snd_pcm_sframes_t result; if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (runtime->channels > 128) return -EINVAL; if (put_user(0, &_xfern->result)) return -EFAULT; if (copy_from_user(&xfern, _xfern, sizeof(xfern))) return -EFAULT; bufs = memdup_array_user(xfern.bufs, runtime->channels, sizeof(void *)); if (IS_ERR(bufs)) return PTR_ERR(bufs); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) result = snd_pcm_lib_writev(substream, bufs, xfern.frames); else result = snd_pcm_lib_readv(substream, bufs, xfern.frames); if (put_user(result, &_xfern->result)) return -EFAULT; return result < 0 ? result : 0; } static int snd_pcm_rewind_ioctl(struct snd_pcm_substream *substream, snd_pcm_uframes_t __user *_frames) { snd_pcm_uframes_t frames; snd_pcm_sframes_t result; if (get_user(frames, _frames)) return -EFAULT; if (put_user(0, _frames)) return -EFAULT; result = snd_pcm_rewind(substream, frames); if (put_user(result, _frames)) return -EFAULT; return result < 0 ? result : 0; } static int snd_pcm_forward_ioctl(struct snd_pcm_substream *substream, snd_pcm_uframes_t __user *_frames) { snd_pcm_uframes_t frames; snd_pcm_sframes_t result; if (get_user(frames, _frames)) return -EFAULT; if (put_user(0, _frames)) return -EFAULT; result = snd_pcm_forward(substream, frames); if (put_user(result, _frames)) return -EFAULT; return result < 0 ? result : 0; } static int snd_pcm_common_ioctl(struct file *file, struct snd_pcm_substream *substream, unsigned int cmd, void __user *arg) { struct snd_pcm_file *pcm_file = file->private_data; int res; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; if (substream->runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; res = snd_power_wait(substream->pcm->card); if (res < 0) return res; switch (cmd) { case SNDRV_PCM_IOCTL_PVERSION: return put_user(SNDRV_PCM_VERSION, (int __user *)arg) ? -EFAULT : 0; case SNDRV_PCM_IOCTL_INFO: return snd_pcm_info_user(substream, arg); case SNDRV_PCM_IOCTL_TSTAMP: /* just for compatibility */ return 0; case SNDRV_PCM_IOCTL_TTSTAMP: return snd_pcm_tstamp(substream, arg); case SNDRV_PCM_IOCTL_USER_PVERSION: if (get_user(pcm_file->user_pversion, (unsigned int __user *)arg)) return -EFAULT; return 0; case SNDRV_PCM_IOCTL_HW_REFINE: return snd_pcm_hw_refine_user(substream, arg); case SNDRV_PCM_IOCTL_HW_PARAMS: return snd_pcm_hw_params_user(substream, arg); case SNDRV_PCM_IOCTL_HW_FREE: return snd_pcm_hw_free(substream); case SNDRV_PCM_IOCTL_SW_PARAMS: return snd_pcm_sw_params_user(substream, arg); case SNDRV_PCM_IOCTL_STATUS32: return snd_pcm_status_user32(substream, arg, false); case SNDRV_PCM_IOCTL_STATUS_EXT32: return snd_pcm_status_user32(substream, arg, true); case SNDRV_PCM_IOCTL_STATUS64: return snd_pcm_status_user64(substream, arg, false); case SNDRV_PCM_IOCTL_STATUS_EXT64: return snd_pcm_status_user64(substream, arg, true); case SNDRV_PCM_IOCTL_CHANNEL_INFO: return snd_pcm_channel_info_user(substream, arg); case SNDRV_PCM_IOCTL_PREPARE: return snd_pcm_prepare(substream, file); case SNDRV_PCM_IOCTL_RESET: return snd_pcm_reset(substream); case SNDRV_PCM_IOCTL_START: return snd_pcm_start_lock_irq(substream); case SNDRV_PCM_IOCTL_LINK: return snd_pcm_link(substream, (int)(unsigned long) arg); case SNDRV_PCM_IOCTL_UNLINK: return snd_pcm_unlink(substream); case SNDRV_PCM_IOCTL_RESUME: return snd_pcm_resume(substream); case SNDRV_PCM_IOCTL_XRUN: return snd_pcm_xrun(substream); case SNDRV_PCM_IOCTL_HWSYNC: return snd_pcm_hwsync(substream); case SNDRV_PCM_IOCTL_DELAY: { snd_pcm_sframes_t delay = 0; snd_pcm_sframes_t __user *res = arg; int err; err = snd_pcm_delay(substream, &delay); if (err) return err; if (put_user(delay, res)) return -EFAULT; return 0; } case __SNDRV_PCM_IOCTL_SYNC_PTR32: return snd_pcm_ioctl_sync_ptr_compat(substream, arg); case __SNDRV_PCM_IOCTL_SYNC_PTR64: return snd_pcm_sync_ptr(substream, arg); #ifdef CONFIG_SND_SUPPORT_OLD_API case SNDRV_PCM_IOCTL_HW_REFINE_OLD: return snd_pcm_hw_refine_old_user(substream, arg); case SNDRV_PCM_IOCTL_HW_PARAMS_OLD: return snd_pcm_hw_params_old_user(substream, arg); #endif case SNDRV_PCM_IOCTL_DRAIN: return snd_pcm_drain(substream, file); case SNDRV_PCM_IOCTL_DROP: return snd_pcm_drop(substream); case SNDRV_PCM_IOCTL_PAUSE: return snd_pcm_pause_lock_irq(substream, (unsigned long)arg); case SNDRV_PCM_IOCTL_WRITEI_FRAMES: case SNDRV_PCM_IOCTL_READI_FRAMES: return snd_pcm_xferi_frames_ioctl(substream, arg); case SNDRV_PCM_IOCTL_WRITEN_FRAMES: case SNDRV_PCM_IOCTL_READN_FRAMES: return snd_pcm_xfern_frames_ioctl(substream, arg); case SNDRV_PCM_IOCTL_REWIND: return snd_pcm_rewind_ioctl(substream, arg); case SNDRV_PCM_IOCTL_FORWARD: return snd_pcm_forward_ioctl(substream, arg); } pcm_dbg(substream->pcm, "unknown ioctl = 0x%x\n", cmd); return -ENOTTY; } static long snd_pcm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_pcm_file *pcm_file; pcm_file = file->private_data; if (((cmd >> 8) & 0xff) != 'A') return -ENOTTY; return snd_pcm_common_ioctl(file, pcm_file->substream, cmd, (void __user *)arg); } /** * snd_pcm_kernel_ioctl - Execute PCM ioctl in the kernel-space * @substream: PCM substream * @cmd: IOCTL cmd * @arg: IOCTL argument * * The function is provided primarily for OSS layer and USB gadget drivers, * and it allows only the limited set of ioctls (hw_params, sw_params, * prepare, start, drain, drop, forward). * * Return: zero if successful, or a negative error code */ int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg) { snd_pcm_uframes_t *frames = arg; snd_pcm_sframes_t result; if (substream->runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; switch (cmd) { case SNDRV_PCM_IOCTL_FORWARD: { /* provided only for OSS; capture-only and no value returned */ if (substream->stream != SNDRV_PCM_STREAM_CAPTURE) return -EINVAL; result = snd_pcm_forward(substream, *frames); return result < 0 ? result : 0; } case SNDRV_PCM_IOCTL_HW_PARAMS: return snd_pcm_hw_params(substream, arg); case SNDRV_PCM_IOCTL_SW_PARAMS: return snd_pcm_sw_params(substream, arg); case SNDRV_PCM_IOCTL_PREPARE: return snd_pcm_prepare(substream, NULL); case SNDRV_PCM_IOCTL_START: return snd_pcm_start_lock_irq(substream); case SNDRV_PCM_IOCTL_DRAIN: return snd_pcm_drain(substream, NULL); case SNDRV_PCM_IOCTL_DROP: return snd_pcm_drop(substream); case SNDRV_PCM_IOCTL_DELAY: return snd_pcm_delay(substream, frames); default: return -EINVAL; } } EXPORT_SYMBOL(snd_pcm_kernel_ioctl); static ssize_t snd_pcm_read(struct file *file, char __user *buf, size_t count, loff_t * offset) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t result; pcm_file = file->private_data; substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN || runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; if (!frame_aligned(runtime, count)) return -EINVAL; count = bytes_to_frames(runtime, count); result = snd_pcm_lib_read(substream, buf, count); if (result > 0) result = frames_to_bytes(runtime, result); return result; } static ssize_t snd_pcm_write(struct file *file, const char __user *buf, size_t count, loff_t * offset) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t result; pcm_file = file->private_data; substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN || runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; if (!frame_aligned(runtime, count)) return -EINVAL; count = bytes_to_frames(runtime, count); result = snd_pcm_lib_write(substream, buf, count); if (result > 0) result = frames_to_bytes(runtime, result); return result; } static ssize_t snd_pcm_readv(struct kiocb *iocb, struct iov_iter *to) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t result; unsigned long i; void __user **bufs __free(kfree) = NULL; snd_pcm_uframes_t frames; const struct iovec *iov = iter_iov(to); pcm_file = iocb->ki_filp->private_data; substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN || runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; if (!user_backed_iter(to)) return -EINVAL; if (to->nr_segs > 1024 || to->nr_segs != runtime->channels) return -EINVAL; if (!frame_aligned(runtime, iov->iov_len)) return -EINVAL; frames = bytes_to_samples(runtime, iov->iov_len); bufs = kmalloc_array(to->nr_segs, sizeof(void *), GFP_KERNEL); if (bufs == NULL) return -ENOMEM; for (i = 0; i < to->nr_segs; ++i) { bufs[i] = iov->iov_base; iov++; } result = snd_pcm_lib_readv(substream, bufs, frames); if (result > 0) result = frames_to_bytes(runtime, result); return result; } static ssize_t snd_pcm_writev(struct kiocb *iocb, struct iov_iter *from) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; snd_pcm_sframes_t result; unsigned long i; void __user **bufs __free(kfree) = NULL; snd_pcm_uframes_t frames; const struct iovec *iov = iter_iov(from); pcm_file = iocb->ki_filp->private_data; substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN || runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; if (!user_backed_iter(from)) return -EINVAL; if (from->nr_segs > 128 || from->nr_segs != runtime->channels || !frame_aligned(runtime, iov->iov_len)) return -EINVAL; frames = bytes_to_samples(runtime, iov->iov_len); bufs = kmalloc_array(from->nr_segs, sizeof(void *), GFP_KERNEL); if (bufs == NULL) return -ENOMEM; for (i = 0; i < from->nr_segs; ++i) { bufs[i] = iov->iov_base; iov++; } result = snd_pcm_lib_writev(substream, bufs, frames); if (result > 0) result = frames_to_bytes(runtime, result); return result; } static __poll_t snd_pcm_poll(struct file *file, poll_table *wait) { struct snd_pcm_file *pcm_file; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; __poll_t mask, ok; snd_pcm_uframes_t avail; pcm_file = file->private_data; substream = pcm_file->substream; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ok = EPOLLOUT | EPOLLWRNORM; else ok = EPOLLIN | EPOLLRDNORM; if (PCM_RUNTIME_CHECK(substream)) return ok | EPOLLERR; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return ok | EPOLLERR; poll_wait(file, &runtime->sleep, wait); mask = 0; guard(pcm_stream_lock_irq)(substream); avail = snd_pcm_avail(substream); switch (runtime->state) { case SNDRV_PCM_STATE_RUNNING: case SNDRV_PCM_STATE_PREPARED: case SNDRV_PCM_STATE_PAUSED: if (avail >= runtime->control->avail_min) mask = ok; break; case SNDRV_PCM_STATE_DRAINING: if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { mask = ok; if (!avail) mask |= EPOLLERR; } break; default: mask = ok | EPOLLERR; break; } return mask; } /* * mmap support */ /* * Only on coherent architectures, we can mmap the status and the control records * for effcient data transfer. On others, we have to use HWSYNC ioctl... */ #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_ALPHA) /* * mmap status record */ static vm_fault_t snd_pcm_mmap_status_fault(struct vm_fault *vmf) { struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; if (substream == NULL) return VM_FAULT_SIGBUS; runtime = substream->runtime; vmf->page = virt_to_page(runtime->status); get_page(vmf->page); return 0; } static const struct vm_operations_struct snd_pcm_vm_ops_status = { .fault = snd_pcm_mmap_status_fault, }; static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area) { long size; if (!(area->vm_flags & VM_READ)) return -EINVAL; size = area->vm_end - area->vm_start; if (size != PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status))) return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_status; area->vm_private_data = substream; vm_flags_mod(area, VM_DONTEXPAND | VM_DONTDUMP, VM_WRITE | VM_MAYWRITE); return 0; } /* * mmap control record */ static vm_fault_t snd_pcm_mmap_control_fault(struct vm_fault *vmf) { struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; if (substream == NULL) return VM_FAULT_SIGBUS; runtime = substream->runtime; vmf->page = virt_to_page(runtime->control); get_page(vmf->page); return 0; } static const struct vm_operations_struct snd_pcm_vm_ops_control = { .fault = snd_pcm_mmap_control_fault, }; static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area) { long size; if (!(area->vm_flags & VM_READ)) return -EINVAL; size = area->vm_end - area->vm_start; if (size != PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control))) return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_control; area->vm_private_data = substream; vm_flags_set(area, VM_DONTEXPAND | VM_DONTDUMP); return 0; } static bool pcm_status_mmap_allowed(struct snd_pcm_file *pcm_file) { /* If drivers require the explicit sync (typically for non-coherent * pages), we have to disable the mmap of status and control data * to enforce the control via SYNC_PTR ioctl. */ if (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_EXPLICIT_SYNC) return false; /* See pcm_control_mmap_allowed() below. * Since older alsa-lib requires both status and control mmaps to be * coupled, we have to disable the status mmap for old alsa-lib, too. */ if (pcm_file->user_pversion < SNDRV_PROTOCOL_VERSION(2, 0, 14) && (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_SYNC_APPLPTR)) return false; return true; } static bool pcm_control_mmap_allowed(struct snd_pcm_file *pcm_file) { if (pcm_file->no_compat_mmap) return false; /* see above */ if (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_EXPLICIT_SYNC) return false; /* Disallow the control mmap when SYNC_APPLPTR flag is set; * it enforces the user-space to fall back to snd_pcm_sync_ptr(), * thus it effectively assures the manual update of appl_ptr. */ if (pcm_file->substream->runtime->hw.info & SNDRV_PCM_INFO_SYNC_APPLPTR) return false; return true; } #else /* ! coherent mmap */ /* * don't support mmap for status and control records. */ #define pcm_status_mmap_allowed(pcm_file) false #define pcm_control_mmap_allowed(pcm_file) false static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area) { return -ENXIO; } static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area) { return -ENXIO; } #endif /* coherent mmap */ /* * snd_pcm_mmap_data_open - increase the mmap counter */ static void snd_pcm_mmap_data_open(struct vm_area_struct *area) { struct snd_pcm_substream *substream = area->vm_private_data; atomic_inc(&substream->mmap_count); } /* * snd_pcm_mmap_data_close - decrease the mmap counter */ static void snd_pcm_mmap_data_close(struct vm_area_struct *area) { struct snd_pcm_substream *substream = area->vm_private_data; atomic_dec(&substream->mmap_count); } /* * fault callback for mmapping a RAM page */ static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf) { struct snd_pcm_substream *substream = vmf->vma->vm_private_data; struct snd_pcm_runtime *runtime; unsigned long offset; struct page * page; size_t dma_bytes; if (substream == NULL) return VM_FAULT_SIGBUS; runtime = substream->runtime; offset = vmf->pgoff << PAGE_SHIFT; dma_bytes = PAGE_ALIGN(runtime->dma_bytes); if (offset > dma_bytes - PAGE_SIZE) return VM_FAULT_SIGBUS; if (substream->ops->page) page = substream->ops->page(substream, offset); else if (!snd_pcm_get_dma_buf(substream)) { if (WARN_ON_ONCE(!runtime->dma_area)) return VM_FAULT_SIGBUS; page = virt_to_page(runtime->dma_area + offset); } else page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset); if (!page) return VM_FAULT_SIGBUS; get_page(page); vmf->page = page; return 0; } static const struct vm_operations_struct snd_pcm_vm_ops_data = { .open = snd_pcm_mmap_data_open, .close = snd_pcm_mmap_data_close, }; static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = { .open = snd_pcm_mmap_data_open, .close = snd_pcm_mmap_data_close, .fault = snd_pcm_mmap_data_fault, }; /* * mmap the DMA buffer on RAM */ /** * snd_pcm_lib_default_mmap - Default PCM data mmap function * @substream: PCM substream * @area: VMA * * This is the default mmap handler for PCM data. When mmap pcm_ops is NULL, * this function is invoked implicitly. * * Return: zero if successful, or a negative error code */ int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *area) { vm_flags_set(area, VM_DONTEXPAND | VM_DONTDUMP); if (!substream->ops->page && !snd_dma_buffer_mmap(snd_pcm_get_dma_buf(substream), area)) return 0; /* mmap with fault handler */ area->vm_ops = &snd_pcm_vm_ops_data_fault; return 0; } EXPORT_SYMBOL_GPL(snd_pcm_lib_default_mmap); /* * mmap the DMA buffer on I/O memory area */ #if SNDRV_PCM_INFO_MMAP_IOMEM /** * snd_pcm_lib_mmap_iomem - Default PCM data mmap function for I/O mem * @substream: PCM substream * @area: VMA * * When your hardware uses the iomapped pages as the hardware buffer and * wants to mmap it, pass this function as mmap pcm_ops. Note that this * is supposed to work only on limited architectures. * * Return: zero if successful, or a negative error code */ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_struct *area) { struct snd_pcm_runtime *runtime = substream->runtime; area->vm_page_prot = pgprot_noncached(area->vm_page_prot); return vm_iomap_memory(area, runtime->dma_addr, runtime->dma_bytes); } EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem); #endif /* SNDRV_PCM_INFO_MMAP */ /* * mmap DMA buffer */ int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area) { struct snd_pcm_runtime *runtime; long size; unsigned long offset; size_t dma_bytes; int err; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { if (!(area->vm_flags & (VM_WRITE|VM_READ))) return -EINVAL; } else { if (!(area->vm_flags & VM_READ)) return -EINVAL; } runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_OPEN) return -EBADFD; if (!(runtime->info & SNDRV_PCM_INFO_MMAP)) return -ENXIO; if (runtime->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED || runtime->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED) return -EINVAL; size = area->vm_end - area->vm_start; offset = area->vm_pgoff << PAGE_SHIFT; dma_bytes = PAGE_ALIGN(runtime->dma_bytes); if ((size_t)size > dma_bytes) return -EINVAL; if (offset > dma_bytes - size) return -EINVAL; area->vm_ops = &snd_pcm_vm_ops_data; area->vm_private_data = substream; if (substream->ops->mmap) err = substream->ops->mmap(substream, area); else err = snd_pcm_lib_default_mmap(substream, area); if (!err) atomic_inc(&substream->mmap_count); return err; } EXPORT_SYMBOL(snd_pcm_mmap_data); static int snd_pcm_mmap(struct file *file, struct vm_area_struct *area) { struct snd_pcm_file * pcm_file; struct snd_pcm_substream *substream; unsigned long offset; pcm_file = file->private_data; substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; if (substream->runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; offset = area->vm_pgoff << PAGE_SHIFT; switch (offset) { case SNDRV_PCM_MMAP_OFFSET_STATUS_OLD: if (pcm_file->no_compat_mmap || !IS_ENABLED(CONFIG_64BIT)) return -ENXIO; fallthrough; case SNDRV_PCM_MMAP_OFFSET_STATUS_NEW: if (!pcm_status_mmap_allowed(pcm_file)) return -ENXIO; return snd_pcm_mmap_status(substream, file, area); case SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD: if (pcm_file->no_compat_mmap || !IS_ENABLED(CONFIG_64BIT)) return -ENXIO; fallthrough; case SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW: if (!pcm_control_mmap_allowed(pcm_file)) return -ENXIO; return snd_pcm_mmap_control(substream, file, area); default: return snd_pcm_mmap_data(substream, file, area); } return 0; } static int snd_pcm_fasync(int fd, struct file * file, int on) { struct snd_pcm_file * pcm_file; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; pcm_file = file->private_data; substream = pcm_file->substream; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; runtime = substream->runtime; if (runtime->state == SNDRV_PCM_STATE_DISCONNECTED) return -EBADFD; return snd_fasync_helper(fd, file, on, &runtime->fasync); } /* * ioctl32 compat */ #ifdef CONFIG_COMPAT #include "pcm_compat.c" #else #define snd_pcm_ioctl_compat NULL #endif /* * To be removed helpers to keep binary compatibility */ #ifdef CONFIG_SND_SUPPORT_OLD_API #define __OLD_TO_NEW_MASK(x) ((x&7)|((x&0x07fffff8)<<5)) #define __NEW_TO_OLD_MASK(x) ((x&7)|((x&0xffffff00)>>5)) static void snd_pcm_hw_convert_from_old_params(struct snd_pcm_hw_params *params, struct snd_pcm_hw_params_old *oparams) { unsigned int i; memset(params, 0, sizeof(*params)); params->flags = oparams->flags; for (i = 0; i < ARRAY_SIZE(oparams->masks); i++) params->masks[i].bits[0] = oparams->masks[i]; memcpy(params->intervals, oparams->intervals, sizeof(oparams->intervals)); params->rmask = __OLD_TO_NEW_MASK(oparams->rmask); params->cmask = __OLD_TO_NEW_MASK(oparams->cmask); params->info = oparams->info; params->msbits = oparams->msbits; params->rate_num = oparams->rate_num; params->rate_den = oparams->rate_den; params->fifo_size = oparams->fifo_size; } static void snd_pcm_hw_convert_to_old_params(struct snd_pcm_hw_params_old *oparams, struct snd_pcm_hw_params *params) { unsigned int i; memset(oparams, 0, sizeof(*oparams)); oparams->flags = params->flags; for (i = 0; i < ARRAY_SIZE(oparams->masks); i++) oparams->masks[i] = params->masks[i].bits[0]; memcpy(oparams->intervals, params->intervals, sizeof(oparams->intervals)); oparams->rmask = __NEW_TO_OLD_MASK(params->rmask); oparams->cmask = __NEW_TO_OLD_MASK(params->cmask); oparams->info = params->info; oparams->msbits = params->msbits; oparams->rate_num = params->rate_num; oparams->rate_den = params->rate_den; oparams->fifo_size = params->fifo_size; } static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream, struct snd_pcm_hw_params_old __user * _oparams) { struct snd_pcm_hw_params *params __free(kfree) = NULL; struct snd_pcm_hw_params_old *oparams __free(kfree) = NULL; int err; params = kmalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; oparams = memdup_user(_oparams, sizeof(*oparams)); if (IS_ERR(oparams)) return PTR_ERR(oparams); snd_pcm_hw_convert_from_old_params(params, oparams); err = snd_pcm_hw_refine(substream, params); if (err < 0) return err; err = fixup_unreferenced_params(substream, params); if (err < 0) return err; snd_pcm_hw_convert_to_old_params(oparams, params); if (copy_to_user(_oparams, oparams, sizeof(*oparams))) return -EFAULT; return 0; } static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream, struct snd_pcm_hw_params_old __user * _oparams) { struct snd_pcm_hw_params *params __free(kfree) = NULL; struct snd_pcm_hw_params_old *oparams __free(kfree) = NULL; int err; params = kmalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; oparams = memdup_user(_oparams, sizeof(*oparams)); if (IS_ERR(oparams)) return PTR_ERR(oparams); snd_pcm_hw_convert_from_old_params(params, oparams); err = snd_pcm_hw_params(substream, params); if (err < 0) return err; snd_pcm_hw_convert_to_old_params(oparams, params); if (copy_to_user(_oparams, oparams, sizeof(*oparams))) return -EFAULT; return 0; } #endif /* CONFIG_SND_SUPPORT_OLD_API */ #ifndef CONFIG_MMU static unsigned long snd_pcm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct snd_pcm_file *pcm_file = file->private_data; struct snd_pcm_substream *substream = pcm_file->substream; struct snd_pcm_runtime *runtime = substream->runtime; unsigned long offset = pgoff << PAGE_SHIFT; switch (offset) { case SNDRV_PCM_MMAP_OFFSET_STATUS_NEW: return (unsigned long)runtime->status; case SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW: return (unsigned long)runtime->control; default: return (unsigned long)runtime->dma_area + offset; } } #else # define snd_pcm_get_unmapped_area NULL #endif /* * Register section */ const struct file_operations snd_pcm_f_ops[2] = { { .owner = THIS_MODULE, .write = snd_pcm_write, .write_iter = snd_pcm_writev, .open = snd_pcm_playback_open, .release = snd_pcm_release, .poll = snd_pcm_poll, .unlocked_ioctl = snd_pcm_ioctl, .compat_ioctl = snd_pcm_ioctl_compat, .mmap = snd_pcm_mmap, .fasync = snd_pcm_fasync, .get_unmapped_area = snd_pcm_get_unmapped_area, }, { .owner = THIS_MODULE, .read = snd_pcm_read, .read_iter = snd_pcm_readv, .open = snd_pcm_capture_open, .release = snd_pcm_release, .poll = snd_pcm_poll, .unlocked_ioctl = snd_pcm_ioctl, .compat_ioctl = snd_pcm_ioctl_compat, .mmap = snd_pcm_mmap, .fasync = snd_pcm_fasync, .get_unmapped_area = snd_pcm_get_unmapped_area, } }; |
| 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 5 5 5 1 1 1 1 4 5 5 21 21 2 2 1 1 21 1 21 19 171 7 2 5 2 3 3 3 168 14 14 14 14 14 14 8 14 14 14 6 6 6 6 1 1 6 26 14 14 14 14 6 6 14 6 6 25 4 1 3 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | // SPDX-License-Identifier: GPL-2.0-only /* * Yama Linux Security Module * * Author: Kees Cook <keescook@chromium.org> * * Copyright (C) 2010 Canonical, Ltd. * Copyright (C) 2011 The Chromium OS Authors. */ #include <linux/lsm_hooks.h> #include <linux/sysctl.h> #include <linux/ptrace.h> #include <linux/prctl.h> #include <linux/ratelimit.h> #include <linux/workqueue.h> #include <linux/string_helpers.h> #include <linux/task_work.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <uapi/linux/lsm.h> #define YAMA_SCOPE_DISABLED 0 #define YAMA_SCOPE_RELATIONAL 1 #define YAMA_SCOPE_CAPABILITY 2 #define YAMA_SCOPE_NO_ATTACH 3 static int ptrace_scope = YAMA_SCOPE_RELATIONAL; /* describe a ptrace relationship for potential exception */ struct ptrace_relation { struct task_struct *tracer; struct task_struct *tracee; bool invalid; struct list_head node; struct rcu_head rcu; }; static LIST_HEAD(ptracer_relations); static DEFINE_SPINLOCK(ptracer_relations_lock); static void yama_relation_cleanup(struct work_struct *work); static DECLARE_WORK(yama_relation_work, yama_relation_cleanup); struct access_report_info { struct callback_head work; const char *access; struct task_struct *target; struct task_struct *agent; }; static void __report_access(struct callback_head *work) { struct access_report_info *info = container_of(work, struct access_report_info, work); char *target_cmd, *agent_cmd; target_cmd = kstrdup_quotable_cmdline(info->target, GFP_KERNEL); agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_KERNEL); pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", info->access, target_cmd, info->target->pid, agent_cmd, info->agent->pid); kfree(agent_cmd); kfree(target_cmd); put_task_struct(info->agent); put_task_struct(info->target); kfree(info); } /* defers execution because cmdline access can sleep */ static void report_access(const char *access, struct task_struct *target, struct task_struct *agent) { struct access_report_info *info; assert_spin_locked(&target->alloc_lock); /* for target->comm */ if (current->flags & PF_KTHREAD) { /* I don't think kthreads call task_work_run() before exiting. * Imagine angry ranting about procfs here. */ pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", access, target->comm, target->pid, agent->comm, agent->pid); return; } info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) return; init_task_work(&info->work, __report_access); get_task_struct(target); get_task_struct(agent); info->access = access; info->target = target; info->agent = agent; if (task_work_add(current, &info->work, TWA_RESUME) == 0) return; /* success */ WARN(1, "report_access called from exiting task"); put_task_struct(target); put_task_struct(agent); kfree(info); } /** * yama_relation_cleanup - remove invalid entries from the relation list * @work: unused * */ static void yama_relation_cleanup(struct work_struct *work) { struct ptrace_relation *relation; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) { list_del_rcu(&relation->node); kfree_rcu(relation, rcu); } } rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); } /** * yama_ptracer_add - add/replace an exception for this tracer/tracee pair * @tracer: the task_struct of the process doing the ptrace * @tracee: the task_struct of the process to be ptraced * * Each tracee can have, at most, one tracer registered. Each time this * is called, the prior registered tracer will be replaced for the tracee. * * Returns 0 if relationship was added, -ve on error. */ static int yama_ptracer_add(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation, *added; added = kmalloc(sizeof(*added), GFP_KERNEL); if (!added) return -ENOMEM; added->tracee = tracee; added->tracer = tracer; added->invalid = false; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { list_replace_rcu(&relation->node, &added->node); kfree_rcu(relation, rcu); goto out; } } list_add_rcu(&added->node, &ptracer_relations); out: rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); return 0; } /** * yama_ptracer_del - remove exceptions related to the given tasks * @tracer: remove any relation where tracer task matches * @tracee: remove any relation where tracee task matches */ static void yama_ptracer_del(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation; bool marked = false; rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee || (tracer && relation->tracer == tracer)) { relation->invalid = true; marked = true; } } rcu_read_unlock(); if (marked) schedule_work(&yama_relation_work); } /** * yama_task_free - check for task_pid to remove from exception list * @task: task being removed */ static void yama_task_free(struct task_struct *task) { yama_ptracer_del(task, task); } /** * yama_task_prctl - check for Yama-specific prctl operations * @option: operation * @arg2: argument * @arg3: argument * @arg4: argument * @arg5: argument * * Return 0 on success, -ve on error. -ENOSYS is returned when Yama * does not handle the given option. */ static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int rc = -ENOSYS; struct task_struct *myself; switch (option) { case PR_SET_PTRACER: /* Since a thread can call prctl(), find the group leader * before calling _add() or _del() on it, since we want * process-level granularity of control. The tracer group * leader checking is handled later when walking the ancestry * at the time of PTRACE_ATTACH check. */ myself = current->group_leader; if (arg2 == 0) { yama_ptracer_del(NULL, myself); rc = 0; } else if (arg2 == PR_SET_PTRACER_ANY || (int)arg2 == -1) { rc = yama_ptracer_add(NULL, myself); } else { struct task_struct *tracer; tracer = find_get_task_by_vpid(arg2); if (!tracer) { rc = -EINVAL; } else { rc = yama_ptracer_add(tracer, myself); put_task_struct(tracer); } } break; } return rc; } /** * task_is_descendant - walk up a process family tree looking for a match * @parent: the process to compare against while walking up from child * @child: the process to start from while looking upwards for parent * * Returns 1 if child is a descendant of parent, 0 if not. */ static int task_is_descendant(struct task_struct *parent, struct task_struct *child) { int rc = 0; struct task_struct *walker = child; if (!parent || !child) return 0; rcu_read_lock(); if (!thread_group_leader(parent)) parent = rcu_dereference(parent->group_leader); while (walker->pid > 0) { if (!thread_group_leader(walker)) walker = rcu_dereference(walker->group_leader); if (walker == parent) { rc = 1; break; } walker = rcu_dereference(walker->real_parent); } rcu_read_unlock(); return rc; } /** * ptracer_exception_found - tracer registered as exception for this tracee * @tracer: the task_struct of the process attempting ptrace * @tracee: the task_struct of the process to be ptraced * * Returns 1 if tracer has a ptracer exception ancestor for tracee. */ static int ptracer_exception_found(struct task_struct *tracer, struct task_struct *tracee) { int rc = 0; struct ptrace_relation *relation; struct task_struct *parent = NULL; bool found = false; rcu_read_lock(); /* * If there's already an active tracing relationship, then make an * exception for the sake of other accesses, like process_vm_rw(). */ parent = ptrace_parent(tracee); if (parent != NULL && same_thread_group(parent, tracer)) { rc = 1; goto unlock; } /* Look for a PR_SET_PTRACER relationship. */ if (!thread_group_leader(tracee)) tracee = rcu_dereference(tracee->group_leader); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { parent = relation->tracer; found = true; break; } } if (found && (parent == NULL || task_is_descendant(parent, tracer))) rc = 1; unlock: rcu_read_unlock(); return rc; } /** * yama_ptrace_access_check - validate PTRACE_ATTACH calls * @child: task that current task is attempting to ptrace * @mode: ptrace attach mode * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_access_check(struct task_struct *child, unsigned int mode) { int rc = 0; /* require ptrace target be a child of ptracer on attach */ if (mode & PTRACE_MODE_ATTACH) { switch (ptrace_scope) { case YAMA_SCOPE_DISABLED: /* No additional restrictions. */ break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); if (!pid_alive(child)) rc = -EPERM; if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_CAPABILITY: rcu_read_lock(); if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: default: rc = -EPERM; break; } } if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0) report_access("attach", child, current); return rc; } /** * yama_ptrace_traceme - validate PTRACE_TRACEME calls * @parent: task that will become the ptracer of the current task * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_traceme(struct task_struct *parent) { int rc = 0; /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) rc = -EPERM; break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; break; } if (rc) { task_lock(current); report_access("traceme", current, parent); task_unlock(current); } return rc; } static const struct lsm_id yama_lsmid = { .name = "yama", .id = LSM_ID_YAMA, }; static struct security_hook_list yama_hooks[] __ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, yama_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, yama_ptrace_traceme), LSM_HOOK_INIT(task_prctl, yama_task_prctl), LSM_HOOK_INIT(task_free, yama_task_free), }; #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table table_copy; if (write && !capable(CAP_SYS_PTRACE)) return -EPERM; /* Lock the max value if it ever gets set. */ table_copy = *table; if (*(int *)table_copy.data == *(int *)table_copy.extra2) table_copy.extra1 = table_copy.extra2; return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); } static int max_scope = YAMA_SCOPE_NO_ATTACH; static const struct ctl_table yama_sysctl_table[] = { { .procname = "ptrace_scope", .data = &ptrace_scope, .maxlen = sizeof(int), .mode = 0644, .proc_handler = yama_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &max_scope, }, }; static void __init yama_init_sysctl(void) { if (!register_sysctl("kernel/yama", yama_sysctl_table)) panic("Yama: sysctl registration failed.\n"); } #else static inline void yama_init_sysctl(void) { } #endif /* CONFIG_SYSCTL */ static int __init yama_init(void) { pr_info("Yama: becoming mindful.\n"); security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), &yama_lsmid); yama_init_sysctl(); return 0; } DEFINE_LSM(yama) = { .name = "yama", .init = yama_init, }; |
| 22 16 10 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter stuff * * Copyright (C) 1999 - 2012 * Greg Kroah-Hartman (greg@kroah.com) */ #ifndef __LINUX_USB_SERIAL_H #define __LINUX_USB_SERIAL_H #include <linux/kref.h> #include <linux/mutex.h> #include <linux/serial.h> #include <linux/kfifo.h> /* The maximum number of ports one device can grab at once */ #define MAX_NUM_PORTS 16 /* USB serial flags */ #define USB_SERIAL_WRITE_BUSY 0 #define USB_SERIAL_THROTTLED 1 /** * usb_serial_port: structure for the specific ports of a device. * @serial: pointer back to the struct usb_serial owner of this port. * @port: pointer to the corresponding tty_port for this port. * @lock: spinlock to grab when updating portions of this structure. * @minor: the minor number of the port * @port_number: the struct usb_serial port number of this port (starts at 0) * @interrupt_in_buffer: pointer to the interrupt in buffer for this port. * @interrupt_in_urb: pointer to the interrupt in struct urb for this port. * @interrupt_in_endpointAddress: endpoint address for the interrupt in pipe * for this port. * @interrupt_out_buffer: pointer to the interrupt out buffer for this port. * @interrupt_out_size: the size of the interrupt_out_buffer, in bytes. * @interrupt_out_urb: pointer to the interrupt out struct urb for this port. * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe * for this port. * @bulk_in_buffer: pointer to the bulk in buffer for this port. * @bulk_in_size: the size of the bulk_in_buffer, in bytes. * @read_urb: pointer to the bulk in struct urb for this port. * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this * port. * @bulk_in_buffers: pointers to the bulk in buffers for this port * @read_urbs: pointers to the bulk in urbs for this port * @read_urbs_free: status bitmap the for bulk in urbs * @bulk_out_buffer: pointer to the bulk out buffer for this port. * @bulk_out_size: the size of the bulk_out_buffer, in bytes. * @write_urb: pointer to the bulk out struct urb for this port. * @write_fifo: kfifo used to buffer outgoing data * @bulk_out_buffers: pointers to the bulk out buffers for this port * @write_urbs: pointers to the bulk out urbs for this port * @write_urbs_free: status bitmap the for bulk out urbs * @icount: interrupt counters * @tx_bytes: number of bytes currently in host stack queues * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this * port. * @flags: usb serial port flags * @work: work queue entry for the line discipline waking up. * @dev: pointer to the serial device * * This structure is used by the usb-serial core and drivers for the specific * ports of a device. */ struct usb_serial_port { struct usb_serial *serial; struct tty_port port; spinlock_t lock; u32 minor; u8 port_number; unsigned char *interrupt_in_buffer; struct urb *interrupt_in_urb; __u8 interrupt_in_endpointAddress; unsigned char *interrupt_out_buffer; int interrupt_out_size; struct urb *interrupt_out_urb; __u8 interrupt_out_endpointAddress; unsigned char *bulk_in_buffer; int bulk_in_size; struct urb *read_urb; __u8 bulk_in_endpointAddress; unsigned char *bulk_in_buffers[2]; struct urb *read_urbs[2]; unsigned long read_urbs_free; unsigned char *bulk_out_buffer; int bulk_out_size; struct urb *write_urb; struct kfifo write_fifo; unsigned char *bulk_out_buffers[2]; struct urb *write_urbs[2]; unsigned long write_urbs_free; __u8 bulk_out_endpointAddress; struct async_icount icount; int tx_bytes; unsigned long flags; struct work_struct work; unsigned long sysrq; /* sysrq timeout */ struct device dev; }; #define to_usb_serial_port(d) container_of(d, struct usb_serial_port, dev) /* get and set the port private data pointer helper functions */ static inline void *usb_get_serial_port_data(struct usb_serial_port *port) { return dev_get_drvdata(&port->dev); } static inline void usb_set_serial_port_data(struct usb_serial_port *port, void *data) { dev_set_drvdata(&port->dev, data); } /** * usb_serial - structure used by the usb-serial core for a device * @dev: pointer to the struct usb_device for this device * @type: pointer to the struct usb_serial_driver for this device * @interface: pointer to the struct usb_interface for this device * @sibling: pointer to the struct usb_interface of any sibling interface * @suspend_count: number of suspended (sibling) interfaces * @num_ports: the number of ports this device has * @num_interrupt_in: number of interrupt in endpoints we have * @num_interrupt_out: number of interrupt out endpoints we have * @num_bulk_in: number of bulk in endpoints we have * @num_bulk_out: number of bulk out endpoints we have * @port: array of struct usb_serial_port structures for the different ports. * @private: place to put any driver specific information that is needed. The * usb-serial driver is required to manage this data, the usb-serial core * will not touch this. Use usb_get_serial_data() and * usb_set_serial_data() to access this. */ struct usb_serial { struct usb_device *dev; struct usb_serial_driver *type; struct usb_interface *interface; struct usb_interface *sibling; unsigned int suspend_count; unsigned char disconnected:1; unsigned char attached:1; unsigned char minors_reserved:1; unsigned char num_ports; unsigned char num_port_pointers; unsigned char num_interrupt_in; unsigned char num_interrupt_out; unsigned char num_bulk_in; unsigned char num_bulk_out; struct usb_serial_port *port[MAX_NUM_PORTS]; struct kref kref; struct mutex disc_mutex; void *private; }; #define to_usb_serial(d) container_of(d, struct usb_serial, kref) /* get and set the serial private data pointer helper functions */ static inline void *usb_get_serial_data(struct usb_serial *serial) { return serial->private; } static inline void usb_set_serial_data(struct usb_serial *serial, void *data) { serial->private = data; } struct usb_serial_endpoints { unsigned char num_bulk_in; unsigned char num_bulk_out; unsigned char num_interrupt_in; unsigned char num_interrupt_out; struct usb_endpoint_descriptor *bulk_in[MAX_NUM_PORTS]; struct usb_endpoint_descriptor *bulk_out[MAX_NUM_PORTS]; struct usb_endpoint_descriptor *interrupt_in[MAX_NUM_PORTS]; struct usb_endpoint_descriptor *interrupt_out[MAX_NUM_PORTS]; }; /** * usb_serial_driver - describes a usb serial driver * @description: pointer to a string that describes this driver. This string * used in the syslog messages when a device is inserted or removed. * @id_table: pointer to a list of usb_device_id structures that define all * of the devices this structure can support. * @num_ports: the number of different ports this device will have. * @num_bulk_in: minimum number of bulk-in endpoints * @num_bulk_out: minimum number of bulk-out endpoints * @num_interrupt_in: minimum number of interrupt-in endpoints * @num_interrupt_out: minimum number of interrupt-out endpoints * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer * (0 = end-point size) * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size) * @calc_num_ports: pointer to a function to determine how many ports this * device has dynamically. It can also be used to verify the number of * endpoints or to modify the port-endpoint mapping. It will be called * after the probe() callback is called, but before attach(). * @probe: pointer to the driver's probe function. * This will be called when the device is inserted into the system, * but before the device has been fully initialized by the usb_serial * subsystem. Use this function to download any firmware to the device, * or any other early initialization that might be needed. * Return 0 to continue on with the initialization sequence. Anything * else will abort it. * @attach: pointer to the driver's attach function. * This will be called when the struct usb_serial structure is fully * set up. Do any local initialization of the device, or any private * memory structure allocation at this point in time. * @disconnect: pointer to the driver's disconnect function. This will be * called when the device is unplugged or unbound from the driver. * @release: pointer to the driver's release function. This will be called * when the usb_serial data structure is about to be destroyed. * @usb_driver: pointer to the struct usb_driver that controls this * device. This is necessary to allow dynamic ids to be added to * the driver from sysfs. * * This structure is defines a USB Serial driver. It provides all of * the information that the USB serial core code needs. If the function * pointers are defined, then the USB serial core code will call them when * the corresponding tty port functions are called. If they are not * called, the generic serial function will be used instead. * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver (remember * it will show up in sysfs, so it needs to be short and to the point. * Using the module name is a good idea.) */ struct usb_serial_driver { const char *description; const struct usb_device_id *id_table; struct list_head driver_list; struct device_driver driver; struct usb_driver *usb_driver; struct usb_dynids dynids; unsigned char num_ports; unsigned char num_bulk_in; unsigned char num_bulk_out; unsigned char num_interrupt_in; unsigned char num_interrupt_out; size_t bulk_in_size; size_t bulk_out_size; int (*probe)(struct usb_serial *serial, const struct usb_device_id *id); int (*attach)(struct usb_serial *serial); int (*calc_num_ports)(struct usb_serial *serial, struct usb_serial_endpoints *epds); void (*disconnect)(struct usb_serial *serial); void (*release)(struct usb_serial *serial); int (*port_probe)(struct usb_serial_port *port); void (*port_remove)(struct usb_serial_port *port); int (*suspend)(struct usb_serial *serial, pm_message_t message); int (*resume)(struct usb_serial *serial); int (*reset_resume)(struct usb_serial *serial); /* serial function calls */ /* Called by console and by the tty layer */ int (*open)(struct tty_struct *tty, struct usb_serial_port *port); void (*close)(struct usb_serial_port *port); int (*write)(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); /* Called only by the tty layer */ unsigned int (*write_room)(struct tty_struct *tty); int (*ioctl)(struct tty_struct *tty, unsigned int cmd, unsigned long arg); void (*get_serial)(struct tty_struct *tty, struct serial_struct *ss); int (*set_serial)(struct tty_struct *tty, struct serial_struct *ss); void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old); int (*break_ctl)(struct tty_struct *tty, int break_state); unsigned int (*chars_in_buffer)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, long timeout); bool (*tx_empty)(struct usb_serial_port *port); void (*throttle)(struct tty_struct *tty); void (*unthrottle)(struct tty_struct *tty); int (*tiocmget)(struct tty_struct *tty); int (*tiocmset)(struct tty_struct *tty, unsigned int set, unsigned int clear); int (*tiocmiwait)(struct tty_struct *tty, unsigned long arg); int (*get_icount)(struct tty_struct *tty, struct serial_icounter_struct *icount); /* Called by the tty layer for port level work. There may or may not be an attached tty at this point */ void (*dtr_rts)(struct usb_serial_port *port, int on); int (*carrier_raised)(struct usb_serial_port *port); /* Called by the usb serial hooks to allow the user to rework the termios state */ void (*init_termios)(struct tty_struct *tty); /* USB events */ void (*read_int_callback)(struct urb *urb); void (*write_int_callback)(struct urb *urb); void (*read_bulk_callback)(struct urb *urb); void (*write_bulk_callback)(struct urb *urb); /* Called by the generic read bulk callback */ void (*process_read_urb)(struct urb *urb); /* Called by the generic write implementation */ int (*prepare_write_buffer)(struct usb_serial_port *port, void *dest, size_t size); }; #define to_usb_serial_driver(d) \ container_of(d, struct usb_serial_driver, driver) #define usb_serial_register_drivers(serial_drivers, name, id_table) \ __usb_serial_register_drivers(serial_drivers, THIS_MODULE, name, id_table) int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], struct module *owner, const char *name, const struct usb_device_id *id_table); void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]); void usb_serial_port_softint(struct usb_serial_port *port); int usb_serial_suspend(struct usb_interface *intf, pm_message_t message); int usb_serial_resume(struct usb_interface *intf); /* USB Serial console functions */ #ifdef CONFIG_USB_SERIAL_CONSOLE void usb_serial_console_init(int minor); void usb_serial_console_exit(void); void usb_serial_console_disconnect(struct usb_serial *serial); #else static inline void usb_serial_console_init(int minor) { } static inline void usb_serial_console_exit(void) { } static inline void usb_serial_console_disconnect(struct usb_serial *serial) {} #endif /* Functions needed by other parts of the usbserial core */ struct usb_serial_port *usb_serial_port_get_by_minor(unsigned int minor); void usb_serial_put(struct usb_serial *serial); int usb_serial_claim_interface(struct usb_serial *serial, struct usb_interface *intf); int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port); int usb_serial_generic_write_start(struct usb_serial_port *port, gfp_t mem_flags); int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); void usb_serial_generic_close(struct usb_serial_port *port); int usb_serial_generic_resume(struct usb_serial *serial); unsigned int usb_serial_generic_write_room(struct tty_struct *tty); unsigned int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout); void usb_serial_generic_read_bulk_callback(struct urb *urb); void usb_serial_generic_write_bulk_callback(struct urb *urb); void usb_serial_generic_throttle(struct tty_struct *tty); void usb_serial_generic_unthrottle(struct tty_struct *tty); int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg); int usb_serial_generic_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount); int usb_serial_generic_register(void); void usb_serial_generic_deregister(void); int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, gfp_t mem_flags); void usb_serial_generic_process_read_urb(struct urb *urb); int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size); #if defined(CONFIG_USB_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch); int usb_serial_handle_break(struct usb_serial_port *port); #else static inline int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { return 0; } static inline int usb_serial_handle_break(struct usb_serial_port *port) { return 0; } #endif void usb_serial_handle_dcd_change(struct usb_serial_port *usb_port, struct tty_struct *tty, unsigned int status); int usb_serial_bus_register(struct usb_serial_driver *device); void usb_serial_bus_deregister(struct usb_serial_driver *device); extern const struct bus_type usb_serial_bus_type; extern struct tty_driver *usb_serial_tty_driver; static inline void usb_serial_debug_data(struct device *dev, const char *function, int size, const unsigned char *data) { dev_dbg(dev, "%s - length = %d, data = %*ph\n", function, size, size, data); } /* * Macro for reporting errors in write path to avoid infinite loop * when port is used as a console. */ #define dev_err_console(usport, fmt, ...) \ do { \ static bool __print_once; \ struct usb_serial_port *__port = (usport); \ \ if (!__port->port.console || !__print_once) { \ __print_once = true; \ dev_err(&__port->dev, fmt, ##__VA_ARGS__); \ } \ } while (0) /* * module_usb_serial_driver() - Helper macro for registering a USB Serial driver * @__serial_drivers: list of usb_serial drivers to register * @__ids: all device ids that @__serial_drivers bind to * * Helper macro for USB serial drivers which do not do anything special * in module init/exit. This eliminates a lot of boilerplate. Each * module may only use this macro once, and calling it replaces * module_init() and module_exit() * */ #define usb_serial_module_driver(__name, __serial_drivers, __ids) \ static int __init usb_serial_module_init(void) \ { \ return usb_serial_register_drivers(__serial_drivers, \ __name, __ids); \ } \ module_init(usb_serial_module_init); \ static void __exit usb_serial_module_exit(void) \ { \ usb_serial_deregister_drivers(__serial_drivers); \ } \ module_exit(usb_serial_module_exit); #define module_usb_serial_driver(__serial_drivers, __ids) \ usb_serial_module_driver(KBUILD_MODNAME, __serial_drivers, __ids) #endif /* __LINUX_USB_SERIAL_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * File: af_phonet.h * * Phonet sockets kernel definitions * * Copyright (C) 2008 Nokia Corporation. */ #ifndef AF_PHONET_H #define AF_PHONET_H #include <linux/phonet.h> #include <linux/skbuff.h> #include <net/sock.h> /* * The lower layers may not require more space, ever. Make sure it's * enough. */ #define MAX_PHONET_HEADER (8 + MAX_HEADER) /* * Every Phonet* socket has this structure first in its * protocol-specific structure under name c. */ struct pn_sock { struct sock sk; u16 sobject; u16 dobject; u8 resource; }; static inline struct pn_sock *pn_sk(struct sock *sk) { return (struct pn_sock *)sk; } extern const struct proto_ops phonet_dgram_ops; void pn_sock_init(void); struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb); void phonet_get_local_port_range(int *min, int *max); int pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); struct sock *pn_find_sock_by_res(struct net *net, u8 res); int pn_sock_bind_res(struct sock *sock, u8 res); int pn_sock_unbind_res(struct sock *sk, u8 res); void pn_sock_unbind_all_res(struct sock *sk); int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target); static inline struct phonethdr *pn_hdr(struct sk_buff *skb) { return (struct phonethdr *)skb_network_header(skb); } static inline struct phonetmsg *pn_msg(struct sk_buff *skb) { return (struct phonetmsg *)skb_transport_header(skb); } /* * Get the other party's sockaddr from received skb. The skb begins * with a Phonet header. */ static inline void pn_skb_get_src_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) { struct phonethdr *ph = pn_hdr(skb); u16 obj = pn_object(ph->pn_sdev, ph->pn_sobj); sa->spn_family = AF_PHONET; pn_sockaddr_set_object(sa, obj); pn_sockaddr_set_resource(sa, ph->pn_res); memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); } static inline void pn_skb_get_dst_sockaddr(struct sk_buff *skb, struct sockaddr_pn *sa) { struct phonethdr *ph = pn_hdr(skb); u16 obj = pn_object(ph->pn_rdev, ph->pn_robj); sa->spn_family = AF_PHONET; pn_sockaddr_set_object(sa, obj); pn_sockaddr_set_resource(sa, ph->pn_res); memset(sa->spn_zero, 0, sizeof(sa->spn_zero)); } /* Protocols in Phonet protocol family. */ struct phonet_protocol { const struct proto_ops *ops; struct proto *prot; int sock_type; }; int phonet_proto_register(unsigned int protocol, const struct phonet_protocol *pp); void phonet_proto_unregister(unsigned int protocol, const struct phonet_protocol *pp); int phonet_sysctl_init(void); void phonet_sysctl_exit(void); int isi_register(void); void isi_unregister(void); static inline bool sk_is_phonet(struct sock *sk) { return sk->sk_family == PF_PHONET; } static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { int karg; switch (cmd) { case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: if (get_user(karg, (int __user *)arg)) return -EFAULT; return sk->sk_prot->ioctl(sk, cmd, &karg); } /* A positive return value means that the ioctl was not processed */ return 1; } #endif |
| 14 13 6 14 10 4 10 18 10 14 26 15 14 26 11 11 21 21 21 21 8 8 8 7 8 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "noise.h" #include "device.h" #include "peer.h" #include "messages.h" #include "queueing.h" #include "peerlookup.h" #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/bitmap.h> #include <linux/scatterlist.h> #include <linux/highmem.h> #include <crypto/utils.h> /* This implements Noise_IKpsk2: * * <- s * ****** * -> e, es, s, ss, {t} * <- e, ee, se, psk, {} */ static const u8 handshake_name[37] __nonstring = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; static const u8 identifier_name[34] __nonstring = "WireGuard v1 zx2c4 Jason@zx2c4.com"; static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; static atomic64_t keypair_counter = ATOMIC64_INIT(0); void __init wg_noise_init(void) { struct blake2s_state blake; blake2s(handshake_init_chaining_key, handshake_name, NULL, NOISE_HASH_LEN, sizeof(handshake_name), 0); blake2s_init(&blake, NOISE_HASH_LEN); blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); blake2s_update(&blake, identifier_name, sizeof(identifier_name)); blake2s_final(&blake, handshake_init_hash); } /* Must hold peer->handshake.static_identity->lock */ void wg_noise_precompute_static_static(struct wg_peer *peer) { down_write(&peer->handshake.lock); if (!peer->handshake.static_identity->has_identity || !curve25519(peer->handshake.precomputed_static_static, peer->handshake.static_identity->static_private, peer->handshake.remote_static)) memset(peer->handshake.precomputed_static_static, 0, NOISE_PUBLIC_KEY_LEN); up_write(&peer->handshake.lock); } void wg_noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], struct wg_peer *peer) { memset(handshake, 0, sizeof(*handshake)); init_rwsem(&handshake->lock); handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; handshake->entry.peer = peer; memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); if (peer_preshared_key) memcpy(handshake->preshared_key, peer_preshared_key, NOISE_SYMMETRIC_KEY_LEN); handshake->static_identity = static_identity; handshake->state = HANDSHAKE_ZEROED; wg_noise_precompute_static_static(peer); } static void handshake_zero(struct noise_handshake *handshake) { memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); memset(&handshake->hash, 0, NOISE_HASH_LEN); memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); handshake->remote_index = 0; handshake->state = HANDSHAKE_ZEROED; } void wg_noise_handshake_clear(struct noise_handshake *handshake) { down_write(&handshake->lock); wg_index_hashtable_remove( handshake->entry.peer->device->index_hashtable, &handshake->entry); handshake_zero(handshake); up_write(&handshake->lock); } static struct noise_keypair *keypair_create(struct wg_peer *peer) { struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); if (unlikely(!keypair)) return NULL; spin_lock_init(&keypair->receiving_counter.lock); keypair->internal_id = atomic64_inc_return(&keypair_counter); keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; keypair->entry.peer = peer; kref_init(&keypair->refcount); return keypair; } static void keypair_free_rcu(struct rcu_head *rcu) { kfree_sensitive(container_of(rcu, struct noise_keypair, rcu)); } static void keypair_free_kref(struct kref *kref) { struct noise_keypair *keypair = container_of(kref, struct noise_keypair, refcount); net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", keypair->entry.peer->device->dev->name, keypair->internal_id, keypair->entry.peer->internal_id); wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, &keypair->entry); call_rcu(&keypair->rcu, keypair_free_rcu); } void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) { if (unlikely(!keypair)) return; if (unlikely(unreference_now)) wg_index_hashtable_remove( keypair->entry.peer->device->index_hashtable, &keypair->entry); kref_put(&keypair->refcount, keypair_free_kref); } struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) { RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), "Taking noise keypair reference without holding the RCU BH read lock"); if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) return NULL; return keypair; } void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) { struct noise_keypair *old; spin_lock_bh(&keypairs->keypair_update_lock); /* We zero the next_keypair before zeroing the others, so that * wg_noise_received_with_keypair returns early before subsequent ones * are zeroed. */ old = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); RCU_INIT_POINTER(keypairs->next_keypair, NULL); wg_noise_keypair_put(old, true); old = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); RCU_INIT_POINTER(keypairs->previous_keypair, NULL); wg_noise_keypair_put(old, true); old = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); RCU_INIT_POINTER(keypairs->current_keypair, NULL); wg_noise_keypair_put(old, true); spin_unlock_bh(&keypairs->keypair_update_lock); } void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) { struct noise_keypair *keypair; wg_noise_handshake_clear(&peer->handshake); wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); spin_lock_bh(&peer->keypairs.keypair_update_lock); keypair = rcu_dereference_protected(peer->keypairs.next_keypair, lockdep_is_held(&peer->keypairs.keypair_update_lock)); if (keypair) keypair->sending.is_valid = false; keypair = rcu_dereference_protected(peer->keypairs.current_keypair, lockdep_is_held(&peer->keypairs.keypair_update_lock)); if (keypair) keypair->sending.is_valid = false; spin_unlock_bh(&peer->keypairs.keypair_update_lock); } static void add_new_keypair(struct noise_keypairs *keypairs, struct noise_keypair *new_keypair) { struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; spin_lock_bh(&keypairs->keypair_update_lock); previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); next_keypair = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); current_keypair = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); if (new_keypair->i_am_the_initiator) { /* If we're the initiator, it means we've sent a handshake, and * received a confirmation response, which means this new * keypair can now be used. */ if (next_keypair) { /* If there already was a next keypair pending, we * demote it to be the previous keypair, and free the * existing current. Note that this means KCI can result * in this transition. It would perhaps be more sound to * always just get rid of the unused next keypair * instead of putting it in the previous slot, but this * might be a bit less robust. Something to think about * for the future. */ RCU_INIT_POINTER(keypairs->next_keypair, NULL); rcu_assign_pointer(keypairs->previous_keypair, next_keypair); wg_noise_keypair_put(current_keypair, true); } else /* If there wasn't an existing next keypair, we replace * the previous with the current one. */ rcu_assign_pointer(keypairs->previous_keypair, current_keypair); /* At this point we can get rid of the old previous keypair, and * set up the new keypair. */ wg_noise_keypair_put(previous_keypair, true); rcu_assign_pointer(keypairs->current_keypair, new_keypair); } else { /* If we're the responder, it means we can't use the new keypair * until we receive confirmation via the first data packet, so * we get rid of the existing previous one, the possibly * existing next one, and slide in the new next one. */ rcu_assign_pointer(keypairs->next_keypair, new_keypair); wg_noise_keypair_put(next_keypair, true); RCU_INIT_POINTER(keypairs->previous_keypair, NULL); wg_noise_keypair_put(previous_keypair, true); } spin_unlock_bh(&keypairs->keypair_update_lock); } bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair) { struct noise_keypair *old_keypair; bool key_is_new; /* We first check without taking the spinlock. */ key_is_new = received_keypair == rcu_access_pointer(keypairs->next_keypair); if (likely(!key_is_new)) return false; spin_lock_bh(&keypairs->keypair_update_lock); /* After locking, we double check that things didn't change from * beneath us. */ if (unlikely(received_keypair != rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock)))) { spin_unlock_bh(&keypairs->keypair_update_lock); return false; } /* When we've finally received the confirmation, we slide the next * into the current, the current into the previous, and get rid of * the old previous. */ old_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); rcu_assign_pointer(keypairs->previous_keypair, rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock))); wg_noise_keypair_put(old_keypair, true); rcu_assign_pointer(keypairs->current_keypair, received_keypair); RCU_INIT_POINTER(keypairs->next_keypair, NULL); spin_unlock_bh(&keypairs->keypair_update_lock); return true; } /* Must hold static_identity->lock */ void wg_noise_set_static_identity_private_key( struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]) { memcpy(static_identity->static_private, private_key, NOISE_PUBLIC_KEY_LEN); curve25519_clamp_secret(static_identity->static_private); static_identity->has_identity = curve25519_generate_public( static_identity->static_public, private_key); } static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen) { struct blake2s_state state; u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); int i; if (keylen > BLAKE2S_BLOCK_SIZE) { blake2s_init(&state, BLAKE2S_HASH_SIZE); blake2s_update(&state, key, keylen); blake2s_final(&state, x_key); } else memcpy(x_key, key, keylen); for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) x_key[i] ^= 0x36; blake2s_init(&state, BLAKE2S_HASH_SIZE); blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); blake2s_update(&state, in, inlen); blake2s_final(&state, i_hash); for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) x_key[i] ^= 0x5c ^ 0x36; blake2s_init(&state, BLAKE2S_HASH_SIZE); blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); blake2s_final(&state, i_hash); memcpy(out, i_hash, BLAKE2S_HASH_SIZE); memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); } /* This is Hugo Krawczyk's HKDF: * - https://eprint.iacr.org/2010/264.pdf * - https://tools.ietf.org/html/rfc5869 */ static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, size_t first_len, size_t second_len, size_t third_len, size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) { u8 output[BLAKE2S_HASH_SIZE + 1]; u8 secret[BLAKE2S_HASH_SIZE]; WARN_ON(IS_ENABLED(DEBUG) && (first_len > BLAKE2S_HASH_SIZE || second_len > BLAKE2S_HASH_SIZE || third_len > BLAKE2S_HASH_SIZE || ((second_len || second_dst || third_len || third_dst) && (!first_len || !first_dst)) || ((third_len || third_dst) && (!second_len || !second_dst)))); /* Extract entropy from data into secret */ hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); if (!first_dst || !first_len) goto out; /* Expand first key: key = secret, data = 0x1 */ output[0] = 1; hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); memcpy(first_dst, output, first_len); if (!second_dst || !second_len) goto out; /* Expand second key: key = secret, data = first-key || 0x2 */ output[BLAKE2S_HASH_SIZE] = 2; hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(second_dst, output, second_len); if (!third_dst || !third_len) goto out; /* Expand third key: key = secret, data = second-key || 0x3 */ output[BLAKE2S_HASH_SIZE] = 3; hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE); memcpy(third_dst, output, third_len); out: /* Clear sensitive data from stack */ memzero_explicit(secret, BLAKE2S_HASH_SIZE); memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); } static void derive_keys(struct noise_symmetric_key *first_dst, struct noise_symmetric_key *second_dst, const u8 chaining_key[NOISE_HASH_LEN]) { u64 birthdate = ktime_get_coarse_boottime_ns(); kdf(first_dst->key, second_dst->key, NULL, NULL, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, chaining_key); first_dst->birthdate = second_dst->birthdate = birthdate; first_dst->is_valid = second_dst->is_valid = true; } static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 private[NOISE_PUBLIC_KEY_LEN], const u8 public[NOISE_PUBLIC_KEY_LEN]) { u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; if (unlikely(!curve25519(dh_calculation, private, public))) return false; kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); return true; } static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 precomputed[NOISE_PUBLIC_KEY_LEN]) { static u8 zero_point[NOISE_PUBLIC_KEY_LEN]; if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN))) return false; kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); return true; } static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) { struct blake2s_state blake; blake2s_init(&blake, NOISE_HASH_LEN); blake2s_update(&blake, hash, NOISE_HASH_LEN); blake2s_update(&blake, src, src_len); blake2s_final(&blake, hash); } static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) { u8 temp_hash[NOISE_HASH_LEN]; kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); mix_hash(hash, temp_hash, NOISE_HASH_LEN); memzero_explicit(temp_hash, NOISE_HASH_LEN); } static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) { memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); } static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN]) { chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key); mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); } static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN]) { if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key)) return false; mix_hash(hash, src_ciphertext, src_len); return true; } static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN]) { if (ephemeral_dst != ephemeral_src) memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); } static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) { struct timespec64 now; ktime_get_real_ts64(&now); /* In order to prevent some sort of infoleak from precise timers, we * round down the nanoseconds part to the closest rounded-down power of * two to the maximum initiations per second allowed anyway by the * implementation. */ now.tv_nsec = ALIGN_DOWN(now.tv_nsec, rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); /* https://cr.yp.to/libtai/tai64.html */ *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); } bool wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake) { u8 timestamp[NOISE_TIMESTAMP_LEN]; u8 key[NOISE_SYMMETRIC_KEY_LEN]; bool ret = false; /* We need to wait for crng _before_ taking any locks, since * curve25519_generate_secret uses get_random_bytes_wait. */ wait_for_random_bytes(); down_read(&handshake->static_identity->lock); down_write(&handshake->lock); if (unlikely(!handshake->static_identity->has_identity)) goto out; dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); handshake_init(handshake->chaining_key, handshake->hash, handshake->remote_static); /* e */ curve25519_generate_secret(handshake->ephemeral_private); if (!curve25519_generate_public(dst->unencrypted_ephemeral, handshake->ephemeral_private)) goto out; message_ephemeral(dst->unencrypted_ephemeral, dst->unencrypted_ephemeral, handshake->chaining_key, handshake->hash); /* es */ if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, handshake->remote_static)) goto out; /* s */ message_encrypt(dst->encrypted_static, handshake->static_identity->static_public, NOISE_PUBLIC_KEY_LEN, key, handshake->hash); /* ss */ if (!mix_precomputed_dh(handshake->chaining_key, key, handshake->precomputed_static_static)) goto out; /* {t} */ tai64n_now(timestamp); message_encrypt(dst->encrypted_timestamp, timestamp, NOISE_TIMESTAMP_LEN, key, handshake->hash); dst->sender_index = wg_index_hashtable_insert( handshake->entry.peer->device->index_hashtable, &handshake->entry); handshake->state = HANDSHAKE_CREATED_INITIATION; ret = true; out: up_write(&handshake->lock); up_read(&handshake->static_identity->lock); memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); return ret; } struct wg_peer * wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wg_device *wg) { struct wg_peer *peer = NULL, *ret_peer = NULL; struct noise_handshake *handshake; bool replay_attack, flood_attack; u8 key[NOISE_SYMMETRIC_KEY_LEN]; u8 chaining_key[NOISE_HASH_LEN]; u8 hash[NOISE_HASH_LEN]; u8 s[NOISE_PUBLIC_KEY_LEN]; u8 e[NOISE_PUBLIC_KEY_LEN]; u8 t[NOISE_TIMESTAMP_LEN]; u64 initiation_consumption; down_read(&wg->static_identity.lock); if (unlikely(!wg->static_identity.has_identity)) goto out; handshake_init(chaining_key, hash, wg->static_identity.static_public); /* e */ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); /* es */ if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) goto out; /* s */ if (!message_decrypt(s, src->encrypted_static, sizeof(src->encrypted_static), key, hash)) goto out; /* Lookup which peer we're actually talking to */ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); if (!peer) goto out; handshake = &peer->handshake; /* ss */ if (!mix_precomputed_dh(chaining_key, key, handshake->precomputed_static_static)) goto out; /* {t} */ if (!message_decrypt(t, src->encrypted_timestamp, sizeof(src->encrypted_timestamp), key, hash)) goto out; down_read(&handshake->lock); replay_attack = memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) <= 0; flood_attack = (s64)handshake->last_initiation_consumption + NSEC_PER_SEC / INITIATIONS_PER_SECOND > (s64)ktime_get_coarse_boottime_ns(); up_read(&handshake->lock); if (replay_attack || flood_attack) goto out; /* Success! Copy everything to peer */ down_write(&handshake->lock); memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); memcpy(handshake->hash, hash, NOISE_HASH_LEN); memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); handshake->remote_index = src->sender_index; initiation_consumption = ktime_get_coarse_boottime_ns(); if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0) handshake->last_initiation_consumption = initiation_consumption; handshake->state = HANDSHAKE_CONSUMED_INITIATION; up_write(&handshake->lock); ret_peer = peer; out: memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); memzero_explicit(hash, NOISE_HASH_LEN); memzero_explicit(chaining_key, NOISE_HASH_LEN); up_read(&wg->static_identity.lock); if (!ret_peer) wg_peer_put(peer); return ret_peer; } bool wg_noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *handshake) { u8 key[NOISE_SYMMETRIC_KEY_LEN]; bool ret = false; /* We need to wait for crng _before_ taking any locks, since * curve25519_generate_secret uses get_random_bytes_wait. */ wait_for_random_bytes(); down_read(&handshake->static_identity->lock); down_write(&handshake->lock); if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) goto out; dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); dst->receiver_index = handshake->remote_index; /* e */ curve25519_generate_secret(handshake->ephemeral_private); if (!curve25519_generate_public(dst->unencrypted_ephemeral, handshake->ephemeral_private)) goto out; message_ephemeral(dst->unencrypted_ephemeral, dst->unencrypted_ephemeral, handshake->chaining_key, handshake->hash); /* ee */ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, handshake->remote_ephemeral)) goto out; /* se */ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, handshake->remote_static)) goto out; /* psk */ mix_psk(handshake->chaining_key, handshake->hash, key, handshake->preshared_key); /* {} */ message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); dst->sender_index = wg_index_hashtable_insert( handshake->entry.peer->device->index_hashtable, &handshake->entry); handshake->state = HANDSHAKE_CREATED_RESPONSE; ret = true; out: up_write(&handshake->lock); up_read(&handshake->static_identity->lock); memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); return ret; } struct wg_peer * wg_noise_handshake_consume_response(struct message_handshake_response *src, struct wg_device *wg) { enum noise_handshake_state state = HANDSHAKE_ZEROED; struct wg_peer *peer = NULL, *ret_peer = NULL; struct noise_handshake *handshake; u8 key[NOISE_SYMMETRIC_KEY_LEN]; u8 hash[NOISE_HASH_LEN]; u8 chaining_key[NOISE_HASH_LEN]; u8 e[NOISE_PUBLIC_KEY_LEN]; u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; u8 static_private[NOISE_PUBLIC_KEY_LEN]; u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; down_read(&wg->static_identity.lock); if (unlikely(!wg->static_identity.has_identity)) goto out; handshake = (struct noise_handshake *)wg_index_hashtable_lookup( wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, src->receiver_index, &peer); if (unlikely(!handshake)) goto out; down_read(&handshake->lock); state = handshake->state; memcpy(hash, handshake->hash, NOISE_HASH_LEN); memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); memcpy(ephemeral_private, handshake->ephemeral_private, NOISE_PUBLIC_KEY_LEN); memcpy(preshared_key, handshake->preshared_key, NOISE_SYMMETRIC_KEY_LEN); up_read(&handshake->lock); if (state != HANDSHAKE_CREATED_INITIATION) goto fail; /* e */ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); /* ee */ if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) goto fail; /* se */ if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) goto fail; /* psk */ mix_psk(chaining_key, hash, key, preshared_key); /* {} */ if (!message_decrypt(NULL, src->encrypted_nothing, sizeof(src->encrypted_nothing), key, hash)) goto fail; /* Success! Copy everything to peer */ down_write(&handshake->lock); /* It's important to check that the state is still the same, while we * have an exclusive lock. */ if (handshake->state != state) { up_write(&handshake->lock); goto fail; } memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); memcpy(handshake->hash, hash, NOISE_HASH_LEN); memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); handshake->remote_index = src->sender_index; handshake->state = HANDSHAKE_CONSUMED_RESPONSE; up_write(&handshake->lock); ret_peer = peer; goto out; fail: wg_peer_put(peer); out: memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); memzero_explicit(hash, NOISE_HASH_LEN); memzero_explicit(chaining_key, NOISE_HASH_LEN); memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN); up_read(&wg->static_identity.lock); return ret_peer; } bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs) { struct noise_keypair *new_keypair; bool ret = false; down_write(&handshake->lock); if (handshake->state != HANDSHAKE_CREATED_RESPONSE && handshake->state != HANDSHAKE_CONSUMED_RESPONSE) goto out; new_keypair = keypair_create(handshake->entry.peer); if (!new_keypair) goto out; new_keypair->i_am_the_initiator = handshake->state == HANDSHAKE_CONSUMED_RESPONSE; new_keypair->remote_index = handshake->remote_index; if (new_keypair->i_am_the_initiator) derive_keys(&new_keypair->sending, &new_keypair->receiving, handshake->chaining_key); else derive_keys(&new_keypair->receiving, &new_keypair->sending, handshake->chaining_key); handshake_zero(handshake); rcu_read_lock_bh(); if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, handshake)->is_dead))) { add_new_keypair(keypairs, new_keypair); net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", handshake->entry.peer->device->dev->name, new_keypair->internal_id, handshake->entry.peer->internal_id); ret = wg_index_hashtable_replace( handshake->entry.peer->device->index_hashtable, &handshake->entry, &new_keypair->entry); } else { kfree_sensitive(new_keypair); } rcu_read_unlock_bh(); out: up_write(&handshake->lock); return ret; } |
| 3 2 11 11 11 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | // SPDX-License-Identifier: GPL-2.0-or-later /* PKCS#8 Private Key parser [RFC 5208]. * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "PKCS8: "fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/oid_registry.h> #include <keys/asymmetric-subtype.h> #include <keys/asymmetric-parser.h> #include <crypto/public_key.h> #include "pkcs8.asn1.h" struct pkcs8_parse_context { struct public_key *pub; unsigned long data; /* Start of data */ enum OID last_oid; /* Last OID encountered */ enum OID algo_oid; /* Algorithm OID */ u32 key_size; const void *key; }; /* * Note an OID when we find one for later processing when we know how to * interpret it. */ int pkcs8_note_OID(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs8_parse_context *ctx = context; ctx->last_oid = look_up_OID(value, vlen); if (ctx->last_oid == OID__NR) { char buffer[50]; sprint_oid(value, vlen, buffer, sizeof(buffer)); pr_info("Unknown OID: [%lu] %s\n", (unsigned long)value - ctx->data, buffer); } return 0; } /* * Note the version number of the ASN.1 blob. */ int pkcs8_note_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { if (vlen != 1 || ((const u8 *)value)[0] != 0) { pr_warn("Unsupported PKCS#8 version\n"); return -EBADMSG; } return 0; } /* * Note the public algorithm. */ int pkcs8_note_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs8_parse_context *ctx = context; if (ctx->last_oid != OID_rsaEncryption) return -ENOPKG; ctx->pub->pkey_algo = "rsa"; return 0; } /* * Note the key data of the ASN.1 blob. */ int pkcs8_note_key(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs8_parse_context *ctx = context; ctx->key = value; ctx->key_size = vlen; return 0; } /* * Parse a PKCS#8 private key blob. */ static struct public_key *pkcs8_parse(const void *data, size_t datalen) { struct pkcs8_parse_context ctx; struct public_key *pub; long ret; memset(&ctx, 0, sizeof(ctx)); ret = -ENOMEM; ctx.pub = kzalloc(sizeof(struct public_key), GFP_KERNEL); if (!ctx.pub) goto error; ctx.data = (unsigned long)data; /* Attempt to decode the private key */ ret = asn1_ber_decoder(&pkcs8_decoder, &ctx, data, datalen); if (ret < 0) goto error_decode; ret = -ENOMEM; pub = ctx.pub; pub->key = kmemdup(ctx.key, ctx.key_size, GFP_KERNEL); if (!pub->key) goto error_decode; pub->keylen = ctx.key_size; pub->key_is_private = true; return pub; error_decode: kfree(ctx.pub); error: return ERR_PTR(ret); } /* * Attempt to parse a data blob for a key as a PKCS#8 private key. */ static int pkcs8_key_preparse(struct key_preparsed_payload *prep) { struct public_key *pub; pub = pkcs8_parse(prep->data, prep->datalen); if (IS_ERR(pub)) return PTR_ERR(pub); pr_devel("Cert Key Algo: %s\n", pub->pkey_algo); pub->id_type = "PKCS8"; /* We're pinning the module by being linked against it */ __module_get(public_key_subtype.owner); prep->payload.data[asym_subtype] = &public_key_subtype; prep->payload.data[asym_key_ids] = NULL; prep->payload.data[asym_crypto] = pub; prep->payload.data[asym_auth] = NULL; prep->quotalen = 100; return 0; } static struct asymmetric_key_parser pkcs8_key_parser = { .owner = THIS_MODULE, .name = "pkcs8", .parse = pkcs8_key_preparse, }; /* * Module stuff */ static int __init pkcs8_key_init(void) { return register_asymmetric_key_parser(&pkcs8_key_parser); } static void __exit pkcs8_key_exit(void) { unregister_asymmetric_key_parser(&pkcs8_key_parser); } module_init(pkcs8_key_init); module_exit(pkcs8_key_exit); MODULE_DESCRIPTION("PKCS#8 certificate parser"); MODULE_LICENSE("GPL"); |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PAGE_FRAG_CACHE_H #define _LINUX_PAGE_FRAG_CACHE_H #include <linux/bits.h> #include <linux/log2.h> #include <linux/mm_types_task.h> #include <linux/types.h> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) /* Use a full byte here to enable assembler optimization as the shift * operation is usually expecting a byte. */ #define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0) #else /* Compiler should be able to figure out we don't read things as any value * ANDed with 0 is 0. */ #define PAGE_FRAG_CACHE_ORDER_MASK 0 #endif #define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1) static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page) { return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT); } static inline void page_frag_cache_init(struct page_frag_cache *nc) { nc->encoded_page = 0; } static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) { return encoded_page_decode_pfmemalloc(nc->encoded_page); } void page_frag_cache_drain(struct page_frag_cache *nc); void __page_frag_cache_drain(struct page *page, unsigned int count); void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask); static inline void *page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); } static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } void page_frag_free(void *addr); #endif |
| 5 5 6 6 6 6 6 6 6 6 5 6 6 6 6 6 6 5 6 6 5 6 6 6 6 6 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2016 Thomas Gleixner. * Copyright (C) 2016-2017 Christoph Hellwig. */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/sort.h> #include <linux/group_cpus.h> #ifdef CONFIG_SMP static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, unsigned int cpus_per_grp) { const struct cpumask *siblmsk; int cpu, sibl; for ( ; cpus_per_grp > 0; ) { cpu = cpumask_first(nmsk); /* Should not happen, but I'm too lazy to think about it */ if (cpu >= nr_cpu_ids) return; cpumask_clear_cpu(cpu, nmsk); cpumask_set_cpu(cpu, irqmsk); cpus_per_grp--; /* If the cpu has siblings, use them first */ siblmsk = topology_sibling_cpumask(cpu); for (sibl = -1; cpus_per_grp > 0; ) { sibl = cpumask_next(sibl, siblmsk); if (sibl >= nr_cpu_ids) break; if (!cpumask_test_and_clear_cpu(sibl, nmsk)) continue; cpumask_set_cpu(sibl, irqmsk); cpus_per_grp--; } } } static cpumask_var_t *alloc_node_to_cpumask(void) { cpumask_var_t *masks; int node; masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); if (!masks) return NULL; for (node = 0; node < nr_node_ids; node++) { if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) goto out_unwind; } return masks; out_unwind: while (--node >= 0) free_cpumask_var(masks[node]); kfree(masks); return NULL; } static void free_node_to_cpumask(cpumask_var_t *masks) { int node; for (node = 0; node < nr_node_ids; node++) free_cpumask_var(masks[node]); kfree(masks); } static void build_node_to_cpumask(cpumask_var_t *masks) { int cpu; for_each_possible_cpu(cpu) cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); } static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, const struct cpumask *mask, nodemask_t *nodemsk) { int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ for_each_node(n) { if (cpumask_intersects(mask, node_to_cpumask[n])) { node_set(n, *nodemsk); nodes++; } } return nodes; } struct node_groups { unsigned id; union { unsigned ngroups; unsigned ncpus; }; }; static int ncpus_cmp_func(const void *l, const void *r) { const struct node_groups *ln = l; const struct node_groups *rn = r; return ln->ncpus - rn->ncpus; } /* * Allocate group number for each node, so that for each node: * * 1) the allocated number is >= 1 * * 2) the allocated number is <= active CPU number of this node * * The actual allocated total groups may be less than @numgrps when * active total CPU number is less than @numgrps. * * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' * for each node. */ static void alloc_nodes_groups(unsigned int numgrps, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, const nodemask_t nodemsk, struct cpumask *nmsk, struct node_groups *node_groups) { unsigned n, remaining_ncpus = 0; for (n = 0; n < nr_node_ids; n++) { node_groups[n].id = n; node_groups[n].ncpus = UINT_MAX; } for_each_node_mask(n, nodemsk) { unsigned ncpus; cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); ncpus = cpumask_weight(nmsk); if (!ncpus) continue; remaining_ncpus += ncpus; node_groups[n].ncpus = ncpus; } numgrps = min_t(unsigned, remaining_ncpus, numgrps); sort(node_groups, nr_node_ids, sizeof(node_groups[0]), ncpus_cmp_func, NULL); /* * Allocate groups for each node according to the ratio of this * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is * bigger than number of active numa nodes. Always start the * allocation from the node with minimized nr_cpus. * * This way guarantees that each active node gets allocated at * least one group, and the theory is simple: over-allocation * is only done when this node is assigned by one group, so * other nodes will be allocated >= 1 groups, since 'numgrps' is * bigger than number of numa nodes. * * One perfect invariant is that number of allocated groups for * each node is <= CPU count of this node: * * 1) suppose there are two nodes: A and B * ncpu(X) is CPU count of node X * grps(X) is the group count allocated to node X via this * algorithm * * ncpu(A) <= ncpu(B) * ncpu(A) + ncpu(B) = N * grps(A) + grps(B) = G * * grps(A) = max(1, round_down(G * ncpu(A) / N)) * grps(B) = G - grps(A) * * both N and G are integer, and 2 <= G <= N, suppose * G = N - delta, and 0 <= delta <= N - 2 * * 2) obviously grps(A) <= ncpu(A) because: * * if grps(A) is 1, then grps(A) <= ncpu(A) given * ncpu(A) >= 1 * * otherwise, * grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N * * 3) prove how grps(B) <= ncpu(B): * * if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be * over-allocated, so grps(B) <= ncpu(B), * * otherwise: * * grps(A) = * round_down(G * ncpu(A) / N) = * round_down((N - delta) * ncpu(A) / N) = * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= * round_down((N * ncpu(A) - delta * N) / N) = * cpu(A) - delta * * then: * * grps(A) - G >= ncpu(A) - delta - G * => * G - grps(A) <= G + delta - ncpu(A) * => * grps(B) <= N - ncpu(A) * => * grps(B) <= cpu(B) * * For nodes >= 3, it can be thought as one node and another big * node given that is exactly what this algorithm is implemented, * and we always re-calculate 'remaining_ncpus' & 'numgrps', and * finally for each node X: grps(X) <= ncpu(X). * */ for (n = 0; n < nr_node_ids; n++) { unsigned ngroups, ncpus; if (node_groups[n].ncpus == UINT_MAX) continue; WARN_ON_ONCE(numgrps == 0); ncpus = node_groups[n].ncpus; ngroups = max_t(unsigned, 1, numgrps * ncpus / remaining_ncpus); WARN_ON_ONCE(ngroups > ncpus); node_groups[n].ngroups = ngroups; remaining_ncpus -= ncpus; numgrps -= ngroups; } } static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct cpumask *masks) { unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0; unsigned int last_grp = numgrps; unsigned int curgrp = startgrp; nodemask_t nodemsk = NODE_MASK_NONE; struct node_groups *node_groups; if (cpumask_empty(cpu_mask)) return 0; nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); /* * If the number of nodes in the mask is greater than or equal the * number of groups we just spread the groups across the nodes. */ if (numgrps <= nodes) { for_each_node_mask(n, nodemsk) { /* Ensure that only CPUs which are in both masks are set */ cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); cpumask_or(&masks[curgrp], &masks[curgrp], nmsk); if (++curgrp == last_grp) curgrp = 0; } return numgrps; } node_groups = kcalloc(nr_node_ids, sizeof(struct node_groups), GFP_KERNEL); if (!node_groups) return -ENOMEM; /* allocate group number for each node */ alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask, nodemsk, nmsk, node_groups); for (i = 0; i < nr_node_ids; i++) { unsigned int ncpus, v; struct node_groups *nv = &node_groups[i]; if (nv->ngroups == UINT_MAX) continue; /* Get the cpus on this node which are in the mask */ cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); ncpus = cpumask_weight(nmsk); if (!ncpus) continue; WARN_ON_ONCE(nv->ngroups > ncpus); /* Account for rounding errors */ extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); /* Spread allocated groups on CPUs of the current node */ for (v = 0; v < nv->ngroups; v++, curgrp++) { cpus_per_grp = ncpus / nv->ngroups; /* Account for extra groups to compensate rounding errors */ if (extra_grps) { cpus_per_grp++; --extra_grps; } /* * wrapping has to be considered given 'startgrp' * may start anywhere */ if (curgrp >= last_grp) curgrp = 0; grp_spread_init_one(&masks[curgrp], nmsk, cpus_per_grp); } done += nv->ngroups; } kfree(node_groups); return done; } /** * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality * @numgrps: number of groups * @nummasks: number of initialized cpumasks * * Return: cpumask array if successful, NULL otherwise. And each element * includes CPUs assigned to this group. nummasks contains the number * of initialized masks which can be less than numgrps. * * Try to put close CPUs from viewpoint of CPU and NUMA locality into * same group, and run two-stage grouping: * 1) allocate present CPUs on these groups evenly first * 2) allocate other possible CPUs on these groups evenly * * We guarantee in the resulted grouping that all CPUs are covered, and * no same CPU is assigned to multiple groups */ struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks) { unsigned int curgrp = 0, nr_present = 0, nr_others = 0; cpumask_var_t *node_to_cpumask; cpumask_var_t nmsk, npresmsk; int ret = -ENOMEM; struct cpumask *masks = NULL; if (numgrps == 0) return NULL; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) goto fail_nmsk; node_to_cpumask = alloc_node_to_cpumask(); if (!node_to_cpumask) goto fail_npresmsk; masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) goto fail_node_to_cpumask; build_node_to_cpumask(node_to_cpumask); /* * Make a local cache of 'cpu_present_mask', so the two stages * spread can observe consistent 'cpu_present_mask' without holding * cpu hotplug lock, then we can reduce deadlock risk with cpu * hotplug code. * * Here CPU hotplug may happen when reading `cpu_present_mask`, and * we can live with the case because it only affects that hotplug * CPU is handled in the 1st or 2nd stage, and either way is correct * from API user viewpoint since 2-stage spread is sort of * optimization. */ cpumask_copy(npresmsk, data_race(cpu_present_mask)); /* grouping present CPUs first */ ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret < 0) goto fail_node_to_cpumask; nr_present = ret; /* * Allocate non present CPUs starting from the next group to be * handled. If the grouping of present CPUs already exhausted the * group space, assign the non present CPUs to the already * allocated out groups. */ if (nr_present >= numgrps) curgrp = 0; else curgrp = nr_present; cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk); ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret >= 0) nr_others = ret; fail_node_to_cpumask: free_node_to_cpumask(node_to_cpumask); fail_npresmsk: free_cpumask_var(npresmsk); fail_nmsk: free_cpumask_var(nmsk); if (ret < 0) { kfree(masks); return NULL; } *nummasks = min(nr_present + nr_others, numgrps); return masks; } #else /* CONFIG_SMP */ struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks) { struct cpumask *masks; if (numgrps == 0) return NULL; masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; /* assign all CPUs(cpu 0) to the 1st group only */ cpumask_copy(&masks[0], cpu_possible_mask); *nummasks = 1; return masks; } #endif /* CONFIG_SMP */ EXPORT_SYMBOL_GPL(group_cpus_evenly); |
| 4421 4426 10 10 10 10 29 2 8 19 2 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 | /* SPDX-License-Identifier: GPL-2.0 */ /* * security/tomoyo/common.h * * Header file for TOMOYO. * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #ifndef _SECURITY_TOMOYO_COMMON_H #define _SECURITY_TOMOYO_COMMON_H #define pr_fmt(fmt) fmt #include <linux/ctype.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/file.h> #include <linux/kmod.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/list.h> #include <linux/cred.h> #include <linux/poll.h> #include <linux/binfmts.h> #include <linux/highmem.h> #include <linux/net.h> #include <linux/inet.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/un.h> #include <linux/lsm_hooks.h> #include <net/sock.h> #include <net/af_unix.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> /********** Constants definitions. **********/ /* * TOMOYO uses this hash only when appending a string into the string * table. Frequency of appending strings is very low. So we don't need * large (e.g. 64k) hash size. 256 will be sufficient. */ #define TOMOYO_HASH_BITS 8 #define TOMOYO_MAX_HASH (1u<<TOMOYO_HASH_BITS) /* * TOMOYO checks only SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, SOCK_SEQPACKET. * Therefore, we don't need SOCK_MAX. */ #define TOMOYO_SOCK_MAX 6 #define TOMOYO_EXEC_TMPSIZE 4096 /* Garbage collector is trying to kfree() this element. */ #define TOMOYO_GC_IN_PROGRESS -1 /* Profile number is an integer between 0 and 255. */ #define TOMOYO_MAX_PROFILES 256 /* Group number is an integer between 0 and 255. */ #define TOMOYO_MAX_ACL_GROUPS 256 /* Index numbers for "struct tomoyo_condition". */ enum tomoyo_conditions_index { TOMOYO_TASK_UID, /* current_uid() */ TOMOYO_TASK_EUID, /* current_euid() */ TOMOYO_TASK_SUID, /* current_suid() */ TOMOYO_TASK_FSUID, /* current_fsuid() */ TOMOYO_TASK_GID, /* current_gid() */ TOMOYO_TASK_EGID, /* current_egid() */ TOMOYO_TASK_SGID, /* current_sgid() */ TOMOYO_TASK_FSGID, /* current_fsgid() */ TOMOYO_TASK_PID, /* sys_getpid() */ TOMOYO_TASK_PPID, /* sys_getppid() */ TOMOYO_EXEC_ARGC, /* "struct linux_binprm *"->argc */ TOMOYO_EXEC_ENVC, /* "struct linux_binprm *"->envc */ TOMOYO_TYPE_IS_SOCKET, /* S_IFSOCK */ TOMOYO_TYPE_IS_SYMLINK, /* S_IFLNK */ TOMOYO_TYPE_IS_FILE, /* S_IFREG */ TOMOYO_TYPE_IS_BLOCK_DEV, /* S_IFBLK */ TOMOYO_TYPE_IS_DIRECTORY, /* S_IFDIR */ TOMOYO_TYPE_IS_CHAR_DEV, /* S_IFCHR */ TOMOYO_TYPE_IS_FIFO, /* S_IFIFO */ TOMOYO_MODE_SETUID, /* S_ISUID */ TOMOYO_MODE_SETGID, /* S_ISGID */ TOMOYO_MODE_STICKY, /* S_ISVTX */ TOMOYO_MODE_OWNER_READ, /* S_IRUSR */ TOMOYO_MODE_OWNER_WRITE, /* S_IWUSR */ TOMOYO_MODE_OWNER_EXECUTE, /* S_IXUSR */ TOMOYO_MODE_GROUP_READ, /* S_IRGRP */ TOMOYO_MODE_GROUP_WRITE, /* S_IWGRP */ TOMOYO_MODE_GROUP_EXECUTE, /* S_IXGRP */ TOMOYO_MODE_OTHERS_READ, /* S_IROTH */ TOMOYO_MODE_OTHERS_WRITE, /* S_IWOTH */ TOMOYO_MODE_OTHERS_EXECUTE, /* S_IXOTH */ TOMOYO_EXEC_REALPATH, TOMOYO_SYMLINK_TARGET, TOMOYO_PATH1_UID, TOMOYO_PATH1_GID, TOMOYO_PATH1_INO, TOMOYO_PATH1_MAJOR, TOMOYO_PATH1_MINOR, TOMOYO_PATH1_PERM, TOMOYO_PATH1_TYPE, TOMOYO_PATH1_DEV_MAJOR, TOMOYO_PATH1_DEV_MINOR, TOMOYO_PATH2_UID, TOMOYO_PATH2_GID, TOMOYO_PATH2_INO, TOMOYO_PATH2_MAJOR, TOMOYO_PATH2_MINOR, TOMOYO_PATH2_PERM, TOMOYO_PATH2_TYPE, TOMOYO_PATH2_DEV_MAJOR, TOMOYO_PATH2_DEV_MINOR, TOMOYO_PATH1_PARENT_UID, TOMOYO_PATH1_PARENT_GID, TOMOYO_PATH1_PARENT_INO, TOMOYO_PATH1_PARENT_PERM, TOMOYO_PATH2_PARENT_UID, TOMOYO_PATH2_PARENT_GID, TOMOYO_PATH2_PARENT_INO, TOMOYO_PATH2_PARENT_PERM, TOMOYO_MAX_CONDITION_KEYWORD, TOMOYO_NUMBER_UNION, TOMOYO_NAME_UNION, TOMOYO_ARGV_ENTRY, TOMOYO_ENVP_ENTRY, }; /* Index numbers for stat(). */ enum tomoyo_path_stat_index { /* Do not change this order. */ TOMOYO_PATH1, TOMOYO_PATH1_PARENT, TOMOYO_PATH2, TOMOYO_PATH2_PARENT, TOMOYO_MAX_PATH_STAT }; /* Index numbers for operation mode. */ enum tomoyo_mode_index { TOMOYO_CONFIG_DISABLED, TOMOYO_CONFIG_LEARNING, TOMOYO_CONFIG_PERMISSIVE, TOMOYO_CONFIG_ENFORCING, TOMOYO_CONFIG_MAX_MODE, TOMOYO_CONFIG_WANT_REJECT_LOG = 64, TOMOYO_CONFIG_WANT_GRANT_LOG = 128, TOMOYO_CONFIG_USE_DEFAULT = 255, }; /* Index numbers for entry type. */ enum tomoyo_policy_id { TOMOYO_ID_GROUP, TOMOYO_ID_ADDRESS_GROUP, TOMOYO_ID_PATH_GROUP, TOMOYO_ID_NUMBER_GROUP, TOMOYO_ID_TRANSITION_CONTROL, TOMOYO_ID_AGGREGATOR, TOMOYO_ID_MANAGER, TOMOYO_ID_CONDITION, TOMOYO_ID_NAME, TOMOYO_ID_ACL, TOMOYO_ID_DOMAIN, TOMOYO_MAX_POLICY }; /* Index numbers for domain's attributes. */ enum tomoyo_domain_info_flags_index { /* Quota warnning flag. */ TOMOYO_DIF_QUOTA_WARNED, /* * This domain was unable to create a new domain at * tomoyo_find_next_domain() because the name of the domain to be * created was too long or it could not allocate memory. * More than one process continued execve() without domain transition. */ TOMOYO_DIF_TRANSITION_FAILED, TOMOYO_MAX_DOMAIN_INFO_FLAGS }; /* Index numbers for audit type. */ enum tomoyo_grant_log { /* Follow profile's configuration. */ TOMOYO_GRANTLOG_AUTO, /* Do not generate grant log. */ TOMOYO_GRANTLOG_NO, /* Generate grant_log. */ TOMOYO_GRANTLOG_YES, }; /* Index numbers for group entries. */ enum tomoyo_group_id { TOMOYO_PATH_GROUP, TOMOYO_NUMBER_GROUP, TOMOYO_ADDRESS_GROUP, TOMOYO_MAX_GROUP }; /* Index numbers for type of numeric values. */ enum tomoyo_value_type { TOMOYO_VALUE_TYPE_INVALID, TOMOYO_VALUE_TYPE_DECIMAL, TOMOYO_VALUE_TYPE_OCTAL, TOMOYO_VALUE_TYPE_HEXADECIMAL, }; /* Index numbers for domain transition control keywords. */ enum tomoyo_transition_type { /* Do not change this order, */ TOMOYO_TRANSITION_CONTROL_NO_RESET, TOMOYO_TRANSITION_CONTROL_RESET, TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE, TOMOYO_TRANSITION_CONTROL_INITIALIZE, TOMOYO_TRANSITION_CONTROL_NO_KEEP, TOMOYO_TRANSITION_CONTROL_KEEP, TOMOYO_MAX_TRANSITION_TYPE }; /* Index numbers for Access Controls. */ enum tomoyo_acl_entry_type_index { TOMOYO_TYPE_PATH_ACL, TOMOYO_TYPE_PATH2_ACL, TOMOYO_TYPE_PATH_NUMBER_ACL, TOMOYO_TYPE_MKDEV_ACL, TOMOYO_TYPE_MOUNT_ACL, TOMOYO_TYPE_INET_ACL, TOMOYO_TYPE_UNIX_ACL, TOMOYO_TYPE_ENV_ACL, TOMOYO_TYPE_MANUAL_TASK_ACL, }; /* Index numbers for access controls with one pathname. */ enum tomoyo_path_acl_index { TOMOYO_TYPE_EXECUTE, TOMOYO_TYPE_READ, TOMOYO_TYPE_WRITE, TOMOYO_TYPE_APPEND, TOMOYO_TYPE_UNLINK, TOMOYO_TYPE_GETATTR, TOMOYO_TYPE_RMDIR, TOMOYO_TYPE_TRUNCATE, TOMOYO_TYPE_SYMLINK, TOMOYO_TYPE_CHROOT, TOMOYO_TYPE_UMOUNT, TOMOYO_MAX_PATH_OPERATION }; /* Index numbers for /sys/kernel/security/tomoyo/stat interface. */ enum tomoyo_memory_stat_type { TOMOYO_MEMORY_POLICY, TOMOYO_MEMORY_AUDIT, TOMOYO_MEMORY_QUERY, TOMOYO_MAX_MEMORY_STAT }; enum tomoyo_mkdev_acl_index { TOMOYO_TYPE_MKBLOCK, TOMOYO_TYPE_MKCHAR, TOMOYO_MAX_MKDEV_OPERATION }; /* Index numbers for socket operations. */ enum tomoyo_network_acl_index { TOMOYO_NETWORK_BIND, /* bind() operation. */ TOMOYO_NETWORK_LISTEN, /* listen() operation. */ TOMOYO_NETWORK_CONNECT, /* connect() operation. */ TOMOYO_NETWORK_SEND, /* send() operation. */ TOMOYO_MAX_NETWORK_OPERATION }; /* Index numbers for access controls with two pathnames. */ enum tomoyo_path2_acl_index { TOMOYO_TYPE_LINK, TOMOYO_TYPE_RENAME, TOMOYO_TYPE_PIVOT_ROOT, TOMOYO_MAX_PATH2_OPERATION }; /* Index numbers for access controls with one pathname and one number. */ enum tomoyo_path_number_acl_index { TOMOYO_TYPE_CREATE, TOMOYO_TYPE_MKDIR, TOMOYO_TYPE_MKFIFO, TOMOYO_TYPE_MKSOCK, TOMOYO_TYPE_IOCTL, TOMOYO_TYPE_CHMOD, TOMOYO_TYPE_CHOWN, TOMOYO_TYPE_CHGRP, TOMOYO_MAX_PATH_NUMBER_OPERATION }; /* Index numbers for /sys/kernel/security/tomoyo/ interfaces. */ enum tomoyo_securityfs_interface_index { TOMOYO_DOMAINPOLICY, TOMOYO_EXCEPTIONPOLICY, TOMOYO_PROCESS_STATUS, TOMOYO_STAT, TOMOYO_AUDIT, TOMOYO_VERSION, TOMOYO_PROFILE, TOMOYO_QUERY, TOMOYO_MANAGER }; /* Index numbers for special mount operations. */ enum tomoyo_special_mount { TOMOYO_MOUNT_BIND, /* mount --bind /source /dest */ TOMOYO_MOUNT_MOVE, /* mount --move /old /new */ TOMOYO_MOUNT_REMOUNT, /* mount -o remount /dir */ TOMOYO_MOUNT_MAKE_UNBINDABLE, /* mount --make-unbindable /dir */ TOMOYO_MOUNT_MAKE_PRIVATE, /* mount --make-private /dir */ TOMOYO_MOUNT_MAKE_SLAVE, /* mount --make-slave /dir */ TOMOYO_MOUNT_MAKE_SHARED, /* mount --make-shared /dir */ TOMOYO_MAX_SPECIAL_MOUNT }; /* Index numbers for functionality. */ enum tomoyo_mac_index { TOMOYO_MAC_FILE_EXECUTE, TOMOYO_MAC_FILE_OPEN, TOMOYO_MAC_FILE_CREATE, TOMOYO_MAC_FILE_UNLINK, TOMOYO_MAC_FILE_GETATTR, TOMOYO_MAC_FILE_MKDIR, TOMOYO_MAC_FILE_RMDIR, TOMOYO_MAC_FILE_MKFIFO, TOMOYO_MAC_FILE_MKSOCK, TOMOYO_MAC_FILE_TRUNCATE, TOMOYO_MAC_FILE_SYMLINK, TOMOYO_MAC_FILE_MKBLOCK, TOMOYO_MAC_FILE_MKCHAR, TOMOYO_MAC_FILE_LINK, TOMOYO_MAC_FILE_RENAME, TOMOYO_MAC_FILE_CHMOD, TOMOYO_MAC_FILE_CHOWN, TOMOYO_MAC_FILE_CHGRP, TOMOYO_MAC_FILE_IOCTL, TOMOYO_MAC_FILE_CHROOT, TOMOYO_MAC_FILE_MOUNT, TOMOYO_MAC_FILE_UMOUNT, TOMOYO_MAC_FILE_PIVOT_ROOT, TOMOYO_MAC_NETWORK_INET_STREAM_BIND, TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN, TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT, TOMOYO_MAC_NETWORK_INET_DGRAM_BIND, TOMOYO_MAC_NETWORK_INET_DGRAM_SEND, TOMOYO_MAC_NETWORK_INET_RAW_BIND, TOMOYO_MAC_NETWORK_INET_RAW_SEND, TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND, TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN, TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT, TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND, TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND, TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND, TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN, TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT, TOMOYO_MAC_ENVIRON, TOMOYO_MAX_MAC_INDEX }; /* Index numbers for category of functionality. */ enum tomoyo_mac_category_index { TOMOYO_MAC_CATEGORY_FILE, TOMOYO_MAC_CATEGORY_NETWORK, TOMOYO_MAC_CATEGORY_MISC, TOMOYO_MAX_MAC_CATEGORY_INDEX }; /* * Retry this request. Returned by tomoyo_supervisor() if policy violation has * occurred in enforcing mode and the userspace daemon decided to retry. * * We must choose a positive value in order to distinguish "granted" (which is * 0) and "rejected" (which is a negative value) and "retry". */ #define TOMOYO_RETRY_REQUEST 1 /* Index numbers for /sys/kernel/security/tomoyo/stat interface. */ enum tomoyo_policy_stat_type { /* Do not change this order. */ TOMOYO_STAT_POLICY_UPDATES, TOMOYO_STAT_POLICY_LEARNING, /* == TOMOYO_CONFIG_LEARNING */ TOMOYO_STAT_POLICY_PERMISSIVE, /* == TOMOYO_CONFIG_PERMISSIVE */ TOMOYO_STAT_POLICY_ENFORCING, /* == TOMOYO_CONFIG_ENFORCING */ TOMOYO_MAX_POLICY_STAT }; /* Index numbers for profile's PREFERENCE values. */ enum tomoyo_pref_index { TOMOYO_PREF_MAX_AUDIT_LOG, TOMOYO_PREF_MAX_LEARNING_ENTRY, TOMOYO_MAX_PREF }; /********** Structure definitions. **********/ /* Common header for holding ACL entries. */ struct tomoyo_acl_head { struct list_head list; s8 is_deleted; /* true or false or TOMOYO_GC_IN_PROGRESS */ } __packed; /* Common header for shared entries. */ struct tomoyo_shared_acl_head { struct list_head list; atomic_t users; } __packed; struct tomoyo_policy_namespace; /* Structure for request info. */ struct tomoyo_request_info { /* * For holding parameters specific to operations which deal files. * NULL if not dealing files. */ struct tomoyo_obj_info *obj; /* * For holding parameters specific to execve() request. * NULL if not dealing execve(). */ struct tomoyo_execve *ee; struct tomoyo_domain_info *domain; /* For holding parameters. */ union { struct { const struct tomoyo_path_info *filename; /* For using wildcards at tomoyo_find_next_domain(). */ const struct tomoyo_path_info *matched_path; /* One of values in "enum tomoyo_path_acl_index". */ u8 operation; } path; struct { const struct tomoyo_path_info *filename1; const struct tomoyo_path_info *filename2; /* One of values in "enum tomoyo_path2_acl_index". */ u8 operation; } path2; struct { const struct tomoyo_path_info *filename; unsigned int mode; unsigned int major; unsigned int minor; /* One of values in "enum tomoyo_mkdev_acl_index". */ u8 operation; } mkdev; struct { const struct tomoyo_path_info *filename; unsigned long number; /* * One of values in * "enum tomoyo_path_number_acl_index". */ u8 operation; } path_number; struct { const struct tomoyo_path_info *name; } environ; struct { const __be32 *address; u16 port; /* One of values smaller than TOMOYO_SOCK_MAX. */ u8 protocol; /* One of values in "enum tomoyo_network_acl_index". */ u8 operation; bool is_ipv6; } inet_network; struct { const struct tomoyo_path_info *address; /* One of values smaller than TOMOYO_SOCK_MAX. */ u8 protocol; /* One of values in "enum tomoyo_network_acl_index". */ u8 operation; } unix_network; struct { const struct tomoyo_path_info *type; const struct tomoyo_path_info *dir; const struct tomoyo_path_info *dev; unsigned long flags; int need_dev; } mount; struct { const struct tomoyo_path_info *domainname; } task; } param; struct tomoyo_acl_info *matched_acl; u8 param_type; bool granted; u8 retry; u8 profile; u8 mode; /* One of tomoyo_mode_index . */ u8 type; }; /* Structure for holding a token. */ struct tomoyo_path_info { const char *name; u32 hash; /* = full_name_hash(name, strlen(name)) */ u16 const_len; /* = tomoyo_const_part_length(name) */ bool is_dir; /* = tomoyo_strendswith(name, "/") */ bool is_patterned; /* = tomoyo_path_contains_pattern(name) */ }; /* Structure for holding string data. */ struct tomoyo_name { struct tomoyo_shared_acl_head head; struct tomoyo_path_info entry; }; /* Structure for holding a word. */ struct tomoyo_name_union { /* Either @filename or @group is NULL. */ const struct tomoyo_path_info *filename; struct tomoyo_group *group; }; /* Structure for holding a number. */ struct tomoyo_number_union { unsigned long values[2]; struct tomoyo_group *group; /* Maybe NULL. */ /* One of values in "enum tomoyo_value_type". */ u8 value_type[2]; }; /* Structure for holding an IP address. */ struct tomoyo_ipaddr_union { struct in6_addr ip[2]; /* Big endian. */ struct tomoyo_group *group; /* Pointer to address group. */ bool is_ipv6; /* Valid only if @group == NULL. */ }; /* Structure for "path_group"/"number_group"/"address_group" directive. */ struct tomoyo_group { struct tomoyo_shared_acl_head head; const struct tomoyo_path_info *group_name; struct list_head member_list; }; /* Structure for "path_group" directive. */ struct tomoyo_path_group { struct tomoyo_acl_head head; const struct tomoyo_path_info *member_name; }; /* Structure for "number_group" directive. */ struct tomoyo_number_group { struct tomoyo_acl_head head; struct tomoyo_number_union number; }; /* Structure for "address_group" directive. */ struct tomoyo_address_group { struct tomoyo_acl_head head; /* Structure for holding an IP address. */ struct tomoyo_ipaddr_union address; }; /* Subset of "struct stat". Used by conditional ACL and audit logs. */ struct tomoyo_mini_stat { kuid_t uid; kgid_t gid; ino_t ino; umode_t mode; dev_t dev; dev_t rdev; }; /* Structure for dumping argv[] and envp[] of "struct linux_binprm". */ struct tomoyo_page_dump { struct page *page; /* Previously dumped page. */ char *data; /* Contents of "page". Size is PAGE_SIZE. */ }; /* Structure for attribute checks in addition to pathname checks. */ struct tomoyo_obj_info { /* * True if tomoyo_get_attributes() was already called, false otherwise. */ bool validate_done; /* True if @stat[] is valid. */ bool stat_valid[TOMOYO_MAX_PATH_STAT]; /* First pathname. Initialized with { NULL, NULL } if no path. */ struct path path1; /* Second pathname. Initialized with { NULL, NULL } if no path. */ struct path path2; /* * Information on @path1, @path1's parent directory, @path2, @path2's * parent directory. */ struct tomoyo_mini_stat stat[TOMOYO_MAX_PATH_STAT]; /* * Content of symbolic link to be created. NULL for operations other * than symlink(). */ struct tomoyo_path_info *symlink_target; }; /* Structure for argv[]. */ struct tomoyo_argv { unsigned long index; const struct tomoyo_path_info *value; bool is_not; }; /* Structure for envp[]. */ struct tomoyo_envp { const struct tomoyo_path_info *name; const struct tomoyo_path_info *value; bool is_not; }; /* Structure for execve() operation. */ struct tomoyo_execve { struct tomoyo_request_info r; struct tomoyo_obj_info obj; struct linux_binprm *bprm; const struct tomoyo_path_info *transition; /* For dumping argv[] and envp[]. */ struct tomoyo_page_dump dump; /* For temporary use. */ char *tmp; /* Size is TOMOYO_EXEC_TMPSIZE bytes */ }; /* Structure for entries which follows "struct tomoyo_condition". */ struct tomoyo_condition_element { /* * Left hand operand. A "struct tomoyo_argv" for TOMOYO_ARGV_ENTRY, a * "struct tomoyo_envp" for TOMOYO_ENVP_ENTRY is attached to the tail * of the array of this struct. */ u8 left; /* * Right hand operand. A "struct tomoyo_number_union" for * TOMOYO_NUMBER_UNION, a "struct tomoyo_name_union" for * TOMOYO_NAME_UNION is attached to the tail of the array of this * struct. */ u8 right; /* Equation operator. True if equals or overlaps, false otherwise. */ bool equals; }; /* Structure for optional arguments. */ struct tomoyo_condition { struct tomoyo_shared_acl_head head; u32 size; /* Memory size allocated for this entry. */ u16 condc; /* Number of conditions in this struct. */ u16 numbers_count; /* Number of "struct tomoyo_number_union values". */ u16 names_count; /* Number of "struct tomoyo_name_union names". */ u16 argc; /* Number of "struct tomoyo_argv". */ u16 envc; /* Number of "struct tomoyo_envp". */ u8 grant_log; /* One of values in "enum tomoyo_grant_log". */ const struct tomoyo_path_info *transit; /* Maybe NULL. */ /* * struct tomoyo_condition_element condition[condc]; * struct tomoyo_number_union values[numbers_count]; * struct tomoyo_name_union names[names_count]; * struct tomoyo_argv argv[argc]; * struct tomoyo_envp envp[envc]; */ }; /* Common header for individual entries. */ struct tomoyo_acl_info { struct list_head list; struct tomoyo_condition *cond; /* Maybe NULL. */ s8 is_deleted; /* true or false or TOMOYO_GC_IN_PROGRESS */ u8 type; /* One of values in "enum tomoyo_acl_entry_type_index". */ } __packed; /* Structure for domain information. */ struct tomoyo_domain_info { struct list_head list; struct list_head acl_info_list; /* Name of this domain. Never NULL. */ const struct tomoyo_path_info *domainname; /* Namespace for this domain. Never NULL. */ struct tomoyo_policy_namespace *ns; /* Group numbers to use. */ unsigned long group[TOMOYO_MAX_ACL_GROUPS / BITS_PER_LONG]; u8 profile; /* Profile number to use. */ bool is_deleted; /* Delete flag. */ bool flags[TOMOYO_MAX_DOMAIN_INFO_FLAGS]; atomic_t users; /* Number of referring tasks. */ }; /* * Structure for "task manual_domain_transition" directive. */ struct tomoyo_task_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MANUAL_TASK_ACL */ /* Pointer to domainname. */ const struct tomoyo_path_info *domainname; }; /* * Structure for "file execute", "file read", "file write", "file append", * "file unlink", "file getattr", "file rmdir", "file truncate", * "file symlink", "file chroot" and "file unmount" directive. */ struct tomoyo_path_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_ACL */ u16 perm; /* Bitmask of values in "enum tomoyo_path_acl_index". */ struct tomoyo_name_union name; }; /* * Structure for "file create", "file mkdir", "file mkfifo", "file mksock", * "file ioctl", "file chmod", "file chown" and "file chgrp" directive. */ struct tomoyo_path_number_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_NUMBER_ACL */ /* Bitmask of values in "enum tomoyo_path_number_acl_index". */ u8 perm; struct tomoyo_name_union name; struct tomoyo_number_union number; }; /* Structure for "file mkblock" and "file mkchar" directive. */ struct tomoyo_mkdev_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MKDEV_ACL */ u8 perm; /* Bitmask of values in "enum tomoyo_mkdev_acl_index". */ struct tomoyo_name_union name; struct tomoyo_number_union mode; struct tomoyo_number_union major; struct tomoyo_number_union minor; }; /* * Structure for "file rename", "file link" and "file pivot_root" directive. */ struct tomoyo_path2_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH2_ACL */ u8 perm; /* Bitmask of values in "enum tomoyo_path2_acl_index". */ struct tomoyo_name_union name1; struct tomoyo_name_union name2; }; /* Structure for "file mount" directive. */ struct tomoyo_mount_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MOUNT_ACL */ struct tomoyo_name_union dev_name; struct tomoyo_name_union dir_name; struct tomoyo_name_union fs_type; struct tomoyo_number_union flags; }; /* Structure for "misc env" directive in domain policy. */ struct tomoyo_env_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_ENV_ACL */ const struct tomoyo_path_info *env; /* environment variable */ }; /* Structure for "network inet" directive. */ struct tomoyo_inet_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_INET_ACL */ u8 protocol; u8 perm; /* Bitmask of values in "enum tomoyo_network_acl_index" */ struct tomoyo_ipaddr_union address; struct tomoyo_number_union port; }; /* Structure for "network unix" directive. */ struct tomoyo_unix_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_UNIX_ACL */ u8 protocol; u8 perm; /* Bitmask of values in "enum tomoyo_network_acl_index" */ struct tomoyo_name_union name; }; /* Structure for holding a line from /sys/kernel/security/tomoyo/ interface. */ struct tomoyo_acl_param { char *data; struct list_head *list; struct tomoyo_policy_namespace *ns; bool is_delete; }; #define TOMOYO_MAX_IO_READ_QUEUE 64 /* * Structure for reading/writing policy via /sys/kernel/security/tomoyo * interfaces. */ struct tomoyo_io_buffer { void (*read)(struct tomoyo_io_buffer *head); int (*write)(struct tomoyo_io_buffer *head); __poll_t (*poll)(struct file *file, poll_table *wait); /* Exclusive lock for this structure. */ struct mutex io_sem; char __user *read_user_buf; size_t read_user_buf_avail; struct { struct list_head *ns; struct list_head *domain; struct list_head *group; struct list_head *acl; size_t avail; unsigned int step; unsigned int query_index; u16 index; u16 cond_index; u8 acl_group_index; u8 cond_step; u8 bit; u8 w_pos; bool eof; bool print_this_domain_only; bool print_transition_related_only; bool print_cond_part; const char *w[TOMOYO_MAX_IO_READ_QUEUE]; } r; struct { struct tomoyo_policy_namespace *ns; /* The position currently writing to. */ struct tomoyo_domain_info *domain; /* Bytes available for writing. */ size_t avail; bool is_delete; } w; /* Buffer for reading. */ char *read_buf; /* Size of read buffer. */ size_t readbuf_size; /* Buffer for writing. */ char *write_buf; /* Size of write buffer. */ size_t writebuf_size; /* Type of this interface. */ enum tomoyo_securityfs_interface_index type; /* Users counter protected by tomoyo_io_buffer_list_lock. */ u8 users; /* List for telling GC not to kfree() elements. */ struct list_head list; }; /* * Structure for "initialize_domain"/"no_initialize_domain"/"keep_domain"/ * "no_keep_domain" keyword. */ struct tomoyo_transition_control { struct tomoyo_acl_head head; u8 type; /* One of values in "enum tomoyo_transition_type". */ /* True if the domainname is tomoyo_get_last_name(). */ bool is_last_name; const struct tomoyo_path_info *domainname; /* Maybe NULL */ const struct tomoyo_path_info *program; /* Maybe NULL */ }; /* Structure for "aggregator" keyword. */ struct tomoyo_aggregator { struct tomoyo_acl_head head; const struct tomoyo_path_info *original_name; const struct tomoyo_path_info *aggregated_name; }; /* Structure for policy manager. */ struct tomoyo_manager { struct tomoyo_acl_head head; /* A path to program or a domainname. */ const struct tomoyo_path_info *manager; }; struct tomoyo_preference { unsigned int learning_max_entry; bool enforcing_verbose; bool learning_verbose; bool permissive_verbose; }; /* Structure for /sys/kernel/security/tomnoyo/profile interface. */ struct tomoyo_profile { const struct tomoyo_path_info *comment; struct tomoyo_preference *learning; struct tomoyo_preference *permissive; struct tomoyo_preference *enforcing; struct tomoyo_preference preference; u8 default_config; u8 config[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX]; unsigned int pref[TOMOYO_MAX_PREF]; }; /* Structure for representing YYYY/MM/DD hh/mm/ss. */ struct tomoyo_time { u16 year; u8 month; u8 day; u8 hour; u8 min; u8 sec; }; /* Structure for policy namespace. */ struct tomoyo_policy_namespace { /* Profile table. Memory is allocated as needed. */ struct tomoyo_profile *profile_ptr[TOMOYO_MAX_PROFILES]; /* List of "struct tomoyo_group". */ struct list_head group_list[TOMOYO_MAX_GROUP]; /* List of policy. */ struct list_head policy_list[TOMOYO_MAX_POLICY]; /* The global ACL referred by "use_group" keyword. */ struct list_head acl_group[TOMOYO_MAX_ACL_GROUPS]; /* List for connecting to tomoyo_namespace_list list. */ struct list_head namespace_list; /* Profile version. Currently only 20150505 is defined. */ unsigned int profile_version; /* Name of this namespace (e.g. "<kernel>", "</usr/sbin/httpd>" ). */ const char *name; }; /* Structure for "struct task_struct"->security. */ struct tomoyo_task { struct tomoyo_domain_info *domain_info; struct tomoyo_domain_info *old_domain_info; }; /********** Function prototypes. **********/ bool tomoyo_address_matches_group(const bool is_ipv6, const __be32 *address, const struct tomoyo_group *group); bool tomoyo_compare_number_union(const unsigned long value, const struct tomoyo_number_union *ptr); bool tomoyo_condition(struct tomoyo_request_info *r, const struct tomoyo_condition *cond); bool tomoyo_correct_domain(const unsigned char *domainname); bool tomoyo_correct_path(const char *filename); bool tomoyo_correct_word(const char *string); bool tomoyo_domain_def(const unsigned char *buffer); bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r); bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, struct tomoyo_page_dump *dump); bool tomoyo_memory_ok(void *ptr); bool tomoyo_number_matches_group(const unsigned long min, const unsigned long max, const struct tomoyo_group *group); bool tomoyo_parse_ipaddr_union(struct tomoyo_acl_param *param, struct tomoyo_ipaddr_union *ptr); bool tomoyo_parse_name_union(struct tomoyo_acl_param *param, struct tomoyo_name_union *ptr); bool tomoyo_parse_number_union(struct tomoyo_acl_param *param, struct tomoyo_number_union *ptr); bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, const struct tomoyo_path_info *pattern); bool tomoyo_permstr(const char *string, const char *keyword); bool tomoyo_str_starts(char **src, const char *find); char *tomoyo_encode(const char *str); char *tomoyo_encode2(const char *str, int str_len); char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt, va_list args) __printf(3, 0); char *tomoyo_read_token(struct tomoyo_acl_param *param); char *tomoyo_realpath_from_path(const struct path *path); char *tomoyo_realpath_nofollow(const char *pathname); const char *tomoyo_get_exe(void); const struct tomoyo_path_info *tomoyo_compare_name_union (const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr); const struct tomoyo_path_info *tomoyo_get_domainname (struct tomoyo_acl_param *param); const struct tomoyo_path_info *tomoyo_get_name(const char *name); const struct tomoyo_path_info *tomoyo_path_matches_group (const struct tomoyo_path_info *pathname, const struct tomoyo_group *group); int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, const struct path *path, const int flag); void tomoyo_close_control(struct tomoyo_io_buffer *head); int tomoyo_env_perm(struct tomoyo_request_info *r, const char *env); int tomoyo_execute_permission(struct tomoyo_request_info *r, const struct tomoyo_path_info *filename); int tomoyo_find_next_domain(struct linux_binprm *bprm); int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile, const u8 index); int tomoyo_init_request_info(struct tomoyo_request_info *r, struct tomoyo_domain_info *domain, const u8 index); int tomoyo_mkdev_perm(const u8 operation, const struct path *path, const unsigned int mode, unsigned int dev); int tomoyo_mount_permission(const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data_page); int tomoyo_open_control(const u8 type, struct file *file); int tomoyo_path2_perm(const u8 operation, const struct path *path1, const struct path *path2); int tomoyo_path_number_perm(const u8 operation, const struct path *path, unsigned long number); int tomoyo_path_perm(const u8 operation, const struct path *path, const char *target); __poll_t tomoyo_poll_control(struct file *file, poll_table *wait); __poll_t tomoyo_poll_log(struct file *file, poll_table *wait); int tomoyo_socket_bind_permission(struct socket *sock, struct sockaddr *addr, int addr_len); int tomoyo_socket_connect_permission(struct socket *sock, struct sockaddr *addr, int addr_len); int tomoyo_socket_listen_permission(struct socket *sock); int tomoyo_socket_sendmsg_permission(struct socket *sock, struct msghdr *msg, int size); int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...) __printf(2, 3); int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size, struct tomoyo_acl_param *param, bool (*check_duplicate) (const struct tomoyo_acl_info *, const struct tomoyo_acl_info *), bool (*merge_duplicate) (struct tomoyo_acl_info *, struct tomoyo_acl_info *, const bool)); int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size, struct tomoyo_acl_param *param, bool (*check_duplicate) (const struct tomoyo_acl_head *, const struct tomoyo_acl_head *)); int tomoyo_write_aggregator(struct tomoyo_acl_param *param); int tomoyo_write_file(struct tomoyo_acl_param *param); int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type); int tomoyo_write_misc(struct tomoyo_acl_param *param); int tomoyo_write_inet_network(struct tomoyo_acl_param *param); int tomoyo_write_transition_control(struct tomoyo_acl_param *param, const u8 type); int tomoyo_write_unix_network(struct tomoyo_acl_param *param); ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer, const int buffer_len); ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, const char __user *buffer, const int buffer_len); struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param); struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname, const bool transit); struct tomoyo_domain_info *tomoyo_domain(void); struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname); struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param, const u8 idx); struct tomoyo_policy_namespace *tomoyo_assign_namespace (const char *domainname); struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns, const u8 profile); u8 tomoyo_parse_ulong(unsigned long *result, char **str); void *tomoyo_commit_ok(void *data, const unsigned int size); void __init tomoyo_load_builtin_policy(void); void __init tomoyo_mm_init(void); void tomoyo_check_acl(struct tomoyo_request_info *r, bool (*check_entry)(struct tomoyo_request_info *, const struct tomoyo_acl_info *)); void tomoyo_check_profile(void); void tomoyo_convert_time(time64_t time, struct tomoyo_time *stamp); void tomoyo_del_condition(struct list_head *element); void tomoyo_fill_path_info(struct tomoyo_path_info *ptr); void tomoyo_get_attributes(struct tomoyo_obj_info *obj); void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns); void tomoyo_load_policy(const char *filename); void tomoyo_normalize_line(unsigned char *buffer); void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register); void tomoyo_print_ip(char *buf, const unsigned int size, const struct tomoyo_ipaddr_union *ptr); void tomoyo_print_ulong(char *buffer, const int buffer_len, const unsigned long value, const u8 type); void tomoyo_put_name_union(struct tomoyo_name_union *ptr); void tomoyo_put_number_union(struct tomoyo_number_union *ptr); void tomoyo_read_log(struct tomoyo_io_buffer *head); void tomoyo_update_stat(const u8 index); void tomoyo_warn_oom(const char *function); void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...) __printf(2, 3); void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt, va_list args) __printf(3, 0); /********** External variable definitions. **********/ extern bool tomoyo_policy_loaded; extern int tomoyo_enabled; extern const char * const tomoyo_condition_keyword [TOMOYO_MAX_CONDITION_KEYWORD]; extern const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS]; extern const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX]; extern const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE]; extern const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION]; extern const char * const tomoyo_proto_keyword[TOMOYO_SOCK_MAX]; extern const char * const tomoyo_socket_keyword[TOMOYO_MAX_NETWORK_OPERATION]; extern const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX]; extern const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION]; extern const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION]; extern const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION]; extern struct list_head tomoyo_condition_list; extern struct list_head tomoyo_domain_list; extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH]; extern struct list_head tomoyo_namespace_list; extern struct mutex tomoyo_policy_lock; extern struct srcu_struct tomoyo_ss; extern struct tomoyo_domain_info tomoyo_kernel_domain; extern struct tomoyo_policy_namespace tomoyo_kernel_namespace; extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT]; extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT]; extern struct lsm_blob_sizes tomoyo_blob_sizes; /********** Inlined functions. **********/ /** * tomoyo_read_lock - Take lock for protecting policy. * * Returns index number for tomoyo_read_unlock(). */ static inline int tomoyo_read_lock(void) { return srcu_read_lock(&tomoyo_ss); } /** * tomoyo_read_unlock - Release lock for protecting policy. * * @idx: Index number returned by tomoyo_read_lock(). * * Returns nothing. */ static inline void tomoyo_read_unlock(int idx) { srcu_read_unlock(&tomoyo_ss, idx); } /** * tomoyo_sys_getppid - Copy of getppid(). * * Returns parent process's PID. * * Alpha does not have getppid() defined. To be able to build this module on * Alpha, I have to copy getppid() from kernel/timer.c. */ static inline pid_t tomoyo_sys_getppid(void) { pid_t pid; rcu_read_lock(); pid = task_tgid_vnr(rcu_dereference(current->real_parent)); rcu_read_unlock(); return pid; } /** * tomoyo_sys_getpid - Copy of getpid(). * * Returns current thread's PID. * * Alpha does not have getpid() defined. To be able to build this module on * Alpha, I have to copy getpid() from kernel/timer.c. */ static inline pid_t tomoyo_sys_getpid(void) { return task_tgid_vnr(current); } /** * tomoyo_pathcmp - strcmp() for "struct tomoyo_path_info" structure. * * @a: Pointer to "struct tomoyo_path_info". * @b: Pointer to "struct tomoyo_path_info". * * Returns true if @a == @b, false otherwise. */ static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a, const struct tomoyo_path_info *b) { return a->hash != b->hash || strcmp(a->name, b->name); } /** * tomoyo_put_name - Drop reference on "struct tomoyo_name". * * @name: Pointer to "struct tomoyo_path_info". Maybe NULL. * * Returns nothing. */ static inline void tomoyo_put_name(const struct tomoyo_path_info *name) { if (name) { struct tomoyo_name *ptr = container_of(name, typeof(*ptr), entry); atomic_dec(&ptr->head.users); } } /** * tomoyo_put_condition - Drop reference on "struct tomoyo_condition". * * @cond: Pointer to "struct tomoyo_condition". Maybe NULL. * * Returns nothing. */ static inline void tomoyo_put_condition(struct tomoyo_condition *cond) { if (cond) atomic_dec(&cond->head.users); } /** * tomoyo_put_group - Drop reference on "struct tomoyo_group". * * @group: Pointer to "struct tomoyo_group". Maybe NULL. * * Returns nothing. */ static inline void tomoyo_put_group(struct tomoyo_group *group) { if (group) atomic_dec(&group->head.users); } /** * tomoyo_task - Get "struct tomoyo_task" for specified thread. * * @task - Pointer to "struct task_struct". * * Returns pointer to "struct tomoyo_task" for specified thread. */ static inline struct tomoyo_task *tomoyo_task(struct task_struct *task) { return task->security + tomoyo_blob_sizes.lbs_task; } /** * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry. * * @a: Pointer to "struct tomoyo_name_union". * @b: Pointer to "struct tomoyo_name_union". * * Returns true if @a == @b, false otherwise. */ static inline bool tomoyo_same_name_union (const struct tomoyo_name_union *a, const struct tomoyo_name_union *b) { return a->filename == b->filename && a->group == b->group; } /** * tomoyo_same_number_union - Check for duplicated "struct tomoyo_number_union" entry. * * @a: Pointer to "struct tomoyo_number_union". * @b: Pointer to "struct tomoyo_number_union". * * Returns true if @a == @b, false otherwise. */ static inline bool tomoyo_same_number_union (const struct tomoyo_number_union *a, const struct tomoyo_number_union *b) { return a->values[0] == b->values[0] && a->values[1] == b->values[1] && a->group == b->group && a->value_type[0] == b->value_type[0] && a->value_type[1] == b->value_type[1]; } /** * tomoyo_same_ipaddr_union - Check for duplicated "struct tomoyo_ipaddr_union" entry. * * @a: Pointer to "struct tomoyo_ipaddr_union". * @b: Pointer to "struct tomoyo_ipaddr_union". * * Returns true if @a == @b, false otherwise. */ static inline bool tomoyo_same_ipaddr_union (const struct tomoyo_ipaddr_union *a, const struct tomoyo_ipaddr_union *b) { return !memcmp(a->ip, b->ip, sizeof(a->ip)) && a->group == b->group && a->is_ipv6 == b->is_ipv6; } /** * tomoyo_current_namespace - Get "struct tomoyo_policy_namespace" for current thread. * * Returns pointer to "struct tomoyo_policy_namespace" for current thread. */ static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void) { return tomoyo_domain()->ns; } /** * list_for_each_cookie - iterate over a list with cookie. * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. */ #define list_for_each_cookie(pos, head) \ if (!pos) \ pos = srcu_dereference((head)->next, &tomoyo_ss); \ for ( ; pos != (head); pos = srcu_dereference(pos->next, &tomoyo_ss)) #endif /* !defined(_SECURITY_TOMOYO_COMMON_H) */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Header file for dma buffer sharing framework. * * Copyright(C) 2011 Linaro Limited. All rights reserved. * Author: Sumit Semwal <sumit.semwal@ti.com> * * Many thanks to linaro-mm-sig list, and specially * Arnd Bergmann <arnd@arndb.de>, Rob Clark <rob@ti.com> and * Daniel Vetter <daniel@ffwll.ch> for their support in creation and * refining of this idea. */ #ifndef __DMA_BUF_H__ #define __DMA_BUF_H__ #include <linux/iosys-map.h> #include <linux/file.h> #include <linux/err.h> #include <linux/scatterlist.h> #include <linux/list.h> #include <linux/dma-mapping.h> #include <linux/fs.h> #include <linux/dma-fence.h> #include <linux/wait.h> struct device; struct dma_buf; struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf * @vmap: [optional] creates a virtual mapping for the buffer into kernel * address space. Same restrictions as for vmap and friends apply. * @vunmap: [optional] unmaps a vmap from the buffer */ struct dma_buf_ops { /** * @attach: * * This is called from dma_buf_attach() to make sure that a given * &dma_buf_attachment.dev can access the provided &dma_buf. Exporters * which support buffer objects in special locations like VRAM or * device-specific carveout areas should check whether the buffer could * be move to system memory (or directly accessed by the provided * device), and otherwise need to fail the attach operation. * * The exporter should also in general check whether the current * allocation fulfills the DMA constraints of the new device. If this * is not the case, and the allocation cannot be moved, it should also * fail the attach operation. * * Any exporter-private housekeeping data can be stored in the * &dma_buf_attachment.priv pointer. * * This callback is optional. * * Returns: * * 0 on success, negative error code on failure. It might return -EBUSY * to signal that backing storage is already allocated and incompatible * with the requirements of requesting device. */ int (*attach)(struct dma_buf *, struct dma_buf_attachment *); /** * @detach: * * This is called by dma_buf_detach() to release a &dma_buf_attachment. * Provided so that exporters can clean up any housekeeping for an * &dma_buf_attachment. * * This callback is optional. */ void (*detach)(struct dma_buf *, struct dma_buf_attachment *); /** * @pin: * * This is called by dma_buf_pin() and lets the exporter know that the * DMA-buf can't be moved any more. Ideally, the exporter should * pin the buffer so that it is generally accessible by all * devices. * * This is called with the &dmabuf.resv object locked and is mutual * exclusive with @cache_sgt_mapping. * * This is called automatically for non-dynamic importers from * dma_buf_attach(). * * Note that similar to non-dynamic exporters in their @map_dma_buf * callback the driver must guarantee that the memory is available for * use and cleared of any old data by the time this function returns. * Drivers which pipeline their buffer moves internally must wait for * all moves and clears to complete. * * Returns: * * 0 on success, negative error code on failure. */ int (*pin)(struct dma_buf_attachment *attach); /** * @unpin: * * This is called by dma_buf_unpin() and lets the exporter know that the * DMA-buf can be moved again. * * This is called with the dmabuf->resv object locked and is mutual * exclusive with @cache_sgt_mapping. * * This callback is optional. */ void (*unpin)(struct dma_buf_attachment *attach); /** * @map_dma_buf: * * This is called by dma_buf_map_attachment() and is used to map a * shared &dma_buf into device address space, and it is mandatory. It * can only be called if @attach has been called successfully. * * This call may sleep, e.g. when the backing storage first needs to be * allocated, or moved to a location suitable for all currently attached * devices. * * Note that any specific buffer attributes required for this function * should get added to device_dma_parameters accessible via * &device.dma_params from the &dma_buf_attachment. The @attach callback * should also check these constraints. * * If this is being called for the first time, the exporter can now * choose to scan through the list of attachments for this buffer, * collate the requirements of the attached devices, and choose an * appropriate backing storage for the buffer. * * Based on enum dma_data_direction, it might be possible to have * multiple users accessing at the same time (for reading, maybe), or * any other kind of sharing that the exporter might wish to make * available to buffer-users. * * This is always called with the dmabuf->resv object locked when * the dynamic_mapping flag is true. * * Note that for non-dynamic exporters the driver must guarantee that * that the memory is available for use and cleared of any old data by * the time this function returns. Drivers which pipeline their buffer * moves internally must wait for all moves and clears to complete. * Dynamic exporters do not need to follow this rule: For non-dynamic * importers the buffer is already pinned through @pin, which has the * same requirements. Dynamic importers otoh are required to obey the * dma_resv fences. * * Returns: * * A &sg_table scatter list of the backing storage of the DMA buffer, * already mapped into the device address space of the &device attached * with the provided &dma_buf_attachment. The addresses and lengths in * the scatter list are PAGE_SIZE aligned. * * On failure, returns a negative error value wrapped into a pointer. * May also return -EINTR when a signal was received while being * blocked. * * Note that exporters should not try to cache the scatter list, or * return the same one for multiple calls. Caching is done either by the * DMA-BUF code (for non-dynamic importers) or the importer. Ownership * of the scatter list is transferred to the caller, and returned by * @unmap_dma_buf. */ struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *, enum dma_data_direction); /** * @unmap_dma_buf: * * This is called by dma_buf_unmap_attachment() and should unmap and * release the &sg_table allocated in @map_dma_buf, and it is mandatory. * For static dma_buf handling this might also unpin the backing * storage if this is the last mapping of the DMA buffer. */ void (*unmap_dma_buf)(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); /* TODO: Add try_map_dma_buf version, to return immed with -EBUSY * if the call would block. */ /** * @release: * * Called after the last dma_buf_put to release the &dma_buf, and * mandatory. */ void (*release)(struct dma_buf *); /** * @begin_cpu_access: * * This is called from dma_buf_begin_cpu_access() and allows the * exporter to ensure that the memory is actually coherent for cpu * access. The exporter also needs to ensure that cpu access is coherent * for the access direction. The direction can be used by the exporter * to optimize the cache flushing, i.e. access with a different * direction (read instead of write) might return stale or even bogus * data (e.g. when the exporter needs to copy the data to temporary * storage). * * Note that this is both called through the DMA_BUF_IOCTL_SYNC IOCTL * command for userspace mappings established through @mmap, and also * for kernel mappings established with @vmap. * * This callback is optional. * * Returns: * * 0 on success or a negative error code on failure. This can for * example fail when the backing storage can't be allocated. Can also * return -ERESTARTSYS or -EINTR when the call has been interrupted and * needs to be restarted. */ int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction); /** * @end_cpu_access: * * This is called from dma_buf_end_cpu_access() when the importer is * done accessing the CPU. The exporter can use this to flush caches and * undo anything else done in @begin_cpu_access. * * This callback is optional. * * Returns: * * 0 on success or a negative error code on failure. Can return * -ERESTARTSYS or -EINTR when the call has been interrupted and needs * to be restarted. */ int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction); /** * @mmap: * * This callback is used by the dma_buf_mmap() function * * Note that the mapping needs to be incoherent, userspace is expected * to bracket CPU access using the DMA_BUF_IOCTL_SYNC interface. * * Because dma-buf buffers have invariant size over their lifetime, the * dma-buf core checks whether a vma is too large and rejects such * mappings. The exporter hence does not need to duplicate this check. * Drivers do not need to check this themselves. * * If an exporter needs to manually flush caches and hence needs to fake * coherency for mmap support, it needs to be able to zap all the ptes * pointing at the backing storage. Now linux mm needs a struct * address_space associated with the struct file stored in vma->vm_file * to do that with the function unmap_mapping_range. But the dma_buf * framework only backs every dma_buf fd with the anon_file struct file, * i.e. all dma_bufs share the same file. * * Hence exporters need to setup their own file (and address_space) * association by setting vma->vm_file and adjusting vma->vm_pgoff in * the dma_buf mmap callback. In the specific case of a gem driver the * exporter could use the shmem file already provided by gem (and set * vm_pgoff = 0). Exporters can then zap ptes by unmapping the * corresponding range of the struct address_space associated with their * own file. * * This callback is optional. * * Returns: * * 0 on success or a negative error code on failure. */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); int (*vmap)(struct dma_buf *dmabuf, struct iosys_map *map); void (*vunmap)(struct dma_buf *dmabuf, struct iosys_map *map); }; /** * struct dma_buf - shared buffer object * * This represents a shared buffer, created by calling dma_buf_export(). The * userspace representation is a normal file descriptor, which can be created by * calling dma_buf_fd(). * * Shared dma buffers are reference counted using dma_buf_put() and * get_dma_buf(). * * Device DMA access is handled by the separate &struct dma_buf_attachment. */ struct dma_buf { /** * @size: * * Size of the buffer; invariant over the lifetime of the buffer. */ size_t size; /** * @file: * * File pointer used for sharing buffers across, and for refcounting. * See dma_buf_get() and dma_buf_put(). */ struct file *file; /** * @attachments: * * List of dma_buf_attachment that denotes all devices attached, * protected by &dma_resv lock @resv. */ struct list_head attachments; /** @ops: dma_buf_ops associated with this buffer object. */ const struct dma_buf_ops *ops; /** * @vmapping_counter: * * Used internally to refcnt the vmaps returned by dma_buf_vmap(). * Protected by @lock. */ unsigned vmapping_counter; /** * @vmap_ptr: * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. */ struct iosys_map vmap_ptr; /** * @exp_name: * * Name of the exporter; useful for debugging. Must not be NULL */ const char *exp_name; /** * @name: * * Userspace-provided name. Default value is NULL. If not NULL, * length cannot be longer than DMA_BUF_NAME_LEN, including NIL * char. Useful for accounting and debugging. Read/Write accesses * are protected by @name_lock * * See the IOCTLs DMA_BUF_SET_NAME or DMA_BUF_SET_NAME_A/B */ const char *name; /** @name_lock: Spinlock to protect name access for read access. */ spinlock_t name_lock; /** * @owner: * * Pointer to exporter module; used for refcounting when exporter is a * kernel module. */ struct module *owner; /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; /** @priv: exporter specific private data for this buffer object. */ void *priv; /** * @resv: * * Reservation object linked to this dma-buf. * * IMPLICIT SYNCHRONIZATION RULES: * * Drivers which support implicit synchronization of buffer access as * e.g. exposed in `Implicit Fence Poll Support`_ must follow the * below rules. * * - Drivers must add a read fence through dma_resv_add_fence() with the * DMA_RESV_USAGE_READ flag for anything the userspace API considers a * read access. This highly depends upon the API and window system. * * - Similarly drivers must add a write fence through * dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for * anything the userspace API considers write access. * * - Drivers may just always add a write fence, since that only * causes unnecessary synchronization, but no correctness issues. * * - Some drivers only expose a synchronous userspace API with no * pipelining across drivers. These do not set any fences for their * access. An example here is v4l. * * - Driver should use dma_resv_usage_rw() when retrieving fences as * dependency for implicit synchronization. * * DYNAMIC IMPORTER RULES: * * Dynamic importers, see dma_buf_attachment_is_dynamic(), have * additional constraints on how they set up fences: * * - Dynamic importers must obey the write fences and wait for them to * signal before allowing access to the buffer's underlying storage * through the device. * * - Dynamic importers should set fences for any access that they can't * disable immediately from their &dma_buf_attach_ops.move_notify * callback. * * IMPORTANT: * * All drivers and memory management related functions must obey the * struct dma_resv rules, specifically the rules for updating and * obeying fences. See enum dma_resv_usage for further descriptions. */ struct dma_resv *resv; /** @poll: for userspace poll support */ wait_queue_head_t poll; /** @cb_in: for userspace poll support */ /** @cb_out: for userspace poll support */ struct dma_buf_poll_cb_t { struct dma_fence_cb cb; wait_queue_head_t *poll; __poll_t active; } cb_in, cb_out; #ifdef CONFIG_DMABUF_SYSFS_STATS /** * @sysfs_entry: * * For exposing information about this buffer in sysfs. See also * `DMA-BUF statistics`_ for the uapi this enables. */ struct dma_buf_sysfs_entry { struct kobject kobj; struct dma_buf *dmabuf; } *sysfs_entry; #endif }; /** * struct dma_buf_attach_ops - importer operations for an attachment * * Attachment operations implemented by the importer. */ struct dma_buf_attach_ops { /** * @allow_peer2peer: * * If this is set to true the importer must be able to handle peer * resources without struct pages. */ bool allow_peer2peer; /** * @move_notify: [optional] notification that the DMA-buf is moving * * If this callback is provided the framework can avoid pinning the * backing store while mappings exists. * * This callback is called with the lock of the reservation object * associated with the dma_buf held and the mapping function must be * called with this lock held as well. This makes sure that no mapping * is created concurrently with an ongoing move operation. * * Mappings stay valid and are not directly affected by this callback. * But the DMA-buf can now be in a different physical location, so all * mappings should be destroyed and re-created as soon as possible. * * New mappings can be created after this callback returns, and will * point to the new location of the DMA-buf. */ void (*move_notify)(struct dma_buf_attachment *attach); }; /** * struct dma_buf_attachment - holds device-buffer attachment data * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf. * @peer2peer: true if the importer can handle peer resources without pages. * @priv: exporter specific attachment data. * @importer_ops: importer operations for this attachment, if provided * dma_buf_map/unmap_attachment() must be called with the dma_resv lock held. * @importer_priv: importer specific attachment data. * * This structure holds the attachment information between the dma_buf buffer * and its user device(s). The list contains one attachment struct per device * attached to the buffer. * * An attachment is created by calling dma_buf_attach(), and released again by * calling dma_buf_detach(). The DMA mapping itself needed to initiate a * transfer is created by dma_buf_map_attachment() and freed again by calling * dma_buf_unmap_attachment(). */ struct dma_buf_attachment { struct dma_buf *dmabuf; struct device *dev; struct list_head node; bool peer2peer; const struct dma_buf_attach_ops *importer_ops; void *importer_priv; void *priv; }; /** * struct dma_buf_export_info - holds information needed to export a dma_buf * @exp_name: name of the exporter - useful for debugging. * @owner: pointer to exporter module - used for refcounting kernel module * @ops: Attach allocator-defined dma buf ops to the new buffer * @size: Size of the buffer - invariant over the lifetime of the buffer * @flags: mode flags for the file * @resv: reservation-object, NULL to allocate default one * @priv: Attach private data of allocator to this buffer * * This structure holds the information required to export the buffer. Used * with dma_buf_export() only. */ struct dma_buf_export_info { const char *exp_name; struct module *owner; const struct dma_buf_ops *ops; size_t size; int flags; struct dma_resv *resv; void *priv; }; /** * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name * * DEFINE_DMA_BUF_EXPORT_INFO macro defines the &struct dma_buf_export_info, * zeroes it out and pre-populates exp_name in it. */ #define DEFINE_DMA_BUF_EXPORT_INFO(name) \ struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ .owner = THIS_MODULE } /** * get_dma_buf - convenience wrapper for get_file. * @dmabuf: [in] pointer to dma_buf * * Increments the reference count on the dma-buf, needed in case of drivers * that either need to create additional references to the dmabuf on the * kernel side. For example, an exporter that needs to keep a dmabuf ptr * so that subsequent exports don't create a new dmabuf. */ static inline void get_dma_buf(struct dma_buf *dmabuf) { get_file(dmabuf->file); } /** * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings. * @dmabuf: the DMA-buf to check * * Returns true if a DMA-buf exporter wants to be called with the dma_resv * locked for the map/unmap callbacks, false if it doesn't wants to be called * with the lock held. */ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) { return !!dmabuf->ops->pin; } struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); struct dma_buf_attachment * dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, const struct dma_buf_attach_ops *importer_ops, void *importer_priv); void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach); int dma_buf_pin(struct dma_buf_attachment *attach); void dma_buf_unpin(struct dma_buf_attachment *attach); struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info); int dma_buf_fd(struct dma_buf *dmabuf, int flags); struct dma_buf *dma_buf_get(int fd); void dma_buf_put(struct dma_buf *dmabuf); struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); void dma_buf_move_notify(struct dma_buf *dma_buf); int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); struct sg_table * dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, enum dma_data_direction direction); void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, struct sg_table *sg_table, enum dma_data_direction direction); int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); struct dma_buf *dma_buf_iter_begin(void); struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf); #endif /* __DMA_BUF_H__ */ |
| 415 418 417 416 418 194 2 1 2 193 193 2 1 2 1 1 2 1 1 1 2 418 191 191 191 191 191 191 191 191 191 1 1 1 1 1 1 12 12 12 414 415 413 415 415 417 193 193 193 2 1 1 1 1 2 2 193 193 88 191 192 192 192 192 192 192 192 193 192 192 192 118 191 191 192 194 194 194 194 194 194 194 194 194 194 199 199 199 199 199 199 199 190 194 194 194 194 189 192 189 194 194 194 194 194 192 192 189 189 1 1 1 1 1 1 1 1 187 99 99 45 45 71 71 71 71 66 71 58 58 29 35 35 20 20 45 71 1 1 1 1 1 1 1 1 1 1 1 1 6 6 6 6 1 5 5 5 1 4 4 4 4 6 5 5 198 1 1 190 190 190 190 190 187 187 187 187 187 187 187 201 201 193 192 194 192 189 99 1 5 199 201 187 187 187 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 | // SPDX-License-Identifier: GPL-2.0 /* * USB Raw Gadget driver. * See Documentation/usb/raw-gadget.rst for more details. * * Copyright (c) 2020 Google, Inc. * Author: Andrey Konovalov <andreyknvl@gmail.com> */ #include <linux/compiler.h> #include <linux/ctype.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/idr.h> #include <linux/kref.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/semaphore.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/wait.h> #include <linux/usb.h> #include <linux/usb/ch9.h> #include <linux/usb/ch11.h> #include <linux/usb/gadget.h> #include <linux/usb/composite.h> #include <uapi/linux/usb/raw_gadget.h> #define DRIVER_DESC "USB Raw Gadget" #define DRIVER_NAME "raw-gadget" MODULE_DESCRIPTION(DRIVER_DESC); MODULE_AUTHOR("Andrey Konovalov"); MODULE_LICENSE("GPL"); /*----------------------------------------------------------------------*/ static DEFINE_IDA(driver_id_numbers); #define DRIVER_DRIVER_NAME_LENGTH_MAX 32 #define RAW_EVENT_QUEUE_SIZE 16 struct raw_event_queue { /* See the comment in raw_event_queue_fetch() for locking details. */ spinlock_t lock; struct semaphore sema; struct usb_raw_event *events[RAW_EVENT_QUEUE_SIZE]; int size; }; static void raw_event_queue_init(struct raw_event_queue *queue) { spin_lock_init(&queue->lock); sema_init(&queue->sema, 0); queue->size = 0; } static int raw_event_queue_add(struct raw_event_queue *queue, enum usb_raw_event_type type, size_t length, const void *data) { unsigned long flags; struct usb_raw_event *event; spin_lock_irqsave(&queue->lock, flags); if (queue->size >= RAW_EVENT_QUEUE_SIZE) { spin_unlock_irqrestore(&queue->lock, flags); return -ENOMEM; } event = kmalloc(sizeof(*event) + length, GFP_ATOMIC); if (!event) { spin_unlock_irqrestore(&queue->lock, flags); return -ENOMEM; } event->type = type; event->length = length; if (event->length) memcpy(&event->data[0], data, length); queue->events[queue->size] = event; queue->size++; up(&queue->sema); spin_unlock_irqrestore(&queue->lock, flags); return 0; } static struct usb_raw_event *raw_event_queue_fetch( struct raw_event_queue *queue) { int ret; unsigned long flags; struct usb_raw_event *event; /* * This function can be called concurrently. We first check that * there's at least one event queued by decrementing the semaphore, * and then take the lock to protect queue struct fields. */ ret = down_interruptible(&queue->sema); if (ret) return ERR_PTR(ret); spin_lock_irqsave(&queue->lock, flags); /* * queue->size must have the same value as queue->sema counter (before * the down_interruptible() call above), so this check is a fail-safe. */ if (WARN_ON(!queue->size)) { spin_unlock_irqrestore(&queue->lock, flags); return ERR_PTR(-ENODEV); } event = queue->events[0]; queue->size--; memmove(&queue->events[0], &queue->events[1], queue->size * sizeof(queue->events[0])); spin_unlock_irqrestore(&queue->lock, flags); return event; } static void raw_event_queue_destroy(struct raw_event_queue *queue) { int i; for (i = 0; i < queue->size; i++) kfree(queue->events[i]); queue->size = 0; } /*----------------------------------------------------------------------*/ struct raw_dev; enum ep_state { STATE_EP_DISABLED, STATE_EP_ENABLED, }; struct raw_ep { struct raw_dev *dev; enum ep_state state; struct usb_ep *ep; u8 addr; struct usb_request *req; bool urb_queued; bool disabling; ssize_t status; }; enum dev_state { STATE_DEV_INVALID = 0, STATE_DEV_OPENED, STATE_DEV_INITIALIZED, STATE_DEV_REGISTERING, STATE_DEV_RUNNING, STATE_DEV_CLOSED, STATE_DEV_FAILED }; struct raw_dev { struct kref count; spinlock_t lock; const char *udc_name; struct usb_gadget_driver driver; /* Reference to misc device: */ struct device *dev; /* Make driver names unique */ int driver_id_number; /* Protected by lock: */ enum dev_state state; bool gadget_registered; struct usb_gadget *gadget; struct usb_request *req; bool ep0_in_pending; bool ep0_out_pending; bool ep0_urb_queued; ssize_t ep0_status; struct raw_ep eps[USB_RAW_EPS_NUM_MAX]; int eps_num; struct completion ep0_done; struct raw_event_queue queue; }; static struct raw_dev *dev_new(void) { struct raw_dev *dev; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; /* Matches kref_put() in raw_release(). */ kref_init(&dev->count); spin_lock_init(&dev->lock); init_completion(&dev->ep0_done); raw_event_queue_init(&dev->queue); dev->driver_id_number = -1; return dev; } static void dev_free(struct kref *kref) { struct raw_dev *dev = container_of(kref, struct raw_dev, count); int i; kfree(dev->udc_name); kfree(dev->driver.udc_name); kfree(dev->driver.driver.name); if (dev->driver_id_number >= 0) ida_free(&driver_id_numbers, dev->driver_id_number); if (dev->req) { if (dev->ep0_urb_queued) usb_ep_dequeue(dev->gadget->ep0, dev->req); usb_ep_free_request(dev->gadget->ep0, dev->req); } raw_event_queue_destroy(&dev->queue); for (i = 0; i < dev->eps_num; i++) { if (dev->eps[i].state == STATE_EP_DISABLED) continue; usb_ep_disable(dev->eps[i].ep); usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req); kfree(dev->eps[i].ep->desc); dev->eps[i].state = STATE_EP_DISABLED; } kfree(dev); } /*----------------------------------------------------------------------*/ static int raw_queue_event(struct raw_dev *dev, enum usb_raw_event_type type, size_t length, const void *data) { int ret = 0; unsigned long flags; ret = raw_event_queue_add(&dev->queue, type, length, data); if (ret < 0) { spin_lock_irqsave(&dev->lock, flags); dev->state = STATE_DEV_FAILED; spin_unlock_irqrestore(&dev->lock, flags); } return ret; } static void gadget_ep0_complete(struct usb_ep *ep, struct usb_request *req) { struct raw_dev *dev = req->context; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (req->status) dev->ep0_status = req->status; else dev->ep0_status = req->actual; if (dev->ep0_in_pending) dev->ep0_in_pending = false; else dev->ep0_out_pending = false; spin_unlock_irqrestore(&dev->lock, flags); complete(&dev->ep0_done); } static u8 get_ep_addr(const char *name) { /* If the endpoint has fixed function (named as e.g. "ep12out-bulk"), * parse the endpoint address from its name. We deliberately use * deprecated simple_strtoul() function here, as the number isn't * followed by '\0' nor '\n'. */ if (isdigit(name[2])) return simple_strtoul(&name[2], NULL, 10); /* Otherwise the endpoint is configurable (named as e.g. "ep-a"). */ return USB_RAW_EP_ADDR_ANY; } static int gadget_bind(struct usb_gadget *gadget, struct usb_gadget_driver *driver) { int ret = 0, i = 0; struct raw_dev *dev = container_of(driver, struct raw_dev, driver); struct usb_request *req; struct usb_ep *ep; unsigned long flags; if (strcmp(gadget->name, dev->udc_name) != 0) return -ENODEV; set_gadget_data(gadget, dev); req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL); if (!req) { dev_err(&gadget->dev, "usb_ep_alloc_request failed\n"); set_gadget_data(gadget, NULL); return -ENOMEM; } spin_lock_irqsave(&dev->lock, flags); dev->req = req; dev->req->context = dev; dev->req->complete = gadget_ep0_complete; dev->gadget = gadget; gadget_for_each_ep(ep, dev->gadget) { dev->eps[i].ep = ep; dev->eps[i].addr = get_ep_addr(ep->name); dev->eps[i].state = STATE_EP_DISABLED; i++; } dev->eps_num = i; spin_unlock_irqrestore(&dev->lock, flags); dev_dbg(&gadget->dev, "gadget connected\n"); ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL); if (ret < 0) { dev_err(&gadget->dev, "failed to queue connect event\n"); set_gadget_data(gadget, NULL); return ret; } /* Matches kref_put() in gadget_unbind(). */ kref_get(&dev->count); return ret; } static void gadget_unbind(struct usb_gadget *gadget) { struct raw_dev *dev = get_gadget_data(gadget); set_gadget_data(gadget, NULL); /* Matches kref_get() in gadget_bind(). */ kref_put(&dev->count, dev_free); } static int gadget_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) { int ret = 0; struct raw_dev *dev = get_gadget_data(gadget); unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_err(&gadget->dev, "ignoring, device is not running\n"); ret = -ENODEV; goto out_unlock; } if (dev->ep0_in_pending || dev->ep0_out_pending) { dev_dbg(&gadget->dev, "stalling, request already pending\n"); ret = -EBUSY; goto out_unlock; } if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) dev->ep0_in_pending = true; else dev->ep0_out_pending = true; spin_unlock_irqrestore(&dev->lock, flags); ret = raw_queue_event(dev, USB_RAW_EVENT_CONTROL, sizeof(*ctrl), ctrl); if (ret < 0) dev_err(&gadget->dev, "failed to queue control event\n"); goto out; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); out: if (ret == 0 && ctrl->wLength == 0) { /* * Return USB_GADGET_DELAYED_STATUS as a workaround to stop * some UDC drivers (e.g. dwc3) from automatically proceeding * with the status stage for 0-length transfers. * Should be removed once all UDC drivers are fixed to always * delay the status stage until a response is queued to EP0. */ return USB_GADGET_DELAYED_STATUS; } return ret; } static void gadget_disconnect(struct usb_gadget *gadget) { struct raw_dev *dev = get_gadget_data(gadget); int ret; dev_dbg(&gadget->dev, "gadget disconnected\n"); ret = raw_queue_event(dev, USB_RAW_EVENT_DISCONNECT, 0, NULL); if (ret < 0) dev_err(&gadget->dev, "failed to queue disconnect event\n"); } static void gadget_suspend(struct usb_gadget *gadget) { struct raw_dev *dev = get_gadget_data(gadget); int ret; dev_dbg(&gadget->dev, "gadget suspended\n"); ret = raw_queue_event(dev, USB_RAW_EVENT_SUSPEND, 0, NULL); if (ret < 0) dev_err(&gadget->dev, "failed to queue suspend event\n"); } static void gadget_resume(struct usb_gadget *gadget) { struct raw_dev *dev = get_gadget_data(gadget); int ret; dev_dbg(&gadget->dev, "gadget resumed\n"); ret = raw_queue_event(dev, USB_RAW_EVENT_RESUME, 0, NULL); if (ret < 0) dev_err(&gadget->dev, "failed to queue resume event\n"); } static void gadget_reset(struct usb_gadget *gadget) { struct raw_dev *dev = get_gadget_data(gadget); int ret; dev_dbg(&gadget->dev, "gadget reset\n"); ret = raw_queue_event(dev, USB_RAW_EVENT_RESET, 0, NULL); if (ret < 0) dev_err(&gadget->dev, "failed to queue reset event\n"); } /*----------------------------------------------------------------------*/ static struct miscdevice raw_misc_device; static int raw_open(struct inode *inode, struct file *fd) { struct raw_dev *dev; /* Nonblocking I/O is not supported yet. */ if (fd->f_flags & O_NONBLOCK) return -EINVAL; dev = dev_new(); if (!dev) return -ENOMEM; fd->private_data = dev; dev->state = STATE_DEV_OPENED; dev->dev = raw_misc_device.this_device; return 0; } static int raw_release(struct inode *inode, struct file *fd) { int ret = 0; struct raw_dev *dev = fd->private_data; unsigned long flags; bool unregister = false; spin_lock_irqsave(&dev->lock, flags); dev->state = STATE_DEV_CLOSED; if (!dev->gadget) { spin_unlock_irqrestore(&dev->lock, flags); goto out_put; } if (dev->gadget_registered) unregister = true; dev->gadget_registered = false; spin_unlock_irqrestore(&dev->lock, flags); if (unregister) { ret = usb_gadget_unregister_driver(&dev->driver); if (ret != 0) dev_err(dev->dev, "usb_gadget_unregister_driver() failed with %d\n", ret); /* Matches kref_get() in raw_ioctl_run(). */ kref_put(&dev->count, dev_free); } out_put: /* Matches dev_new() in raw_open(). */ kref_put(&dev->count, dev_free); return ret; } /*----------------------------------------------------------------------*/ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value) { int ret = 0; int driver_id_number; struct usb_raw_init arg; char *udc_driver_name; char *udc_device_name; char *driver_driver_name; unsigned long flags; if (copy_from_user(&arg, (void __user *)value, sizeof(arg))) return -EFAULT; switch (arg.speed) { case USB_SPEED_UNKNOWN: arg.speed = USB_SPEED_HIGH; break; case USB_SPEED_LOW: case USB_SPEED_FULL: case USB_SPEED_HIGH: case USB_SPEED_SUPER: break; default: return -EINVAL; } driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL); if (driver_id_number < 0) return driver_id_number; driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL); if (!driver_driver_name) { ret = -ENOMEM; goto out_free_driver_id_number; } snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX, DRIVER_NAME ".%d", driver_id_number); udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_driver_name) { ret = -ENOMEM; goto out_free_driver_driver_name; } ret = strscpy(udc_driver_name, &arg.driver_name[0], UDC_NAME_LENGTH_MAX); if (ret < 0) goto out_free_udc_driver_name; ret = 0; udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL); if (!udc_device_name) { ret = -ENOMEM; goto out_free_udc_driver_name; } ret = strscpy(udc_device_name, &arg.device_name[0], UDC_NAME_LENGTH_MAX); if (ret < 0) goto out_free_udc_device_name; ret = 0; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_OPENED) { dev_dbg(dev->dev, "fail, device is not opened\n"); ret = -EINVAL; goto out_unlock; } dev->udc_name = udc_driver_name; dev->driver.function = DRIVER_DESC; dev->driver.max_speed = arg.speed; dev->driver.setup = gadget_setup; dev->driver.disconnect = gadget_disconnect; dev->driver.bind = gadget_bind; dev->driver.unbind = gadget_unbind; dev->driver.suspend = gadget_suspend; dev->driver.resume = gadget_resume; dev->driver.reset = gadget_reset; dev->driver.driver.name = driver_driver_name; dev->driver.udc_name = udc_device_name; dev->driver.match_existing_only = 1; dev->driver_id_number = driver_id_number; dev->state = STATE_DEV_INITIALIZED; spin_unlock_irqrestore(&dev->lock, flags); return ret; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); out_free_udc_device_name: kfree(udc_device_name); out_free_udc_driver_name: kfree(udc_driver_name); out_free_driver_driver_name: kfree(driver_driver_name); out_free_driver_id_number: ida_free(&driver_id_numbers, driver_id_number); return ret; } static int raw_ioctl_run(struct raw_dev *dev, unsigned long value) { int ret = 0; unsigned long flags; if (value) return -EINVAL; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_INITIALIZED) { dev_dbg(dev->dev, "fail, device is not initialized\n"); ret = -EINVAL; goto out_unlock; } dev->state = STATE_DEV_REGISTERING; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_gadget_register_driver(&dev->driver); spin_lock_irqsave(&dev->lock, flags); if (ret) { dev_err(dev->dev, "fail, usb_gadget_register_driver returned %d\n", ret); dev->state = STATE_DEV_FAILED; goto out_unlock; } dev->gadget_registered = true; dev->state = STATE_DEV_RUNNING; /* Matches kref_put() in raw_release(). */ kref_get(&dev->count); out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_event_fetch(struct raw_dev *dev, unsigned long value) { struct usb_raw_event arg; unsigned long flags; struct usb_raw_event *event; uint32_t length; if (copy_from_user(&arg, (void __user *)value, sizeof(arg))) return -EFAULT; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); spin_unlock_irqrestore(&dev->lock, flags); return -EINVAL; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); spin_unlock_irqrestore(&dev->lock, flags); return -EBUSY; } spin_unlock_irqrestore(&dev->lock, flags); event = raw_event_queue_fetch(&dev->queue); if (PTR_ERR(event) == -EINTR) { dev_dbg(&dev->gadget->dev, "event fetching interrupted\n"); return -EINTR; } if (IS_ERR(event)) { dev_err(&dev->gadget->dev, "failed to fetch event\n"); spin_lock_irqsave(&dev->lock, flags); dev->state = STATE_DEV_FAILED; spin_unlock_irqrestore(&dev->lock, flags); return -ENODEV; } length = min(arg.length, event->length); if (copy_to_user((void __user *)value, event, sizeof(*event) + length)) { kfree(event); return -EFAULT; } kfree(event); return 0; } static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr, bool get_from_user) { void *data; if (copy_from_user(io, ptr, sizeof(*io))) return ERR_PTR(-EFAULT); if (io->ep >= USB_RAW_EPS_NUM_MAX) return ERR_PTR(-EINVAL); if (!usb_raw_io_flags_valid(io->flags)) return ERR_PTR(-EINVAL); if (io->length > PAGE_SIZE) return ERR_PTR(-EINVAL); if (get_from_user) data = memdup_user(ptr + sizeof(*io), io->length); else { data = kmalloc(io->length, GFP_KERNEL); if (!data) data = ERR_PTR(-ENOMEM); } return data; } static int raw_process_ep0_io(struct raw_dev *dev, struct usb_raw_ep_io *io, void *data, bool in) { int ret = 0; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } if (dev->ep0_urb_queued) { dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); ret = -EBUSY; goto out_unlock; } if ((in && !dev->ep0_in_pending) || (!in && !dev->ep0_out_pending)) { dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); ret = -EBUSY; goto out_unlock; } if (WARN_ON(in && dev->ep0_out_pending)) { ret = -ENODEV; dev->state = STATE_DEV_FAILED; goto out_unlock; } if (WARN_ON(!in && dev->ep0_in_pending)) { ret = -ENODEV; dev->state = STATE_DEV_FAILED; goto out_unlock; } dev->req->buf = data; dev->req->length = io->length; dev->req->zero = usb_raw_io_flags_zero(io->flags); dev->ep0_urb_queued = true; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_ep_queue(dev->gadget->ep0, dev->req, GFP_KERNEL); if (ret) { dev_err(&dev->gadget->dev, "fail, usb_ep_queue returned %d\n", ret); spin_lock_irqsave(&dev->lock, flags); goto out_queue_failed; } ret = wait_for_completion_interruptible(&dev->ep0_done); if (ret) { dev_dbg(&dev->gadget->dev, "wait interrupted\n"); usb_ep_dequeue(dev->gadget->ep0, dev->req); wait_for_completion(&dev->ep0_done); spin_lock_irqsave(&dev->lock, flags); if (dev->ep0_status == -ECONNRESET) dev->ep0_status = -EINTR; goto out_interrupted; } spin_lock_irqsave(&dev->lock, flags); out_interrupted: ret = dev->ep0_status; out_queue_failed: dev->ep0_urb_queued = false; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_ep0_write(struct raw_dev *dev, unsigned long value) { int ret = 0; void *data; struct usb_raw_ep_io io; data = raw_alloc_io_data(&io, (void __user *)value, true); if (IS_ERR(data)) return PTR_ERR(data); ret = raw_process_ep0_io(dev, &io, data, true); kfree(data); return ret; } static int raw_ioctl_ep0_read(struct raw_dev *dev, unsigned long value) { int ret = 0; void *data; struct usb_raw_ep_io io; unsigned int length; data = raw_alloc_io_data(&io, (void __user *)value, false); if (IS_ERR(data)) return PTR_ERR(data); ret = raw_process_ep0_io(dev, &io, data, false); if (ret < 0) goto free; length = min_t(unsigned int, io.length, ret); if (copy_to_user((void __user *)(value + sizeof(io)), data, length)) ret = -EFAULT; else ret = length; free: kfree(data); return ret; } static int raw_ioctl_ep0_stall(struct raw_dev *dev, unsigned long value) { int ret = 0; unsigned long flags; if (value) return -EINVAL; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } if (dev->ep0_urb_queued) { dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); ret = -EBUSY; goto out_unlock; } if (!dev->ep0_in_pending && !dev->ep0_out_pending) { dev_dbg(&dev->gadget->dev, "fail, no request pending\n"); ret = -EBUSY; goto out_unlock; } ret = usb_ep_set_halt(dev->gadget->ep0); if (ret < 0) dev_err(&dev->gadget->dev, "fail, usb_ep_set_halt returned %d\n", ret); if (dev->ep0_in_pending) dev->ep0_in_pending = false; else dev->ep0_out_pending = false; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_ep_enable(struct raw_dev *dev, unsigned long value) { int ret = 0, i; unsigned long flags; struct usb_endpoint_descriptor *desc; struct raw_ep *ep; bool ep_props_matched = false; desc = memdup_user((void __user *)value, sizeof(*desc)); if (IS_ERR(desc)) return PTR_ERR(desc); /* * Endpoints with a maxpacket length of 0 can cause crashes in UDC * drivers. */ if (usb_endpoint_maxp(desc) == 0) { dev_dbg(dev->dev, "fail, bad endpoint maxpacket\n"); kfree(desc); return -EINVAL; } spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_free; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_free; } for (i = 0; i < dev->eps_num; i++) { ep = &dev->eps[i]; if (ep->addr != usb_endpoint_num(desc) && ep->addr != USB_RAW_EP_ADDR_ANY) continue; if (!usb_gadget_ep_match_desc(dev->gadget, ep->ep, desc, NULL)) continue; ep_props_matched = true; if (ep->state != STATE_EP_DISABLED) continue; ep->ep->desc = desc; ret = usb_ep_enable(ep->ep); if (ret < 0) { dev_err(&dev->gadget->dev, "fail, usb_ep_enable returned %d\n", ret); goto out_free; } ep->req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC); if (!ep->req) { dev_err(&dev->gadget->dev, "fail, usb_ep_alloc_request failed\n"); usb_ep_disable(ep->ep); ret = -ENOMEM; goto out_free; } ep->state = STATE_EP_ENABLED; ep->ep->driver_data = ep; ret = i; goto out_unlock; } if (!ep_props_matched) { dev_dbg(&dev->gadget->dev, "fail, bad endpoint descriptor\n"); ret = -EINVAL; } else { dev_dbg(&dev->gadget->dev, "fail, no endpoints available\n"); ret = -EBUSY; } out_free: kfree(desc); out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_ep_disable(struct raw_dev *dev, unsigned long value) { int ret = 0, i = value; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } if (i < 0 || i >= dev->eps_num) { dev_dbg(dev->dev, "fail, invalid endpoint\n"); ret = -EBUSY; goto out_unlock; } if (dev->eps[i].state == STATE_EP_DISABLED) { dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); ret = -EINVAL; goto out_unlock; } if (dev->eps[i].disabling) { dev_dbg(&dev->gadget->dev, "fail, disable already in progress\n"); ret = -EINVAL; goto out_unlock; } if (dev->eps[i].urb_queued) { dev_dbg(&dev->gadget->dev, "fail, waiting for urb completion\n"); ret = -EINVAL; goto out_unlock; } dev->eps[i].disabling = true; spin_unlock_irqrestore(&dev->lock, flags); usb_ep_disable(dev->eps[i].ep); spin_lock_irqsave(&dev->lock, flags); usb_ep_free_request(dev->eps[i].ep, dev->eps[i].req); kfree(dev->eps[i].ep->desc); dev->eps[i].state = STATE_EP_DISABLED; dev->eps[i].disabling = false; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_ep_set_clear_halt_wedge(struct raw_dev *dev, unsigned long value, bool set, bool halt) { int ret = 0, i = value; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } if (i < 0 || i >= dev->eps_num) { dev_dbg(dev->dev, "fail, invalid endpoint\n"); ret = -EBUSY; goto out_unlock; } if (dev->eps[i].state == STATE_EP_DISABLED) { dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); ret = -EINVAL; goto out_unlock; } if (dev->eps[i].disabling) { dev_dbg(&dev->gadget->dev, "fail, disable is in progress\n"); ret = -EINVAL; goto out_unlock; } if (dev->eps[i].urb_queued) { dev_dbg(&dev->gadget->dev, "fail, waiting for urb completion\n"); ret = -EINVAL; goto out_unlock; } if (usb_endpoint_xfer_isoc(dev->eps[i].ep->desc)) { dev_dbg(&dev->gadget->dev, "fail, can't halt/wedge ISO endpoint\n"); ret = -EINVAL; goto out_unlock; } if (set && halt) { ret = usb_ep_set_halt(dev->eps[i].ep); if (ret < 0) dev_err(&dev->gadget->dev, "fail, usb_ep_set_halt returned %d\n", ret); } else if (!set && halt) { ret = usb_ep_clear_halt(dev->eps[i].ep); if (ret < 0) dev_err(&dev->gadget->dev, "fail, usb_ep_clear_halt returned %d\n", ret); } else if (set && !halt) { ret = usb_ep_set_wedge(dev->eps[i].ep); if (ret < 0) dev_err(&dev->gadget->dev, "fail, usb_ep_set_wedge returned %d\n", ret); } out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static void gadget_ep_complete(struct usb_ep *ep, struct usb_request *req) { struct raw_ep *r_ep = (struct raw_ep *)ep->driver_data; struct raw_dev *dev = r_ep->dev; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (req->status) r_ep->status = req->status; else r_ep->status = req->actual; spin_unlock_irqrestore(&dev->lock, flags); complete((struct completion *)req->context); } static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io, void *data, bool in) { int ret = 0; unsigned long flags; struct raw_ep *ep; DECLARE_COMPLETION_ONSTACK(done); spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } if (io->ep >= dev->eps_num) { dev_dbg(&dev->gadget->dev, "fail, invalid endpoint\n"); ret = -EINVAL; goto out_unlock; } ep = &dev->eps[io->ep]; if (ep->state != STATE_EP_ENABLED) { dev_dbg(&dev->gadget->dev, "fail, endpoint is not enabled\n"); ret = -EBUSY; goto out_unlock; } if (ep->disabling) { dev_dbg(&dev->gadget->dev, "fail, endpoint is already being disabled\n"); ret = -EBUSY; goto out_unlock; } if (ep->urb_queued) { dev_dbg(&dev->gadget->dev, "fail, urb already queued\n"); ret = -EBUSY; goto out_unlock; } if (in != usb_endpoint_dir_in(ep->ep->desc)) { dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); ret = -EINVAL; goto out_unlock; } ep->dev = dev; ep->req->context = &done; ep->req->complete = gadget_ep_complete; ep->req->buf = data; ep->req->length = io->length; ep->req->zero = usb_raw_io_flags_zero(io->flags); ep->urb_queued = true; spin_unlock_irqrestore(&dev->lock, flags); ret = usb_ep_queue(ep->ep, ep->req, GFP_KERNEL); if (ret) { dev_err(&dev->gadget->dev, "fail, usb_ep_queue returned %d\n", ret); spin_lock_irqsave(&dev->lock, flags); goto out_queue_failed; } ret = wait_for_completion_interruptible(&done); if (ret) { dev_dbg(&dev->gadget->dev, "wait interrupted\n"); usb_ep_dequeue(ep->ep, ep->req); wait_for_completion(&done); spin_lock_irqsave(&dev->lock, flags); if (ep->status == -ECONNRESET) ep->status = -EINTR; goto out_interrupted; } spin_lock_irqsave(&dev->lock, flags); out_interrupted: ret = ep->status; out_queue_failed: ep->urb_queued = false; out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_ep_write(struct raw_dev *dev, unsigned long value) { int ret = 0; char *data; struct usb_raw_ep_io io; data = raw_alloc_io_data(&io, (void __user *)value, true); if (IS_ERR(data)) return PTR_ERR(data); ret = raw_process_ep_io(dev, &io, data, true); kfree(data); return ret; } static int raw_ioctl_ep_read(struct raw_dev *dev, unsigned long value) { int ret = 0; char *data; struct usb_raw_ep_io io; unsigned int length; data = raw_alloc_io_data(&io, (void __user *)value, false); if (IS_ERR(data)) return PTR_ERR(data); ret = raw_process_ep_io(dev, &io, data, false); if (ret < 0) goto free; length = min_t(unsigned int, io.length, ret); if (copy_to_user((void __user *)(value + sizeof(io)), data, length)) ret = -EFAULT; else ret = length; free: kfree(data); return ret; } static int raw_ioctl_configure(struct raw_dev *dev, unsigned long value) { int ret = 0; unsigned long flags; if (value) return -EINVAL; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } usb_gadget_set_state(dev->gadget, USB_STATE_CONFIGURED); out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static int raw_ioctl_vbus_draw(struct raw_dev *dev, unsigned long value) { int ret = 0; unsigned long flags; spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; goto out_unlock; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; goto out_unlock; } usb_gadget_vbus_draw(dev->gadget, 2 * value); out_unlock: spin_unlock_irqrestore(&dev->lock, flags); return ret; } static void fill_ep_caps(struct usb_ep_caps *caps, struct usb_raw_ep_caps *raw_caps) { raw_caps->type_control = caps->type_control; raw_caps->type_iso = caps->type_iso; raw_caps->type_bulk = caps->type_bulk; raw_caps->type_int = caps->type_int; raw_caps->dir_in = caps->dir_in; raw_caps->dir_out = caps->dir_out; } static void fill_ep_limits(struct usb_ep *ep, struct usb_raw_ep_limits *limits) { limits->maxpacket_limit = ep->maxpacket_limit; limits->max_streams = ep->max_streams; } static int raw_ioctl_eps_info(struct raw_dev *dev, unsigned long value) { int ret = 0, i; unsigned long flags; struct usb_raw_eps_info *info; struct raw_ep *ep; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { ret = -ENOMEM; goto out; } spin_lock_irqsave(&dev->lock, flags); if (dev->state != STATE_DEV_RUNNING) { dev_dbg(dev->dev, "fail, device is not running\n"); ret = -EINVAL; spin_unlock_irqrestore(&dev->lock, flags); goto out_free; } if (!dev->gadget) { dev_dbg(dev->dev, "fail, gadget is not bound\n"); ret = -EBUSY; spin_unlock_irqrestore(&dev->lock, flags); goto out_free; } for (i = 0; i < dev->eps_num; i++) { ep = &dev->eps[i]; strscpy(&info->eps[i].name[0], ep->ep->name, USB_RAW_EP_NAME_MAX); info->eps[i].addr = ep->addr; fill_ep_caps(&ep->ep->caps, &info->eps[i].caps); fill_ep_limits(ep->ep, &info->eps[i].limits); } ret = dev->eps_num; spin_unlock_irqrestore(&dev->lock, flags); if (copy_to_user((void __user *)value, info, sizeof(*info))) ret = -EFAULT; out_free: kfree(info); out: return ret; } static long raw_ioctl(struct file *fd, unsigned int cmd, unsigned long value) { struct raw_dev *dev = fd->private_data; int ret = 0; if (!dev) return -EBUSY; switch (cmd) { case USB_RAW_IOCTL_INIT: ret = raw_ioctl_init(dev, value); break; case USB_RAW_IOCTL_RUN: ret = raw_ioctl_run(dev, value); break; case USB_RAW_IOCTL_EVENT_FETCH: ret = raw_ioctl_event_fetch(dev, value); break; case USB_RAW_IOCTL_EP0_WRITE: ret = raw_ioctl_ep0_write(dev, value); break; case USB_RAW_IOCTL_EP0_READ: ret = raw_ioctl_ep0_read(dev, value); break; case USB_RAW_IOCTL_EP_ENABLE: ret = raw_ioctl_ep_enable(dev, value); break; case USB_RAW_IOCTL_EP_DISABLE: ret = raw_ioctl_ep_disable(dev, value); break; case USB_RAW_IOCTL_EP_WRITE: ret = raw_ioctl_ep_write(dev, value); break; case USB_RAW_IOCTL_EP_READ: ret = raw_ioctl_ep_read(dev, value); break; case USB_RAW_IOCTL_CONFIGURE: ret = raw_ioctl_configure(dev, value); break; case USB_RAW_IOCTL_VBUS_DRAW: ret = raw_ioctl_vbus_draw(dev, value); break; case USB_RAW_IOCTL_EPS_INFO: ret = raw_ioctl_eps_info(dev, value); break; case USB_RAW_IOCTL_EP0_STALL: ret = raw_ioctl_ep0_stall(dev, value); break; case USB_RAW_IOCTL_EP_SET_HALT: ret = raw_ioctl_ep_set_clear_halt_wedge( dev, value, true, true); break; case USB_RAW_IOCTL_EP_CLEAR_HALT: ret = raw_ioctl_ep_set_clear_halt_wedge( dev, value, false, true); break; case USB_RAW_IOCTL_EP_SET_WEDGE: ret = raw_ioctl_ep_set_clear_halt_wedge( dev, value, true, false); break; default: ret = -EINVAL; } return ret; } /*----------------------------------------------------------------------*/ static const struct file_operations raw_fops = { .open = raw_open, .unlocked_ioctl = raw_ioctl, .compat_ioctl = raw_ioctl, .release = raw_release, }; static struct miscdevice raw_misc_device = { .minor = MISC_DYNAMIC_MINOR, .name = DRIVER_NAME, .fops = &raw_fops, }; module_misc_device(raw_misc_device); |
| 55 54 11 1 1 1 1 54 54 54 54 54 54 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | // SPDX-License-Identifier: GPL-2.0-only /* * Process number limiting controller for cgroups. * * Used to allow a cgroup hierarchy to stop any new processes from fork()ing * after a certain limit is reached. * * Since it is trivial to hit the task limit without hitting any kmemcg limits * in place, PIDs are a fundamental resource. As such, PID exhaustion must be * preventable in the scope of a cgroup hierarchy by allowing resource limiting * of the number of tasks in a cgroup. * * In order to use the `pids` controller, set the maximum number of tasks in * pids.max (this is not available in the root cgroup for obvious reasons). The * number of processes currently in the cgroup is given by pids.current. * Organisational operations are not blocked by cgroup policies, so it is * possible to have pids.current > pids.max. However, it is not possible to * violate a cgroup policy through fork(). fork() will return -EAGAIN if forking * would cause a cgroup policy to be violated. * * To set a cgroup to have no limit, set pids.max to "max". This is the default * for all new cgroups (N.B. that PID limits are hierarchical, so the most * stringent limit in the hierarchy is followed). * * pids.current tracks all child cgroup hierarchies, so parent/pids.current is * a superset of parent/child/pids.current. * * Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com> */ #include <linux/kernel.h> #include <linux/threads.h> #include <linux/atomic.h> #include <linux/cgroup.h> #include <linux/slab.h> #include <linux/sched/task.h> #define PIDS_MAX (PID_MAX_LIMIT + 1ULL) #define PIDS_MAX_STR "max" enum pidcg_event { /* Fork failed in subtree because this pids_cgroup limit was hit. */ PIDCG_MAX, /* Fork failed in this pids_cgroup because ancestor limit was hit. */ PIDCG_FORKFAIL, NR_PIDCG_EVENTS, }; struct pids_cgroup { struct cgroup_subsys_state css; /* * Use 64-bit types so that we can safely represent "max" as * %PIDS_MAX = (%PID_MAX_LIMIT + 1). */ atomic64_t counter; atomic64_t limit; int64_t watermark; /* Handles for pids.events[.local] */ struct cgroup_file events_file; struct cgroup_file events_local_file; atomic64_t events[NR_PIDCG_EVENTS]; atomic64_t events_local[NR_PIDCG_EVENTS]; }; static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css) { return container_of(css, struct pids_cgroup, css); } static struct pids_cgroup *parent_pids(struct pids_cgroup *pids) { return css_pids(pids->css.parent); } static struct cgroup_subsys_state * pids_css_alloc(struct cgroup_subsys_state *parent) { struct pids_cgroup *pids; pids = kzalloc(sizeof(struct pids_cgroup), GFP_KERNEL); if (!pids) return ERR_PTR(-ENOMEM); atomic64_set(&pids->limit, PIDS_MAX); return &pids->css; } static void pids_css_free(struct cgroup_subsys_state *css) { kfree(css_pids(css)); } static void pids_update_watermark(struct pids_cgroup *p, int64_t nr_pids) { /* * This is racy, but we don't need perfectly accurate tallying of * the watermark, and this lets us avoid extra atomic overhead. */ if (nr_pids > READ_ONCE(p->watermark)) WRITE_ONCE(p->watermark, nr_pids); } /** * pids_cancel - uncharge the local pid count * @pids: the pid cgroup state * @num: the number of pids to cancel * * This function will WARN if the pid count goes under 0, because such a case is * a bug in the pids controller proper. */ static void pids_cancel(struct pids_cgroup *pids, int num) { /* * A negative count (or overflow for that matter) is invalid, * and indicates a bug in the `pids` controller proper. */ WARN_ON_ONCE(atomic64_add_negative(-num, &pids->counter)); } /** * pids_uncharge - hierarchically uncharge the pid count * @pids: the pid cgroup state * @num: the number of pids to uncharge */ static void pids_uncharge(struct pids_cgroup *pids, int num) { struct pids_cgroup *p; for (p = pids; parent_pids(p); p = parent_pids(p)) pids_cancel(p, num); } /** * pids_charge - hierarchically charge the pid count * @pids: the pid cgroup state * @num: the number of pids to charge * * This function does *not* follow the pid limit set. It cannot fail and the new * pid count may exceed the limit. This is only used for reverting failed * attaches, where there is no other way out than violating the limit. */ static void pids_charge(struct pids_cgroup *pids, int num) { struct pids_cgroup *p; for (p = pids; parent_pids(p); p = parent_pids(p)) { int64_t new = atomic64_add_return(num, &p->counter); pids_update_watermark(p, new); } } /** * pids_try_charge - hierarchically try to charge the pid count * @pids: the pid cgroup state * @num: the number of pids to charge * @fail: storage of pid cgroup causing the fail * * This function follows the set limit. It will fail if the charge would cause * the new value to exceed the hierarchical limit. Returns 0 if the charge * succeeded, otherwise -EAGAIN. */ static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup **fail) { struct pids_cgroup *p, *q; for (p = pids; parent_pids(p); p = parent_pids(p)) { int64_t new = atomic64_add_return(num, &p->counter); int64_t limit = atomic64_read(&p->limit); /* * Since new is capped to the maximum number of pid_t, if * p->limit is %PIDS_MAX then we know that this test will never * fail. */ if (new > limit) { *fail = p; goto revert; } /* * Not technically accurate if we go over limit somewhere up * the hierarchy, but that's tolerable for the watermark. */ pids_update_watermark(p, new); } return 0; revert: for (q = pids; q != p; q = parent_pids(q)) pids_cancel(q, num); pids_cancel(p, num); return -EAGAIN; } static int pids_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *dst_css; cgroup_taskset_for_each(task, dst_css, tset) { struct pids_cgroup *pids = css_pids(dst_css); struct cgroup_subsys_state *old_css; struct pids_cgroup *old_pids; /* * No need to pin @old_css between here and cancel_attach() * because cgroup core protects it from being freed before * the migration completes or fails. */ old_css = task_css(task, pids_cgrp_id); old_pids = css_pids(old_css); pids_charge(pids, 1); pids_uncharge(old_pids, 1); } return 0; } static void pids_cancel_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *dst_css; cgroup_taskset_for_each(task, dst_css, tset) { struct pids_cgroup *pids = css_pids(dst_css); struct cgroup_subsys_state *old_css; struct pids_cgroup *old_pids; old_css = task_css(task, pids_cgrp_id); old_pids = css_pids(old_css); pids_charge(old_pids, 1); pids_uncharge(pids, 1); } } static void pids_event(struct pids_cgroup *pids_forking, struct pids_cgroup *pids_over_limit) { struct pids_cgroup *p = pids_forking; /* Only log the first time limit is hit. */ if (atomic64_inc_return(&p->events_local[PIDCG_FORKFAIL]) == 1) { pr_info("cgroup: fork rejected by pids controller in "); pr_cont_cgroup_path(p->css.cgroup); pr_cont("\n"); } if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) || cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) { cgroup_file_notify(&p->events_local_file); return; } atomic64_inc(&pids_over_limit->events_local[PIDCG_MAX]); cgroup_file_notify(&pids_over_limit->events_local_file); for (p = pids_over_limit; parent_pids(p); p = parent_pids(p)) { atomic64_inc(&p->events[PIDCG_MAX]); cgroup_file_notify(&p->events_file); } } /* * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies * on cgroup_threadgroup_change_begin() held by the copy_process(). */ static int pids_can_fork(struct task_struct *task, struct css_set *cset) { struct pids_cgroup *pids, *pids_over_limit; int err; pids = css_pids(cset->subsys[pids_cgrp_id]); err = pids_try_charge(pids, 1, &pids_over_limit); if (err) pids_event(pids, pids_over_limit); return err; } static void pids_cancel_fork(struct task_struct *task, struct css_set *cset) { struct pids_cgroup *pids; pids = css_pids(cset->subsys[pids_cgrp_id]); pids_uncharge(pids, 1); } static void pids_release(struct task_struct *task) { struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id)); pids_uncharge(pids, 1); } static ssize_t pids_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup_subsys_state *css = of_css(of); struct pids_cgroup *pids = css_pids(css); int64_t limit; int err; buf = strstrip(buf); if (!strcmp(buf, PIDS_MAX_STR)) { limit = PIDS_MAX; goto set_limit; } err = kstrtoll(buf, 0, &limit); if (err) return err; if (limit < 0 || limit >= PIDS_MAX) return -EINVAL; set_limit: /* * Limit updates don't need to be mutex'd, since it isn't * critical that any racing fork()s follow the new limit. */ atomic64_set(&pids->limit, limit); return nbytes; } static int pids_max_show(struct seq_file *sf, void *v) { struct cgroup_subsys_state *css = seq_css(sf); struct pids_cgroup *pids = css_pids(css); int64_t limit = atomic64_read(&pids->limit); if (limit >= PIDS_MAX) seq_printf(sf, "%s\n", PIDS_MAX_STR); else seq_printf(sf, "%lld\n", limit); return 0; } static s64 pids_current_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct pids_cgroup *pids = css_pids(css); return atomic64_read(&pids->counter); } static s64 pids_peak_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct pids_cgroup *pids = css_pids(css); return READ_ONCE(pids->watermark); } static int __pids_events_show(struct seq_file *sf, bool local) { struct pids_cgroup *pids = css_pids(seq_css(sf)); enum pidcg_event pe = PIDCG_MAX; atomic64_t *events; if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) || cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) { pe = PIDCG_FORKFAIL; local = true; } events = local ? pids->events_local : pids->events; seq_printf(sf, "max %lld\n", (s64)atomic64_read(&events[pe])); return 0; } static int pids_events_show(struct seq_file *sf, void *v) { __pids_events_show(sf, false); return 0; } static int pids_events_local_show(struct seq_file *sf, void *v) { __pids_events_show(sf, true); return 0; } static struct cftype pids_files[] = { { .name = "max", .write = pids_max_write, .seq_show = pids_max_show, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "current", .read_s64 = pids_current_read, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "peak", .flags = CFTYPE_NOT_ON_ROOT, .read_s64 = pids_peak_read, }, { .name = "events", .seq_show = pids_events_show, .file_offset = offsetof(struct pids_cgroup, events_file), .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "events.local", .seq_show = pids_events_local_show, .file_offset = offsetof(struct pids_cgroup, events_local_file), .flags = CFTYPE_NOT_ON_ROOT, }, { } /* terminate */ }; static struct cftype pids_files_legacy[] = { { .name = "max", .write = pids_max_write, .seq_show = pids_max_show, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "current", .read_s64 = pids_current_read, .flags = CFTYPE_NOT_ON_ROOT, }, { .name = "peak", .flags = CFTYPE_NOT_ON_ROOT, .read_s64 = pids_peak_read, }, { .name = "events", .seq_show = pids_events_show, .file_offset = offsetof(struct pids_cgroup, events_file), .flags = CFTYPE_NOT_ON_ROOT, }, { } /* terminate */ }; struct cgroup_subsys pids_cgrp_subsys = { .css_alloc = pids_css_alloc, .css_free = pids_css_free, .can_attach = pids_can_attach, .cancel_attach = pids_cancel_attach, .can_fork = pids_can_fork, .cancel_fork = pids_cancel_fork, .release = pids_release, .legacy_cftypes = pids_files_legacy, .dfl_cftypes = pids_files, .threaded = true, }; |
| 2 1 5 5 5 3 3 2 4 2 3 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 | // SPDX-License-Identifier: GPL-2.0 /* * Encryption policy functions for per-file encryption support. * * Copyright (C) 2015, Google, Inc. * Copyright (C) 2015, Motorola Mobility. * * Originally written by Michael Halcrow, 2015. * Modified by Jaegeuk Kim, 2015. * Modified by Eric Biggers, 2019 for v2 policy support. */ #include <linux/export.h> #include <linux/fs_context.h> #include <linux/mount.h> #include <linux/random.h> #include <linux/seq_file.h> #include <linux/string.h> #include "fscrypt_private.h" /** * fscrypt_policies_equal() - check whether two encryption policies are the same * @policy1: the first policy * @policy2: the second policy * * Return: %true if equal, else %false */ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, const union fscrypt_policy *policy2) { if (policy1->version != policy2->version) return false; return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } int fscrypt_policy_to_key_spec(const union fscrypt_policy *policy, struct fscrypt_key_specifier *key_spec) { switch (policy->version) { case FSCRYPT_POLICY_V1: key_spec->type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR; memcpy(key_spec->u.descriptor, policy->v1.master_key_descriptor, FSCRYPT_KEY_DESCRIPTOR_SIZE); return 0; case FSCRYPT_POLICY_V2: key_spec->type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER; memcpy(key_spec->u.identifier, policy->v2.master_key_identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); return 0; default: WARN_ON_ONCE(1); return -EINVAL; } } const union fscrypt_policy *fscrypt_get_dummy_policy(struct super_block *sb) { if (!sb->s_cop->get_dummy_policy) return NULL; return sb->s_cop->get_dummy_policy(sb); } /* * Return %true if the given combination of encryption modes is supported for v1 * (and later) encryption policies. * * Do *not* add anything new here, since v1 encryption policies are deprecated. * New combinations of modes should go in fscrypt_valid_enc_modes_v2() only. */ static bool fscrypt_valid_enc_modes_v1(u32 contents_mode, u32 filenames_mode) { if (contents_mode == FSCRYPT_MODE_AES_256_XTS && filenames_mode == FSCRYPT_MODE_AES_256_CTS) return true; if (contents_mode == FSCRYPT_MODE_AES_128_CBC && filenames_mode == FSCRYPT_MODE_AES_128_CTS) return true; if (contents_mode == FSCRYPT_MODE_ADIANTUM && filenames_mode == FSCRYPT_MODE_ADIANTUM) return true; return false; } static bool fscrypt_valid_enc_modes_v2(u32 contents_mode, u32 filenames_mode) { if (contents_mode == FSCRYPT_MODE_AES_256_XTS && filenames_mode == FSCRYPT_MODE_AES_256_HCTR2) return true; if (contents_mode == FSCRYPT_MODE_SM4_XTS && filenames_mode == FSCRYPT_MODE_SM4_CTS) return true; return fscrypt_valid_enc_modes_v1(contents_mode, filenames_mode); } static bool supported_direct_key_modes(const struct inode *inode, u32 contents_mode, u32 filenames_mode) { const struct fscrypt_mode *mode; if (contents_mode != filenames_mode) { fscrypt_warn(inode, "Direct key flag not allowed with different contents and filenames modes"); return false; } mode = &fscrypt_modes[contents_mode]; if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { fscrypt_warn(inode, "Direct key flag not allowed with %s", mode->friendly_name); return false; } return true; } static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy, const struct inode *inode) { const char *type = (policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) ? "IV_INO_LBLK_64" : "IV_INO_LBLK_32"; struct super_block *sb = inode->i_sb; /* * IV_INO_LBLK_* exist only because of hardware limitations, and * currently the only known use case for them involves AES-256-XTS. * That's also all we test currently. For these reasons, for now only * allow AES-256-XTS here. This can be relaxed later if a use case for * IV_INO_LBLK_* with other encryption modes arises. */ if (policy->contents_encryption_mode != FSCRYPT_MODE_AES_256_XTS) { fscrypt_warn(inode, "Can't use %s policy with contents mode other than AES-256-XTS", type); return false; } /* * It's unsafe to include inode numbers in the IVs if the filesystem can * potentially renumber inodes, e.g. via filesystem shrinking. */ if (!sb->s_cop->has_stable_inodes || !sb->s_cop->has_stable_inodes(sb)) { fscrypt_warn(inode, "Can't use %s policy on filesystem '%s' because it doesn't have stable inode numbers", type, sb->s_id); return false; } /* * IV_INO_LBLK_64 and IV_INO_LBLK_32 both require that inode numbers fit * in 32 bits. In principle, IV_INO_LBLK_32 could support longer inode * numbers because it hashes the inode number; however, currently the * inode number is gotten from inode::i_ino which is 'unsigned long'. * So for now the implementation limit is 32 bits. */ if (!sb->s_cop->has_32bit_inodes) { fscrypt_warn(inode, "Can't use %s policy on filesystem '%s' because its inode numbers are too long", type, sb->s_id); return false; } /* * IV_INO_LBLK_64 and IV_INO_LBLK_32 both require that file data unit * indices fit in 32 bits. */ if (fscrypt_max_file_dun_bits(sb, fscrypt_policy_v2_du_bits(policy, inode)) > 32) { fscrypt_warn(inode, "Can't use %s policy on filesystem '%s' because its maximum file size is too large", type, sb->s_id); return false; } return true; } static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, const struct inode *inode) { if (!fscrypt_valid_enc_modes_v1(policy->contents_encryption_mode, policy->filenames_encryption_mode)) { fscrypt_warn(inode, "Unsupported encryption modes (contents %d, filenames %d)", policy->contents_encryption_mode, policy->filenames_encryption_mode); return false; } if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", policy->flags); return false; } if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && !supported_direct_key_modes(inode, policy->contents_encryption_mode, policy->filenames_encryption_mode)) return false; if (IS_CASEFOLDED(inode)) { /* With v1, there's no way to derive dirhash keys. */ fscrypt_warn(inode, "v1 policies can't be used on casefolded directories"); return false; } return true; } static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, const struct inode *inode) { int count = 0; if (!fscrypt_valid_enc_modes_v2(policy->contents_encryption_mode, policy->filenames_encryption_mode)) { fscrypt_warn(inode, "Unsupported encryption modes (contents %d, filenames %d)", policy->contents_encryption_mode, policy->filenames_encryption_mode); return false; } if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | FSCRYPT_POLICY_FLAG_DIRECT_KEY | FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 | FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", policy->flags); return false; } count += !!(policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY); count += !!(policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64); count += !!(policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32); if (count > 1) { fscrypt_warn(inode, "Mutually exclusive encryption flags (0x%02x)", policy->flags); return false; } if (policy->log2_data_unit_size) { if (!inode->i_sb->s_cop->supports_subblock_data_units) { fscrypt_warn(inode, "Filesystem does not support configuring crypto data unit size"); return false; } if (policy->log2_data_unit_size > inode->i_blkbits || policy->log2_data_unit_size < SECTOR_SHIFT /* 9 */) { fscrypt_warn(inode, "Unsupported log2_data_unit_size in encryption policy: %d", policy->log2_data_unit_size); return false; } if (policy->log2_data_unit_size != inode->i_blkbits && (policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { /* * Not safe to enable yet, as we need to ensure that DUN * wraparound can only occur on a FS block boundary. */ fscrypt_warn(inode, "Sub-block data units not yet supported with IV_INO_LBLK_32"); return false; } } if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && !supported_direct_key_modes(inode, policy->contents_encryption_mode, policy->filenames_encryption_mode)) return false; if ((policy->flags & (FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 | FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) && !supported_iv_ino_lblk_policy(policy, inode)) return false; if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { fscrypt_warn(inode, "Reserved bits set in encryption policy"); return false; } return true; } /** * fscrypt_supported_policy() - check whether an encryption policy is supported * @policy_u: the encryption policy * @inode: the inode on which the policy will be used * * Given an encryption policy, check whether all its encryption modes and other * settings are supported by this kernel on the given inode. (But we don't * currently don't check for crypto API support here, so attempting to use an * algorithm not configured into the crypto API will still fail later.) * * Return: %true if supported, else %false */ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode) { switch (policy_u->version) { case FSCRYPT_POLICY_V1: return fscrypt_supported_v1_policy(&policy_u->v1, inode); case FSCRYPT_POLICY_V2: return fscrypt_supported_v2_policy(&policy_u->v2, inode); } return false; } /** * fscrypt_new_context() - create a new fscrypt_context * @ctx_u: output context * @policy_u: input policy * @nonce: nonce to use * * Create an fscrypt_context for an inode that is being assigned the given * encryption policy. @nonce must be a new random nonce. * * Return: the size of the new context in bytes. */ static int fscrypt_new_context(union fscrypt_context *ctx_u, const union fscrypt_policy *policy_u, const u8 nonce[FSCRYPT_FILE_NONCE_SIZE]) { memset(ctx_u, 0, sizeof(*ctx_u)); switch (policy_u->version) { case FSCRYPT_POLICY_V1: { const struct fscrypt_policy_v1 *policy = &policy_u->v1; struct fscrypt_context_v1 *ctx = &ctx_u->v1; ctx->version = FSCRYPT_CONTEXT_V1; ctx->contents_encryption_mode = policy->contents_encryption_mode; ctx->filenames_encryption_mode = policy->filenames_encryption_mode; ctx->flags = policy->flags; memcpy(ctx->master_key_descriptor, policy->master_key_descriptor, sizeof(ctx->master_key_descriptor)); memcpy(ctx->nonce, nonce, FSCRYPT_FILE_NONCE_SIZE); return sizeof(*ctx); } case FSCRYPT_POLICY_V2: { const struct fscrypt_policy_v2 *policy = &policy_u->v2; struct fscrypt_context_v2 *ctx = &ctx_u->v2; ctx->version = FSCRYPT_CONTEXT_V2; ctx->contents_encryption_mode = policy->contents_encryption_mode; ctx->filenames_encryption_mode = policy->filenames_encryption_mode; ctx->flags = policy->flags; ctx->log2_data_unit_size = policy->log2_data_unit_size; memcpy(ctx->master_key_identifier, policy->master_key_identifier, sizeof(ctx->master_key_identifier)); memcpy(ctx->nonce, nonce, FSCRYPT_FILE_NONCE_SIZE); return sizeof(*ctx); } } BUG(); } /** * fscrypt_policy_from_context() - convert an fscrypt_context to * an fscrypt_policy * @policy_u: output policy * @ctx_u: input context * @ctx_size: size of input context in bytes * * Given an fscrypt_context, build the corresponding fscrypt_policy. * * Return: 0 on success, or -EINVAL if the fscrypt_context has an unrecognized * version number or size. * * This does *not* validate the settings within the policy itself, e.g. the * modes, flags, and reserved bits. Use fscrypt_supported_policy() for that. */ int fscrypt_policy_from_context(union fscrypt_policy *policy_u, const union fscrypt_context *ctx_u, int ctx_size) { memset(policy_u, 0, sizeof(*policy_u)); if (!fscrypt_context_is_valid(ctx_u, ctx_size)) return -EINVAL; switch (ctx_u->version) { case FSCRYPT_CONTEXT_V1: { const struct fscrypt_context_v1 *ctx = &ctx_u->v1; struct fscrypt_policy_v1 *policy = &policy_u->v1; policy->version = FSCRYPT_POLICY_V1; policy->contents_encryption_mode = ctx->contents_encryption_mode; policy->filenames_encryption_mode = ctx->filenames_encryption_mode; policy->flags = ctx->flags; memcpy(policy->master_key_descriptor, ctx->master_key_descriptor, sizeof(policy->master_key_descriptor)); return 0; } case FSCRYPT_CONTEXT_V2: { const struct fscrypt_context_v2 *ctx = &ctx_u->v2; struct fscrypt_policy_v2 *policy = &policy_u->v2; policy->version = FSCRYPT_POLICY_V2; policy->contents_encryption_mode = ctx->contents_encryption_mode; policy->filenames_encryption_mode = ctx->filenames_encryption_mode; policy->flags = ctx->flags; policy->log2_data_unit_size = ctx->log2_data_unit_size; memcpy(policy->__reserved, ctx->__reserved, sizeof(policy->__reserved)); memcpy(policy->master_key_identifier, ctx->master_key_identifier, sizeof(policy->master_key_identifier)); return 0; } } /* unreachable */ return -EINVAL; } /* Retrieve an inode's encryption policy */ static int fscrypt_get_policy(struct inode *inode, union fscrypt_policy *policy) { const struct fscrypt_inode_info *ci; union fscrypt_context ctx; int ret; ci = fscrypt_get_inode_info(inode); if (ci) { /* key available, use the cached policy */ *policy = ci->ci_policy; return 0; } if (!IS_ENCRYPTED(inode)) return -ENODATA; ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (ret < 0) return (ret == -ERANGE) ? -EINVAL : ret; return fscrypt_policy_from_context(policy, &ctx, ret); } static int set_encryption_policy(struct inode *inode, const union fscrypt_policy *policy) { u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; union fscrypt_context ctx; int ctxsize; int err; if (!fscrypt_supported_policy(policy, inode)) return -EINVAL; switch (policy->version) { case FSCRYPT_POLICY_V1: /* * The original encryption policy version provided no way of * verifying that the correct master key was supplied, which was * insecure in scenarios where multiple users have access to the * same encrypted files (even just read-only access). The new * encryption policy version fixes this and also implies use of * an improved key derivation function and allows non-root users * to securely remove keys. So as long as compatibility with * old kernels isn't required, it is recommended to use the new * policy version for all new encrypted directories. */ pr_warn_once("%s (pid %d) is setting deprecated v1 encryption policy; recommend upgrading to v2.\n", current->comm, current->pid); break; case FSCRYPT_POLICY_V2: err = fscrypt_verify_key_added(inode->i_sb, policy->v2.master_key_identifier); if (err) return err; if (policy->v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) pr_warn_once("%s (pid %d) is setting an IV_INO_LBLK_32 encryption policy. This should only be used if there are certain hardware limitations.\n", current->comm, current->pid); break; default: WARN_ON_ONCE(1); return -EINVAL; } get_random_bytes(nonce, FSCRYPT_FILE_NONCE_SIZE); ctxsize = fscrypt_new_context(&ctx, policy, nonce); return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, NULL); } int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) { union fscrypt_policy policy; union fscrypt_policy existing_policy; struct inode *inode = file_inode(filp); u8 version; int size; int ret; if (get_user(policy.version, (const u8 __user *)arg)) return -EFAULT; size = fscrypt_policy_size(&policy); if (size <= 0) return -EINVAL; /* * We should just copy the remaining 'size - 1' bytes here, but a * bizarre bug in gcc 7 and earlier (fixed by gcc r255731) causes gcc to * think that size can be 0 here (despite the check above!) *and* that * it's a compile-time constant. Thus it would think copy_from_user() * is passed compile-time constant ULONG_MAX, causing the compile-time * buffer overflow check to fail, breaking the build. This only occurred * when building an i386 kernel with -Os and branch profiling enabled. * * Work around it by just copying the first byte again... */ version = policy.version; if (copy_from_user(&policy, arg, size)) return -EFAULT; policy.version = version; if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EACCES; ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); ret = fscrypt_get_policy(inode, &existing_policy); if (ret == -ENODATA) { if (!S_ISDIR(inode->i_mode)) ret = -ENOTDIR; else if (IS_DEADDIR(inode)) ret = -ENOENT; else if (!inode->i_sb->s_cop->empty_dir(inode)) ret = -ENOTEMPTY; else ret = set_encryption_policy(inode, &policy); } else if (ret == -EINVAL || (ret == 0 && !fscrypt_policies_equal(&policy, &existing_policy))) { /* The file already uses a different encryption policy. */ ret = -EEXIST; } inode_unlock(inode); mnt_drop_write_file(filp); return ret; } EXPORT_SYMBOL(fscrypt_ioctl_set_policy); /* Original ioctl version; can only get the original policy version */ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) { union fscrypt_policy policy; int err; err = fscrypt_get_policy(file_inode(filp), &policy); if (err) return err; if (policy.version != FSCRYPT_POLICY_V1) return -EINVAL; if (copy_to_user(arg, &policy, sizeof(policy.v1))) return -EFAULT; return 0; } EXPORT_SYMBOL(fscrypt_ioctl_get_policy); /* Extended ioctl version; can get policies of any version */ int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *uarg) { struct fscrypt_get_policy_ex_arg arg; union fscrypt_policy *policy = (union fscrypt_policy *)&arg.policy; size_t policy_size; int err; /* arg is policy_size, then policy */ BUILD_BUG_ON(offsetof(typeof(arg), policy_size) != 0); BUILD_BUG_ON(offsetofend(typeof(arg), policy_size) != offsetof(typeof(arg), policy)); BUILD_BUG_ON(sizeof(arg.policy) != sizeof(*policy)); err = fscrypt_get_policy(file_inode(filp), policy); if (err) return err; policy_size = fscrypt_policy_size(policy); if (copy_from_user(&arg, uarg, sizeof(arg.policy_size))) return -EFAULT; if (policy_size > arg.policy_size) return -EOVERFLOW; arg.policy_size = policy_size; if (copy_to_user(uarg, &arg, sizeof(arg.policy_size) + policy_size)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_policy_ex); /* FS_IOC_GET_ENCRYPTION_NONCE: retrieve file's encryption nonce for testing */ int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) { struct inode *inode = file_inode(filp); union fscrypt_context ctx; int ret; ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (ret < 0) return ret; if (!fscrypt_context_is_valid(&ctx, ret)) return -EINVAL; if (copy_to_user(arg, fscrypt_context_nonce(&ctx), FSCRYPT_FILE_NONCE_SIZE)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_nonce); /** * fscrypt_has_permitted_context() - is a file's encryption policy permitted * within its directory? * * @parent: inode for parent directory * @child: inode for file being looked up, opened, or linked into @parent * * Filesystems must call this before permitting access to an inode in a * situation where the parent directory is encrypted (either before allowing * ->lookup() to succeed, or for a regular file before allowing it to be opened) * and before any operation that involves linking an inode into an encrypted * directory, including link, rename, and cross rename. It enforces the * constraint that within a given encrypted directory tree, all files use the * same encryption policy. The pre-access check is needed to detect potentially * malicious offline violations of this constraint, while the link and rename * checks are needed to prevent online violations of this constraint. * * Return: 1 if permitted, 0 if forbidden. */ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { union fscrypt_policy parent_policy, child_policy; int err, err1, err2; /* No restrictions on file types which are never encrypted */ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && !S_ISLNK(child->i_mode)) return 1; /* No restrictions if the parent directory is unencrypted */ if (!IS_ENCRYPTED(parent)) return 1; /* Encrypted directories must not contain unencrypted files */ if (!IS_ENCRYPTED(child)) return 0; /* * Both parent and child are encrypted, so verify they use the same * encryption policy. Compare the cached policies if the keys are * available, otherwise retrieve and compare the fscrypt_contexts. * * Note that the fscrypt_context retrieval will be required frequently * when accessing an encrypted directory tree without the key. * Performance-wise this is not a big deal because we already don't * really optimize for file access without the key (to the extent that * such access is even possible), given that any attempted access * already causes a fscrypt_context retrieval and keyring search. * * In any case, if an unexpected error occurs, fall back to "forbidden". */ err = fscrypt_get_encryption_info(parent, true); if (err) return 0; err = fscrypt_get_encryption_info(child, true); if (err) return 0; err1 = fscrypt_get_policy(parent, &parent_policy); err2 = fscrypt_get_policy(child, &child_policy); /* * Allow the case where the parent and child both have an unrecognized * encryption policy, so that files with an unrecognized encryption * policy can be deleted. */ if (err1 == -EINVAL && err2 == -EINVAL) return 1; if (err1 || err2) return 0; return fscrypt_policies_equal(&parent_policy, &child_policy); } EXPORT_SYMBOL(fscrypt_has_permitted_context); /* * Return the encryption policy that new files in the directory will inherit, or * NULL if none, or an ERR_PTR() on error. If the directory is encrypted, also * ensure that its key is set up, so that the new filename can be encrypted. */ const union fscrypt_policy *fscrypt_policy_to_inherit(struct inode *dir) { int err; if (IS_ENCRYPTED(dir)) { err = fscrypt_require_key(dir); if (err) return ERR_PTR(err); return &fscrypt_get_inode_info_raw(dir)->ci_policy; } return fscrypt_get_dummy_policy(dir->i_sb); } /** * fscrypt_context_for_new_inode() - create an encryption context for a new inode * @ctx: where context should be written * @inode: inode from which to fetch policy and nonce * * Given an in-core "prepared" (via fscrypt_prepare_new_inode) inode, * generate a new context and write it to ctx. ctx _must_ be at least * FSCRYPT_SET_CONTEXT_MAX_SIZE bytes. * * Return: size of the resulting context or a negative error code. */ int fscrypt_context_for_new_inode(void *ctx, struct inode *inode) { struct fscrypt_inode_info *ci = fscrypt_get_inode_info_raw(inode); BUILD_BUG_ON(sizeof(union fscrypt_context) != FSCRYPT_SET_CONTEXT_MAX_SIZE); /* fscrypt_prepare_new_inode() should have set up the key already. */ if (WARN_ON_ONCE(!ci)) return -ENOKEY; return fscrypt_new_context(ctx, &ci->ci_policy, ci->ci_nonce); } EXPORT_SYMBOL_GPL(fscrypt_context_for_new_inode); /** * fscrypt_set_context() - Set the fscrypt context of a new inode * @inode: a new inode * @fs_data: private data given by FS and passed to ->set_context() * * This should be called after fscrypt_prepare_new_inode(), generally during a * filesystem transaction. Everything here must be %GFP_NOFS-safe. * * Return: 0 on success, -errno on failure */ int fscrypt_set_context(struct inode *inode, void *fs_data) { struct fscrypt_inode_info *ci; union fscrypt_context ctx; int ctxsize; ctxsize = fscrypt_context_for_new_inode(&ctx, inode); if (ctxsize < 0) return ctxsize; /* * This may be the first time the inode number is available, so do any * delayed key setup that requires the inode number. */ ci = fscrypt_get_inode_info_raw(inode); if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) fscrypt_hash_inode_number(ci, ci->ci_master_key); return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data); } EXPORT_SYMBOL_GPL(fscrypt_set_context); /** * fscrypt_parse_test_dummy_encryption() - parse the test_dummy_encryption mount option * @param: the mount option * @dummy_policy: (input/output) the place to write the dummy policy that will * result from parsing the option. Zero-initialize this. If a policy is * already set here (due to test_dummy_encryption being given multiple * times), then this function will verify that the policies are the same. * * Return: 0 on success; -EINVAL if the argument is invalid; -EEXIST if the * argument conflicts with one already specified; or -ENOMEM. */ int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy) { const char *arg = "v2"; union fscrypt_policy *policy; int err; if (param->type == fs_value_is_string && *param->string) arg = param->string; policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) return -ENOMEM; if (!strcmp(arg, "v1")) { policy->version = FSCRYPT_POLICY_V1; policy->v1.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS; policy->v1.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS; memset(policy->v1.master_key_descriptor, 0x42, FSCRYPT_KEY_DESCRIPTOR_SIZE); } else if (!strcmp(arg, "v2")) { policy->version = FSCRYPT_POLICY_V2; policy->v2.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS; policy->v2.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS; fscrypt_get_test_dummy_key_identifier( policy->v2.master_key_identifier); } else { err = -EINVAL; goto out; } if (dummy_policy->policy) { if (fscrypt_policies_equal(policy, dummy_policy->policy)) err = 0; else err = -EEXIST; goto out; } dummy_policy->policy = policy; policy = NULL; err = 0; out: kfree(policy); return err; } EXPORT_SYMBOL_GPL(fscrypt_parse_test_dummy_encryption); /** * fscrypt_dummy_policies_equal() - check whether two dummy policies are equal * @p1: the first test dummy policy (may be unset) * @p2: the second test dummy policy (may be unset) * * Return: %true if the dummy policies are both set and equal, or both unset. */ bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2) { if (!p1->policy && !p2->policy) return true; if (!p1->policy || !p2->policy) return false; return fscrypt_policies_equal(p1->policy, p2->policy); } EXPORT_SYMBOL_GPL(fscrypt_dummy_policies_equal); /** * fscrypt_show_test_dummy_encryption() - show '-o test_dummy_encryption' * @seq: the seq_file to print the option to * @sep: the separator character to use * @sb: the filesystem whose options are being shown * * Show the test_dummy_encryption mount option, if it was specified. * This is mainly used for /proc/mounts. */ void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb) { const union fscrypt_policy *policy = fscrypt_get_dummy_policy(sb); int vers; if (!policy) return; vers = policy->version; if (vers == FSCRYPT_POLICY_V1) /* Handle numbering quirk */ vers = 1; seq_printf(seq, "%ctest_dummy_encryption=v%d", sep, vers); } EXPORT_SYMBOL_GPL(fscrypt_show_test_dummy_encryption); |
| 3 2 2 2 1 2 2 2 2 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | // SPDX-License-Identifier: GPL-2.0-or-later /* * MD5 and HMAC-MD5 library functions * * md5_block_generic() is derived from cryptoapi implementation, originally * based on the public domain implementation written by Colin Plumb in 1993. * * Copyright (c) Cryptoapi developers. * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright 2025 Google LLC */ #include <crypto/hmac.h> #include <crypto/md5.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> #include <linux/wordpart.h> static const struct md5_block_state md5_iv = { .h = { MD5_H0, MD5_H1, MD5_H2, MD5_H3 }, }; #define F1(x, y, z) (z ^ (x & (y ^ z))) #define F2(x, y, z) F1(z, x, y) #define F3(x, y, z) (x ^ y ^ z) #define F4(x, y, z) (y ^ (x | ~z)) #define MD5STEP(f, w, x, y, z, in, s) \ (w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x) static void md5_block_generic(struct md5_block_state *state, const u8 data[MD5_BLOCK_SIZE]) { u32 in[MD5_BLOCK_WORDS]; u32 a, b, c, d; memcpy(in, data, MD5_BLOCK_SIZE); le32_to_cpu_array(in, ARRAY_SIZE(in)); a = state->h[0]; b = state->h[1]; c = state->h[2]; d = state->h[3]; MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); state->h[0] += a; state->h[1] += b; state->h[2] += c; state->h[3] += d; } static void __maybe_unused md5_blocks_generic(struct md5_block_state *state, const u8 *data, size_t nblocks) { do { md5_block_generic(state, data); data += MD5_BLOCK_SIZE; } while (--nblocks); } #ifdef CONFIG_CRYPTO_LIB_MD5_ARCH #include "md5.h" /* $(SRCARCH)/md5.h */ #else #define md5_blocks md5_blocks_generic #endif void md5_init(struct md5_ctx *ctx) { ctx->state = md5_iv; ctx->bytecount = 0; } EXPORT_SYMBOL_GPL(md5_init); void md5_update(struct md5_ctx *ctx, const u8 *data, size_t len) { size_t partial = ctx->bytecount % MD5_BLOCK_SIZE; ctx->bytecount += len; if (partial + len >= MD5_BLOCK_SIZE) { size_t nblocks; if (partial) { size_t l = MD5_BLOCK_SIZE - partial; memcpy(&ctx->buf[partial], data, l); data += l; len -= l; md5_blocks(&ctx->state, ctx->buf, 1); } nblocks = len / MD5_BLOCK_SIZE; len %= MD5_BLOCK_SIZE; if (nblocks) { md5_blocks(&ctx->state, data, nblocks); data += nblocks * MD5_BLOCK_SIZE; } partial = 0; } if (len) memcpy(&ctx->buf[partial], data, len); } EXPORT_SYMBOL_GPL(md5_update); static void __md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) { u64 bitcount = ctx->bytecount << 3; size_t partial = ctx->bytecount % MD5_BLOCK_SIZE; ctx->buf[partial++] = 0x80; if (partial > MD5_BLOCK_SIZE - 8) { memset(&ctx->buf[partial], 0, MD5_BLOCK_SIZE - partial); md5_blocks(&ctx->state, ctx->buf, 1); partial = 0; } memset(&ctx->buf[partial], 0, MD5_BLOCK_SIZE - 8 - partial); *(__le64 *)&ctx->buf[MD5_BLOCK_SIZE - 8] = cpu_to_le64(bitcount); md5_blocks(&ctx->state, ctx->buf, 1); cpu_to_le32_array(ctx->state.h, ARRAY_SIZE(ctx->state.h)); memcpy(out, ctx->state.h, MD5_DIGEST_SIZE); } void md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) { __md5_final(ctx, out); memzero_explicit(ctx, sizeof(*ctx)); } EXPORT_SYMBOL_GPL(md5_final); void md5(const u8 *data, size_t len, u8 out[MD5_DIGEST_SIZE]) { struct md5_ctx ctx; md5_init(&ctx); md5_update(&ctx, data, len); md5_final(&ctx, out); } EXPORT_SYMBOL_GPL(md5); static void __hmac_md5_preparekey(struct md5_block_state *istate, struct md5_block_state *ostate, const u8 *raw_key, size_t raw_key_len) { union { u8 b[MD5_BLOCK_SIZE]; unsigned long w[MD5_BLOCK_SIZE / sizeof(unsigned long)]; } derived_key = { 0 }; if (unlikely(raw_key_len > MD5_BLOCK_SIZE)) md5(raw_key, raw_key_len, derived_key.b); else memcpy(derived_key.b, raw_key, raw_key_len); for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) derived_key.w[i] ^= REPEAT_BYTE(HMAC_IPAD_VALUE); *istate = md5_iv; md5_blocks(istate, derived_key.b, 1); for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) derived_key.w[i] ^= REPEAT_BYTE(HMAC_OPAD_VALUE ^ HMAC_IPAD_VALUE); *ostate = md5_iv; md5_blocks(ostate, derived_key.b, 1); memzero_explicit(&derived_key, sizeof(derived_key)); } void hmac_md5_preparekey(struct hmac_md5_key *key, const u8 *raw_key, size_t raw_key_len) { __hmac_md5_preparekey(&key->istate, &key->ostate, raw_key, raw_key_len); } EXPORT_SYMBOL_GPL(hmac_md5_preparekey); void hmac_md5_init(struct hmac_md5_ctx *ctx, const struct hmac_md5_key *key) { ctx->hash_ctx.state = key->istate; ctx->hash_ctx.bytecount = MD5_BLOCK_SIZE; ctx->ostate = key->ostate; } EXPORT_SYMBOL_GPL(hmac_md5_init); void hmac_md5_init_usingrawkey(struct hmac_md5_ctx *ctx, const u8 *raw_key, size_t raw_key_len) { __hmac_md5_preparekey(&ctx->hash_ctx.state, &ctx->ostate, raw_key, raw_key_len); ctx->hash_ctx.bytecount = MD5_BLOCK_SIZE; } EXPORT_SYMBOL_GPL(hmac_md5_init_usingrawkey); void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) { /* Generate the padded input for the outer hash in ctx->hash_ctx.buf. */ __md5_final(&ctx->hash_ctx, ctx->hash_ctx.buf); memset(&ctx->hash_ctx.buf[MD5_DIGEST_SIZE], 0, MD5_BLOCK_SIZE - MD5_DIGEST_SIZE); ctx->hash_ctx.buf[MD5_DIGEST_SIZE] = 0x80; *(__le64 *)&ctx->hash_ctx.buf[MD5_BLOCK_SIZE - 8] = cpu_to_le64(8 * (MD5_BLOCK_SIZE + MD5_DIGEST_SIZE)); /* Compute the outer hash, which gives the HMAC value. */ md5_blocks(&ctx->ostate, ctx->hash_ctx.buf, 1); cpu_to_le32_array(ctx->ostate.h, ARRAY_SIZE(ctx->ostate.h)); memcpy(out, ctx->ostate.h, MD5_DIGEST_SIZE); memzero_explicit(ctx, sizeof(*ctx)); } EXPORT_SYMBOL_GPL(hmac_md5_final); void hmac_md5(const struct hmac_md5_key *key, const u8 *data, size_t data_len, u8 out[MD5_DIGEST_SIZE]) { struct hmac_md5_ctx ctx; hmac_md5_init(&ctx, key); hmac_md5_update(&ctx, data, data_len); hmac_md5_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_md5); void hmac_md5_usingrawkey(const u8 *raw_key, size_t raw_key_len, const u8 *data, size_t data_len, u8 out[MD5_DIGEST_SIZE]) { struct hmac_md5_ctx ctx; hmac_md5_init_usingrawkey(&ctx, raw_key, raw_key_len); hmac_md5_update(&ctx, data, data_len); hmac_md5_final(&ctx, out); } EXPORT_SYMBOL_GPL(hmac_md5_usingrawkey); #ifdef md5_mod_init_arch static int __init md5_mod_init(void) { md5_mod_init_arch(); return 0; } subsys_initcall(md5_mod_init); static void __exit md5_mod_exit(void) { } module_exit(md5_mod_exit); #endif MODULE_DESCRIPTION("MD5 and HMAC-MD5 library functions"); MODULE_LICENSE("GPL"); |
| 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | // SPDX-License-Identifier: GPL-2.0 /* * Provide a default dump_stack() function for architectures * which don't implement their own. */ #include <linux/kernel.h> #include <linux/buildid.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/smp.h> #include <linux/atomic.h> #include <linux/kexec.h> #include <linux/utsname.h> #include <linux/stop_machine.h> static char dump_stack_arch_desc_str[128]; /** * dump_stack_set_arch_desc - set arch-specific str to show with task dumps * @fmt: printf-style format string * @...: arguments for the format string * * The configured string will be printed right after utsname during task * dumps. Usually used to add arch-specific system identifiers. If an * arch wants to make use of such an ID string, it should initialize this * as soon as possible during boot. */ void __init dump_stack_set_arch_desc(const char *fmt, ...) { va_list args; va_start(args, fmt); vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str), fmt, args); va_end(args); } #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) #define BUILD_ID_FMT " %20phN" #define BUILD_ID_VAL vmlinux_build_id #else #define BUILD_ID_FMT "%s" #define BUILD_ID_VAL "" #endif /** * dump_stack_print_info - print generic debug info for dump_stack() * @log_lvl: log level * * Arch-specific dump_stack() implementations can use this function to * print out the same debug information as the generic dump_stack(). */ void dump_stack_print_info(const char *log_lvl) { printk("%sCPU: %d UID: %u PID: %d Comm: %.20s %s%s %s %.*s %s " BUILD_ID_FMT "\n", log_lvl, raw_smp_processor_id(), __kuid_val(current_real_cred()->euid), current->pid, current->comm, kexec_crash_loaded() ? "Kdump: loaded " : "", print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version, preempt_model_str(), BUILD_ID_VAL); if (get_taint()) printk("%s%s\n", log_lvl, print_tainted_verbose()); if (dump_stack_arch_desc_str[0] != '\0') printk("%sHardware name: %s\n", log_lvl, dump_stack_arch_desc_str); print_worker_info(log_lvl, current); print_stop_info(log_lvl, current); print_scx_info(log_lvl, current); } /** * show_regs_print_info - print generic debug info for show_regs() * @log_lvl: log level * * show_regs() implementations can use this function to print out generic * debug information. */ void show_regs_print_info(const char *log_lvl) { dump_stack_print_info(log_lvl); } static void __dump_stack(const char *log_lvl) { dump_stack_print_info(log_lvl); show_stack(NULL, NULL, log_lvl); } /** * dump_stack_lvl - dump the current task information and its stack trace * @log_lvl: log level * * Architectures can override this implementation by implementing its own. */ asmlinkage __visible void dump_stack_lvl(const char *log_lvl) { bool in_panic = panic_on_this_cpu(); unsigned long flags; /* * Permit this cpu to perform nested stack dumps while serialising * against other CPUs, unless this CPU is in panic. * * When in panic, non-panic CPUs are not permitted to store new * printk messages so there is no need to synchronize the output. * This avoids potential deadlock in panic() if another CPU is * holding and unable to release the printk_cpu_sync. */ if (!in_panic) printk_cpu_sync_get_irqsave(flags); __dump_stack(log_lvl); if (!in_panic) printk_cpu_sync_put_irqrestore(flags); } EXPORT_SYMBOL(dump_stack_lvl); asmlinkage __visible void dump_stack(void) { dump_stack_lvl(KERN_DEFAULT); } EXPORT_SYMBOL(dump_stack); |
| 293 277 24 418 47 47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_RTNETLINK_H #define __LINUX_RTNETLINK_H #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/wait.h> #include <linux/refcount.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo) { return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo); } extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, const struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, int new_ifindex, u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_interruptible(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; #define ASSERT_RTNL() \ WARN_ONCE(!rtnl_is_locked(), \ "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else static inline bool lockdep_rtnl_is_held(void) { return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ /** * rcu_dereference_rtnl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() */ #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * * Return: the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) /** * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning * its old value * @rp: RCU pointer, whose value is returned * @p: regular pointer * * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated * pointer. The old value of @rp is returned, and @rp is set to @p */ #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) #ifdef CONFIG_DEBUG_NET_SMALL_RTNL void __rtnl_net_lock(struct net *net); void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_trylock(struct net *net); int rtnl_net_lock_killable(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); #define ASSERT_RTNL_NET(net) \ WARN_ONCE(!rtnl_net_is_locked(net), \ "RTNL_NET: assertion failed at %s (%d)\n", \ __FILE__, __LINE__) bool lockdep_rtnl_net_is_held(struct net *net); #define rcu_dereference_rtnl_net(net, p) \ rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) #define rtnl_net_dereference(net, p) \ rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) #define rcu_replace_pointer_rtnl_net(net, rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) #else static inline void __rtnl_net_lock(struct net *net) {} static inline void __rtnl_net_unlock(struct net *net) {} static inline void rtnl_net_lock(struct net *net) { rtnl_lock(); } static inline void rtnl_net_unlock(struct net *net) { rtnl_unlock(); } static inline int rtnl_net_trylock(struct net *net) { return rtnl_trylock(); } static inline int rtnl_net_lock_killable(struct net *net) { return rtnl_lock_killable(); } static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); } #define rcu_dereference_rtnl_net(net, p) \ rcu_dereference_rtnl(p) #define rtnl_net_dereference(net, p) \ rtnl_dereference(p) #define rcu_replace_pointer_rtnl_net(net, rp, p) \ rcu_replace_pointer_rtnl(rp, p) #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); } static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) { return rcu_dereference(dev->ingress_queue); } struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); void netdev_xmit_skip_txqueue(bool skip); #endif void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); /* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ struct ndo_fdb_dump_context { unsigned long ifindex; unsigned long fdb_idx; }; extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags); extern int ndo_dflt_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, u32 flags, u32 mask, int nlflags, u32 filter_mask, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); extern void rtnl_offload_xstats_notify(struct net_device *dev); static inline int rtnl_has_listeners(const struct net *net, u32 group) { struct sock *rtnl = net->rtnl; return netlink_has_listeners(rtnl, group); } /** * rtnl_notify_needed - check if notification is needed * @net: Pointer to the net namespace * @nlflags: netlink ingress message flags * @group: rtnl group * * Based on the ingress message flags and rtnl group, returns true * if a notification is needed, false otherwise. */ static inline bool rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) { return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ |
| 6 14 14 14 5 31 12 12 12 29 2 31 2 31 40 40 12 64 61 44 31 31 31 30 30 30 30 31 15 4 15 15 15 30 12 24 21 34 9 17 12 2 12 12 2 2 2 12 12 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Jens Axboe <axboe@suse.de> */ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H #include <linux/mempool.h> /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include <linux/blk_types.h> #include <linux/uio.h> #define BIO_MAX_VECS 256U #define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; static inline unsigned int bio_max_segs(unsigned int nr_segs) { return min(nr_segs, BIO_MAX_VECS); } #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) #define bio_iter_page(bio, iter) \ bvec_iter_page((bio)->bi_io_vec, (iter)) #define bio_iter_len(bio, iter) \ bvec_iter_len((bio)->bi_io_vec, (iter)) #define bio_iter_offset(bio, iter) \ bvec_iter_offset((bio)->bi_io_vec, (iter)) #define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) #define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) #define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) #define bvec_iter_sectors(iter) ((iter).bi_size >> 9) #define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) #define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) #define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) /* * Return the data direction, READ or WRITE. */ #define bio_data_dir(bio) \ (op_is_write(bio_op(bio)) ? WRITE : READ) /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && bio_op(bio) != REQ_OP_DISCARD && bio_op(bio) != REQ_OP_SECURE_ERASE && bio_op(bio) != REQ_OP_WRITE_ZEROES) return true; return false; } static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || bio_op(bio) == REQ_OP_WRITE_ZEROES; } static inline void *bio_data(struct bio *bio) { if (bio_has_data(bio)) return page_address(bio_page(bio)) + bio_offset(bio); return NULL; } static inline bool bio_next_segment(const struct bio *bio, struct bvec_iter_all *iter) { if (iter->idx >= bio->bi_vcnt) return false; bvec_advance(&bio->bi_io_vec[iter->idx], iter); return true; } /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ #define bio_for_each_segment_all(bvl, bio, iter) \ for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) static inline void bio_advance_iter(const struct bio *bio, struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } /* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ static inline void bio_advance_iter_single(const struct bio *bio, struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); } void __bio_advance(struct bio *, unsigned bytes); /** * bio_advance - increment/complete a bio by some number of bytes * @bio: bio to advance * @nbytes: number of bytes to complete * * This updates bi_sector, bi_size and bi_idx; if the number of bytes to * complete doesn't align with a bvec boundary, then bv_len and bv_offset will * be updated on the last bvec as well. * * @bio will then represent the remaining, uncompleted portion of the io. */ static inline void bio_advance(struct bio *bio, unsigned int nbytes) { if (nbytes == bio->bi_iter.bi_size) { bio->bi_iter.bi_size = 0; return; } __bio_advance(bio, nbytes); } #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) #define __bio_for_each_bvec(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ __bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter) /* * Iterate over all multi-page bvecs. Drivers shouldn't use this version for the * same reasons as bio_for_each_segment_all(). */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ i < (bio)->bi_vcnt; i++, bvl++) #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; struct bvec_iter iter; /* * We special case discard/write same/write zeroes, because they * interpret bi_size differently: */ switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return 0; default: break; } bio_for_each_segment(bv, bio, iter) segs++; return segs; } /* * get a reference to a bio, so it won't disappear. the intended use is * something like: * * bio_get(bio); * submit_bio(rw, bio); * if (bio->bi_flags ...) * do_something * bio_put(bio); * * without the bio_get(), it could potentially complete I/O before submit_bio * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ static inline void bio_get(struct bio *bio) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb__before_atomic(); atomic_inc(&bio->__bi_cnt); } static inline void bio_cnt_set(struct bio *bio, unsigned int count) { if (count != 1) { bio->bi_flags |= (1 << BIO_REFFED); smp_mb(); } atomic_set(&bio->__bi_cnt, count); } static inline bool bio_flagged(struct bio *bio, unsigned int bit) { return bio->bi_flags & (1U << bit); } static inline void bio_set_flag(struct bio *bio, unsigned int bit) { bio->bi_flags |= (1U << bit); } static inline void bio_clear_flag(struct bio *bio, unsigned int bit) { bio->bi_flags &= ~(1U << bit); } static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return bio->bi_io_vec; } static inline struct page *bio_first_page_all(struct bio *bio) { return bio_first_bvec_all(bio)->bv_page; } static inline struct folio *bio_first_folio_all(struct bio *bio) { return page_folio(bio_first_page_all(bio)); } static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); return &bio->bi_io_vec[bio->bi_vcnt - 1]; } /** * struct folio_iter - State for iterating all folios in a bio. * @folio: The current folio we're iterating. NULL after the last folio. * @offset: The byte offset within the current folio. * @length: The number of bytes in this iteration (will not cross folio * boundary). */ struct folio_iter { struct folio *folio; size_t offset; size_t length; /* private: for use by the iterator */ struct folio *_next; size_t _seg_count; int _i; }; static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, int i) { struct bio_vec *bvec = bio_first_bvec_all(bio) + i; if (unlikely(i >= bio->bi_vcnt)) { fi->folio = NULL; return; } fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); fi->_i = i; } static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) { fi->_seg_count -= fi->length; if (fi->_seg_count) { fi->folio = fi->_next; fi->offset = 0; fi->length = min(folio_size(fi->folio), fi->_seg_count); fi->_next = folio_next(fi->folio); } else { bio_first_folio(fi, bio, fi->_i + 1); } } /** * bio_for_each_folio_all - Iterate over each folio in a bio. * @fi: struct folio_iter which is updated for each folio. * @bio: struct bio to iterate over. */ #define bio_for_each_folio_all(fi, bio) \ for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); int bio_split_io_at(struct bio *bio, const struct queue_limits *lim, unsigned *segs, unsigned max_bytes, unsigned len_align); /** * bio_next_split - get next @sectors from a bio, splitting if necessary * @bio: bio to split * @sectors: number of sectors to split from the front of @bio * @gfp: gfp mask * @bs: bio set to allocate from * * Return: a bio representing the next @sectors of @bio - if the bio is smaller * than @sectors, returns the original bio unchanged. */ static inline struct bio *bio_next_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs) { if (sectors >= bio_sectors(bio)) return bio; return bio_split(bio, sectors, gfp, bs); } enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), BIOSET_PERCPU_CACHE = BIT(2), }; extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src, gfp_t gfp, struct bio_set *bs); int bio_init_clone(struct block_device *bdev, struct bio *bio, struct bio *bio_src, gfp_t gfp); extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(struct block_device *bdev, unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) { return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } void submit_bio(struct bio *bio); extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); } static inline void bio_wouldblock_error(struct bio *bio) { bio_set_flag(bio, BIO_QUIET); bio->bi_status = BLK_STS_AGAIN; bio_endio(bio); } /* * Calculate number of bvec segments that should be allocated to fit data * pointed by @iter. If @iter is backed by bvec it's going to be reused * instead of allocating a new one. */ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) { if (iov_iter_is_bvec(iter)) return 0; return iov_iter_npages(iter, max_segs); } struct request_queue; void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); static inline void bio_init_inline(struct bio *bio, struct block_device *bdev, unsigned short max_vecs, blk_opf_t opf) { bio_init(bio, bdev, bio_inline_vecs(bio), max_vecs, opf); } extern void bio_uninit(struct bio *); void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, unsigned off); bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); /** * bio_add_max_vecs - number of bio_vecs needed to add data to a bio * @kaddr: kernel virtual address to add * @len: length in bytes to add * * Calculate how many bio_vecs need to be allocated to add the kernel virtual * address range in [@kaddr:@len] in the worse case. */ static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) { if (is_vmalloc_addr(kaddr)) return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); return 1; } unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); int submit_bio_wait(struct bio *bio); int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, size_t len, enum req_op op); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); static inline void zero_fill_bio(struct bio *bio) { zero_fill_bio_iter(bio, bio->bi_iter); } static inline void bio_release_pages(struct bio *bio, bool mark_dirty) { if (bio_flagged(bio, BIO_PAGE_PINNED)) __bio_release_pages(bio, mark_dirty); } #define bio_dev(bio) \ disk_devt((bio)->bi_bdev->bd_disk) #ifdef CONFIG_BLK_CGROUP void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_clone_blkg_association(struct bio *dst, struct bio *src); void blkcg_punt_bio_submit(struct bio *bio); #else /* CONFIG_BLK_CGROUP */ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } static inline void blkcg_punt_bio_submit(struct bio *bio) { submit_bio(bio); } #endif /* CONFIG_BLK_CGROUP */ static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) { bio_clear_flag(bio, BIO_REMAPPED); if (bio->bi_bdev != bdev) bio_clear_flag(bio, BIO_BPS_THROTTLED); bio->bi_bdev = bdev; bio_associate_blkg(bio); } /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow * fast access to the tail. */ struct bio_list { struct bio *head; struct bio *tail; }; static inline int bio_list_empty(const struct bio_list *bl) { return bl->head == NULL; } static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL; } #define BIO_EMPTY_LIST { NULL, NULL } #define bio_list_for_each(bio, bl) \ for (bio = (bl)->head; bio; bio = bio->bi_next) static inline unsigned bio_list_size(const struct bio_list *bl) { unsigned sz = 0; struct bio *bio; bio_list_for_each(bio, bl) sz++; return sz; } static inline void bio_list_add(struct bio_list *bl, struct bio *bio) { bio->bi_next = NULL; if (bl->tail) bl->tail->bi_next = bio; else bl->head = bio; bl->tail = bio; } static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) { bio->bi_next = bl->head; bl->head = bio; if (!bl->tail) bl->tail = bio; } static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->tail) bl->tail->bi_next = bl2->head; else bl->head = bl2->head; bl->tail = bl2->tail; } static inline void bio_list_merge_init(struct bio_list *bl, struct bio_list *bl2) { bio_list_merge(bl, bl2); bio_list_init(bl2); } static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) return; if (bl->head) bl2->tail->bi_next = bl->head; else bl->tail = bl2->tail; bl->head = bl2->head; } static inline struct bio *bio_list_peek(struct bio_list *bl) { return bl->head; } static inline struct bio *bio_list_pop(struct bio_list *bl) { struct bio *bio = bl->head; if (bio) { bl->head = bl->head->bi_next; if (!bl->head) bl->tail = NULL; bio->bi_next = NULL; } return bio; } static inline struct bio *bio_list_get(struct bio_list *bl) { struct bio *bio = bl->head; bl->head = bl->tail = NULL; return bio; } /* * Increment chain count for the bio. Make sure the CHAIN flag update * is visible before the raised count. */ static inline void bio_inc_remaining(struct bio *bio) { bio_set_flag(bio, BIO_CHAIN); smp_mb__before_atomic(); atomic_inc(&bio->__bi_remaining); } /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab * and the bvec_slabs[]. */ #define BIO_POOL_SIZE 2 struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; /* * per-cpu bio alloc cache */ struct bio_alloc_cache __percpu *cache; mempool_t bio_pool; mempool_t bvec_pool; unsigned int back_pad; /* * Deadlock avoidance for stacking block drivers: see comments in * bio_alloc_bioset() for details */ spinlock_t rescue_lock; struct bio_list rescue_list; struct work_struct rescue_work; struct workqueue_struct *rescue_workqueue; /* * Hot un-plug notifier for the per-cpu cache, if used */ struct hlist_node cpuhp_dead; }; static inline bool bioset_initialized(struct bio_set *bs) { return bs->bio_slab != NULL; } /* * Mark a bio as polled. Note that for async polled IO, the caller must * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). * We cannot block waiting for requests on polled IO, as those completions * must be found by the caller. This is different than IRQ driven IO, where * it's safe to wait for IO to complete. */ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) { bio->bi_opf |= REQ_POLLED; if (kiocb->ki_flags & IOCB_NOWAIT) bio->bi_opf |= REQ_NOWAIT; } static inline void bio_clear_polled(struct bio *bio) { bio->bi_opf &= ~REQ_POLLED; } /** * bio_is_zone_append - is this a zone append bio? * @bio: bio to check * * Check if @bio is a zone append operation. Core block layer code and end_io * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if * it is not natively supported. */ static inline bool bio_is_zone_append(struct bio *bio) { if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) return false; return bio_op(bio) == REQ_OP_ZONE_APPEND || bio_flagged(bio, BIO_EMULATES_ZONE_APPEND); } struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); struct bio *blk_alloc_discard_bio(struct block_device *bdev, sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask); #endif /* __LINUX_BIO_H */ |
| 1 1 1 3 1 3 3 3 3 1 2 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Resilient Queued Spin Lock * * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. * (C) Copyright 2013-2014,2018 Red Hat, Inc. * (C) Copyright 2015 Intel Corp. * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP * (C) Copyright 2024-2025 Meta Platforms, Inc. and affiliates. * * Authors: Waiman Long <longman@redhat.com> * Peter Zijlstra <peterz@infradead.org> * Kumar Kartikeya Dwivedi <memxor@gmail.com> */ #include <linux/smp.h> #include <linux/bug.h> #include <linux/bpf.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/mutex.h> #include <linux/prefetch.h> #include <asm/byteorder.h> #ifdef CONFIG_QUEUED_SPINLOCKS #include <asm/qspinlock.h> #endif #include <trace/events/lock.h> #include <asm/rqspinlock.h> #include <linux/timekeeping.h> /* * Include queued spinlock definitions and statistics code */ #ifdef CONFIG_QUEUED_SPINLOCKS #include "../locking/qspinlock.h" #include "../locking/lock_events.h" #include "rqspinlock.h" #include "../locking/mcs_spinlock.h" #endif /* * The basic principle of a queue-based spinlock can best be understood * by studying a classic queue-based spinlock implementation called the * MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalable * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and * Scott") is available at * * https://bugzilla.kernel.org/show_bug.cgi?id=206115 * * This queued spinlock implementation is based on the MCS lock, however to * make it fit the 4 bytes we assume spinlock_t to be, and preserve its * existing API, we must modify it somehow. * * In particular; where the traditional MCS lock consists of a tail pointer * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to * unlock the next pending (next->locked), we compress both these: {tail, * next->locked} into a single u32 value. * * Since a spinlock disables recursion of its own context and there is a limit * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now * we can encode the tail by combining the 2-bit nesting level with the cpu * number. With one byte for the lock value and 3 bytes for the tail, only a * 32-bit word is now needed. Even though we only need 1 bit for the lock, * we extend it to a full byte to achieve better performance for architectures * that support atomic byte write. * * We also change the first spinner to spin on the lock bit instead of its * node; whereby avoiding the need to carry a node from lock to unlock, and * preserving existing lock API. This also makes the unlock code simpler and * faster. * * N.B. The current implementation only supports architectures that allow * atomic operations on smaller 8-bit and 16-bit data types. * */ struct rqspinlock_timeout { u64 timeout_end; u64 duration; u64 cur; u16 spin; }; #define RES_TIMEOUT_VAL 2 DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks); EXPORT_SYMBOL_GPL(rqspinlock_held_locks); static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) { if (!(atomic_read_acquire(&lock->val) & (mask))) return true; return false; } static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); int cnt = min(RES_NR_HELD, rqh->cnt); /* * Return an error if we hold the lock we are attempting to acquire. * We'll iterate over max 32 locks; no need to do is_lock_released. */ for (int i = 0; i < cnt - 1; i++) { if (rqh->locks[i] == lock) return -EDEADLK; } return 0; } /* * This focuses on the most common case of ABBA deadlocks (or ABBA involving * more locks, which reduce to ABBA). This is not exhaustive, and we rely on * timeouts as the final line of defense. */ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); int rqh_cnt = min(RES_NR_HELD, rqh->cnt); void *remote_lock; int cpu; /* * Find the CPU holding the lock that we want to acquire. If there is a * deadlock scenario, we will read a stable set on the remote CPU and * find the target. This would be a constant time operation instead of * O(NR_CPUS) if we could determine the owning CPU from a lock value, but * that requires increasing the size of the lock word. */ for_each_possible_cpu(cpu) { struct rqspinlock_held *rqh_cpu = per_cpu_ptr(&rqspinlock_held_locks, cpu); int real_cnt = READ_ONCE(rqh_cpu->cnt); int cnt = min(RES_NR_HELD, real_cnt); /* * Let's ensure to break out of this loop if the lock is available for * us to potentially acquire. */ if (is_lock_released(lock, mask, ts)) return 0; /* * Skip ourselves, and CPUs whose count is less than 2, as they need at * least one held lock and one acquisition attempt (reflected as top * most entry) to participate in an ABBA deadlock. * * If cnt is more than RES_NR_HELD, it means the current lock being * acquired won't appear in the table, and other locks in the table are * already held, so we can't determine ABBA. */ if (cpu == smp_processor_id() || real_cnt < 2 || real_cnt > RES_NR_HELD) continue; /* * Obtain the entry at the top, this corresponds to the lock the * remote CPU is attempting to acquire in a deadlock situation, * and would be one of the locks we hold on the current CPU. */ remote_lock = READ_ONCE(rqh_cpu->locks[cnt - 1]); /* * If it is NULL, we've raced and cannot determine a deadlock * conclusively, skip this CPU. */ if (!remote_lock) continue; /* * Find if the lock we're attempting to acquire is held by this CPU. * Don't consider the topmost entry, as that must be the latest lock * being held or acquired. For a deadlock, the target CPU must also * attempt to acquire a lock we hold, so for this search only 'cnt - 1' * entries are important. */ for (int i = 0; i < cnt - 1; i++) { if (READ_ONCE(rqh_cpu->locks[i]) != lock) continue; /* * We found our lock as held on the remote CPU. Is the * acquisition attempt on the remote CPU for a lock held * by us? If so, we have a deadlock situation, and need * to recover. */ for (int i = 0; i < rqh_cnt - 1; i++) { if (rqh->locks[i] == remote_lock) return -EDEADLK; } /* * Inconclusive; retry again later. */ return 0; } } return 0; } static noinline int check_deadlock(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) { int ret; ret = check_deadlock_AA(lock, mask, ts); if (ret) return ret; ret = check_deadlock_ABBA(lock, mask, ts); if (ret) return ret; return 0; } static noinline int check_timeout(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) { u64 time = ktime_get_mono_fast_ns(); u64 prev = ts->cur; if (!ts->timeout_end) { ts->cur = time; ts->timeout_end = time + ts->duration; return 0; } if (time > ts->timeout_end) return -ETIMEDOUT; /* * A millisecond interval passed from last time? Trigger deadlock * checks. */ if (prev + NSEC_PER_MSEC < time) { ts->cur = time; return check_deadlock(lock, mask, ts); } return 0; } /* * Do not amortize with spins when res_smp_cond_load_acquire is defined, * as the macro does internal amortization for us. */ #ifndef res_smp_cond_load_acquire #define RES_CHECK_TIMEOUT(ts, ret, mask) \ ({ \ if (!(ts).spin++) \ (ret) = check_timeout((lock), (mask), &(ts)); \ (ret); \ }) #else #define RES_CHECK_TIMEOUT(ts, ret, mask) \ ({ (ret) = check_timeout((lock), (mask), &(ts)); }) #endif /* * Initialize the 'spin' member. * Set spin member to 0 to trigger AA/ABBA checks immediately. */ #define RES_INIT_TIMEOUT(ts) ({ (ts).spin = 0; }) /* * We only need to reset 'timeout_end', 'spin' will just wrap around as necessary. * Duration is defined for each spin attempt, so set it here. */ #define RES_RESET_TIMEOUT(ts, _duration) ({ (ts).timeout_end = 0; (ts).duration = _duration; }) /* * Provide a test-and-set fallback for cases when queued spin lock support is * absent from the architecture. */ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock) { struct rqspinlock_timeout ts; int val, ret = 0; RES_INIT_TIMEOUT(ts); grab_held_lock_entry(lock); /* * Since the waiting loop's time is dependent on the amount of * contention, a short timeout unlike rqspinlock waiting loops * isn't enough. Choose a second as the timeout value. */ RES_RESET_TIMEOUT(ts, NSEC_PER_SEC); retry: val = atomic_read(&lock->val); if (val || !atomic_try_cmpxchg(&lock->val, &val, 1)) { if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) goto out; cpu_relax(); goto retry; } return 0; out: release_held_lock_entry(); return ret; } EXPORT_SYMBOL_GPL(resilient_tas_spin_lock); #ifdef CONFIG_QUEUED_SPINLOCKS /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. * * Exactly fits one 64-byte cacheline on a 64-bit architecture. */ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]); #ifndef res_smp_cond_load_acquire #define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire(v, c) #endif #define res_atomic_cond_read_acquire(v, c) res_smp_cond_load_acquire(&(v)->counter, (c)) /** * resilient_queued_spin_lock_slowpath - acquire the queued spinlock * @lock: Pointer to queued spinlock structure * @val: Current value of the queued spinlock 32-bit word * * Return: * * 0 - Lock was acquired successfully. * * -EDEADLK - Lock acquisition failed because of AA/ABBA deadlock. * * -ETIMEDOUT - Lock acquisition failed because of timeout. * * (queue tail, pending bit, lock value) * * fast : slow : unlock * : : * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0) * : | ^--------.------. / : * : v \ \ | : * pending : (0,1,1) +--> (0,1,0) \ | : * : | ^--' | | : * : v | | : * uncontended : (n,x,y) +--> (n,0,0) --' | : * queue : | ^--' | : * : v | : * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : * queue : ^--' : */ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) { struct mcs_spinlock *prev, *next, *node; struct rqspinlock_timeout ts; int idx, ret = 0; u32 old, tail; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); if (resilient_virt_spin_lock_enabled()) return resilient_virt_spin_lock(lock); RES_INIT_TIMEOUT(ts); /* * Wait for in-progress pending->locked hand-overs with a bounded * number of spins so that we guarantee forward progress. * * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) { int cnt = _Q_PENDING_LOOPS; val = atomic_cond_read_relaxed(&lock->val, (VAL != _Q_PENDING_VAL) || !cnt--); } /* * If we observe any contention; queue. */ if (val & ~_Q_LOCKED_MASK) goto queue; /* * trylock || pending * * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock */ val = queued_fetch_set_pending_acquire(lock); /* * If we observe contention, there is a concurrent locker. * * Undo and queue; our setting of PENDING might have made the * n,0,0 -> 0,0,0 transition fail and it will now be waiting * on @next to become !NULL. */ if (unlikely(val & ~_Q_LOCKED_MASK)) { /* Undo PENDING if we set it. */ if (!(val & _Q_PENDING_MASK)) clear_pending(lock); goto queue; } /* * Grab an entry in the held locks array, to enable deadlock detection. */ grab_held_lock_entry(lock); /* * We're pending, wait for the owner to go away. * * 0,1,1 -> *,1,0 * * this wait loop must be a load-acquire such that we match the * store-release that clears the locked bit and create lock * sequentiality; this is because not all * clear_pending_set_locked() implementations imply full * barriers. */ if (val & _Q_LOCKED_MASK) { RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_MASK)); } if (ret) { /* * We waited for the locked bit to go back to 0, as the pending * waiter, but timed out. We need to clear the pending bit since * we own it. Once a stuck owner has been recovered, the lock * must be restored to a valid state, hence removing the pending * bit is necessary. * * *,1,* -> *,0,* */ clear_pending(lock); lockevent_inc(rqspinlock_lock_timeout); goto err_release_entry; } /* * take ownership and clear the pending bit. * * 0,1,0 -> 0,0,1 */ clear_pending_set_locked(lock); lockevent_inc(lock_pending); return 0; /* * End of pending bit optimistic spinning and beginning of MCS * queuing. */ queue: lockevent_inc(lock_slowpath); /* * Grab deadlock detection entry for the queue path. */ grab_held_lock_entry(lock); node = this_cpu_ptr(&rqnodes[0].mcs); idx = node->count++; tail = encode_tail(smp_processor_id(), idx); trace_contention_begin(lock, LCB_F_SPIN); /* * 4 nodes are allocated based on the assumption that there will * not be nested NMIs taking spinlocks. That may not be true in * some architectures even though the chance of needing more than * 4 nodes will still be extremely unlikely. When that happens, * we fall back to spinning on the lock directly without using * any MCS node. This is not the most elegant solution, but is * simple enough. */ if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) { lockevent_inc(lock_no_node); RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); while (!queued_spin_trylock(lock)) { if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) { lockevent_inc(rqspinlock_lock_timeout); goto err_release_node; } cpu_relax(); } goto release; } node = grab_mcs_node(node, idx); /* * Keep counts of non-zero index values: */ lockevent_cond_inc(lock_use_node2 + idx - 1, idx); /* * Ensure that we increment the head node->count before initialising * the actual node. If the compiler is kind enough to reorder these * stores, then an IRQ could overwrite our assignments. */ barrier(); node->locked = 0; node->next = NULL; /* * We touched a (possibly) cold cacheline in the per-cpu queue node; * attempt the trylock once more in the hope someone let go while we * weren't watching. */ if (queued_spin_trylock(lock)) goto release; /* * Ensure that the initialisation of @node is complete before we * publish the updated tail via xchg_tail() and potentially link * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. */ smp_wmb(); /* * Publish the updated tail. * We have already touched the queueing cacheline; don't bother with * pending stuff. * * p,*,* -> n,*,* */ old = xchg_tail(lock, tail); next = NULL; /* * if there was a previous node; link it and wait until reaching the * head of the waitqueue. */ if (old & _Q_TAIL_MASK) { int val; prev = decode_tail(old, rqnodes); /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); val = arch_mcs_spin_lock_contended(&node->locked); if (val == RES_TIMEOUT_VAL) { ret = -EDEADLK; goto waitq_timeout; } /* * While waiting for the MCS lock, the next pointer may have * been set by another lock waiter. We optimistically load * the next pointer & prefetch the cacheline for writing * to reduce latency in the upcoming MCS unlock operation. */ next = READ_ONCE(node->next); if (next) prefetchw(next); } /* * we're at the head of the waitqueue, wait for the owner & pending to * go away. * * *,x,y -> *,0,0 * * this wait loop must use a load-acquire such that we match the * store-release that clears the locked bit and create lock * sequentiality; this is because the set_locked() function below * does not imply a full barrier. * * We use RES_DEF_TIMEOUT * 2 as the duration, as RES_DEF_TIMEOUT is * meant to span maximum allowed time per critical section, and we may * have both the owner of the lock and the pending bit waiter ahead of * us. */ RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT * 2); val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK)); waitq_timeout: if (ret) { /* * If the tail is still pointing to us, then we are the final waiter, * and are responsible for resetting the tail back to 0. Otherwise, if * the cmpxchg operation fails, we signal the next waiter to take exit * and try the same. For a waiter with tail node 'n': * * n,*,* -> 0,*,* * * When performing cmpxchg for the whole word (NR_CPUS > 16k), it is * possible locked/pending bits keep changing and we see failures even * when we remain the head of wait queue. However, eventually, * pending bit owner will unset the pending bit, and new waiters * will queue behind us. This will leave the lock owner in * charge, and it will eventually either set locked bit to 0, or * leave it as 1, allowing us to make progress. * * We terminate the whole wait queue for two reasons. Firstly, * we eschew per-waiter timeouts with one applied at the head of * the wait queue. This allows everyone to break out faster * once we've seen the owner / pending waiter not responding for * the timeout duration from the head. Secondly, it avoids * complicated synchronization, because when not leaving in FIFO * order, prev's next pointer needs to be fixed up etc. */ if (!try_cmpxchg_tail(lock, tail, 0)) { next = smp_cond_load_relaxed(&node->next, VAL); WRITE_ONCE(next->locked, RES_TIMEOUT_VAL); } lockevent_inc(rqspinlock_lock_timeout); goto err_release_node; } /* * claim the lock: * * n,0,0 -> 0,0,1 : lock, uncontended * *,*,0 -> *,*,1 : lock, contended * * If the queue head is the only one in the queue (lock value == tail) * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock. */ /* * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the * above wait condition, therefore any concurrent setting of * PENDING will make the uncontended transition fail. */ if ((val & _Q_TAIL_MASK) == tail) { if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) goto release; /* No contention */ } /* * Either somebody is queued behind us or _Q_PENDING_VAL got set * which will then detect the remaining tail and queue behind us * ensuring we'll see a @next. */ set_locked(lock); /* * contended path; wait for next if not observed yet, release. */ if (!next) next = smp_cond_load_relaxed(&node->next, (VAL)); arch_mcs_spin_unlock_contended(&next->locked); release: trace_contention_end(lock, 0); /* * release the node */ __this_cpu_dec(rqnodes[0].mcs.count); return ret; err_release_node: trace_contention_end(lock, ret); __this_cpu_dec(rqnodes[0].mcs.count); err_release_entry: release_held_lock_entry(); return ret; } EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath); #endif /* CONFIG_QUEUED_SPINLOCKS */ __bpf_kfunc_start_defs(); static void bpf_prog_report_rqspinlock_violation(const char *str, void *lock, bool irqsave) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); struct bpf_stream_stage ss; struct bpf_prog *prog; prog = bpf_prog_find_from_stack(); if (!prog) return; bpf_stream_stage(ss, prog, BPF_STDERR, ({ bpf_stream_printk(ss, "ERROR: %s for bpf_res_spin_lock%s\n", str, irqsave ? "_irqsave" : ""); bpf_stream_printk(ss, "Attempted lock = 0x%px\n", lock); bpf_stream_printk(ss, "Total held locks = %d\n", rqh->cnt); for (int i = 0; i < min(RES_NR_HELD, rqh->cnt); i++) bpf_stream_printk(ss, "Held lock[%2d] = 0x%px\n", i, rqh->locks[i]); bpf_stream_dump_stack(ss); })); } #define REPORT_STR(ret) ({ (ret) == -ETIMEDOUT ? "Timeout detected" : "AA or ABBA deadlock detected"; }) __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock) { int ret; BUILD_BUG_ON(sizeof(rqspinlock_t) != sizeof(struct bpf_res_spin_lock)); BUILD_BUG_ON(__alignof__(rqspinlock_t) != __alignof__(struct bpf_res_spin_lock)); preempt_disable(); ret = res_spin_lock((rqspinlock_t *)lock); if (unlikely(ret)) { bpf_prog_report_rqspinlock_violation(REPORT_STR(ret), lock, false); preempt_enable(); return ret; } return 0; } __bpf_kfunc void bpf_res_spin_unlock(struct bpf_res_spin_lock *lock) { res_spin_unlock((rqspinlock_t *)lock); preempt_enable(); } __bpf_kfunc int bpf_res_spin_lock_irqsave(struct bpf_res_spin_lock *lock, unsigned long *flags__irq_flag) { u64 *ptr = (u64 *)flags__irq_flag; unsigned long flags; int ret; preempt_disable(); local_irq_save(flags); ret = res_spin_lock((rqspinlock_t *)lock); if (unlikely(ret)) { bpf_prog_report_rqspinlock_violation(REPORT_STR(ret), lock, true); local_irq_restore(flags); preempt_enable(); return ret; } *ptr = flags; return 0; } __bpf_kfunc void bpf_res_spin_unlock_irqrestore(struct bpf_res_spin_lock *lock, unsigned long *flags__irq_flag) { u64 *ptr = (u64 *)flags__irq_flag; unsigned long flags = *ptr; res_spin_unlock((rqspinlock_t *)lock); local_irq_restore(flags); preempt_enable(); } __bpf_kfunc_end_defs(); BTF_KFUNCS_START(rqspinlock_kfunc_ids) BTF_ID_FLAGS(func, bpf_res_spin_lock, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_res_spin_unlock) BTF_ID_FLAGS(func, bpf_res_spin_lock_irqsave, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_res_spin_unlock_irqrestore) BTF_KFUNCS_END(rqspinlock_kfunc_ids) static const struct btf_kfunc_id_set rqspinlock_kfunc_set = { .owner = THIS_MODULE, .set = &rqspinlock_kfunc_ids, }; static __init int rqspinlock_register_kfuncs(void) { return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &rqspinlock_kfunc_set); } late_initcall(rqspinlock_register_kfuncs); |
| 59 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BIO_INTEGRITY_H #define _LINUX_BIO_INTEGRITY_H #include <linux/bio.h> enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */ BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */ BIP_CHECK_GUARD = 1 << 5, /* guard check */ BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ BIP_P2P_DMA = 1 << 8, /* using P2P address */ }; struct bio_integrity_payload { struct bvec_iter bip_iter; unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ u16 app_tag; /* application tag value */ struct bio_vec *bip_vec; }; #define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \ BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) #ifdef CONFIG_BLK_DEV_INTEGRITY #define bip_for_each_vec(bvl, bip, iter) \ for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) #define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { if (bio->bi_opf & REQ_INTEGRITY) return bio->bi_integrity; return NULL; } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { struct bio_integrity_payload *bip = bio_integrity(bio); if (bip) return bip->bip_flags & flag; return false; } static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) { return bip->bip_iter.bi_sector; } static inline void bip_set_seed(struct bio_integrity_payload *bip, sector_t seed) { bip->bip_iter.bi_sector = seed; } void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip, struct bio_vec *bvecs, unsigned int nr_vecs); struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter); int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); void bio_integrity_trim(struct bio *bio); int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { return NULL; } static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) { return -EINVAL; } static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta) { return -EINVAL; } static inline void bio_integrity_unmap_user(struct bio *bio) { } static inline bool bio_integrity_prep(struct bio *bio) { return true; } static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { return 0; } static inline void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { } static inline void bio_integrity_trim(struct bio *bio) { } static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { return false; } static inline struct bio_integrity_payload * bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr) { return ERR_PTR(-EINVAL); } static inline int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { return 0; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* _LINUX_BIO_INTEGRITY_H */ |
| 3 3 2 2 2 3 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | // SPDX-License-Identifier: GPL-2.0 /* * xfrm6_policy.c: based on xfrm4_policy.c * * Authors: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * IPv6 support * YOSHIFUJI Hideaki * Split up af-specific portion * */ #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/l3mdev.h> static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params) { struct flowi6 fl6; struct dst_entry *dst; int err; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net, params->oif); fl6.flowi6_mark = params->mark; memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr)); if (params->saddr) memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); fl6.flowi4_proto = params->ipproto; fl6.uli = params->uli; dst = ip6_route_output(params->net, NULL, &fl6); err = dst->error; if (dst->error) { dst_release(dst); dst = ERR_PTR(err); } return dst; } static int xfrm6_get_saddr(xfrm_address_t *saddr, const struct xfrm_dst_lookup_params *params) { struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; dst = xfrm6_dst_lookup(params); if (IS_ERR(dst)) return -EHOSTUNREACH; idev = ip6_dst_idev(dst); if (!idev) { dst_release(dst); return -EHOSTUNREACH; } dev = idev->dev; ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rt6_info *rt = dst_rt6_info(xdst->route); xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { netdev_put(dev, &xdst->u.dst.dev_tracker); return -ENODEV; } /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; rt6_uncached_list_add(&xdst->u.rt6); return 0; } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); rt6_uncached_list_del(&xdst->u.rt6); if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { struct xfrm_dst *xdst; xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { struct inet6_dev *loopback_idev = in6_dev_get(dev_net(dev)->loopback_dev); do { in6_dev_put(xdst->u.rt6.rt6i_idev); xdst->u.rt6.rt6i_idev = loopback_idev; in6_dev_hold(loopback_idev); xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst); } while (xdst->u.dst.xfrm); __in6_dev_put(loopback_idev); } xfrm_dst_ifdown(dst, dev); } static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, }; static int __init xfrm6_policy_init(void) { return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) { xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } #ifdef CONFIG_SYSCTL static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static int __net_init xfrm6_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = xfrm6_policy_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); if (!table) goto err_alloc; table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; } hdr = register_net_sysctl_sz(net, "net/ipv6", table, ARRAY_SIZE(xfrm6_policy_table)); if (!hdr) goto err_reg; net->ipv6.sysctl.xfrm6_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit xfrm6_net_sysctl_exit(struct net *net) { const struct ctl_table *table; if (!net->ipv6.sysctl.xfrm6_hdr) return; table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); if (!net_eq(net, &init_net)) kfree(table); } #else /* CONFIG_SYSCTL */ static inline int xfrm6_net_sysctl_init(struct net *net) { return 0; } static inline void xfrm6_net_sysctl_exit(struct net *net) { } #endif static int __net_init xfrm6_net_init(struct net *net) { int ret; memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, sizeof(xfrm6_dst_ops_template)); ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); if (ret) return ret; ret = xfrm6_net_sysctl_init(net); if (ret) dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); return ret; } static void __net_exit xfrm6_net_exit(struct net *net) { xfrm6_net_sysctl_exit(net); dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); } static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, }; int __init xfrm6_init(void) { int ret; ret = xfrm6_policy_init(); if (ret) goto out; ret = xfrm6_state_init(); if (ret) goto out_policy; ret = xfrm6_protocol_init(); if (ret) goto out_state; ret = register_pernet_subsys(&xfrm6_net_ops); if (ret) goto out_protocol; ret = xfrm_nat_keepalive_init(AF_INET6); if (ret) goto out_nat_keepalive; out: return ret; out_nat_keepalive: unregister_pernet_subsys(&xfrm6_net_ops); out_protocol: xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: xfrm6_policy_fini(); goto out; } void xfrm6_fini(void) { xfrm_nat_keepalive_fini(AF_INET6); unregister_pernet_subsys(&xfrm6_net_ops); xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); } |
| 20 20 1 1 1 1 1 2 1 1 2 3 3 2 3 3 3 3 2 2 3 3 3 2 1 2 1 9 9 8 4 4 1 3 3 4 3 1 11 10 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 | // SPDX-License-Identifier: GPL-2.0 /* * fs/signalfd.c * * Copyright (C) 2003 Linus Torvalds * * Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org> * Changed ->read() to return a siginfo strcture instead of signal number. * Fixed locking in ->poll(). * Added sighand-detach notification. * Added fd re-use in sys_signalfd() syscall. * Now using anonymous inode source. * Thanks to Oleg Nesterov for useful code review and suggestions. * More comments and suggestions from Arnd Bergmann. * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> * Retrieve multiple signals with one read() call * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org> * Attach to the sighand only during read() and poll(). */ #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/list.h> #include <linux/anon_inodes.h> #include <linux/signalfd.h> #include <linux/syscalls.h> #include <linux/proc_fs.h> #include <linux/compat.h> void signalfd_cleanup(struct sighand_struct *sighand) { wake_up_pollfree(&sighand->signalfd_wqh); } struct signalfd_ctx { sigset_t sigmask; }; static int signalfd_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static __poll_t signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; __poll_t events = 0; poll_wait(file, ¤t->sighand->signalfd_wqh, wait); spin_lock_irq(¤t->sighand->siglock); if (next_signal(¤t->pending, &ctx->sigmask) || next_signal(¤t->signal->shared_pending, &ctx->sigmask)) events |= EPOLLIN; spin_unlock_irq(¤t->sighand->siglock); return events; } /* * Copied from copy_siginfo_to_user() in kernel/signal.c */ static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo) { struct signalfd_siginfo new; BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); /* * Unused members should be zero ... */ memset(&new, 0, sizeof(new)); /* * If you change siginfo_t structure, please be sure * this code is fixed accordingly. */ new.ssi_signo = kinfo->si_signo; new.ssi_errno = kinfo->si_errno; new.ssi_code = kinfo->si_code; switch (siginfo_layout(kinfo->si_signo, kinfo->si_code)) { case SIL_KILL: new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; break; case SIL_TIMER: new.ssi_tid = kinfo->si_tid; new.ssi_overrun = kinfo->si_overrun; new.ssi_ptr = (long) kinfo->si_ptr; new.ssi_int = kinfo->si_int; break; case SIL_POLL: new.ssi_band = kinfo->si_band; new.ssi_fd = kinfo->si_fd; break; case SIL_FAULT_BNDERR: case SIL_FAULT_PKUERR: case SIL_FAULT_PERF_EVENT: /* * Fall through to the SIL_FAULT case. SIL_FAULT_BNDERR, * SIL_FAULT_PKUERR, and SIL_FAULT_PERF_EVENT are only * generated by faults that deliver them synchronously to * userspace. In case someone injects one of these signals * and signalfd catches it treat it as SIL_FAULT. */ case SIL_FAULT: new.ssi_addr = (long) kinfo->si_addr; break; case SIL_FAULT_TRAPNO: new.ssi_addr = (long) kinfo->si_addr; new.ssi_trapno = kinfo->si_trapno; break; case SIL_FAULT_MCEERR: new.ssi_addr = (long) kinfo->si_addr; new.ssi_addr_lsb = (short) kinfo->si_addr_lsb; break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; new.ssi_status = kinfo->si_status; new.ssi_utime = kinfo->si_utime; new.ssi_stime = kinfo->si_stime; break; case SIL_RT: /* * This case catches also the signals queued by sigqueue(). */ new.ssi_pid = kinfo->si_pid; new.ssi_uid = kinfo->si_uid; new.ssi_ptr = (long) kinfo->si_ptr; new.ssi_int = kinfo->si_int; break; case SIL_SYS: new.ssi_call_addr = (long) kinfo->si_call_addr; new.ssi_syscall = kinfo->si_syscall; new.ssi_arch = kinfo->si_arch; break; } if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to)) return -EFAULT; return sizeof(struct signalfd_siginfo); } static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info, int nonblock) { enum pid_type type; ssize_t ret; DECLARE_WAITQUEUE(wait, current); spin_lock_irq(¤t->sighand->siglock); ret = dequeue_signal(&ctx->sigmask, info, &type); switch (ret) { case 0: if (!nonblock) break; ret = -EAGAIN; fallthrough; default: spin_unlock_irq(¤t->sighand->siglock); return ret; } add_wait_queue(¤t->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = dequeue_signal(&ctx->sigmask, info, &type); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } spin_unlock_irq(¤t->sighand->siglock); schedule(); spin_lock_irq(¤t->sighand->siglock); } spin_unlock_irq(¤t->sighand->siglock); remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); return ret; } /* * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative * error code. The "count" parameter must be at least the size of a * "struct signalfd_siginfo". */ static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct signalfd_ctx *ctx = file->private_data; size_t count = iov_iter_count(to); ssize_t ret, total = 0; kernel_siginfo_t info; bool nonblock; count /= sizeof(struct signalfd_siginfo); if (!count) return -EINVAL; nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT; do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) break; ret = signalfd_copyinfo(to, &info); if (ret < 0) break; total += ret; nonblock = 1; } while (--count); return total ? total: ret; } #ifdef CONFIG_PROC_FS static void signalfd_show_fdinfo(struct seq_file *m, struct file *f) { struct signalfd_ctx *ctx = f->private_data; sigset_t sigmask; sigmask = ctx->sigmask; signotset(&sigmask); render_sigset_t(m, "sigmask:\t", &sigmask); } #endif static const struct file_operations signalfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = signalfd_show_fdinfo, #endif .release = signalfd_release, .poll = signalfd_poll, .read_iter = signalfd_read_iter, .llseek = noop_llseek, }; static int do_signalfd4(int ufd, sigset_t *mask, int flags) { struct signalfd_ctx *ctx; /* Check the SFD_* constants for consistency. */ BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK); if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) return -EINVAL; sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); signotset(mask); if (ufd == -1) { struct file *file; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->sigmask = *mask; ufd = get_unused_fd_flags(flags & O_CLOEXEC); if (ufd < 0) { kfree(ctx); return ufd; } file = anon_inode_getfile_fmode("[signalfd]", &signalfd_fops, ctx, O_RDWR | (flags & O_NONBLOCK), FMODE_NOWAIT); if (IS_ERR(file)) { put_unused_fd(ufd); kfree(ctx); return PTR_ERR(file); } fd_install(ufd, file); } else { CLASS(fd, f)(ufd); if (fd_empty(f)) return -EBADF; ctx = fd_file(f)->private_data; if (fd_file(f)->f_op != &signalfd_fops) return -EINVAL; spin_lock_irq(¤t->sighand->siglock); ctx->sigmask = *mask; spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); } return ufd; } SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, size_t, sizemask, int, flags) { sigset_t mask; if (sizemask != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&mask, user_mask, sizeof(mask))) return -EFAULT; return do_signalfd4(ufd, &mask, flags); } SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, size_t, sizemask) { sigset_t mask; if (sizemask != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(&mask, user_mask, sizeof(mask))) return -EFAULT; return do_signalfd4(ufd, &mask, 0); } #ifdef CONFIG_COMPAT static long do_compat_signalfd4(int ufd, const compat_sigset_t __user *user_mask, compat_size_t sigsetsize, int flags) { sigset_t mask; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; if (get_compat_sigset(&mask, user_mask)) return -EFAULT; return do_signalfd4(ufd, &mask, flags); } COMPAT_SYSCALL_DEFINE4(signalfd4, int, ufd, const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize, int, flags) { return do_compat_signalfd4(ufd, user_mask, sigsetsize, flags); } COMPAT_SYSCALL_DEFINE3(signalfd, int, ufd, const compat_sigset_t __user *, user_mask, compat_size_t, sigsetsize) { return do_compat_signalfd4(ufd, user_mask, sigsetsize, 0); } #endif |
| 31 31 31 28 14 14 4 1 3 2 1 3 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 1 3 2 1 3 3 3 3 3 3 3 3 4 1 3 2 1 3 2 2 12 1 3 2 1 3 1 1 1 2 1 2 1 3 2 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 | // SPDX-License-Identifier: GPL-2.0-only /* * kernel/power/main.c - PM subsystem core functionality. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab */ #include <linux/acpi.h> #include <linux/export.h> #include <linux/init.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/pm-trace.h> #include <linux/workqueue.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/pm_runtime.h> #include "power.h" #ifdef CONFIG_PM_SLEEP /* * The following functions are used by the suspend/hibernate code to temporarily * change gfp_allowed_mask in order to avoid using I/O during memory allocations * while devices are suspended. To avoid races with the suspend/hibernate code, * they should always be called with system_transition_mutex held * (gfp_allowed_mask also should only be modified with system_transition_mutex * held, unless the suspend/hibernate code is guaranteed not to run in parallel * with that modification). */ static gfp_t saved_gfp_mask; void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); if (saved_gfp_mask) { gfp_allowed_mask = saved_gfp_mask; saved_gfp_mask = 0; } } void pm_restrict_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); WARN_ON(saved_gfp_mask); saved_gfp_mask = gfp_allowed_mask; gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS); } unsigned int lock_system_sleep(void) { unsigned int flags = current->flags; current->flags |= PF_NOFREEZE; mutex_lock(&system_transition_mutex); return flags; } EXPORT_SYMBOL_GPL(lock_system_sleep); void unlock_system_sleep(unsigned int flags) { if (!(flags & PF_NOFREEZE)) current->flags &= ~PF_NOFREEZE; mutex_unlock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(unlock_system_sleep); void ksys_sync_helper(void) { ktime_t start; long elapsed_msecs; start = ktime_get(); ksys_sync(); elapsed_msecs = ktime_to_ms(ktime_sub(ktime_get(), start)); pr_info("Filesystems sync: %ld.%03ld seconds\n", elapsed_msecs / MSEC_PER_SEC, elapsed_msecs % MSEC_PER_SEC); } EXPORT_SYMBOL_GPL(ksys_sync_helper); /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); int register_pm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&pm_chain_head, nb); } EXPORT_SYMBOL_GPL(register_pm_notifier); int unregister_pm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&pm_chain_head, nb); } EXPORT_SYMBOL_GPL(unregister_pm_notifier); int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down) { int ret; ret = blocking_notifier_call_chain_robust(&pm_chain_head, val_up, val_down, NULL); return notifier_to_errno(ret); } int pm_notifier_call_chain(unsigned long val) { return blocking_notifier_call_chain(&pm_chain_head, val, NULL); } /* If set, devices may be suspended and resumed asynchronously. */ int pm_async_enabled = 1; static int __init pm_async_setup(char *str) { if (!strcmp(str, "off")) pm_async_enabled = 0; return 1; } __setup("pm_async=", pm_async_setup); static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", pm_async_enabled); } static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; pm_async_enabled = val; return n; } power_attr(pm_async); #ifdef CONFIG_SUSPEND static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t count = 0; suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { if (i >= PM_SUSPEND_MEM && cxl_mem_active()) continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; if (mem_sleep_current == i) count += sysfs_emit_at(buf, count, "[%s] ", label); else count += sysfs_emit_at(buf, count, "%s ", label); } } /* Convert the last space to a newline if needed. */ if (count > 0) buf[count - 1] = '\n'; return count; } static suspend_state_t decode_suspend_state(const char *buf, size_t n) { suspend_state_t state; char *p; int len; p = memchr(buf, '\n', n); len = p ? p - buf : n; for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) { const char *label = mem_sleep_states[state]; if (label && len == strlen(label) && !strncmp(buf, label, len)) return state; } return PM_SUSPEND_ON; } static ssize_t mem_sleep_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { suspend_state_t state; int error; error = pm_autosleep_lock(); if (error) return error; if (pm_autosleep_state() > PM_SUSPEND_ON) { error = -EBUSY; goto out; } state = decode_suspend_state(buf, n); if (state < PM_SUSPEND_MAX && state > PM_SUSPEND_ON) mem_sleep_current = state; else error = -EINVAL; out: pm_autosleep_unlock(); return error ? error : n; } power_attr(mem_sleep); /* * sync_on_suspend: invoke ksys_sync_helper() before suspend. * * show() returns whether ksys_sync_helper() is invoked before suspend. * store() accepts 0 or 1. 0 disables ksys_sync_helper() and 1 enables it. */ bool sync_on_suspend_enabled = !IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC); static ssize_t sync_on_suspend_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", sync_on_suspend_enabled); } static ssize_t sync_on_suspend_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; sync_on_suspend_enabled = !!val; return n; } power_attr(sync_on_suspend); #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_PM_SLEEP_DEBUG int pm_test_level = TEST_NONE; static const char * const pm_tests[__TEST_AFTER_LAST] = { [TEST_NONE] = "none", [TEST_CORE] = "core", [TEST_CPUS] = "processors", [TEST_PLATFORM] = "platform", [TEST_DEVICES] = "devices", [TEST_FREEZER] = "freezer", }; static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t count = 0; int level; for (level = TEST_FIRST; level <= TEST_MAX; level++) if (pm_tests[level]) { if (level == pm_test_level) count += sysfs_emit_at(buf, count, "[%s] ", pm_tests[level]); else count += sysfs_emit_at(buf, count, "%s ", pm_tests[level]); } /* Convert the last space to a newline if needed. */ if (count > 0) buf[count - 1] = '\n'; return count; } static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned int sleep_flags; const char * const *s; int error = -EINVAL; int level; char *p; int len; p = memchr(buf, '\n', n); len = p ? p - buf : n; sleep_flags = lock_system_sleep(); level = TEST_FIRST; for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { pm_test_level = level; error = 0; break; } unlock_system_sleep(sleep_flags); return error ? error : n; } power_attr(pm_test); #endif /* CONFIG_PM_SLEEP_DEBUG */ #define SUSPEND_NR_STEPS SUSPEND_RESUME #define REC_FAILED_NUM 2 struct suspend_stats { unsigned int step_failures[SUSPEND_NR_STEPS]; unsigned int success; unsigned int fail; int last_failed_dev; char failed_devs[REC_FAILED_NUM][40]; int last_failed_errno; int errno[REC_FAILED_NUM]; int last_failed_step; u64 last_hw_sleep; u64 total_hw_sleep; u64 max_hw_sleep; enum suspend_stat_step failed_steps[REC_FAILED_NUM]; }; static struct suspend_stats suspend_stats; static DEFINE_MUTEX(suspend_stats_lock); void dpm_save_failed_dev(const char *name) { mutex_lock(&suspend_stats_lock); strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], name, sizeof(suspend_stats.failed_devs[0])); suspend_stats.last_failed_dev++; suspend_stats.last_failed_dev %= REC_FAILED_NUM; mutex_unlock(&suspend_stats_lock); } void dpm_save_failed_step(enum suspend_stat_step step) { suspend_stats.step_failures[step-1]++; suspend_stats.failed_steps[suspend_stats.last_failed_step] = step; suspend_stats.last_failed_step++; suspend_stats.last_failed_step %= REC_FAILED_NUM; } void dpm_save_errno(int err) { if (!err) { suspend_stats.success++; return; } suspend_stats.fail++; suspend_stats.errno[suspend_stats.last_failed_errno] = err; suspend_stats.last_failed_errno++; suspend_stats.last_failed_errno %= REC_FAILED_NUM; } void pm_report_hw_sleep_time(u64 t) { suspend_stats.last_hw_sleep = t; suspend_stats.total_hw_sleep += t; } EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time); void pm_report_max_hw_sleep(u64 t) { suspend_stats.max_hw_sleep = t; } EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep); static const char * const suspend_step_names[] = { [SUSPEND_WORKING] = "", [SUSPEND_FREEZE] = "freeze", [SUSPEND_PREPARE] = "prepare", [SUSPEND_SUSPEND] = "suspend", [SUSPEND_SUSPEND_LATE] = "suspend_late", [SUSPEND_SUSPEND_NOIRQ] = "suspend_noirq", [SUSPEND_RESUME_NOIRQ] = "resume_noirq", [SUSPEND_RESUME_EARLY] = "resume_early", [SUSPEND_RESUME] = "resume", }; #define suspend_attr(_name, format_str) \ static ssize_t _name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ return sysfs_emit(buf, format_str, suspend_stats._name);\ } \ static struct kobj_attribute _name = __ATTR_RO(_name) suspend_attr(success, "%u\n"); suspend_attr(fail, "%u\n"); suspend_attr(last_hw_sleep, "%llu\n"); suspend_attr(total_hw_sleep, "%llu\n"); suspend_attr(max_hw_sleep, "%llu\n"); #define suspend_step_attr(_name, step) \ static ssize_t _name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf) \ { \ return sysfs_emit(buf, "%u\n", \ suspend_stats.step_failures[step-1]); \ } \ static struct kobj_attribute _name = __ATTR_RO(_name) suspend_step_attr(failed_freeze, SUSPEND_FREEZE); suspend_step_attr(failed_prepare, SUSPEND_PREPARE); suspend_step_attr(failed_suspend, SUSPEND_SUSPEND); suspend_step_attr(failed_suspend_late, SUSPEND_SUSPEND_LATE); suspend_step_attr(failed_suspend_noirq, SUSPEND_SUSPEND_NOIRQ); suspend_step_attr(failed_resume, SUSPEND_RESUME); suspend_step_attr(failed_resume_early, SUSPEND_RESUME_EARLY); suspend_step_attr(failed_resume_noirq, SUSPEND_RESUME_NOIRQ); static ssize_t last_failed_dev_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int index; char *last_failed_dev = NULL; index = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; index %= REC_FAILED_NUM; last_failed_dev = suspend_stats.failed_devs[index]; return sysfs_emit(buf, "%s\n", last_failed_dev); } static struct kobj_attribute last_failed_dev = __ATTR_RO(last_failed_dev); static ssize_t last_failed_errno_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int index; int last_failed_errno; index = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; index %= REC_FAILED_NUM; last_failed_errno = suspend_stats.errno[index]; return sysfs_emit(buf, "%d\n", last_failed_errno); } static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno); static ssize_t last_failed_step_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { enum suspend_stat_step step; int index; index = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; index %= REC_FAILED_NUM; step = suspend_stats.failed_steps[index]; return sysfs_emit(buf, "%s\n", suspend_step_names[step]); } static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step); static struct attribute *suspend_attrs[] = { &success.attr, &fail.attr, &failed_freeze.attr, &failed_prepare.attr, &failed_suspend.attr, &failed_suspend_late.attr, &failed_suspend_noirq.attr, &failed_resume.attr, &failed_resume_early.attr, &failed_resume_noirq.attr, &last_failed_dev.attr, &last_failed_errno.attr, &last_failed_step.attr, &last_hw_sleep.attr, &total_hw_sleep.attr, &max_hw_sleep.attr, NULL, }; static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { if (attr != &last_hw_sleep.attr && attr != &total_hw_sleep.attr && attr != &max_hw_sleep.attr) return 0444; #ifdef CONFIG_ACPI if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) return 0444; #endif return 0; } static const struct attribute_group suspend_attr_group = { .name = "suspend_stats", .attrs = suspend_attrs, .is_visible = suspend_attr_is_visible, }; #ifdef CONFIG_DEBUG_FS static int suspend_stats_show(struct seq_file *s, void *unused) { int i, index, last_dev, last_errno, last_step; enum suspend_stat_step step; last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1; last_dev %= REC_FAILED_NUM; last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1; last_errno %= REC_FAILED_NUM; last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1; last_step %= REC_FAILED_NUM; seq_printf(s, "success: %u\nfail: %u\n", suspend_stats.success, suspend_stats.fail); for (step = SUSPEND_FREEZE; step <= SUSPEND_NR_STEPS; step++) seq_printf(s, "failed_%s: %u\n", suspend_step_names[step], suspend_stats.step_failures[step-1]); seq_printf(s, "failures:\n last_failed_dev:\t%-s\n", suspend_stats.failed_devs[last_dev]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_dev + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-s\n", suspend_stats.failed_devs[index]); } seq_printf(s, " last_failed_errno:\t%-d\n", suspend_stats.errno[last_errno]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_errno + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-d\n", suspend_stats.errno[index]); } seq_printf(s, " last_failed_step:\t%-s\n", suspend_step_names[suspend_stats.failed_steps[last_step]]); for (i = 1; i < REC_FAILED_NUM; i++) { index = last_step + REC_FAILED_NUM - i; index %= REC_FAILED_NUM; seq_printf(s, "\t\t\t%-s\n", suspend_step_names[suspend_stats.failed_steps[index]]); } return 0; } DEFINE_SHOW_ATTRIBUTE(suspend_stats); static int __init pm_debugfs_init(void) { debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO, NULL, NULL, &suspend_stats_fops); return 0; } late_initcall(pm_debugfs_init); #endif /* CONFIG_DEBUG_FS */ bool pm_sleep_transition_in_progress(void) { return pm_suspend_in_progress() || hibernation_in_progress(); } #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_SLEEP_DEBUG /* * pm_print_times: print time taken by devices to suspend and resume. * * show() returns whether printing of suspend and resume times is enabled. * store() accepts 0 or 1. 0 disables printing and 1 enables it. */ bool pm_print_times_enabled; static ssize_t pm_print_times_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", pm_print_times_enabled); } static ssize_t pm_print_times_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; pm_print_times_enabled = !!val; return n; } power_attr(pm_print_times); static inline void pm_print_times_init(void) { pm_print_times_enabled = initcall_debug; } static ssize_t pm_wakeup_irq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { if (!pm_wakeup_irq()) return -ENODATA; return sysfs_emit(buf, "%u\n", pm_wakeup_irq()); } power_attr_ro(pm_wakeup_irq); bool pm_debug_messages_on __read_mostly; bool pm_debug_messages_should_print(void) { return pm_debug_messages_on && pm_sleep_transition_in_progress(); } EXPORT_SYMBOL_GPL(pm_debug_messages_should_print); static ssize_t pm_debug_messages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", pm_debug_messages_on); } static ssize_t pm_debug_messages_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; pm_debug_messages_on = !!val; return n; } power_attr(pm_debug_messages); static int __init pm_debug_messages_setup(char *str) { pm_debug_messages_on = true; return 1; } __setup("pm_debug_messages", pm_debug_messages_setup); #else /* !CONFIG_PM_SLEEP_DEBUG */ static inline void pm_print_times_init(void) {} #endif /* CONFIG_PM_SLEEP_DEBUG */ struct kobject *power_kobj; /* * state - control system sleep states. * * show() returns available sleep state labels, which may be "mem", "standby", * "freeze" and "disk" (hibernation). * See Documentation/admin-guide/pm/sleep-states.rst for a description of * what they mean. * * store() accepts one of those strings, translates it into the proper * enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t count = 0; #ifdef CONFIG_SUSPEND suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) if (pm_states[i]) count += sysfs_emit_at(buf, count, "%s ", pm_states[i]); #endif if (hibernation_available()) count += sysfs_emit_at(buf, count, "disk "); /* Convert the last space to a newline if needed. */ if (count > 0) buf[count - 1] = '\n'; return count; } static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state; #endif char *p; int len; p = memchr(buf, '\n', n); len = p ? p - buf : n; /* Check hibernation first. */ if (len == 4 && str_has_prefix(buf, "disk")) return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) { const char *label = pm_states[state]; if (label && len == strlen(label) && !strncmp(buf, label, len)) return state; } #endif return PM_SUSPEND_ON; } static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { suspend_state_t state; int error; error = pm_autosleep_lock(); if (error) return error; if (pm_autosleep_state() > PM_SUSPEND_ON) { error = -EBUSY; goto out; } state = decode_state(buf, n); if (state < PM_SUSPEND_MAX) { if (state == PM_SUSPEND_MEM) state = mem_sleep_current; error = pm_suspend(state); } else if (state == PM_SUSPEND_MAX) { error = hibernate(); } else { error = -EINVAL; } out: pm_autosleep_unlock(); return error ? error : n; } power_attr(state); #ifdef CONFIG_PM_SLEEP /* * The 'wakeup_count' attribute, along with the functions defined in * drivers/base/power/wakeup.c, provides a means by which wakeup events can be * handled in a non-racy way. * * If a wakeup event occurs when the system is in a sleep state, it simply is * woken up. In turn, if an event that would wake the system up from a sleep * state occurs when it is undergoing a transition to that sleep state, the * transition should be aborted. Moreover, if such an event occurs when the * system is in the working state, an attempt to start a transition to the * given sleep state should fail during certain period after the detection of * the event. Using the 'state' attribute alone is not sufficient to satisfy * these requirements, because a wakeup event may occur exactly when 'state' * is being written to and may be delivered to user space right before it is * frozen, so the event will remain only partially processed until the system is * woken up by another event. In particular, it won't cause the transition to * a sleep state to be aborted. * * This difficulty may be overcome if user space uses 'wakeup_count' before * writing to 'state'. It first should read from 'wakeup_count' and store * the read value. Then, after carrying out its own preparations for the system * transition to a sleep state, it should write the stored value to * 'wakeup_count'. If that fails, at least one wakeup event has occurred since * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it * is allowed to write to 'state', but the transition will be aborted if there * are any wakeup events detected after 'wakeup_count' was written to. */ static ssize_t wakeup_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { unsigned int val; return pm_get_wakeup_count(&val, true) ? sysfs_emit(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned int val; int error; error = pm_autosleep_lock(); if (error) return error; if (pm_autosleep_state() > PM_SUSPEND_ON) { error = -EBUSY; goto out; } error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) error = n; else pm_print_active_wakeup_sources(); } out: pm_autosleep_unlock(); return error; } power_attr(wakeup_count); #ifdef CONFIG_PM_AUTOSLEEP static ssize_t autosleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { suspend_state_t state = pm_autosleep_state(); if (state == PM_SUSPEND_ON) return sysfs_emit(buf, "off\n"); #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) return sysfs_emit(buf, "%s\n", pm_states[state] ? pm_states[state] : "error"); #endif #ifdef CONFIG_HIBERNATION return sysfs_emit(buf, "disk\n"); #else return sysfs_emit(buf, "error\n"); #endif } static ssize_t autosleep_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { suspend_state_t state = decode_state(buf, n); int error; if (state == PM_SUSPEND_ON && strcmp(buf, "off") && strcmp(buf, "off\n")) return -EINVAL; if (state == PM_SUSPEND_MEM) state = mem_sleep_current; error = pm_autosleep_set_state(state); return error ? error : n; } power_attr(autosleep); #endif /* CONFIG_PM_AUTOSLEEP */ #ifdef CONFIG_PM_WAKELOCKS static ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return pm_show_wakelocks(buf, true); } static ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int error = pm_wake_lock(buf); return error ? error : n; } power_attr(wake_lock); static ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return pm_show_wakelocks(buf, false); } static ssize_t wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int error = pm_wake_unlock(buf); return error ? error : n; } power_attr(wake_unlock); #endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE int pm_trace_enabled; static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", pm_trace_enabled); } static ssize_t pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int val; if (sscanf(buf, "%d", &val) == 1) { pm_trace_enabled = !!val; if (pm_trace_enabled) { pr_warn("PM: Enabling pm_trace changes system date and time during resume.\n" "PM: Correct system time has to be restored manually after resume.\n"); } return n; } return -EINVAL; } power_attr(pm_trace); static ssize_t pm_trace_dev_match_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return show_trace_dev_match(buf, PAGE_SIZE); } power_attr_ro(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ #ifdef CONFIG_FREEZER static ssize_t pm_freeze_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", freeze_timeout_msecs); } static ssize_t pm_freeze_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; freeze_timeout_msecs = val; return n; } power_attr(pm_freeze_timeout); #endif /* CONFIG_FREEZER*/ #if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) bool filesystem_freeze_enabled = false; static ssize_t freeze_filesystems_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", filesystem_freeze_enabled); } static ssize_t freeze_filesystems_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { unsigned long val; if (kstrtoul(buf, 10, &val)) return -EINVAL; if (val > 1) return -EINVAL; filesystem_freeze_enabled = !!val; return n; } power_attr(freeze_filesystems); #endif /* CONFIG_SUSPEND || CONFIG_HIBERNATION */ static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE &pm_trace_attr.attr, &pm_trace_dev_match_attr.attr, #endif #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, #ifdef CONFIG_SUSPEND &mem_sleep_attr.attr, &sync_on_suspend_attr.attr, #endif #ifdef CONFIG_PM_AUTOSLEEP &autosleep_attr.attr, #endif #ifdef CONFIG_PM_WAKELOCKS &wake_lock_attr.attr, &wake_unlock_attr.attr, #endif #ifdef CONFIG_PM_SLEEP_DEBUG &pm_test_attr.attr, &pm_print_times_attr.attr, &pm_wakeup_irq_attr.attr, &pm_debug_messages_attr.attr, #endif #endif #ifdef CONFIG_FREEZER &pm_freeze_timeout_attr.attr, #endif #if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) &freeze_filesystems_attr.attr, #endif NULL, }; static const struct attribute_group attr_group = { .attrs = g, }; static const struct attribute_group *attr_groups[] = { &attr_group, #ifdef CONFIG_PM_SLEEP &suspend_attr_group, #endif NULL, }; struct workqueue_struct *pm_wq; EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } static int __init pm_init(void) { int error = pm_start_workqueue(); if (error) return error; hibernate_image_size_init(); hibernate_reserved_size_init(); pm_states_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; error = sysfs_create_groups(power_kobj, attr_groups); if (error) return error; pm_print_times_init(); return pm_autosleep_init(); } core_initcall(pm_init); |
| 2 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 | // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter Generic functions * * Copyright (C) 2010 - 2013 Johan Hovold (jhovold@gmail.com) * Copyright (C) 1999 - 2002 Greg Kroah-Hartman (greg@kroah.com) */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/sysrq.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/uaccess.h> #include <linux/kfifo.h> #include <linux/serial.h> #ifdef CONFIG_USB_SERIAL_GENERIC static __u16 vendor = 0x05f9; static __u16 product = 0xffff; module_param(vendor, ushort, 0); MODULE_PARM_DESC(vendor, "User specified USB idVendor"); module_param(product, ushort, 0); MODULE_PARM_DESC(product, "User specified USB idProduct"); static struct usb_device_id generic_device_ids[2]; /* Initially all zeroes. */ static int usb_serial_generic_probe(struct usb_serial *serial, const struct usb_device_id *id) { struct device *dev = &serial->interface->dev; dev_info(dev, "The \"generic\" usb-serial driver is only for testing and one-off prototypes.\n"); dev_info(dev, "Tell linux-usb@vger.kernel.org to add your device to a proper driver.\n"); return 0; } static int usb_serial_generic_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds) { struct device *dev = &serial->interface->dev; int num_ports; num_ports = max(epds->num_bulk_in, epds->num_bulk_out); if (num_ports == 0) { dev_err(dev, "device has no bulk endpoints\n"); return -ENODEV; } return num_ports; } static struct usb_serial_driver usb_serial_generic_device = { .driver = { .name = "generic", }, .id_table = generic_device_ids, .probe = usb_serial_generic_probe, .calc_num_ports = usb_serial_generic_calc_num_ports, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, .resume = usb_serial_generic_resume, }; static struct usb_serial_driver * const serial_drivers[] = { &usb_serial_generic_device, NULL }; #endif int usb_serial_generic_register(void) { int retval = 0; #ifdef CONFIG_USB_SERIAL_GENERIC generic_device_ids[0].idVendor = vendor; generic_device_ids[0].idProduct = product; generic_device_ids[0].match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT; retval = usb_serial_register_drivers(serial_drivers, "usbserial_generic", generic_device_ids); #endif return retval; } void usb_serial_generic_deregister(void) { #ifdef CONFIG_USB_SERIAL_GENERIC usb_serial_deregister_drivers(serial_drivers); #endif } int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port) { int result = 0; clear_bit(USB_SERIAL_THROTTLED, &port->flags); if (port->bulk_in_size) result = usb_serial_generic_submit_read_urbs(port, GFP_KERNEL); return result; } EXPORT_SYMBOL_GPL(usb_serial_generic_open); void usb_serial_generic_close(struct usb_serial_port *port) { unsigned long flags; int i; if (port->bulk_out_size) { for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) usb_kill_urb(port->write_urbs[i]); spin_lock_irqsave(&port->lock, flags); kfifo_reset_out(&port->write_fifo); spin_unlock_irqrestore(&port->lock, flags); } if (port->bulk_in_size) { for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_kill_urb(port->read_urbs[i]); } } EXPORT_SYMBOL_GPL(usb_serial_generic_close); int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size) { return kfifo_out_locked(&port->write_fifo, dest, size, &port->lock); } /** * usb_serial_generic_write_start - start writing buffered data * @port: usb-serial port * @mem_flags: flags to use for memory allocations * * Serialised using USB_SERIAL_WRITE_BUSY flag. * * Return: Zero on success or if busy, otherwise a negative errno value. */ int usb_serial_generic_write_start(struct usb_serial_port *port, gfp_t mem_flags) { struct urb *urb; int count, result; unsigned long flags; int i; if (test_and_set_bit_lock(USB_SERIAL_WRITE_BUSY, &port->flags)) return 0; retry: spin_lock_irqsave(&port->lock, flags); if (!port->write_urbs_free || !kfifo_len(&port->write_fifo)) { clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags); spin_unlock_irqrestore(&port->lock, flags); return 0; } i = (int)find_first_bit(&port->write_urbs_free, ARRAY_SIZE(port->write_urbs)); spin_unlock_irqrestore(&port->lock, flags); urb = port->write_urbs[i]; count = port->serial->type->prepare_write_buffer(port, urb->transfer_buffer, port->bulk_out_size); urb->transfer_buffer_length = count; usb_serial_debug_data(&port->dev, __func__, count, urb->transfer_buffer); spin_lock_irqsave(&port->lock, flags); port->tx_bytes += count; spin_unlock_irqrestore(&port->lock, flags); clear_bit(i, &port->write_urbs_free); result = usb_submit_urb(urb, mem_flags); if (result) { dev_err_console(port, "%s - error submitting urb: %d\n", __func__, result); set_bit(i, &port->write_urbs_free); spin_lock_irqsave(&port->lock, flags); port->tx_bytes -= count; spin_unlock_irqrestore(&port->lock, flags); clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags); return result; } goto retry; /* try sending off another urb */ } EXPORT_SYMBOL_GPL(usb_serial_generic_write_start); /** * usb_serial_generic_write - generic write function * @tty: tty for the port * @port: usb-serial port * @buf: data to write * @count: number of bytes to write * * Return: The number of characters buffered, which may be anything from * zero to @count, or a negative errno value. */ int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count) { int result; if (!port->bulk_out_size) return -ENODEV; if (!count) return 0; count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock); result = usb_serial_generic_write_start(port, GFP_ATOMIC); if (result) return result; return count; } EXPORT_SYMBOL_GPL(usb_serial_generic_write); unsigned int usb_serial_generic_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned long flags; unsigned int room; if (!port->bulk_out_size) return 0; spin_lock_irqsave(&port->lock, flags); room = kfifo_avail(&port->write_fifo); spin_unlock_irqrestore(&port->lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, room); return room; } unsigned int usb_serial_generic_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned long flags; unsigned int chars; if (!port->bulk_out_size) return 0; spin_lock_irqsave(&port->lock, flags); chars = kfifo_len(&port->write_fifo) + port->tx_bytes; spin_unlock_irqrestore(&port->lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, chars); return chars; } EXPORT_SYMBOL_GPL(usb_serial_generic_chars_in_buffer); void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout) { struct usb_serial_port *port = tty->driver_data; unsigned int bps; unsigned long period; unsigned long expire; bps = tty_get_baud_rate(tty); if (!bps) bps = 9600; /* B0 */ /* * Use a poll-period of roughly the time it takes to send one * character or at least one jiffy. */ period = max_t(unsigned long, (10 * HZ / bps), 1); if (timeout) period = min_t(unsigned long, period, timeout); dev_dbg(&port->dev, "%s - timeout = %u ms, period = %u ms\n", __func__, jiffies_to_msecs(timeout), jiffies_to_msecs(period)); expire = jiffies + timeout; while (!port->serial->type->tx_empty(port)) { schedule_timeout_interruptible(period); if (signal_pending(current)) break; if (timeout && time_after(jiffies, expire)) break; } } EXPORT_SYMBOL_GPL(usb_serial_generic_wait_until_sent); static int usb_serial_generic_submit_read_urb(struct usb_serial_port *port, int index, gfp_t mem_flags) { int res; if (!test_and_clear_bit(index, &port->read_urbs_free)) return 0; dev_dbg(&port->dev, "%s - urb %d\n", __func__, index); res = usb_submit_urb(port->read_urbs[index], mem_flags); if (res) { if (res != -EPERM && res != -ENODEV) { dev_err(&port->dev, "%s - usb_submit_urb failed: %d\n", __func__, res); } set_bit(index, &port->read_urbs_free); return res; } return 0; } int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, gfp_t mem_flags) { int res; int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { res = usb_serial_generic_submit_read_urb(port, i, mem_flags); if (res) goto err; } return 0; err: for (; i >= 0; --i) usb_kill_urb(port->read_urbs[i]); return res; } EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urbs); void usb_serial_generic_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; char *ch = urb->transfer_buffer; int i; if (!urb->actual_length) return; /* * The per character mucking around with sysrq path it too slow for * stuff like 3G modems, so shortcircuit it in the 99.9999999% of * cases where the USB serial is not a console anyway. */ if (port->sysrq) { for (i = 0; i < urb->actual_length; i++, ch++) { if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(&port->port, *ch, TTY_NORMAL); } } else { tty_insert_flip_string(&port->port, ch, urb->actual_length); } tty_flip_buffer_push(&port->port); } EXPORT_SYMBOL_GPL(usb_serial_generic_process_read_urb); void usb_serial_generic_read_bulk_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; bool stopped = false; int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { if (urb == port->read_urbs[i]) break; } dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i, urb->actual_length); switch (status) { case 0: usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); port->serial->type->process_read_urb(urb); break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", __func__, status); stopped = true; break; case -EPIPE: dev_err(&port->dev, "%s - urb stopped: %d\n", __func__, status); stopped = true; break; default: dev_dbg(&port->dev, "%s - nonzero urb status: %d\n", __func__, status); break; } /* * Make sure URB processing is done before marking as free to avoid * racing with unthrottle() on another CPU. Matches the barriers * implied by the test_and_clear_bit() in * usb_serial_generic_submit_read_urb(). */ smp_mb__before_atomic(); set_bit(i, &port->read_urbs_free); /* * Make sure URB is marked as free before checking the throttled flag * to avoid racing with unthrottle() on another CPU. Matches the * smp_mb__after_atomic() in unthrottle(). */ smp_mb__after_atomic(); if (stopped) return; if (test_bit(USB_SERIAL_THROTTLED, &port->flags)) return; usb_serial_generic_submit_read_urb(port, i, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(usb_serial_generic_read_bulk_callback); void usb_serial_generic_write_bulk_callback(struct urb *urb) { unsigned long flags; struct usb_serial_port *port = urb->context; int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { if (port->write_urbs[i] == urb) break; } spin_lock_irqsave(&port->lock, flags); port->tx_bytes -= urb->transfer_buffer_length; set_bit(i, &port->write_urbs_free); spin_unlock_irqrestore(&port->lock, flags); switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", __func__, status); return; case -EPIPE: dev_err_console(port, "%s - urb stopped: %d\n", __func__, status); return; default: dev_err_console(port, "%s - nonzero urb status: %d\n", __func__, status); break; } usb_serial_generic_write_start(port, GFP_ATOMIC); usb_serial_port_softint(port); } EXPORT_SYMBOL_GPL(usb_serial_generic_write_bulk_callback); void usb_serial_generic_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; set_bit(USB_SERIAL_THROTTLED, &port->flags); } EXPORT_SYMBOL_GPL(usb_serial_generic_throttle); void usb_serial_generic_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; clear_bit(USB_SERIAL_THROTTLED, &port->flags); /* * Matches the smp_mb__after_atomic() in * usb_serial_generic_read_bulk_callback(). */ smp_mb__after_atomic(); usb_serial_generic_submit_read_urbs(port, GFP_KERNEL); } EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle); static bool usb_serial_generic_msr_changed(struct tty_struct *tty, unsigned long arg, struct async_icount *cprev) { struct usb_serial_port *port = tty->driver_data; struct async_icount cnow; unsigned long flags; bool ret; /* * Use tty-port initialised flag to detect all hangups including the * one generated at USB-device disconnect. */ if (!tty_port_initialized(&port->port)) return true; spin_lock_irqsave(&port->lock, flags); cnow = port->icount; /* atomic copy*/ spin_unlock_irqrestore(&port->lock, flags); ret = ((arg & TIOCM_RNG) && (cnow.rng != cprev->rng)) || ((arg & TIOCM_DSR) && (cnow.dsr != cprev->dsr)) || ((arg & TIOCM_CD) && (cnow.dcd != cprev->dcd)) || ((arg & TIOCM_CTS) && (cnow.cts != cprev->cts)); *cprev = cnow; return ret; } int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; struct async_icount cnow; unsigned long flags; int ret; spin_lock_irqsave(&port->lock, flags); cnow = port->icount; /* atomic copy */ spin_unlock_irqrestore(&port->lock, flags); ret = wait_event_interruptible(port->port.delta_msr_wait, usb_serial_generic_msr_changed(tty, arg, &cnow)); if (!ret && !tty_port_initialized(&port->port)) ret = -EIO; return ret; } EXPORT_SYMBOL_GPL(usb_serial_generic_tiocmiwait); int usb_serial_generic_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { struct usb_serial_port *port = tty->driver_data; struct async_icount cnow; unsigned long flags; spin_lock_irqsave(&port->lock, flags); cnow = port->icount; /* atomic copy */ spin_unlock_irqrestore(&port->lock, flags); icount->cts = cnow.cts; icount->dsr = cnow.dsr; icount->rng = cnow.rng; icount->dcd = cnow.dcd; icount->tx = cnow.tx; icount->rx = cnow.rx; icount->frame = cnow.frame; icount->parity = cnow.parity; icount->overrun = cnow.overrun; icount->brk = cnow.brk; icount->buf_overrun = cnow.buf_overrun; return 0; } EXPORT_SYMBOL_GPL(usb_serial_generic_get_icount); #if defined(CONFIG_USB_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { handle_sysrq(ch); port->sysrq = 0; return 1; } port->sysrq = 0; } return 0; } EXPORT_SYMBOL_GPL(usb_serial_handle_sysrq_char); int usb_serial_handle_break(struct usb_serial_port *port) { if (!port->port.console) return 0; if (!port->sysrq) { port->sysrq = jiffies + HZ*5; return 1; } port->sysrq = 0; return 0; } EXPORT_SYMBOL_GPL(usb_serial_handle_break); #endif /** * usb_serial_handle_dcd_change - handle a change of carrier detect state * @port: usb-serial port * @tty: tty for the port * @status: new carrier detect status, nonzero if active */ void usb_serial_handle_dcd_change(struct usb_serial_port *port, struct tty_struct *tty, unsigned int status) { dev_dbg(&port->dev, "%s - status %d\n", __func__, status); if (tty) { struct tty_ldisc *ld = tty_ldisc_ref(tty); if (ld) { if (ld->ops->dcd_change) ld->ops->dcd_change(tty, status); tty_ldisc_deref(ld); } } if (status) wake_up_interruptible(&port->port.open_wait); else if (tty && !C_CLOCAL(tty)) tty_hangup(tty); } EXPORT_SYMBOL_GPL(usb_serial_handle_dcd_change); int usb_serial_generic_resume(struct usb_serial *serial) { struct usb_serial_port *port; int i, c = 0, r; for (i = 0; i < serial->num_ports; i++) { port = serial->port[i]; if (!tty_port_initialized(&port->port)) continue; if (port->bulk_in_size) { r = usb_serial_generic_submit_read_urbs(port, GFP_NOIO); if (r < 0) c++; } if (port->bulk_out_size) { r = usb_serial_generic_write_start(port, GFP_NOIO); if (r < 0) c++; } } return c ? -EIO : 0; } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); |
| 253 243 253 252 4328 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H /* * Jump label support * * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra * * DEPRECATED API: * * The use of 'struct static_key' directly, is now DEPRECATED. In addition * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: * * struct static_key false = STATIC_KEY_INIT_FALSE; * struct static_key true = STATIC_KEY_INIT_TRUE; * static_key_true() * static_key_false() * * The updated API replacements are: * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count); * static_branch_likely() * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", * an "if (static_branch_unlikely(&key))" statement is an unconditional branch * (which defaults to false - and the true block is placed out of line). * Similarly, we can define an initially true key via * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same * "if (static_branch_unlikely(&key))", in which case we will generate an * unconditional branch to the out-of-line true branch. Keys that are * initially true or false can be using in both static_branch_unlikely() * and static_branch_likely() statements. * * At runtime we can change the branch target by setting the key * to true via a call to static_branch_enable(), or false using * static_branch_disable(). If the direction of the branch is switched by * these calls then we run-time modify the branch target via a * no-op -> jump or jump -> no-op conversion. For example, for an * initially false key that is used in an "if (static_branch_unlikely(&key))" * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total * effect is a single NOP of appropriate size. The on case will patch in a jump * to the out-of-line block. * * When the control is directly exposed to userspace, it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, static keys fall back to a * simple conditional branch. * * Additional babbling in: Documentation/staging/static-keys.rst */ #ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/compiler.h> #include <linux/cleanup.h> extern bool static_key_initialized; #define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized, \ "%s(): static key '%pS' used before call to jump_label_init()", \ __func__, (key)) struct static_key { atomic_t enabled; #ifdef CONFIG_JUMP_LABEL /* * Note: * To make anonymous unions work with old compilers, the static * initialization of them requires brackets. This creates a dependency * on the order of the struct with the initializers. If any fields * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need * to be modified. * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod * 0 if points to struct jump_entry */ union { unsigned long type; struct jump_entry *entries; struct static_key_mod *next; }; #endif /* CONFIG_JUMP_LABEL */ }; #endif /* __ASSEMBLY__ */ #ifdef CONFIG_JUMP_LABEL #include <asm/jump_label.h> #ifndef __ASSEMBLY__ #ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE struct jump_entry { s32 code; s32 target; long key; // key may be far away from the core kernel under KASLR }; static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return (unsigned long)&entry->code + entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return (unsigned long)&entry->target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { long offset = entry->key & ~3L; return (struct static_key *)((unsigned long)&entry->key + offset); } #else static inline unsigned long jump_entry_code(const struct jump_entry *entry) { return entry->code; } static inline unsigned long jump_entry_target(const struct jump_entry *entry) { return entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { return (struct static_key *)((unsigned long)entry->key & ~3UL); } #endif static inline bool jump_entry_is_branch(const struct jump_entry *entry) { return (unsigned long)entry->key & 1UL; } static inline bool jump_entry_is_init(const struct jump_entry *entry) { return (unsigned long)entry->key & 2UL; } static inline void jump_entry_set_init(struct jump_entry *entry, bool set) { if (set) entry->key |= 2; else entry->key &= ~2; } static inline int jump_entry_size(struct jump_entry *entry) { #ifdef JUMP_LABEL_NOP_SIZE return JUMP_LABEL_NOP_SIZE; #else return arch_jump_entry_size(entry); #endif } #endif #endif #ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_NOP = 0, JUMP_LABEL_JMP, }; struct module; #ifdef CONFIG_JUMP_LABEL #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_LINKED 2UL #define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); extern void jump_label_init_ro(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type); extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); extern bool static_key_slow_inc(struct static_key *key); extern bool static_key_fast_inc_not_disabled(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern bool static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); /* * We should be using ATOMIC_INIT() for initializing .enabled, but * the inclusion of atomic.h is problematic for inclusion of jump_label.h * in 'low-level' headers. Thus, we are initializing .enabled with a * raw value, but have added a BUILD_BUG_ON() to catch any issues in * jump_label_init() see: kernel/jump_label.c. */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ { .type = JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ { .type = JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ #include <linux/atomic.h> #include <linux/bug.h> static __always_inline int static_key_count(struct static_key *key) { return raw_atomic_read(&key->enabled); } static __always_inline void jump_label_init(void) { static_key_initialized = true; } static __always_inline void jump_label_init_ro(void) { } static __always_inline bool static_key_false(struct static_key *key) { if (unlikely_notrace(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { if (likely_notrace(static_key_count(key) > 0)) return true; return false; } static inline bool static_key_fast_inc_not_disabled(struct static_key *key) { int v; STATIC_KEY_CHECK_USE(key); /* * Prevent key->enabled getting negative to follow the same semantics * as for CONFIG_JUMP_LABEL=y, see kernel/jump_label.c comment. */ v = atomic_read(&key->enabled); do { if (v < 0 || (v + 1) < 0) return false; } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1))); return true; } #define static_key_slow_inc(key) static_key_fast_inc_not_disabled(key) static inline void static_key_slow_dec(struct static_key *key) { STATIC_KEY_CHECK_USE(key); atomic_dec(&key->enabled); } #define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) #define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) static inline int jump_label_text_reserved(void *start, void *end) { return 0; } static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} static inline void static_key_enable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); return; } atomic_set(&key->enabled, 1); } static inline void static_key_disable(struct static_key *key) { STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); return; } atomic_set(&key->enabled, 0); } #define static_key_enable_cpuslocked(k) static_key_enable((k)) #define static_key_disable_cpuslocked(k) static_key_disable((k)) #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* CONFIG_JUMP_LABEL */ DEFINE_LOCK_GUARD_0(jump_label_lock, jump_label_lock(), jump_label_unlock()) #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled /* -------------------------------------------------------------------------- */ /* * Two type wrappers around static_key, such that we can use compile time * type differentiation to emit the right code. * * All the below code is macros in order to play type games. */ struct static_key_true { struct static_key key; }; struct static_key_false { struct static_key key; }; #define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } #define DEFINE_STATIC_KEY_TRUE(name) \ struct static_key_true name = STATIC_KEY_TRUE_INIT #define DEFINE_STATIC_KEY_TRUE_RO(name) \ struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT #define DECLARE_STATIC_KEY_TRUE(name) \ extern struct static_key_true name #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT #define DEFINE_STATIC_KEY_FALSE_RO(name) \ struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT #define DECLARE_STATIC_KEY_FALSE(name) \ extern struct static_key_false name #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count) \ struct static_key_true name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_TRUE_INIT, \ } #define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count) \ struct static_key_false name[count] = { \ [0 ... (count) - 1] = STATIC_KEY_FALSE_INIT, \ } #define _DEFINE_STATIC_KEY_1(name) DEFINE_STATIC_KEY_TRUE(name) #define _DEFINE_STATIC_KEY_0(name) DEFINE_STATIC_KEY_FALSE(name) #define DEFINE_STATIC_KEY_MAYBE(cfg, name) \ __PASTE(_DEFINE_STATIC_KEY_, IS_ENABLED(cfg))(name) #define _DEFINE_STATIC_KEY_RO_1(name) DEFINE_STATIC_KEY_TRUE_RO(name) #define _DEFINE_STATIC_KEY_RO_0(name) DEFINE_STATIC_KEY_FALSE_RO(name) #define DEFINE_STATIC_KEY_MAYBE_RO(cfg, name) \ __PASTE(_DEFINE_STATIC_KEY_RO_, IS_ENABLED(cfg))(name) #define _DECLARE_STATIC_KEY_1(name) DECLARE_STATIC_KEY_TRUE(name) #define _DECLARE_STATIC_KEY_0(name) DECLARE_STATIC_KEY_FALSE(name) #define DECLARE_STATIC_KEY_MAYBE(cfg, name) \ __PASTE(_DECLARE_STATIC_KEY_, IS_ENABLED(cfg))(name) extern bool ____wrong_branch_error(void); #define static_key_enabled(x) \ ({ \ if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ ____wrong_branch_error(); \ static_key_count((struct static_key *)x) > 0; \ }) #ifdef CONFIG_JUMP_LABEL /* * Combine the right initial value (type) with the right branch order * to generate the desired result. * * * type\branch| likely (1) | unlikely (0) * -----------+-----------------------+------------------ * | | * true (1) | ... | ... * | NOP | JMP L * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * | | * false (0) | ... | ... * | JMP L | NOP * | <br-stmts> | 1: ... * | L: ... | * | | * | | L: <br-stmts> * | | jmp 1b * | | * -----------+-----------------------+------------------ * * The initial value is encoded in the LSB of static_key::entries, * type: 0 = false, 1 = true. * * The branch type is encoded in the LSB of jump_entry::key, * branch: 0 = unlikely, 1 = likely. * * This gives the following logic table: * * enabled type branch instuction * -----------------------------+----------- * 0 0 0 | NOP * 0 0 1 | JMP * 0 1 0 | NOP * 0 1 1 | JMP * * 1 0 0 | JMP * 1 0 1 | NOP * 1 1 0 | JMP * 1 1 1 | NOP * * Which gives the following functions: * * dynamic: instruction = enabled ^ branch * static: instruction = type ^ branch * * See jump_label_type() / jump_label_init_type(). */ #define static_branch_likely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = !arch_static_branch(&(x)->key, true); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = !arch_static_branch_jump(&(x)->key, true); \ else \ branch = ____wrong_branch_error(); \ likely_notrace(branch); \ }) #define static_branch_unlikely(x) \ ({ \ bool branch; \ if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ branch = arch_static_branch_jump(&(x)->key, false); \ else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ branch = arch_static_branch(&(x)->key, false); \ else \ branch = ____wrong_branch_error(); \ unlikely_notrace(branch); \ }) #else /* !CONFIG_JUMP_LABEL */ #define static_branch_likely(x) likely_notrace(static_key_enabled(&(x)->key)) #define static_branch_unlikely(x) unlikely_notrace(static_key_enabled(&(x)->key)) #endif /* CONFIG_JUMP_LABEL */ #define static_branch_maybe(config, x) \ (IS_ENABLED(config) ? static_branch_likely(x) \ : static_branch_unlikely(x)) /* * Advanced usage; refcount, branch is enabled when: count != 0 */ #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) #define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) #define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. */ #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) #endif /* __ASSEMBLY__ */ #endif /* _LINUX_JUMP_LABEL_H */ |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_MEMORY_HOTPLUG_H #define __LINUX_MEMORY_HOTPLUG_H #include <linux/mmzone.h> #include <linux/spinlock.h> #include <linux/notifier.h> #include <linux/bug.h> struct page; struct zone; struct pglist_data; struct mem_section; struct memory_group; struct resource; struct vmem_altmap; struct dev_pagemap; #ifdef CONFIG_MEMORY_HOTPLUG struct page *pfn_to_online_page(unsigned long pfn); /* Types for control the zone type of onlined and offlined memory */ enum { /* Offline the memory. */ MMOP_OFFLINE = 0, /* Online the memory. Zone depends, see default_zone_for_pfn(). */ MMOP_ONLINE, /* Online the memory to ZONE_NORMAL. */ MMOP_ONLINE_KERNEL, /* Online the memory to ZONE_MOVABLE. */ MMOP_ONLINE_MOVABLE, }; /* Flags for add_memory() and friends to specify memory hotplug details. */ typedef int __bitwise mhp_t; /* No special request */ #define MHP_NONE ((__force mhp_t)0) /* * Allow merging of the added System RAM resource with adjacent, * mergeable resources. After a successful call to add_memory_resource() * with this flag set, the resource pointer must no longer be used as it * might be stale, or the resource might have changed. */ #define MHP_MERGE_RESOURCE ((__force mhp_t)BIT(0)) /* * We want memmap (struct page array) to be self contained. * To do so, we will use the beginning of the hot-added range to build * the page tables for the memmap array that describes the entire range. * Only selected architectures support it with SPARSE_VMEMMAP. * This is only a hint, the core kernel can decide to not do this based on * different alignment checks. */ #define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1)) /* * The nid field specifies a memory group id (mgid) instead. The memory group * implies the node id (nid). */ #define MHP_NID_IS_MGID ((__force mhp_t)BIT(2)) /* * The hotplugged memory is completely inaccessible while the memory is * offline. The memory provider will handle MEM_PREPARE_ONLINE / * MEM_FINISH_OFFLINE notifications and make the memory accessible. * * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY, * because the altmap cannot be written (e.g., poisoned) when adding * memory -- before it is set online. * * This allows for adding memory with an altmap that is not currently * made available by a hypervisor. When onlining that memory, the * hypervisor can be instructed to make that memory available, and * the onlining phase will not require any memory allocations, which is * helpful in low-memory situations. */ #define MHP_OFFLINE_INACCESSIBLE ((__force mhp_t)BIT(3)) /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) * pgprot: page protection flags to apply to newly created page tables * (required) */ struct mhp_params { struct vmem_altmap *altmap; pgprot_t pgprot; struct dev_pagemap *pgmap; }; bool mhp_range_allowed(u64 start, u64 size, bool need_mapping); struct range mhp_get_pluggable_range(bool need_mapping); bool mhp_supports_memmap_on_memory(void); /* * Zone resizing functions * * Note: any attempt to resize a zone should has pgdat_resize_lock() * zone_span_writelock() both held. This ensure the size of a zone * can't be changed while pgdat_resize_lock() held. */ static inline unsigned zone_span_seqbegin(struct zone *zone) { return read_seqbegin(&zone->span_seqlock); } static inline int zone_span_seqretry(struct zone *zone, unsigned iv) { return read_seqretry(&zone->span_seqlock, iv); } static inline void zone_span_writelock(struct zone *zone) { write_seqlock(&zone->span_seqlock); } static inline void zone_span_writeunlock(struct zone *zone) { write_sequnlock(&zone->span_seqlock); } static inline void zone_seqlock_init(struct zone *zone) { seqlock_init(&zone->span_seqlock); } extern void adjust_present_page_count(struct page *page, struct memory_group *group, long nr_pages); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, struct zone *zone, bool mhp_off_inaccessible); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); extern unsigned long __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn); typedef void (*online_page_callback_t)(struct page *page, unsigned int order); extern void generic_online_page(struct page *page, unsigned int order); extern int set_online_page_callback(online_page_callback_t callback); extern int restore_online_page_callback(online_page_callback_t callback); extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params); extern u64 max_mem_size; extern int mhp_online_type_from_str(const char *str); /* If movable_node boot option specified */ extern bool movable_node_enabled; static inline bool movable_node_is_enabled(void) { return movable_node_enabled; } extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap); extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params) { return __add_pages(nid, start_pfn, nr_pages, params); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ void get_online_mems(void); void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); /* See kswapd_is_running() */ static inline void pgdat_kswapd_lock(pg_data_t *pgdat) { mutex_lock(&pgdat->kswapd_lock); } static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) { mutex_unlock(&pgdat->kswapd_lock); } static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) { mutex_init(&pgdat->kswapd_lock); } #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ struct page *___page = NULL; \ if (pfn_valid(pfn)) \ ___page = pfn_to_page(pfn); \ ___page; \ }) static inline unsigned zone_span_seqbegin(struct zone *zone) { return 0; } static inline int zone_span_seqretry(struct zone *zone, unsigned iv) { return 0; } static inline void zone_span_writelock(struct zone *zone) {} static inline void zone_span_writeunlock(struct zone *zone) {} static inline void zone_seqlock_init(struct zone *zone) {} static inline int try_online_node(int nid) { return 0; } static inline void get_online_mems(void) {} static inline void put_online_mems(void) {} static inline void mem_hotplug_begin(void) {} static inline void mem_hotplug_done(void) {} static inline bool movable_node_is_enabled(void) { return false; } static inline bool mhp_supports_memmap_on_memory(void) { return false; } static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {} #endif /* ! CONFIG_MEMORY_HOTPLUG */ /* * Keep this declaration outside CONFIG_MEMORY_HOTPLUG as some * platforms might override and use arch_get_mappable_range() * for internal non memory hotplug purposes. */ struct range arch_get_mappable_range(void); #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions */ static inline void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) { spin_lock_irqsave(&pgdat->node_size_lock, *flags); } static inline void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) { spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); } static inline void pgdat_resize_init(struct pglist_data *pgdat) { spin_lock_init(&pgdat->node_size_lock); } #else /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */ /* * Stub functions for when hotplug is off */ static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} static inline void pgdat_resize_init(struct pglist_data *pgdat) {} #endif /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */ #ifdef CONFIG_MEMORY_HOTREMOVE extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); extern int remove_memory(u64 start, u64 size); extern void __remove_memory(u64 start, u64 size); extern int offline_and_remove_memory(u64 start, u64 size); #else static inline void try_offline_node(int nid) {} static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { return -EINVAL; } static inline int remove_memory(u64 start, u64 size) { return -EBUSY; } static inline void __remove_memory(u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_MEMORY_HOTPLUG /* Default online_type (MMOP_*) when new memory blocks are added. */ extern int mhp_get_default_online_type(void); extern void mhp_set_default_online_type(int online_type); extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory_resource(int nid, struct resource *resource, mhp_t mhp_flags); extern int add_memory_driver_managed(int nid, u64 start, u64 size, const char *resource_name, mhp_t mhp_flags); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap, int migratetype, bool isolate_pageblock); extern void remove_pfn_range_from_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); extern int sparse_add_section(int nid, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern struct zone *zone_for_pfn_range(int online_type, int nid, struct memory_group *group, unsigned long start_pfn, unsigned long nr_pages); extern int arch_create_linear_mapping(int nid, u64 start, u64 size, struct mhp_params *params); void arch_remove_linear_mapping(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __LINUX_MEMORY_HOTPLUG_H */ |
| 27 19 27 27 27 27 27 27 9 9 9 9 9 9 9 9 9 9 9 9 27 27 19 9 9 9 9 9 9 9 9 9 6 6 6 8 9 9 9 9 9 9 9 9 9 9 19 19 19 18 19 19 19 27 27 19 27 27 26 9 9 9 9 27 27 27 27 9 27 27 27 27 27 27 27 25 23 23 27 26 27 27 27 27 27 27 27 27 15 26 26 19 27 9 27 27 27 15 27 27 15 27 27 27 27 9 1 27 27 27 27 27 27 27 27 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 | // SPDX-License-Identifier: GPL-2.0 /* * Generic ring buffer * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ #include <linux/trace_recursion.h> #include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> #include <linux/sched/clock.h> #include <linux/cacheflush.h> #include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/irq_work.h> #include <linux/security.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kthread.h> /* for self test */ #include <linux/module.h> #include <linux/percpu.h> #include <linux/mutex.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/list.h> #include <linux/cpu.h> #include <linux/oom.h> #include <linux/mm.h> #include <asm/local64.h> #include <asm/local.h> #include <asm/setup.h> #include "trace.h" /* * The "absolute" timestamp in the buffer is only 59 bits. * If a clock has the 5 MSBs set, it needs to be saved and * reinserted. */ #define TS_MSB (0xf8ULL << 56) #define ABS_TS_MASK (~TS_MSB) static void update_pages_handler(struct work_struct *work); #define RING_BUFFER_META_MAGIC 0xBADFEED struct ring_buffer_meta { int magic; int struct_sizes; unsigned long total_size; unsigned long buffers_offset; }; struct ring_buffer_cpu_meta { unsigned long first_buffer; unsigned long head_buffer; unsigned long commit_buffer; __u32 subbuf_size; __u32 nr_subbufs; int buffers[]; }; /* * The ring buffer header is special. We must manually up keep it. */ int ring_buffer_print_entry_header(struct trace_seq *s) { trace_seq_puts(s, "# compressed entry header\n"); trace_seq_puts(s, "\ttype_len : 5 bits\n"); trace_seq_puts(s, "\ttime_delta : 27 bits\n"); trace_seq_puts(s, "\tarray : 32 bits\n"); trace_seq_putc(s, '\n'); trace_seq_printf(s, "\tpadding : type == %d\n", RINGBUF_TYPE_PADDING); trace_seq_printf(s, "\ttime_extend : type == %d\n", RINGBUF_TYPE_TIME_EXTEND); trace_seq_printf(s, "\ttime_stamp : type == %d\n", RINGBUF_TYPE_TIME_STAMP); trace_seq_printf(s, "\tdata max type_len == %d\n", RINGBUF_TYPE_DATA_TYPE_LEN_MAX); return !trace_seq_has_overflowed(s); } /* * The ring buffer is made up of a list of pages. A separate list of pages is * allocated for each CPU. A writer may only write to a buffer that is * associated with the CPU it is currently executing on. A reader may read * from any per cpu buffer. * * The reader is special. For each per cpu buffer, the reader has its own * reader page. When a reader has read the entire reader page, this reader * page is swapped with another page in the ring buffer. * * Now, as long as the writer is off the reader page, the reader can do what * ever it wants with that page. The writer will never write to that page * again (as long as it is out of the ring buffer). * * Here's some silly ASCII art. * * +------+ * |reader| RING BUFFER * |page | * +------+ +---+ +---+ +---+ * | |-->| |-->| | * +---+ +---+ +---+ * ^ | * | | * +---------------+ * * * +------+ * |reader| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * | |-->| |-->| | * +---+ +---+ +---+ * ^ | * | | * +---------------+ * * * +------+ * |reader| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * ^ | |-->| |-->| | * | +---+ +---+ +---+ * | | * | | * +------------------------------+ * * * +------+ * |buffer| RING BUFFER * |page |------------------v * +------+ +---+ +---+ +---+ * ^ | | | |-->| | * | New +---+ +---+ +---+ * | Reader------^ | * | page | * +------------------------------+ * * * After we make this swap, the reader can hand this page off to the splice * code and be done with it. It can even allocate a new page if it needs to * and swap that into the ring buffer. * * We will be using cmpxchg soon to make all this lockless. * */ /* Used for individual buffers (after the counter) */ #define RB_BUFFER_OFF (1 << 20) #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ #ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS # define RB_FORCE_8BYTE_ALIGNMENT 0 # define RB_ARCH_ALIGNMENT RB_ALIGNMENT #else # define RB_FORCE_8BYTE_ALIGNMENT 1 # define RB_ARCH_ALIGNMENT 8U #endif #define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX enum { RB_LEN_TIME_EXTEND = 8, RB_LEN_TIME_STAMP = 8, }; #define skip_time_extend(event) \ ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) #define extended_time(event) \ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) static inline bool rb_null_event(struct ring_buffer_event *event) { return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; } static void rb_event_set_padding(struct ring_buffer_event *event) { /* padding has a NULL time_delta */ event->type_len = RINGBUF_TYPE_PADDING; event->time_delta = 0; } static unsigned rb_event_data_length(struct ring_buffer_event *event) { unsigned length; if (event->type_len) length = event->type_len * RB_ALIGNMENT; else length = event->array[0]; return length + RB_EVNT_HDR_SIZE; } /* * Return the length of the given event. Will return * the length of the time extend if the event is a * time extend. */ static inline unsigned rb_event_length(struct ring_buffer_event *event) { switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) /* undefined */ return -1; return event->array[0] + RB_EVNT_HDR_SIZE; case RINGBUF_TYPE_TIME_EXTEND: return RB_LEN_TIME_EXTEND; case RINGBUF_TYPE_TIME_STAMP: return RB_LEN_TIME_STAMP; case RINGBUF_TYPE_DATA: return rb_event_data_length(event); default: WARN_ON_ONCE(1); } /* not hit */ return 0; } /* * Return total length of time extend and data, * or just the event length for all other events. */ static inline unsigned rb_event_ts_length(struct ring_buffer_event *event) { unsigned len = 0; if (extended_time(event)) { /* time extends include the data event after it */ len = RB_LEN_TIME_EXTEND; event = skip_time_extend(event); } return len + rb_event_length(event); } /** * ring_buffer_event_length - return the length of the event * @event: the event to get the length of * * Returns the size of the data load of a data event. * If the event is something other than a data event, it * returns the size of the event itself. With the exception * of a TIME EXTEND, where it still returns the size of the * data load of the data event after it. */ unsigned ring_buffer_event_length(struct ring_buffer_event *event) { unsigned length; if (extended_time(event)) event = skip_time_extend(event); length = rb_event_length(event); if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) return length; length -= RB_EVNT_HDR_SIZE; if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) length -= sizeof(event->array[0]); return length; } EXPORT_SYMBOL_GPL(ring_buffer_event_length); /* inline for ring buffer fast paths */ static __always_inline void * rb_event_data(struct ring_buffer_event *event) { if (extended_time(event)) event = skip_time_extend(event); WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ if (event->type_len) return (void *)&event->array[0]; /* Otherwise length is in array[0] and array[1] has the data */ return (void *)&event->array[1]; } /** * ring_buffer_event_data - return the data of the event * @event: the event to get the data from */ void *ring_buffer_event_data(struct ring_buffer_event *event) { return rb_event_data(event); } EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define for_each_buffer_cpu(buffer, cpu) \ for_each_cpu(cpu, buffer->cpumask) #define for_each_online_buffer_cpu(buffer, cpu) \ for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask) #define TS_SHIFT 27 #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) static u64 rb_event_time_stamp(struct ring_buffer_event *event) { u64 ts; ts = event->array[0]; ts <<= TS_SHIFT; ts += event->time_delta; return ts; } /* Flag when events were overwritten */ #define RB_MISSED_EVENTS (1 << 31) /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) #define RB_MISSED_MASK (3 << 30) struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ }; struct buffer_data_read_page { unsigned order; /* order of the page */ struct buffer_data_page *data; /* actual data, stored in this page */ }; /* * Note, the buffer_page list must be first. The buffer pages * are allocated in cache lines, which means that each buffer * page will be at the beginning of a cache line, and thus * the least significant bits will be zero. We use this to * add flags in the list struct pointers, to make the ring buffer * lockless. */ struct buffer_page { struct list_head list; /* list of buffer pages */ local_t write; /* index for next write */ unsigned read; /* index for next read */ local_t entries; /* entries on this page */ unsigned long real_end; /* real end of data */ unsigned order; /* order of the page */ u32 id:30; /* ID for external mapping */ u32 range:1; /* Mapped via a range */ struct buffer_data_page *page; /* Actual data page */ }; /* * The buffer page counters, write and entries, must be reset * atomically when crossing page boundaries. To synchronize this * update, two counters are inserted into the number. One is * the actual counter for the write position or count on the page. * * The other is a counter of updaters. Before an update happens * the update partition of the counter is incremented. This will * allow the updater to update the counter atomically. * * The counter is 20 bits, and the state data is 12. */ #define RB_WRITE_MASK 0xfffff #define RB_WRITE_INTCNT (1 << 20) static void rb_init_page(struct buffer_data_page *bpage) { local_set(&bpage->commit, 0); } static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) { return local_read(&bpage->page->commit); } static void free_buffer_page(struct buffer_page *bpage) { /* Range pages are not to be freed */ if (!bpage->range) free_pages((unsigned long)bpage->page, bpage->order); kfree(bpage); } /* * We need to fit the time_stamp delta into 27 bits. */ static inline bool test_time_stamp(u64 delta) { return !!(delta & TS_DELTA_TEST); } struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; wait_queue_head_t full_waiters; atomic_t seq; bool waiters_pending; bool full_waiters_pending; bool wakeup_full; }; /* * Structure to hold event state and handle nested events. */ struct rb_event_info { u64 ts; u64 delta; u64 before; u64 after; unsigned long length; struct buffer_page *tail_page; int add_timestamp; }; /* * Used for the add_timestamp * NONE * EXTEND - wants a time extend * ABSOLUTE - the buffer requests all events to have absolute time stamps * FORCE - force a full time stamp. */ enum { RB_ADD_STAMP_NONE = 0, RB_ADD_STAMP_EXTEND = BIT(1), RB_ADD_STAMP_ABSOLUTE = BIT(2), RB_ADD_STAMP_FORCE = BIT(3) }; /* * Used for which event context the event is in. * TRANSITION = 0 * NMI = 1 * IRQ = 2 * SOFTIRQ = 3 * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, RB_CTX_NORMAL, RB_CTX_MAX }; struct rb_time_struct { local64_t time; }; typedef struct rb_time_struct rb_time_t; #define MAX_NEST 5 /* * head_page == tail_page && head == tail then buffer is empty. */ struct ring_buffer_per_cpu { int cpu; atomic_t record_disabled; atomic_t resize_disabled; struct trace_buffer *buffer; raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; struct buffer_data_page *free_page; unsigned long nr_pages; unsigned int current_context; struct list_head *pages; /* pages generation counter, incremented when the list changes */ unsigned long cnt; struct buffer_page *head_page; /* read from head */ struct buffer_page *tail_page; /* write to tail */ struct buffer_page *commit_page; /* committed pages */ struct buffer_page *reader_page; unsigned long lost_events; unsigned long last_overrun; unsigned long nest; local_t entries_bytes; local_t entries; local_t overrun; local_t commit_overrun; local_t dropped_events; local_t committing; local_t commits; local_t pages_touched; local_t pages_lost; local_t pages_read; long last_pages_touch; size_t shortest_full; unsigned long read; unsigned long read_bytes; rb_time_t write_stamp; rb_time_t before_stamp; u64 event_stamp[MAX_NEST]; u64 read_stamp; /* pages removed since last reset */ unsigned long pages_removed; unsigned int mapped; unsigned int user_mapped; /* user space mapping */ struct mutex mapping_lock; unsigned long *subbuf_ids; /* ID to subbuf VA */ struct trace_buffer_meta *meta_page; struct ring_buffer_cpu_meta *ring_meta; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; struct list_head new_pages; /* new pages to add */ struct work_struct update_pages_work; struct completion update_done; struct rb_irq_work irq_work; }; struct trace_buffer { unsigned flags; int cpus; atomic_t record_disabled; atomic_t resizing; cpumask_var_t cpumask; struct lock_class_key *reader_lock_key; struct mutex mutex; struct ring_buffer_per_cpu **buffers; struct hlist_node node; u64 (*clock)(void); struct rb_irq_work irq_work; bool time_stamp_abs; unsigned long range_addr_start; unsigned long range_addr_end; struct ring_buffer_meta *meta; unsigned int subbuf_size; unsigned int subbuf_order; unsigned int max_data_size; }; struct ring_buffer_iter { struct ring_buffer_per_cpu *cpu_buffer; unsigned long head; unsigned long next_event; struct buffer_page *head_page; struct buffer_page *cache_reader_page; unsigned long cache_read; unsigned long cache_pages_removed; u64 read_stamp; u64 page_stamp; struct ring_buffer_event *event; size_t event_size; int missed_events; }; int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s) { struct buffer_data_page field; trace_seq_printf(s, "\tfield: u64 timestamp;\t" "offset:0;\tsize:%u;\tsigned:%u;\n", (unsigned int)sizeof(field.time_stamp), (unsigned int)is_signed_type(u64)); trace_seq_printf(s, "\tfield: local_t commit;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), (unsigned int)sizeof(field.commit), (unsigned int)is_signed_type(long)); trace_seq_printf(s, "\tfield: int overwrite;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), 1, (unsigned int)is_signed_type(long)); trace_seq_printf(s, "\tfield: char data;\t" "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), (unsigned int)buffer->subbuf_size, (unsigned int)is_signed_type(char)); return !trace_seq_has_overflowed(s); } static inline void rb_time_read(rb_time_t *t, u64 *ret) { *ret = local64_read(&t->time); } static void rb_time_set(rb_time_t *t, u64 val) { local64_set(&t->time, val); } /* * Enable this to make sure that the event passed to * ring_buffer_event_time_stamp() is not committed and also * is on the buffer that it passed in. */ //#define RB_VERIFY_EVENT #ifdef RB_VERIFY_EVENT static struct list_head *rb_list_head(struct list_head *list); static void verify_event(struct ring_buffer_per_cpu *cpu_buffer, void *event) { struct buffer_page *page = cpu_buffer->commit_page; struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page); struct list_head *next; long commit, write; unsigned long addr = (unsigned long)event; bool done = false; int stop = 0; /* Make sure the event exists and is not committed yet */ do { if (page == tail_page || WARN_ON_ONCE(stop++ > 100)) done = true; commit = local_read(&page->page->commit); write = local_read(&page->write); if (addr >= (unsigned long)&page->page->data[commit] && addr < (unsigned long)&page->page->data[write]) return; next = rb_list_head(page->list.next); page = list_entry(next, struct buffer_page, list); } while (!done); WARN_ON_ONCE(1); } #else static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer, void *event) { } #endif /* * The absolute time stamp drops the 5 MSBs and some clocks may * require them. The rb_fix_abs_ts() will take a previous full * time stamp, and add the 5 MSB of that time stamp on to the * saved absolute time stamp. Then they are compared in case of * the unlikely event that the latest time stamp incremented * the 5 MSB. */ static inline u64 rb_fix_abs_ts(u64 abs, u64 save_ts) { if (save_ts & TS_MSB) { abs |= save_ts & TS_MSB; /* Check for overflow */ if (unlikely(abs < save_ts)) abs += 1ULL << 59; } return abs; } static inline u64 rb_time_stamp(struct trace_buffer *buffer); /** * ring_buffer_event_time_stamp - return the event's current time stamp * @buffer: The buffer that the event is on * @event: the event to get the time stamp of * * Note, this must be called after @event is reserved, and before it is * committed to the ring buffer. And must be called from the same * context where the event was reserved (normal, softirq, irq, etc). * * Returns the time stamp associated with the current event. * If the event has an extended time stamp, then that is used as * the time stamp to return. * In the highly unlikely case that the event was nested more than * the max nesting, then the write_stamp of the buffer is returned, * otherwise current time is returned, but that really neither of * the last two cases should ever happen. */ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()]; unsigned int nest; u64 ts; /* If the event includes an absolute time, then just use that */ if (event->type_len == RINGBUF_TYPE_TIME_STAMP) { ts = rb_event_time_stamp(event); return rb_fix_abs_ts(ts, cpu_buffer->tail_page->page->time_stamp); } nest = local_read(&cpu_buffer->committing); verify_event(cpu_buffer, event); if (WARN_ON_ONCE(!nest)) goto fail; /* Read the current saved nesting level time stamp */ if (likely(--nest < MAX_NEST)) return cpu_buffer->event_stamp[nest]; /* Shouldn't happen, warn if it does */ WARN_ONCE(1, "nest (%d) greater than max", nest); fail: rb_time_read(&cpu_buffer->write_stamp, &ts); return ts; } /** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from * @cpu: The cpu of the ring_buffer to get the number of pages from * * Returns the number of pages that have content in the ring buffer. */ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; size_t lost; size_t cnt; read = local_read(&buffer->buffers[cpu]->pages_read); lost = local_read(&buffer->buffers[cpu]->pages_lost); cnt = local_read(&buffer->buffers[cpu]->pages_touched); if (WARN_ON_ONCE(cnt < lost)) return 0; cnt -= lost; /* The reader can read an empty page, but not more than that */ if (cnt < read) { WARN_ON_ONCE(read > cnt + 1); return 0; } return cnt - read; } static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; size_t nr_pages; size_t dirty; nr_pages = cpu_buffer->nr_pages; if (!nr_pages || !full) return true; /* * Add one as dirty will never equal nr_pages, as the sub-buffer * that the writer is on is not counted as dirty. * This is needed if "buffer_percent" is set to 100. */ dirty = ring_buffer_nr_dirty_pages(buffer, cpu) + 1; return (dirty * 100) >= (full * nr_pages); } /* * rb_wake_up_waiters - wake up tasks waiting for ring buffer input * * Schedules a delayed work to wake up any task that is blocked on the * ring buffer waiters queue. */ static void rb_wake_up_waiters(struct irq_work *work) { struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); /* For waiters waiting for the first wake up */ (void)atomic_fetch_inc_release(&rbwork->seq); wake_up_all(&rbwork->waiters); if (rbwork->full_waiters_pending || rbwork->wakeup_full) { /* Only cpu_buffer sets the above flags */ struct ring_buffer_per_cpu *cpu_buffer = container_of(rbwork, struct ring_buffer_per_cpu, irq_work); /* Called from interrupt context */ raw_spin_lock(&cpu_buffer->reader_lock); rbwork->wakeup_full = false; rbwork->full_waiters_pending = false; /* Waking up all waiters, they will reset the shortest full */ cpu_buffer->shortest_full = 0; raw_spin_unlock(&cpu_buffer->reader_lock); wake_up_all(&rbwork->full_waiters); } } /** * ring_buffer_wake_waiters - wake up any waiters on this ring buffer * @buffer: The ring buffer to wake waiters on * @cpu: The CPU buffer to wake waiters on * * In the case of a file that represents a ring buffer is closing, * it is prudent to wake up any waiters that are on this. */ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; if (!buffer) return; if (cpu == RING_BUFFER_ALL_CPUS) { /* Wake up individual ones too. One level recursion */ for_each_buffer_cpu(buffer, cpu) ring_buffer_wake_waiters(buffer, cpu); rbwork = &buffer->irq_work; } else { if (WARN_ON_ONCE(!buffer->buffers)) return; if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) return; cpu_buffer = buffer->buffers[cpu]; /* The CPU buffer may not have been initialized yet */ if (!cpu_buffer) return; rbwork = &cpu_buffer->irq_work; } /* This can be called in any context */ irq_work_queue(&rbwork->work); } static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer; bool ret = false; /* Reads of all CPUs always waits for any data */ if (cpu == RING_BUFFER_ALL_CPUS) return !ring_buffer_empty(buffer); cpu_buffer = buffer->buffers[cpu]; if (!ring_buffer_empty_cpu(buffer, cpu)) { unsigned long flags; bool pagebusy; if (!full) return true; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; ret = !pagebusy && full_hit(buffer, cpu, full); if (!ret && (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full)) { cpu_buffer->shortest_full = full; } raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } return ret; } static inline bool rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data) { if (rb_watermark_hit(buffer, cpu, full)) return true; if (cond(data)) return true; /* * The events can happen in critical sections where * checking a work queue can cause deadlocks. * After adding a task to the queue, this flag is set * only to notify events to try to wake up the queue * using irq_work. * * We don't clear it even if the buffer is no longer * empty. The flag only causes the next event to run * irq_work to do the work queue wake up. The worse * that can happen if we race with !trace_empty() is that * an event will cause an irq_work to try to wake up * an empty queue. * * There's no reason to protect this flag either, as * the work queue and irq_work logic will do the necessary * synchronization for the wake ups. The only thing * that is necessary is that the wake up happens after * a task has been queued. It's OK for spurious wake ups. */ if (full) rbwork->full_waiters_pending = true; else rbwork->waiters_pending = true; return false; } struct rb_wait_data { struct rb_irq_work *irq_work; int seq; }; /* * The default wait condition for ring_buffer_wait() is to just to exit the * wait loop the first time it is woken up. */ static bool rb_wait_once(void *data) { struct rb_wait_data *rdata = data; struct rb_irq_work *rbwork = rdata->irq_work; return atomic_read_acquire(&rbwork->seq) != rdata->seq; } /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * @cond: condition function to break out of wait (NULL to run once) * @data: the data to pass to @cond. * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data) { struct ring_buffer_per_cpu *cpu_buffer; struct wait_queue_head *waitq; struct rb_irq_work *rbwork; struct rb_wait_data rdata; int ret = 0; /* * Depending on what the caller is waiting for, either any * data in any cpu buffer, or a specific buffer, put the * caller on the appropriate wait queue. */ if (cpu == RING_BUFFER_ALL_CPUS) { rbwork = &buffer->irq_work; /* Full only makes sense on per cpu reads */ full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -ENODEV; cpu_buffer = buffer->buffers[cpu]; rbwork = &cpu_buffer->irq_work; } if (full) waitq = &rbwork->full_waiters; else waitq = &rbwork->waiters; /* Set up to exit loop as soon as it is woken */ if (!cond) { cond = rb_wait_once; rdata.irq_work = rbwork; rdata.seq = atomic_read_acquire(&rbwork->seq); data = &rdata; } ret = wait_event_interruptible((*waitq), rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); return ret; } /** * ring_buffer_poll_wait - poll on buffer input * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on * @filp: the file descriptor * @poll_table: The poll descriptor * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. * * Returns EPOLLIN | EPOLLRDNORM if data exists in the buffers, * zero otherwise. */ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; if (cpu == RING_BUFFER_ALL_CPUS) { rbwork = &buffer->irq_work; full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; rbwork = &cpu_buffer->irq_work; } if (full) { poll_wait(filp, &rbwork->full_waiters, poll_table); if (rb_watermark_hit(buffer, cpu, full)) return EPOLLIN | EPOLLRDNORM; /* * Only allow full_waiters_pending update to be seen after * the shortest_full is set (in rb_watermark_hit). If the * writer sees the full_waiters_pending flag set, it will * compare the amount in the ring buffer to shortest_full. * If the amount in the ring buffer is greater than the * shortest_full percent, it will call the irq_work handler * to wake up this list. The irq_handler will reset shortest_full * back to zero. That's done under the reader_lock, but * the below smp_mb() makes sure that the update to * full_waiters_pending doesn't leak up into the above. */ smp_mb(); rbwork->full_waiters_pending = true; return 0; } poll_wait(filp, &rbwork->waiters, poll_table); rbwork->waiters_pending = true; /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit * is set, the next event will wake the task up, but we can get stuck * if there's only a single event in. * * FIXME: Ideally, we need a memory barrier on the writer side as well, * but adding a memory barrier to all events will cause too much of a * performance hit in the fast path. We only need a memory barrier when * the buffer goes from empty to having content. But as this race is * extremely small, and it's not a problem if another event comes in, we * will fix it later. */ smp_mb(); if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; return 0; } /* buffer may be either ring_buffer or ring_buffer_per_cpu */ #define RB_WARN_ON(b, cond) \ ({ \ int _____ret = unlikely(cond); \ if (_____ret) { \ if (__same_type(*(b), struct ring_buffer_per_cpu)) { \ struct ring_buffer_per_cpu *__b = \ (void *)b; \ atomic_inc(&__b->buffer->record_disabled); \ } else \ atomic_inc(&b->record_disabled); \ WARN_ON(1); \ } \ _____ret; \ }) /* Up this if you want to test the TIME_EXTENTS and normalization */ #define DEBUG_SHIFT 0 static inline u64 rb_time_stamp(struct trace_buffer *buffer) { u64 ts; /* Skip retpolines :-( */ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) && likely(buffer->clock == trace_clock_local)) ts = trace_clock_local(); else ts = buffer->clock(); /* shift to debug/test normalization and TIME_EXTENTS */ return ts << DEBUG_SHIFT; } u64 ring_buffer_time_stamp(struct trace_buffer *buffer) { u64 time; preempt_disable_notrace(); time = rb_time_stamp(buffer); preempt_enable_notrace(); return time; } EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer, int cpu, u64 *ts) { /* Just stupid testing the normalize function and deltas */ *ts >>= DEBUG_SHIFT; } EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); /* * Making the ring buffer lockless makes things tricky. * Although writes only happen on the CPU that they are on, * and they only need to worry about interrupts. Reads can * happen on any CPU. * * The reader page is always off the ring buffer, but when the * reader finishes with a page, it needs to swap its page with * a new one from the buffer. The reader needs to take from * the head (writes go to the tail). But if a writer is in overwrite * mode and wraps, it must push the head page forward. * * Here lies the problem. * * The reader must be careful to replace only the head page, and * not another one. As described at the top of the file in the * ASCII art, the reader sets its old page to point to the next * page after head. It then sets the page after head to point to * the old reader page. But if the writer moves the head page * during this operation, the reader could end up with the tail. * * We use cmpxchg to help prevent this race. We also do something * special with the page before head. We set the LSB to 1. * * When the writer must push the page forward, it will clear the * bit that points to the head page, move the head, and then set * the bit that points to the new head page. * * We also don't want an interrupt coming in and moving the head * page on another writer. Thus we use the second LSB to catch * that too. Thus: * * head->list->prev->next bit 1 bit 0 * ------- ------- * Normal page 0 0 * Points to head page 0 1 * New head page 1 0 * * Note we can not trust the prev pointer of the head page, because: * * +----+ +-----+ +-----+ * | |------>| T |---X--->| N | * | |<------| | | | * +----+ +-----+ +-----+ * ^ ^ | * | +-----+ | | * +----------| R |----------+ | * | |<-----------+ * +-----+ * * Key: ---X--> HEAD flag set in pointer * T Tail page * R Reader page * N Next page * * (see __rb_reserve_next() to see where this happens) * * What the above shows is that the reader just swapped out * the reader page with a page in the buffer, but before it * could make the new header point back to the new page added * it was preempted by a writer. The writer moved forward onto * the new page added by the reader and is about to move forward * again. * * You can see, it is legitimate for the previous pointer of * the head (or any page) not to point back to itself. But only * temporarily. */ #define RB_PAGE_NORMAL 0UL #define RB_PAGE_HEAD 1UL #define RB_PAGE_UPDATE 2UL #define RB_FLAG_MASK 3UL /* PAGE_MOVED is not part of the mask */ #define RB_PAGE_MOVED 4UL /* * rb_list_head - remove any bit */ static struct list_head *rb_list_head(struct list_head *list) { unsigned long val = (unsigned long)list; return (struct list_head *)(val & ~RB_FLAG_MASK); } /* * rb_is_head_page - test if the given page is the head page * * Because the reader may move the head_page pointer, we can * not trust what the head page is (it may be pointing to * the reader page). But if the next page is a header page, * its flags will be non zero. */ static inline int rb_is_head_page(struct buffer_page *page, struct list_head *list) { unsigned long val; val = (unsigned long)list->next; if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list) return RB_PAGE_MOVED; return val & RB_FLAG_MASK; } /* * rb_is_reader_page * * The unique thing about the reader page, is that, if the * writer is ever on it, the previous pointer never points * back to the reader page. */ static bool rb_is_reader_page(struct buffer_page *page) { struct list_head *list = page->list.prev; return rb_list_head(list->next) != &page->list; } /* * rb_set_list_to_head - set a list_head to be pointing to head. */ static void rb_set_list_to_head(struct list_head *list) { unsigned long *ptr; ptr = (unsigned long *)&list->next; *ptr |= RB_PAGE_HEAD; *ptr &= ~RB_PAGE_UPDATE; } /* * rb_head_page_activate - sets up head page */ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *head; head = cpu_buffer->head_page; if (!head) return; /* * Set the previous list pointer to have the HEAD flag. */ rb_set_list_to_head(head->list.prev); if (cpu_buffer->ring_meta) { struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; meta->head_buffer = (unsigned long)head->page; } } static void rb_list_head_clear(struct list_head *list) { unsigned long *ptr = (unsigned long *)&list->next; *ptr &= ~RB_FLAG_MASK; } /* * rb_head_page_deactivate - clears head page ptr (for free list) */ static void rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *hd; /* Go through the whole list and clear any pointers found. */ rb_list_head_clear(cpu_buffer->pages); list_for_each(hd, cpu_buffer->pages) rb_list_head_clear(hd); } static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag, int new_flag) { struct list_head *list; unsigned long val = (unsigned long)&head->list; unsigned long ret; list = &prev->list; val &= ~RB_FLAG_MASK; ret = cmpxchg((unsigned long *)&list->next, val | old_flag, val | new_flag); /* check if the reader took the page */ if ((ret & ~RB_FLAG_MASK) != val) return RB_PAGE_MOVED; return ret & RB_FLAG_MASK; } static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { return rb_head_page_set(cpu_buffer, head, prev, old_flag, RB_PAGE_UPDATE); } static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { return rb_head_page_set(cpu_buffer, head, prev, old_flag, RB_PAGE_HEAD); } static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *head, struct buffer_page *prev, int old_flag) { return rb_head_page_set(cpu_buffer, head, prev, old_flag, RB_PAGE_NORMAL); } static inline void rb_inc_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.next); *bpage = list_entry(p, struct buffer_page, list); } static inline void rb_dec_page(struct buffer_page **bpage) { struct list_head *p = rb_list_head((*bpage)->list.prev); *bpage = list_entry(p, struct buffer_page, list); } static struct buffer_page * rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) { struct buffer_page *head; struct buffer_page *page; struct list_head *list; int i; if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page)) return NULL; /* sanity check */ list = cpu_buffer->pages; if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list)) return NULL; page = head = cpu_buffer->head_page; /* * It is possible that the writer moves the header behind * where we started, and we miss in one loop. * A second loop should grab the header, but we'll do * three loops just because I'm paranoid. */ for (i = 0; i < 3; i++) { do { if (rb_is_head_page(page, page->list.prev)) { cpu_buffer->head_page = page; return page; } rb_inc_page(&page); } while (page != head); } RB_WARN_ON(cpu_buffer, 1); return NULL; } static bool rb_head_page_replace(struct buffer_page *old, struct buffer_page *new) { unsigned long *ptr = (unsigned long *)&old->list.prev->next; unsigned long val; val = *ptr & ~RB_FLAG_MASK; val |= RB_PAGE_HEAD; return try_cmpxchg(ptr, &val, (unsigned long)&new->list); } /* * rb_tail_page_update - move the tail page forward */ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { unsigned long old_entries; unsigned long old_write; /* * The tail page now needs to be moved forward. * * We need to reset the tail page, but without messing * with possible erasing of data brought in by interrupts * that have moved the tail page and are currently on it. * * We add a counter to the write field to denote this. */ old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write); old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries); /* * Just make sure we have seen our old_write and synchronize * with any interrupts that come in. */ barrier(); /* * If the tail page is still the same as what we think * it is, then it is up to us to update the tail * pointer. */ if (tail_page == READ_ONCE(cpu_buffer->tail_page)) { /* Zero the write counter */ unsigned long val = old_write & ~RB_WRITE_MASK; unsigned long eval = old_entries & ~RB_WRITE_MASK; /* * This will only succeed if an interrupt did * not come in and change it. In which case, we * do not want to modify it. * * We add (void) to let the compiler know that we do not care * about the return value of these functions. We use the * cmpxchg to only update if an interrupt did not already * do it for us. If the cmpxchg fails, we don't care. */ (void)local_cmpxchg(&next_page->write, old_write, val); (void)local_cmpxchg(&next_page->entries, old_entries, eval); /* * No need to worry about races with clearing out the commit. * it only can increment when a commit takes place. But that * only happens in the outer most nested commit. */ local_set(&next_page->page->commit, 0); /* Either we update tail_page or an interrupt does */ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page)) local_inc(&cpu_buffer->pages_touched); } } static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { unsigned long val = (unsigned long)bpage; RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK); } static bool rb_check_links(struct ring_buffer_per_cpu *cpu_buffer, struct list_head *list) { if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(list->next)->prev) != list)) return false; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(list->prev)->next) != list)) return false; return true; } /** * rb_check_pages - integrity check of buffer pages * @cpu_buffer: CPU buffer with pages to test * * As a safety measure we check to make sure the data pages have not * been corrupted. */ static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head, *tmp; unsigned long buffer_cnt; unsigned long flags; int nr_loops = 0; /* * Walk the linked list underpinning the ring buffer and validate all * its next and prev links. * * The check acquires the reader_lock to avoid concurrent processing * with code that could be modifying the list. However, the lock cannot * be held for the entire duration of the walk, as this would make the * time when interrupts are disabled non-deterministic, dependent on the * ring buffer size. Therefore, the code releases and re-acquires the * lock after checking each page. The ring_buffer_per_cpu.cnt variable * is then used to detect if the list was modified while the lock was * not held, in which case the check needs to be restarted. * * The code attempts to perform the check at most three times before * giving up. This is acceptable because this is only a self-validation * to detect problems early on. In practice, the list modification * operations are fairly spaced, and so this check typically succeeds at * most on the second try. */ again: if (++nr_loops > 3) return; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); head = rb_list_head(cpu_buffer->pages); if (!rb_check_links(cpu_buffer, head)) goto out_locked; buffer_cnt = cpu_buffer->cnt; tmp = head; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); while (true) { raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); if (buffer_cnt != cpu_buffer->cnt) { /* The list was updated, try again. */ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); goto again; } tmp = rb_list_head(tmp->next); if (tmp == head) /* The iteration circled back, all is done. */ goto out_locked; if (!rb_check_links(cpu_buffer, tmp)) goto out_locked; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } out_locked: raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); } /* * Take an address, add the meta data size as well as the array of * array subbuffer indexes, then align it to a subbuffer size. * * This is used to help find the next per cpu subbuffer within a mapped range. */ static unsigned long rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs) { addr += sizeof(struct ring_buffer_cpu_meta) + sizeof(int) * nr_subbufs; return ALIGN(addr, subbuf_size); } /* * Return the ring_buffer_meta for a given @cpu. */ static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu) { int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE; struct ring_buffer_cpu_meta *meta; struct ring_buffer_meta *bmeta; unsigned long ptr; int nr_subbufs; bmeta = buffer->meta; if (!bmeta) return NULL; ptr = (unsigned long)bmeta + bmeta->buffers_offset; meta = (struct ring_buffer_cpu_meta *)ptr; /* When nr_pages passed in is zero, the first meta has already been initialized */ if (!nr_pages) { nr_subbufs = meta->nr_subbufs; } else { /* Include the reader page */ nr_subbufs = nr_pages + 1; } /* * The first chunk may not be subbuffer aligned, where as * the rest of the chunks are. */ if (cpu) { ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); ptr += subbuf_size * nr_subbufs; /* We can use multiplication to find chunks greater than 1 */ if (cpu > 1) { unsigned long size; unsigned long p; /* Save the beginning of this CPU chunk */ p = ptr; ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs); ptr += subbuf_size * nr_subbufs; /* Now all chunks after this are the same size */ size = ptr - p; ptr += size * (cpu - 2); } } return (void *)ptr; } /* Return the start of subbufs given the meta pointer */ static void *rb_subbufs_from_meta(struct ring_buffer_cpu_meta *meta) { int subbuf_size = meta->subbuf_size; unsigned long ptr; ptr = (unsigned long)meta; ptr = rb_range_align_subbuf(ptr, subbuf_size, meta->nr_subbufs); return (void *)ptr; } /* * Return a specific sub-buffer for a given @cpu defined by @idx. */ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) { struct ring_buffer_cpu_meta *meta; unsigned long ptr; int subbuf_size; meta = rb_range_meta(cpu_buffer->buffer, 0, cpu_buffer->cpu); if (!meta) return NULL; if (WARN_ON_ONCE(idx >= meta->nr_subbufs)) return NULL; subbuf_size = meta->subbuf_size; /* Map this buffer to the order that's in meta->buffers[] */ idx = meta->buffers[idx]; ptr = (unsigned long)rb_subbufs_from_meta(meta); ptr += subbuf_size * idx; if (ptr + subbuf_size > cpu_buffer->buffer->range_addr_end) return NULL; return (void *)ptr; } /* * See if the existing memory contains a valid meta section. * if so, use that, otherwise initialize it. */ static bool rb_meta_init(struct trace_buffer *buffer, int scratch_size) { unsigned long ptr = buffer->range_addr_start; struct ring_buffer_meta *bmeta; unsigned long total_size; int struct_sizes; bmeta = (struct ring_buffer_meta *)ptr; buffer->meta = bmeta; total_size = buffer->range_addr_end - buffer->range_addr_start; struct_sizes = sizeof(struct ring_buffer_cpu_meta); struct_sizes |= sizeof(*bmeta) << 16; /* The first buffer will start word size after the meta page */ ptr += sizeof(*bmeta); ptr = ALIGN(ptr, sizeof(long)); ptr += scratch_size; if (bmeta->magic != RING_BUFFER_META_MAGIC) { pr_info("Ring buffer boot meta mismatch of magic\n"); goto init; } if (bmeta->struct_sizes != struct_sizes) { pr_info("Ring buffer boot meta mismatch of struct size\n"); goto init; } if (bmeta->total_size != total_size) { pr_info("Ring buffer boot meta mismatch of total size\n"); goto init; } if (bmeta->buffers_offset > bmeta->total_size) { pr_info("Ring buffer boot meta mismatch of offset outside of total size\n"); goto init; } if (bmeta->buffers_offset != (void *)ptr - (void *)bmeta) { pr_info("Ring buffer boot meta mismatch of first buffer offset\n"); goto init; } return true; init: bmeta->magic = RING_BUFFER_META_MAGIC; bmeta->struct_sizes = struct_sizes; bmeta->total_size = total_size; bmeta->buffers_offset = (void *)ptr - (void *)bmeta; /* Zero out the scatch pad */ memset((void *)bmeta + sizeof(*bmeta), 0, bmeta->buffers_offset - sizeof(*bmeta)); return false; } /* * See if the existing memory contains valid ring buffer data. * As the previous kernel must be the same as this kernel, all * the calculations (size of buffers and number of buffers) * must be the same. */ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu, struct trace_buffer *buffer, int nr_pages, unsigned long *subbuf_mask) { int subbuf_size = PAGE_SIZE; struct buffer_data_page *subbuf; unsigned long buffers_start; unsigned long buffers_end; int i; if (!subbuf_mask) return false; buffers_start = meta->first_buffer; buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs); /* Is the head and commit buffers within the range of buffers? */ if (meta->head_buffer < buffers_start || meta->head_buffer >= buffers_end) { pr_info("Ring buffer boot meta [%d] head buffer out of range\n", cpu); return false; } if (meta->commit_buffer < buffers_start || meta->commit_buffer >= buffers_end) { pr_info("Ring buffer boot meta [%d] commit buffer out of range\n", cpu); return false; } subbuf = rb_subbufs_from_meta(meta); bitmap_clear(subbuf_mask, 0, meta->nr_subbufs); /* Is the meta buffers and the subbufs themselves have correct data? */ for (i = 0; i < meta->nr_subbufs; i++) { if (meta->buffers[i] < 0 || meta->buffers[i] >= meta->nr_subbufs) { pr_info("Ring buffer boot meta [%d] array out of range\n", cpu); return false; } if ((unsigned)local_read(&subbuf->commit) > subbuf_size) { pr_info("Ring buffer boot meta [%d] buffer invalid commit\n", cpu); return false; } if (test_bit(meta->buffers[i], subbuf_mask)) { pr_info("Ring buffer boot meta [%d] array has duplicates\n", cpu); return false; } set_bit(meta->buffers[i], subbuf_mask); subbuf = (void *)subbuf + subbuf_size; } return true; } static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf); static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu, unsigned long long *timestamp, u64 *delta_ptr) { struct ring_buffer_event *event; u64 ts, delta; int events = 0; int e; *delta_ptr = 0; *timestamp = 0; ts = dpage->time_stamp; for (e = 0; e < tail; e += rb_event_length(event)) { event = (struct ring_buffer_event *)(dpage->data + e); switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: delta = rb_event_time_stamp(event); ts += delta; break; case RINGBUF_TYPE_TIME_STAMP: delta = rb_event_time_stamp(event); delta = rb_fix_abs_ts(delta, ts); if (delta < ts) { *delta_ptr = delta; *timestamp = ts; return -1; } ts = delta; break; case RINGBUF_TYPE_PADDING: if (event->time_delta == 1) break; fallthrough; case RINGBUF_TYPE_DATA: events++; ts += event->time_delta; break; default: return -1; } } *timestamp = ts; return events; } static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu) { unsigned long long ts; u64 delta; int tail; tail = local_read(&dpage->commit); return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta); } /* If the meta data has been validated, now validate the events */ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) { struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; struct buffer_page *head_page, *orig_head; unsigned long entry_bytes = 0; unsigned long entries = 0; int ret; u64 ts; int i; if (!meta || !meta->head_buffer) return; /* Do the reader page first */ ret = rb_validate_buffer(cpu_buffer->reader_page->page, cpu_buffer->cpu); if (ret < 0) { pr_info("Ring buffer reader page is invalid\n"); goto invalid; } entries += ret; entry_bytes += local_read(&cpu_buffer->reader_page->page->commit); local_set(&cpu_buffer->reader_page->entries, ret); orig_head = head_page = cpu_buffer->head_page; ts = head_page->page->time_stamp; /* * Try to rewind the head so that we can read the pages which already * read in the previous boot. */ if (head_page == cpu_buffer->tail_page) goto skip_rewind; rb_dec_page(&head_page); for (i = 0; i < meta->nr_subbufs + 1; i++, rb_dec_page(&head_page)) { /* Rewind until tail (writer) page. */ if (head_page == cpu_buffer->tail_page) break; /* Ensure the page has older data than head. */ if (ts < head_page->page->time_stamp) break; ts = head_page->page->time_stamp; /* Ensure the page has correct timestamp and some data. */ if (!ts || rb_page_commit(head_page) == 0) break; /* Stop rewind if the page is invalid. */ ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu); if (ret < 0) break; /* Recover the number of entries and update stats. */ local_set(&head_page->entries, ret); if (ret) local_inc(&cpu_buffer->pages_touched); entries += ret; entry_bytes += rb_page_commit(head_page); } if (i) pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i); /* The last rewound page must be skipped. */ if (head_page != orig_head) rb_inc_page(&head_page); /* * If the ring buffer was rewound, then inject the reader page * into the location just before the original head page. */ if (head_page != orig_head) { struct buffer_page *bpage = orig_head; rb_dec_page(&bpage); /* * Insert the reader_page before the original head page. * Since the list encode RB_PAGE flags, general list * operations should be avoided. */ cpu_buffer->reader_page->list.next = &orig_head->list; cpu_buffer->reader_page->list.prev = orig_head->list.prev; orig_head->list.prev = &cpu_buffer->reader_page->list; bpage->list.next = &cpu_buffer->reader_page->list; /* Make the head_page the reader page */ cpu_buffer->reader_page = head_page; bpage = head_page; rb_inc_page(&head_page); head_page->list.prev = bpage->list.prev; rb_dec_page(&bpage); bpage->list.next = &head_page->list; rb_set_list_to_head(&bpage->list); cpu_buffer->pages = &head_page->list; cpu_buffer->head_page = head_page; meta->head_buffer = (unsigned long)head_page->page; /* Reset all the indexes */ bpage = cpu_buffer->reader_page; meta->buffers[0] = rb_meta_subbuf_idx(meta, bpage->page); bpage->id = 0; for (i = 1, bpage = head_page; i < meta->nr_subbufs; i++, rb_inc_page(&bpage)) { meta->buffers[i] = rb_meta_subbuf_idx(meta, bpage->page); bpage->id = i; } /* We'll restart verifying from orig_head */ head_page = orig_head; } skip_rewind: /* If the commit_buffer is the reader page, update the commit page */ if (meta->commit_buffer == (unsigned long)cpu_buffer->reader_page->page) { cpu_buffer->commit_page = cpu_buffer->reader_page; /* Nothing more to do, the only page is the reader page */ goto done; } /* Iterate until finding the commit page */ for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) { /* Reader page has already been done */ if (head_page == cpu_buffer->reader_page) continue; ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu); if (ret < 0) { pr_info("Ring buffer meta [%d] invalid buffer page\n", cpu_buffer->cpu); goto invalid; } /* If the buffer has content, update pages_touched */ if (ret) local_inc(&cpu_buffer->pages_touched); entries += ret; entry_bytes += local_read(&head_page->page->commit); local_set(&cpu_buffer->head_page->entries, ret); if (head_page == cpu_buffer->commit_page) break; } if (head_page != cpu_buffer->commit_page) { pr_info("Ring buffer meta [%d] commit page not found\n", cpu_buffer->cpu); goto invalid; } done: local_set(&cpu_buffer->entries, entries); local_set(&cpu_buffer->entries_bytes, entry_bytes); pr_info("Ring buffer meta [%d] is from previous boot!\n", cpu_buffer->cpu); return; invalid: /* The content of the buffers are invalid, reset the meta data */ meta->head_buffer = 0; meta->commit_buffer = 0; /* Reset the reader page */ local_set(&cpu_buffer->reader_page->entries, 0); local_set(&cpu_buffer->reader_page->page->commit, 0); /* Reset all the subbuffers */ for (i = 0; i < meta->nr_subbufs - 1; i++, rb_inc_page(&head_page)) { local_set(&head_page->entries, 0); local_set(&head_page->page->commit, 0); } } static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages, int scratch_size) { struct ring_buffer_cpu_meta *meta; unsigned long *subbuf_mask; unsigned long delta; void *subbuf; bool valid = false; int cpu; int i; /* Create a mask to test the subbuf array */ subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL); /* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */ if (rb_meta_init(buffer, scratch_size)) valid = true; for (cpu = 0; cpu < nr_cpu_ids; cpu++) { void *next_meta; meta = rb_range_meta(buffer, nr_pages, cpu); if (valid && rb_cpu_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { /* Make the mappings match the current address */ subbuf = rb_subbufs_from_meta(meta); delta = (unsigned long)subbuf - meta->first_buffer; meta->first_buffer += delta; meta->head_buffer += delta; meta->commit_buffer += delta; continue; } if (cpu < nr_cpu_ids - 1) next_meta = rb_range_meta(buffer, nr_pages, cpu + 1); else next_meta = (void *)buffer->range_addr_end; memset(meta, 0, next_meta - (void *)meta); meta->nr_subbufs = nr_pages + 1; meta->subbuf_size = PAGE_SIZE; subbuf = rb_subbufs_from_meta(meta); meta->first_buffer = (unsigned long)subbuf; /* * The buffers[] array holds the order of the sub-buffers * that are after the meta data. The sub-buffers may * be swapped out when read and inserted into a different * location of the ring buffer. Although their addresses * remain the same, the buffers[] array contains the * index into the sub-buffers holding their actual order. */ for (i = 0; i < meta->nr_subbufs; i++) { meta->buffers[i] = i; rb_init_page(subbuf); subbuf += meta->subbuf_size; } } bitmap_free(subbuf_mask); } static void *rbm_start(struct seq_file *m, loff_t *pos) { struct ring_buffer_per_cpu *cpu_buffer = m->private; struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; unsigned long val; if (!meta) return NULL; if (*pos > meta->nr_subbufs) return NULL; val = *pos; val++; return (void *)val; } static void *rbm_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return rbm_start(m, pos); } static int rbm_show(struct seq_file *m, void *v) { struct ring_buffer_per_cpu *cpu_buffer = m->private; struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; unsigned long val = (unsigned long)v; if (val == 1) { seq_printf(m, "head_buffer: %d\n", rb_meta_subbuf_idx(meta, (void *)meta->head_buffer)); seq_printf(m, "commit_buffer: %d\n", rb_meta_subbuf_idx(meta, (void *)meta->commit_buffer)); seq_printf(m, "subbuf_size: %d\n", meta->subbuf_size); seq_printf(m, "nr_subbufs: %d\n", meta->nr_subbufs); return 0; } val -= 2; seq_printf(m, "buffer[%ld]: %d\n", val, meta->buffers[val]); return 0; } static void rbm_stop(struct seq_file *m, void *p) { } static const struct seq_operations rb_meta_seq_ops = { .start = rbm_start, .next = rbm_next, .show = rbm_show, .stop = rbm_stop, }; int ring_buffer_meta_seq_init(struct file *file, struct trace_buffer *buffer, int cpu) { struct seq_file *m; int ret; ret = seq_open(file, &rb_meta_seq_ops); if (ret) return ret; m = file->private_data; m->private = buffer->buffers[cpu]; return 0; } /* Map the buffer_pages to the previous head and commit pages */ static void rb_meta_buffer_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; if (meta->head_buffer == (unsigned long)bpage->page) cpu_buffer->head_page = bpage; if (meta->commit_buffer == (unsigned long)bpage->page) { cpu_buffer->commit_page = bpage; cpu_buffer->tail_page = bpage; } } static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, long nr_pages, struct list_head *pages) { struct trace_buffer *buffer = cpu_buffer->buffer; struct ring_buffer_cpu_meta *meta = NULL; struct buffer_page *bpage, *tmp; bool user_thread = current->mm != NULL; gfp_t mflags; long i; /* * Check if the available memory is there first. * Note, si_mem_available() only gives us a rough estimate of available * memory. It may not be accurate. But we don't care, we just want * to prevent doing any allocation when it is obvious that it is * not going to succeed. */ i = si_mem_available(); if (i < nr_pages) return -ENOMEM; /* * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails * gracefully without invoking oom-killer and the system is not * destabilized. */ mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL; /* * If a user thread allocates too much, and si_mem_available() * reports there's enough memory, even though there is not. * Make sure the OOM killer kills this thread. This can happen * even with RETRY_MAYFAIL because another task may be doing * an allocation after this task has taken all memory. * This is the task the OOM killer needs to take out during this * loop, even if it was triggered by an allocation somewhere else. */ if (user_thread) set_current_oom_origin(); if (buffer->range_addr_start) meta = rb_range_meta(buffer, nr_pages, cpu_buffer->cpu); for (i = 0; i < nr_pages; i++) { struct page *page; bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), mflags, cpu_to_node(cpu_buffer->cpu)); if (!bpage) goto free_pages; rb_check_bpage(cpu_buffer, bpage); /* * Append the pages as for mapped buffers we want to keep * the order */ list_add_tail(&bpage->list, pages); if (meta) { /* A range was given. Use that for the buffer page */ bpage->page = rb_range_buffer(cpu_buffer, i + 1); if (!bpage->page) goto free_pages; /* If this is valid from a previous boot */ if (meta->head_buffer) rb_meta_buffer_update(cpu_buffer, bpage); bpage->range = 1; bpage->id = i + 1; } else { page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags | __GFP_COMP | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) goto free_pages; bpage->page = page_address(page); rb_init_page(bpage->page); } bpage->order = cpu_buffer->buffer->subbuf_order; if (user_thread && fatal_signal_pending(current)) goto free_pages; } if (user_thread) clear_current_oom_origin(); return 0; free_pages: list_for_each_entry_safe(bpage, tmp, pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } if (user_thread) clear_current_oom_origin(); return -ENOMEM; } static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { LIST_HEAD(pages); WARN_ON(!nr_pages); if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages)) return -ENOMEM; /* * The ring buffer page list is a circular list that does not * start and end with a list head. All page list items point to * other pages. */ cpu_buffer->pages = pages.next; list_del(&pages); cpu_buffer->nr_pages = nr_pages; rb_check_pages(cpu_buffer); return 0; } static struct ring_buffer_per_cpu * rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer __free(kfree) = NULL; struct ring_buffer_cpu_meta *meta; struct buffer_page *bpage; struct page *page; int ret; cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); if (!cpu_buffer) return NULL; cpu_buffer->cpu = cpu; cpu_buffer->buffer = buffer; raw_spin_lock_init(&cpu_buffer->reader_lock); lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler); init_completion(&cpu_buffer->update_done); init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); init_waitqueue_head(&cpu_buffer->irq_work.waiters); init_waitqueue_head(&cpu_buffer->irq_work.full_waiters); mutex_init(&cpu_buffer->mapping_lock); bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); if (!bpage) return NULL; rb_check_bpage(cpu_buffer, bpage); cpu_buffer->reader_page = bpage; if (buffer->range_addr_start) { /* * Range mapped buffers have the same restrictions as memory * mapped ones do. */ cpu_buffer->mapped = 1; cpu_buffer->ring_meta = rb_range_meta(buffer, nr_pages, cpu); bpage->page = rb_range_buffer(cpu_buffer, 0); if (!bpage->page) goto fail_free_reader; if (cpu_buffer->ring_meta->head_buffer) rb_meta_buffer_update(cpu_buffer, bpage); bpage->range = 1; } else { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_COMP | __GFP_ZERO, cpu_buffer->buffer->subbuf_order); if (!page) goto fail_free_reader; bpage->page = page_address(page); rb_init_page(bpage->page); } INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); ret = rb_allocate_pages(cpu_buffer, nr_pages); if (ret < 0) goto fail_free_reader; rb_meta_validate_events(cpu_buffer); /* If the boot meta was valid then this has already been updated */ meta = cpu_buffer->ring_meta; if (!meta || !meta->head_buffer || !cpu_buffer->head_page || !cpu_buffer->commit_page || !cpu_buffer->tail_page) { if (meta && meta->head_buffer && (cpu_buffer->head_page || cpu_buffer->commit_page || cpu_buffer->tail_page)) { pr_warn("Ring buffer meta buffers not all mapped\n"); if (!cpu_buffer->head_page) pr_warn(" Missing head_page\n"); if (!cpu_buffer->commit_page) pr_warn(" Missing commit_page\n"); if (!cpu_buffer->tail_page) pr_warn(" Missing tail_page\n"); } cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page; rb_head_page_activate(cpu_buffer); if (cpu_buffer->ring_meta) meta->commit_buffer = meta->head_buffer; } else { /* The valid meta buffer still needs to activate the head page */ rb_head_page_activate(cpu_buffer); } return_ptr(cpu_buffer); fail_free_reader: free_buffer_page(cpu_buffer->reader_page); return NULL; } static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = cpu_buffer->pages; struct buffer_page *bpage, *tmp; irq_work_sync(&cpu_buffer->irq_work.work); free_buffer_page(cpu_buffer->reader_page); if (head) { rb_head_page_deactivate(cpu_buffer); list_for_each_entry_safe(bpage, tmp, head, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } bpage = list_entry(head, struct buffer_page, list); free_buffer_page(bpage); } free_page((unsigned long)cpu_buffer->free_page); kfree(cpu_buffer); } static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long end, unsigned long scratch_size, struct lock_class_key *key) { struct trace_buffer *buffer __free(kfree) = NULL; long nr_pages; int subbuf_size; int bsize; int cpu; int ret; /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), GFP_KERNEL); if (!buffer) return NULL; if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL)) return NULL; buffer->subbuf_order = order; subbuf_size = (PAGE_SIZE << order); buffer->subbuf_size = subbuf_size - BUF_PAGE_HDR_SIZE; /* Max payload is buffer page size - header (8bytes) */ buffer->max_data_size = buffer->subbuf_size - (sizeof(u32) * 2); buffer->flags = flags; buffer->clock = trace_clock_local; buffer->reader_lock_key = key; init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters); init_waitqueue_head(&buffer->irq_work.waiters); buffer->cpus = nr_cpu_ids; bsize = sizeof(void *) * nr_cpu_ids; buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()), GFP_KERNEL); if (!buffer->buffers) goto fail_free_cpumask; /* If start/end are specified, then that overrides size */ if (start && end) { unsigned long buffers_start; unsigned long ptr; int n; /* Make sure that start is word aligned */ start = ALIGN(start, sizeof(long)); /* scratch_size needs to be aligned too */ scratch_size = ALIGN(scratch_size, sizeof(long)); /* Subtract the buffer meta data and word aligned */ buffers_start = start + sizeof(struct ring_buffer_cpu_meta); buffers_start = ALIGN(buffers_start, sizeof(long)); buffers_start += scratch_size; /* Calculate the size for the per CPU data */ size = end - buffers_start; size = size / nr_cpu_ids; /* * The number of sub-buffers (nr_pages) is determined by the * total size allocated minus the meta data size. * Then that is divided by the number of per CPU buffers * needed, plus account for the integer array index that * will be appended to the meta data. */ nr_pages = (size - sizeof(struct ring_buffer_cpu_meta)) / (subbuf_size + sizeof(int)); /* Need at least two pages plus the reader page */ if (nr_pages < 3) goto fail_free_buffers; again: /* Make sure that the size fits aligned */ for (n = 0, ptr = buffers_start; n < nr_cpu_ids; n++) { ptr += sizeof(struct ring_buffer_cpu_meta) + sizeof(int) * nr_pages; ptr = ALIGN(ptr, subbuf_size); ptr += subbuf_size * nr_pages; } if (ptr > end) { if (nr_pages <= 3) goto fail_free_buffers; nr_pages--; goto again; } /* nr_pages should not count the reader page */ nr_pages--; buffer->range_addr_start = start; buffer->range_addr_end = end; rb_range_meta_init(buffer, nr_pages, scratch_size); } else { /* need at least two pages */ nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size); if (nr_pages < 2) nr_pages = 2; } cpu = raw_smp_processor_id(); cpumask_set_cpu(cpu, buffer->cpumask); buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); if (!buffer->buffers[cpu]) goto fail_free_buffers; ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); if (ret < 0) goto fail_free_buffers; mutex_init(&buffer->mutex); return_ptr(buffer); fail_free_buffers: for_each_buffer_cpu(buffer, cpu) { if (buffer->buffers[cpu]) rb_free_cpu_buffer(buffer->buffers[cpu]); } kfree(buffer->buffers); fail_free_cpumask: free_cpumask_var(buffer->cpumask); return NULL; } /** * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { /* Default buffer page size - one system page */ return alloc_buffer(size, flags, 0, 0, 0, 0, key); } EXPORT_SYMBOL_GPL(__ring_buffer_alloc); /** * __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory * @size: the size in bytes per cpu that is needed. * @flags: attributes to set for the ring buffer. * @order: sub-buffer order * @start: start of allocated range * @range_size: size of allocated range * @scratch_size: size of scratch area (for preallocated memory buffers) * @key: ring buffer reader_lock_key. * * Currently the only flag that is available is the RB_FL_OVERWRITE * flag. This flag means that the buffer will overwrite old data * when the buffer wraps. If this flag is not set, the buffer will * drop data when the tail hits the head. */ struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long range_size, unsigned long scratch_size, struct lock_class_key *key) { return alloc_buffer(size, flags, order, start, start + range_size, scratch_size, key); } void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size) { struct ring_buffer_meta *meta; void *ptr; if (!buffer || !buffer->meta) return NULL; meta = buffer->meta; ptr = (void *)ALIGN((unsigned long)meta + sizeof(*meta), sizeof(long)); if (size) *size = (void *)meta + meta->buffers_offset - ptr; return ptr; } /** * ring_buffer_free - free a ring buffer. * @buffer: the buffer to free. */ void ring_buffer_free(struct trace_buffer *buffer) { int cpu; cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); irq_work_sync(&buffer->irq_work.work); for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); kfree(buffer); } EXPORT_SYMBOL_GPL(ring_buffer_free); void ring_buffer_set_clock(struct trace_buffer *buffer, u64 (*clock)(void)) { buffer->clock = clock; } void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs) { buffer->time_stamp_abs = abs; } bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer) { return buffer->time_stamp_abs; } static inline unsigned long rb_page_entries(struct buffer_page *bpage) { return local_read(&bpage->entries) & RB_WRITE_MASK; } static inline unsigned long rb_page_write(struct buffer_page *bpage) { return local_read(&bpage->write) & RB_WRITE_MASK; } static bool rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { struct list_head *tail_page, *to_remove, *next_page; struct buffer_page *to_remove_page, *tmp_iter_page; struct buffer_page *last_page, *first_page; unsigned long nr_removed; unsigned long head_bit; int page_entries; head_bit = 0; raw_spin_lock_irq(&cpu_buffer->reader_lock); atomic_inc(&cpu_buffer->record_disabled); /* * We don't race with the readers since we have acquired the reader * lock. We also don't race with writers after disabling recording. * This makes it easy to figure out the first and the last page to be * removed from the list. We unlink all the pages in between including * the first and last pages. This is done in a busy loop so that we * lose the least number of traces. * The pages are freed after we restart recording and unlock readers. */ tail_page = &cpu_buffer->tail_page->list; /* * tail page might be on reader page, we remove the next page * from the ring buffer */ if (cpu_buffer->tail_page == cpu_buffer->reader_page) tail_page = rb_list_head(tail_page->next); to_remove = tail_page; /* start of pages to remove */ first_page = list_entry(rb_list_head(to_remove->next), struct buffer_page, list); for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) { to_remove = rb_list_head(to_remove)->next; head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD; } /* Read iterators need to reset themselves when some pages removed */ cpu_buffer->pages_removed += nr_removed; next_page = rb_list_head(to_remove)->next; /* * Now we remove all pages between tail_page and next_page. * Make sure that we have head_bit value preserved for the * next page */ tail_page->next = (struct list_head *)((unsigned long)next_page | head_bit); next_page = rb_list_head(next_page); next_page->prev = tail_page; /* make sure pages points to a valid page in the ring buffer */ cpu_buffer->pages = next_page; cpu_buffer->cnt++; /* update head page */ if (head_bit) cpu_buffer->head_page = list_entry(next_page, struct buffer_page, list); /* pages are removed, resume tracing and then free the pages */ atomic_dec(&cpu_buffer->record_disabled); raw_spin_unlock_irq(&cpu_buffer->reader_lock); RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)); /* last buffer page to remove */ last_page = list_entry(rb_list_head(to_remove), struct buffer_page, list); tmp_iter_page = first_page; do { cond_resched(); to_remove_page = tmp_iter_page; rb_inc_page(&tmp_iter_page); /* update the counters */ page_entries = rb_page_entries(to_remove_page); if (page_entries) { /* * If something was added to this page, it was full * since it is not the tail page. So we deduct the * bytes consumed in ring buffer from here. * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } /* * We have already removed references to this list item, just * free up the buffer_page and its page */ free_buffer_page(to_remove_page); nr_removed--; } while (to_remove_page != last_page); RB_WARN_ON(cpu_buffer, nr_removed); return nr_removed == 0; } static bool rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *pages = &cpu_buffer->new_pages; unsigned long flags; bool success; int retries; /* Can be called at early boot up, where interrupts must not been enabled */ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); /* * We are holding the reader lock, so the reader page won't be swapped * in the ring buffer. Now we are racing with the writer trying to * move head page and the tail page. * We are going to adapt the reader page update process where: * 1. We first splice the start and end of list of new pages between * the head page and its previous page. * 2. We cmpxchg the prev_page->next to point from head page to the * start of new pages list. * 3. Finally, we update the head->prev to the end of new list. * * We will try this process 10 times, to make sure that we don't keep * spinning. */ retries = 10; success = false; while (retries--) { struct list_head *head_page, *prev_page; struct list_head *last_page, *first_page; struct list_head *head_page_with_bit; struct buffer_page *hpage = rb_set_head_page(cpu_buffer); if (!hpage) break; head_page = &hpage->list; prev_page = head_page->prev; first_page = pages->next; last_page = pages->prev; head_page_with_bit = (struct list_head *) ((unsigned long)head_page | RB_PAGE_HEAD); last_page->next = head_page_with_bit; first_page->prev = prev_page; /* caution: head_page_with_bit gets updated on cmpxchg failure */ if (try_cmpxchg(&prev_page->next, &head_page_with_bit, first_page)) { /* * yay, we replaced the page pointer to our new list, * now, we just have to update to head page's prev * pointer to point to end of list */ head_page->prev = last_page; cpu_buffer->cnt++; success = true; break; } } if (success) INIT_LIST_HEAD(pages); /* * If we weren't successful in adding in new pages, warn and stop * tracing */ RB_WARN_ON(cpu_buffer, !success); raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); /* free pages if they weren't inserted */ if (!success) { struct buffer_page *bpage, *tmp; list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } } return success; } static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) { bool success; if (cpu_buffer->nr_pages_to_update > 0) success = rb_insert_pages(cpu_buffer); else success = rb_remove_pages(cpu_buffer, -cpu_buffer->nr_pages_to_update); if (success) cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update; } static void update_pages_handler(struct work_struct *work) { struct ring_buffer_per_cpu *cpu_buffer = container_of(work, struct ring_buffer_per_cpu, update_pages_work); rb_update_pages(cpu_buffer); complete(&cpu_buffer->update_done); } /** * ring_buffer_resize - resize the ring buffer * @buffer: the buffer to resize. * @size: the new size. * @cpu_id: the cpu buffer to resize * * Minimum size is 2 * buffer->subbuf_size. * * Returns 0 on success and < 0 on failure. */ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) return 0; nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size); /* we need a minimum of two pages */ if (nr_pages < 2) nr_pages = 2; /* * Keep CPUs from coming online while resizing to synchronize * with new per CPU buffers being created. */ guard(cpus_read_lock)(); /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); atomic_inc(&buffer->resizing); if (cpu_id == RING_BUFFER_ALL_CPUS) { /* * Don't succeed if resizing is disabled, as a reader might be * manipulating the ring buffer and is expecting a sane state while * this is true. */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (atomic_read(&cpu_buffer->resize_disabled)) { err = -EBUSY; goto out_err_unlock; } } /* calculate the pages to update */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; cpu_buffer->nr_pages_to_update = nr_pages - cpu_buffer->nr_pages; /* * nothing more to do for removing pages or no update */ if (cpu_buffer->nr_pages_to_update <= 0) continue; /* * to add pages, make sure all new pages can be * allocated without receiving ENOMEM */ INIT_LIST_HEAD(&cpu_buffer->new_pages); if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update, &cpu_buffer->new_pages)) { /* not enough memory for new pages */ err = -ENOMEM; goto out_err; } cond_resched(); } /* * Fire off all the required work handlers * We can't schedule on offline CPUs, but it's not necessary * since we can change their buffer sizes without any race. */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (!cpu_buffer->nr_pages_to_update) continue; /* Can't run something on an offline CPU. */ if (!cpu_online(cpu)) { rb_update_pages(cpu_buffer); cpu_buffer->nr_pages_to_update = 0; } else { /* Run directly if possible. */ migrate_disable(); if (cpu != smp_processor_id()) { migrate_enable(); schedule_work_on(cpu, &cpu_buffer->update_pages_work); } else { update_pages_handler(&cpu_buffer->update_pages_work); migrate_enable(); } } } /* wait for all the updates to complete */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; if (!cpu_buffer->nr_pages_to_update) continue; if (cpu_online(cpu)) wait_for_completion(&cpu_buffer->update_done); cpu_buffer->nr_pages_to_update = 0; } } else { cpu_buffer = buffer->buffers[cpu_id]; if (nr_pages == cpu_buffer->nr_pages) goto out; /* * Don't succeed if resizing is disabled, as a reader might be * manipulating the ring buffer and is expecting a sane state while * this is true. */ if (atomic_read(&cpu_buffer->resize_disabled)) { err = -EBUSY; goto out_err_unlock; } cpu_buffer->nr_pages_to_update = nr_pages - cpu_buffer->nr_pages; INIT_LIST_HEAD(&cpu_buffer->new_pages); if (cpu_buffer->nr_pages_to_update > 0 && __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update, &cpu_buffer->new_pages)) { err = -ENOMEM; goto out_err; } /* Can't run something on an offline CPU. */ if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); else { /* Run directly if possible. */ migrate_disable(); if (cpu_id == smp_processor_id()) { rb_update_pages(cpu_buffer); migrate_enable(); } else { migrate_enable(); schedule_work_on(cpu_id, &cpu_buffer->update_pages_work); wait_for_completion(&cpu_buffer->update_done); } } cpu_buffer->nr_pages_to_update = 0; } out: /* * The ring buffer resize can happen with the ring buffer * enabled, so that the update disturbs the tracing as little * as possible. But if the buffer is disabled, we do not need * to worry about that, and we can take the time to verify * that the buffer is not corrupt. */ if (atomic_read(&buffer->record_disabled)) { atomic_inc(&buffer->record_disabled); /* * Even though the buffer was disabled, we must make sure * that it is truly disabled before calling rb_check_pages. * There could have been a race between checking * record_disable and incrementing it. */ synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; rb_check_pages(cpu_buffer); } atomic_dec(&buffer->record_disabled); } atomic_dec(&buffer->resizing); mutex_unlock(&buffer->mutex); return 0; out_err: for_each_buffer_cpu(buffer, cpu) { struct buffer_page *bpage, *tmp; cpu_buffer = buffer->buffers[cpu]; cpu_buffer->nr_pages_to_update = 0; if (list_empty(&cpu_buffer->new_pages)) continue; list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) { list_del_init(&bpage->list); free_buffer_page(bpage); } } out_err_unlock: atomic_dec(&buffer->resizing); mutex_unlock(&buffer->mutex); return err; } EXPORT_SYMBOL_GPL(ring_buffer_resize); void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val) { mutex_lock(&buffer->mutex); if (val) buffer->flags |= RB_FL_OVERWRITE; else buffer->flags &= ~RB_FL_OVERWRITE; mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) { return bpage->page->data + index; } static __always_inline struct ring_buffer_event * rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) { return __rb_page_index(cpu_buffer->reader_page, cpu_buffer->reader_page->read); } static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { struct ring_buffer_event *event; struct buffer_page *iter_head_page = iter->head_page; unsigned long commit; unsigned length; if (iter->head != iter->next_event) return iter->event; /* * When the writer goes across pages, it issues a cmpxchg which * is a mb(), which will synchronize with the rmb here. * (see rb_tail_page_update() and __rb_reserve_next()) */ commit = rb_page_commit(iter_head_page); smp_rmb(); /* An event needs to be at least 8 bytes in size */ if (iter->head > commit - 8) goto reset; event = __rb_page_index(iter_head_page, iter->head); length = rb_event_length(event); /* * READ_ONCE() doesn't work on functions and we don't want the * compiler doing any crazy optimizations with length. */ barrier(); if ((iter->head + length) > commit || length > iter->event_size) /* Writer corrupted the read? */ goto reset; memcpy(iter->event, event, length); /* * If the page stamp is still the same after this rmb() then the * event was safely copied without the writer entering the page. */ smp_rmb(); /* Make sure the page didn't change since we read this */ if (iter->page_stamp != iter_head_page->page->time_stamp || commit > rb_page_commit(iter_head_page)) goto reset; iter->next_event = iter->head + length; return iter->event; reset: /* Reset to the beginning */ iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; iter->next_event = 0; iter->missed_events = 1; return NULL; } /* Size is determined by what has been committed */ static __always_inline unsigned rb_page_size(struct buffer_page *bpage) { return rb_page_commit(bpage) & ~RB_MISSED_MASK; } static __always_inline unsigned rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer) { return rb_page_commit(cpu_buffer->commit_page); } static __always_inline unsigned rb_event_index(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { unsigned long addr = (unsigned long)event; addr &= (PAGE_SIZE << cpu_buffer->buffer->subbuf_order) - 1; return addr - BUF_PAGE_HDR_SIZE; } static void rb_inc_iter(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* * The iterator could be on the reader page (it starts there). * But the head could have moved, since the reader was * found. Check for this case and assign the iterator * to the head page instead of next. */ if (iter->head_page == cpu_buffer->reader_page) iter->head_page = rb_set_head_page(cpu_buffer); else rb_inc_page(&iter->head_page); iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp; iter->head = 0; iter->next_event = 0; } /* Return the index into the sub-buffers for a given sub-buffer */ static int rb_meta_subbuf_idx(struct ring_buffer_cpu_meta *meta, void *subbuf) { void *subbuf_array; subbuf_array = (void *)meta + sizeof(int) * meta->nr_subbufs; subbuf_array = (void *)ALIGN((unsigned long)subbuf_array, meta->subbuf_size); return (subbuf - subbuf_array) / meta->subbuf_size; } static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *next_page) { struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; unsigned long old_head = (unsigned long)next_page->page; unsigned long new_head; rb_inc_page(&next_page); new_head = (unsigned long)next_page->page; /* * Only move it forward once, if something else came in and * moved it forward, then we don't want to touch it. */ (void)cmpxchg(&meta->head_buffer, old_head, new_head); } static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *reader) { struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta; void *old_reader = cpu_buffer->reader_page->page; void *new_reader = reader->page; int id; id = reader->id; cpu_buffer->reader_page->id = id; reader->id = 0; meta->buffers[0] = rb_meta_subbuf_idx(meta, new_reader); meta->buffers[id] = rb_meta_subbuf_idx(meta, old_reader); /* The head pointer is the one after the reader */ rb_update_meta_head(cpu_buffer, reader); } /* * rb_handle_head_page - writer hit the head page * * Returns: +1 to retry page * 0 to continue * -1 on error */ static int rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { struct buffer_page *new_head; int entries; int type; int ret; entries = rb_page_entries(next_page); /* * The hard part is here. We need to move the head * forward, and protect against both readers on * other CPUs and writers coming in via interrupts. */ type = rb_head_page_set_update(cpu_buffer, next_page, tail_page, RB_PAGE_HEAD); /* * type can be one of four: * NORMAL - an interrupt already moved it for us * HEAD - we are the first to get here. * UPDATE - we are the interrupt interrupting * a current move. * MOVED - a reader on another CPU moved the next * pointer to its reader page. Give up * and try again. */ switch (type) { case RB_PAGE_HEAD: /* * We changed the head to UPDATE, thus * it is our responsibility to update * the counters. */ local_add(entries, &cpu_buffer->overrun); local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); if (cpu_buffer->ring_meta) rb_update_meta_head(cpu_buffer, next_page); /* * The entries will be zeroed out when we move the * tail page. */ /* still more to do */ break; case RB_PAGE_UPDATE: /* * This is an interrupt that interrupt the * previous update. Still more to do. */ break; case RB_PAGE_NORMAL: /* * An interrupt came in before the update * and processed this for us. * Nothing left to do. */ return 1; case RB_PAGE_MOVED: /* * The reader is on another CPU and just did * a swap with our next_page. * Try again. */ return 1; default: RB_WARN_ON(cpu_buffer, 1); /* WTF??? */ return -1; } /* * Now that we are here, the old head pointer is * set to UPDATE. This will keep the reader from * swapping the head page with the reader page. * The reader (on another CPU) will spin till * we are finished. * * We just need to protect against interrupts * doing the job. We will set the next pointer * to HEAD. After that, we set the old pointer * to NORMAL, but only if it was HEAD before. * otherwise we are an interrupt, and only * want the outer most commit to reset it. */ new_head = next_page; rb_inc_page(&new_head); ret = rb_head_page_set_head(cpu_buffer, new_head, next_page, RB_PAGE_NORMAL); /* * Valid returns are: * HEAD - an interrupt came in and already set it. * NORMAL - One of two things: * 1) We really set it. * 2) A bunch of interrupts came in and moved * the page forward again. */ switch (ret) { case RB_PAGE_HEAD: case RB_PAGE_NORMAL: /* OK */ break; default: RB_WARN_ON(cpu_buffer, 1); return -1; } /* * It is possible that an interrupt came in, * set the head up, then more interrupts came in * and moved it again. When we get back here, * the page would have been set to NORMAL but we * just set it back to HEAD. * * How do you detect this? Well, if that happened * the tail page would have moved. */ if (ret == RB_PAGE_NORMAL) { struct buffer_page *buffer_tail_page; buffer_tail_page = READ_ONCE(cpu_buffer->tail_page); /* * If the tail had moved passed next, then we need * to reset the pointer. */ if (buffer_tail_page != tail_page && buffer_tail_page != next_page) rb_head_page_set_normal(cpu_buffer, new_head, next_page, RB_PAGE_HEAD); } /* * If this was the outer most commit (the one that * changed the original pointer from HEAD to UPDATE), * then it is up to us to reset it to NORMAL. */ if (type == RB_PAGE_HEAD) { ret = rb_head_page_set_normal(cpu_buffer, next_page, tail_page, RB_PAGE_UPDATE); if (RB_WARN_ON(cpu_buffer, ret != RB_PAGE_UPDATE)) return -1; } return 0; } static inline void rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size); struct buffer_page *tail_page = info->tail_page; struct ring_buffer_event *event; unsigned long length = info->length; /* * Only the event that crossed the page boundary * must fill the old tail_page with padding. */ if (tail >= bsize) { /* * If the page was filled, then we still need * to update the real_end. Reset it to zero * and the reader will ignore it. */ if (tail == bsize) tail_page->real_end = 0; local_sub(length, &tail_page->write); return; } event = __rb_page_index(tail_page, tail); /* * Save the original length to the meta data. * This will be used by the reader to add lost event * counter. */ tail_page->real_end = tail; /* * If this event is bigger than the minimum size, then * we need to be careful that we don't subtract the * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure * that this space is not used again, and this space will * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. */ if (tail > (bsize - RB_EVNT_MIN_SIZE)) { /* No room for any events */ /* Mark the rest of the page with padding */ rb_event_set_padding(event); /* Make sure the padding is visible before the write update */ smp_wmb(); /* Set the write back to the previous setting */ local_sub(length, &tail_page->write); return; } /* Put in a discarded event */ event->array[0] = (bsize - tail) - RB_EVNT_HDR_SIZE; event->type_len = RINGBUF_TYPE_PADDING; /* time delta must be non zero */ event->time_delta = 1; /* account for padding bytes */ local_add(bsize - tail, &cpu_buffer->entries_bytes); /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); /* Set write to end of buffer */ length = (tail + length) - bsize; local_sub(length, &tail_page->write); } static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); /* * This is the slow path, force gcc not to inline it. */ static noinline struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long tail, struct rb_event_info *info) { struct buffer_page *tail_page = info->tail_page; struct buffer_page *commit_page = cpu_buffer->commit_page; struct trace_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; next_page = tail_page; rb_inc_page(&next_page); /* * If for some reason, we had an interrupt storm that made * it all the way around the buffer, bail, and warn * about it. */ if (unlikely(next_page == commit_page)) { local_inc(&cpu_buffer->commit_overrun); goto out_reset; } /* * This is where the fun begins! * * We are fighting against races between a reader that * could be on another CPU trying to swap its reader * page with the buffer head. * * We are also fighting against interrupts coming in and * moving the head or tail on us as well. * * If the next page is the head page then we have filled * the buffer, unless the commit page is still on the * reader page. */ if (rb_is_head_page(next_page, &tail_page->list)) { /* * If the commit is not on the reader page, then * move the header page. */ if (!rb_is_reader_page(cpu_buffer->commit_page)) { /* * If we are not in overwrite mode, * this is easy, just stop here. */ if (!(buffer->flags & RB_FL_OVERWRITE)) { local_inc(&cpu_buffer->dropped_events); goto out_reset; } ret = rb_handle_head_page(cpu_buffer, tail_page, next_page); if (ret < 0) goto out_reset; if (ret) goto out_again; } else { /* * We need to be careful here too. The * commit page could still be on the reader * page. We could have a small buffer, and * have filled up the buffer with events * from interrupts and such, and wrapped. * * Note, if the tail page is also on the * reader_page, we let it move out. */ if (unlikely((cpu_buffer->commit_page != cpu_buffer->tail_page) && (cpu_buffer->commit_page == cpu_buffer->reader_page))) { local_inc(&cpu_buffer->commit_overrun); goto out_reset; } } } rb_tail_page_update(cpu_buffer, tail_page, next_page); out_again: rb_reset_tail(cpu_buffer, tail, info); /* Commit what we have for now. */ rb_end_commit(cpu_buffer); /* rb_end_commit() decs committing */ local_inc(&cpu_buffer->committing); /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); out_reset: /* reset write */ rb_reset_tail(cpu_buffer, tail, info); return NULL; } /* Slow path */ static struct ring_buffer_event * rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event, u64 delta, bool abs) { if (abs) event->type_len = RINGBUF_TYPE_TIME_STAMP; else event->type_len = RINGBUF_TYPE_TIME_EXTEND; /* Not the first event on the page, or not delta? */ if (abs || rb_event_index(cpu_buffer, event)) { event->time_delta = delta & TS_MASK; event->array[0] = delta >> TS_SHIFT; } else { /* nope, just zero it */ event->time_delta = 0; event->array[0] = 0; } return skip_time_extend(event); } #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK static inline bool sched_clock_stable(void) { return true; } #endif static void rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer, struct rb_event_info *info) { u64 write_stamp; WARN_ONCE(1, "Delta way too big! %llu ts=%llu before=%llu after=%llu write stamp=%llu\n%s", (unsigned long long)info->delta, (unsigned long long)info->ts, (unsigned long long)info->before, (unsigned long long)info->after, (unsigned long long)({rb_time_read(&cpu_buffer->write_stamp, &write_stamp); write_stamp;}), sched_clock_stable() ? "" : "If you just came from a suspend/resume,\n" "please switch to the trace global clock:\n" " echo global > /sys/kernel/tracing/trace_clock\n" "or add trace_clock=global to the kernel command line\n"); } static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event **event, struct rb_event_info *info, u64 *delta, unsigned int *length) { bool abs = info-& |