1 2 3 3 3 2 1 1 1 1 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Syntek DV4000 (STK014) subdriver * * Copyright (C) 2008 Jean-Francois Moine (http://moinejf.free.fr) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "stk014" #include "gspca.h" #include "jpeg.h" MODULE_AUTHOR("Jean-Francois Moine <http://moinejf.free.fr>"); MODULE_DESCRIPTION("Syntek DV4000 (STK014) USB Camera Driver"); MODULE_LICENSE("GPL"); #define QUALITY 50 /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 jpeg_hdr[JPEG_HDR_SZ]; }; static const struct v4l2_pix_format vga_mode[] = { {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; /* -- read a register -- */ static u8 reg_r(struct gspca_dev *gspca_dev, __u16 index) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return 0; ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x00, index, gspca_dev->usb_buf, 1, 500); if (ret < 0) { pr_err("reg_r err %d\n", ret); gspca_dev->usb_err = ret; return 0; } return gspca_dev->usb_buf[0]; } /* -- write a register -- */ static void reg_w(struct gspca_dev *gspca_dev, __u16 index, __u16 value) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x01, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); if (ret < 0) { pr_err("reg_w err %d\n", ret); gspca_dev->usb_err = ret; } } /* -- get a bulk value (4 bytes) -- */ static void rcv_val(struct gspca_dev *gspca_dev, int ads) { struct usb_device *dev = gspca_dev->dev; int alen, ret; reg_w(gspca_dev, 0x634, (ads >> 16) & 0xff); reg_w(gspca_dev, 0x635, (ads >> 8) & 0xff); reg_w(gspca_dev, 0x636, ads & 0xff); reg_w(gspca_dev, 0x637, 0); reg_w(gspca_dev, 0x638, 4); /* len & 0xff */ reg_w(gspca_dev, 0x639, 0); /* len >> 8 */ reg_w(gspca_dev, 0x63a, 0); reg_w(gspca_dev, 0x63b, 0); reg_w(gspca_dev, 0x630, 5); if (gspca_dev->usb_err < 0) return; ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, 0x05), gspca_dev->usb_buf, 4, /* length */ &alen, 500); /* timeout in milliseconds */ if (ret < 0) { pr_err("rcv_val err %d\n", ret); gspca_dev->usb_err = ret; } } /* -- send a bulk value -- */ static void snd_val(struct gspca_dev *gspca_dev, int ads, unsigned int val) { struct usb_device *dev = gspca_dev->dev; int alen, ret; __u8 seq = 0; if (ads == 0x003f08) { reg_r(gspca_dev, 0x0704); seq = reg_r(gspca_dev, 0x0705); reg_r(gspca_dev, 0x0650); reg_w(gspca_dev, 0x654, seq); } else { reg_w(gspca_dev, 0x654, (ads >> 16) & 0xff); } reg_w(gspca_dev, 0x655, (ads >> 8) & 0xff); reg_w(gspca_dev, 0x656, ads & 0xff); reg_w(gspca_dev, 0x657, 0); reg_w(gspca_dev, 0x658, 0x04); /* size */ reg_w(gspca_dev, 0x659, 0); reg_w(gspca_dev, 0x65a, 0); reg_w(gspca_dev, 0x65b, 0); reg_w(gspca_dev, 0x650, 5); if (gspca_dev->usb_err < 0) return; gspca_dev->usb_buf[0] = val >> 24; gspca_dev->usb_buf[1] = val >> 16; gspca_dev->usb_buf[2] = val >> 8; gspca_dev->usb_buf[3] = val; ret = usb_bulk_msg(dev, usb_sndbulkpipe(dev, 6), gspca_dev->usb_buf, 4, &alen, 500); /* timeout in milliseconds */ if (ret < 0) { pr_err("snd_val err %d\n", ret); gspca_dev->usb_err = ret; } else { if (ads == 0x003f08) { seq += 4; seq &= 0x3f; reg_w(gspca_dev, 0x705, seq); } } } /* set a camera parameter */ static void set_par(struct gspca_dev *gspca_dev, int parval) { snd_val(gspca_dev, 0x003f08, parval); } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { int parval; parval = 0x06000000 /* whiteness */ + (val << 16); set_par(gspca_dev, parval); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { int parval; parval = 0x07000000 /* contrast */ + (val << 16); set_par(gspca_dev, parval); } static void setcolors(struct gspca_dev *gspca_dev, s32 val) { int parval; parval = 0x08000000 /* saturation */ + (val << 16); set_par(gspca_dev, parval); } static void setlightfreq(struct gspca_dev *gspca_dev, s32 val) { set_par(gspca_dev, val == 1 ? 0x33640000 /* 50 Hz */ : 0x33780000); /* 60 Hz */ } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { gspca_dev->cam.cam_mode = vga_mode; gspca_dev->cam.nmodes = ARRAY_SIZE(vga_mode); return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { u8 ret; /* check if the device responds */ usb_set_interface(gspca_dev->dev, gspca_dev->iface, 1); ret = reg_r(gspca_dev, 0x0740); if (gspca_dev->usb_err >= 0) { if (ret != 0xff) { pr_err("init reg: 0x%02x\n", ret); gspca_dev->usb_err = -EIO; } } return gspca_dev->usb_err; } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int ret, value; /* create the JPEG header */ jpeg_define(sd->jpeg_hdr, gspca_dev->pixfmt.height, gspca_dev->pixfmt.width, 0x22); /* JPEG 411 */ jpeg_set_qual(sd->jpeg_hdr, QUALITY); /* work on alternate 1 */ usb_set_interface(gspca_dev->dev, gspca_dev->iface, 1); set_par(gspca_dev, 0x10000000); set_par(gspca_dev, 0x00000000); set_par(gspca_dev, 0x8002e001); set_par(gspca_dev, 0x14000000); if (gspca_dev->pixfmt.width > 320) value = 0x8002e001; /* 640x480 */ else value = 0x4001f000; /* 320x240 */ set_par(gspca_dev, value); ret = usb_set_interface(gspca_dev->dev, gspca_dev->iface, gspca_dev->alt); if (ret < 0) { pr_err("set intf %d %d failed\n", gspca_dev->iface, gspca_dev->alt); gspca_dev->usb_err = ret; goto out; } reg_r(gspca_dev, 0x0630); rcv_val(gspca_dev, 0x000020); /* << (value ff ff ff ff) */ reg_r(gspca_dev, 0x0650); snd_val(gspca_dev, 0x000020, 0xffffffff); reg_w(gspca_dev, 0x0620, 0); reg_w(gspca_dev, 0x0630, 0); reg_w(gspca_dev, 0x0640, 0); reg_w(gspca_dev, 0x0650, 0); reg_w(gspca_dev, 0x0660, 0); set_par(gspca_dev, 0x09800000); /* Red ? */ set_par(gspca_dev, 0x0a800000); /* Green ? */ set_par(gspca_dev, 0x0b800000); /* Blue ? */ set_par(gspca_dev, 0x0d030000); /* Gamma ? */ /* start the video flow */ set_par(gspca_dev, 0x01000000); set_par(gspca_dev, 0x01000000); if (gspca_dev->usb_err >= 0) gspca_dbg(gspca_dev, D_STREAM, "camera started alt: 0x%02x\n", gspca_dev->alt); out: return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; set_par(gspca_dev, 0x02000000); set_par(gspca_dev, 0x02000000); usb_set_interface(dev, gspca_dev->iface, 1); reg_r(gspca_dev, 0x0630); rcv_val(gspca_dev, 0x000020); /* << (value ff ff ff ff) */ reg_r(gspca_dev, 0x0650); snd_val(gspca_dev, 0x000020, 0xffffffff); reg_w(gspca_dev, 0x0620, 0); reg_w(gspca_dev, 0x0630, 0); reg_w(gspca_dev, 0x0640, 0); reg_w(gspca_dev, 0x0650, 0); reg_w(gspca_dev, 0x0660, 0); gspca_dbg(gspca_dev, D_STREAM, "camera stopped\n"); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; static unsigned char ffd9[] = {0xff, 0xd9}; /* a frame starts with: * - 0xff 0xfe * - 0x08 0x00 - length (little endian ?!) * - 4 bytes = size of whole frame (BE - including header) * - 0x00 0x0c * - 0xff 0xd8 * - .. JPEG image with escape sequences (ff 00) * (without ending - ff d9) */ if (data[0] == 0xff && data[1] == 0xfe) { gspca_frame_add(gspca_dev, LAST_PACKET, ffd9, 2); /* put the JPEG 411 header */ gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); /* beginning of the frame */ #define STKHDRSZ 12 data += STKHDRSZ; len -= STKHDRSZ; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolors(gspca_dev, ctrl->val); break; case V4L2_CID_POWER_LINE_FREQUENCY: setlightfreq(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 4); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 127); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 127); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 255, 1, 127); v4l2_ctrl_new_std_menu(hdl, &sd_ctrl_ops, V4L2_CID_POWER_LINE_FREQUENCY, V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 1, V4L2_CID_POWER_LINE_FREQUENCY_50HZ); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x05e1, 0x0893)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
50 41 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _linux_POSIX_TIMERS_H #define _linux_POSIX_TIMERS_H #include <linux/spinlock.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/alarmtimer.h> #include <linux/timerqueue.h> #include <linux/task_work.h> struct kernel_siginfo; struct task_struct; /* * Bit fields within a clockid: * * The most significant 29 bits hold either a pid or a file descriptor. * * Bit 2 indicates whether a cpu clock refers to a thread or a process. * * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3. * * A clockid is invalid if bits 2, 1, and 0 are all set. */ #define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) #define CPUCLOCK_PERTHREAD(clock) \ (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) #define CPUCLOCK_PERTHREAD_MASK 4 #define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) #define CPUCLOCK_CLOCK_MASK 3 #define CPUCLOCK_PROF 0 #define CPUCLOCK_VIRT 1 #define CPUCLOCK_SCHED 2 #define CPUCLOCK_MAX 3 #define CLOCKFD CPUCLOCK_MAX #define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) { return ((~pid) << 3) | clock; } static inline clockid_t make_thread_cpuclock(const unsigned int tid, const clockid_t clock) { return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK); } static inline clockid_t fd_to_clockid(const int fd) { return make_process_cpuclock((unsigned int) fd, CLOCKFD); } static inline int clockid_to_fd(const clockid_t clk) { return ~(clk >> 3); } #ifdef CONFIG_POSIX_TIMERS /** * cpu_timer - Posix CPU timer representation for k_itimer * @node: timerqueue node to queue in the task/sig * @head: timerqueue head on which this timer is queued * @pid: Pointer to target task PID * @elist: List head for the expiry list * @firing: Timer is currently firing * @handling: Pointer to the task which handles expiry */ struct cpu_timer { struct timerqueue_node node; struct timerqueue_head *head; struct pid *pid; struct list_head elist; int firing; struct task_struct __rcu *handling; }; static inline bool cpu_timer_enqueue(struct timerqueue_head *head, struct cpu_timer *ctmr) { ctmr->head = head; return timerqueue_add(head, &ctmr->node); } static inline bool cpu_timer_queued(struct cpu_timer *ctmr) { return !!ctmr->head; } static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr) { if (cpu_timer_queued(ctmr)) { timerqueue_del(ctmr->head, &ctmr->node); ctmr->head = NULL; return true; } return false; } static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) { return ctmr->node.expires; } static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) { ctmr->node.expires = exp; } /** * posix_cputimer_base - Container per posix CPU clock * @nextevt: Earliest-expiration cache * @tqhead: timerqueue head for cpu_timers */ struct posix_cputimer_base { u64 nextevt; struct timerqueue_head tqhead; }; /** * posix_cputimers - Container for posix CPU timer related data * @bases: Base container for posix CPU clocks * @timers_active: Timers are queued. * @expiry_active: Timer expiry is active. Used for * process wide timers to avoid multiple * task trying to handle expiry concurrently * * Used in task_struct and signal_struct */ struct posix_cputimers { struct posix_cputimer_base bases[CPUCLOCK_MAX]; unsigned int timers_active; unsigned int expiry_active; }; /** * posix_cputimers_work - Container for task work based posix CPU timer expiry * @work: The task work to be scheduled * @mutex: Mutex held around expiry in context of this task work * @scheduled: @work has been scheduled already, no further processing */ struct posix_cputimers_work { struct callback_head work; struct mutex mutex; unsigned int scheduled; }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); pct->bases[0].nextevt = U64_MAX; pct->bases[1].nextevt = U64_MAX; pct->bases[2].nextevt = U64_MAX; } void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, u64 runtime) { pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ .nextevt = U64_MAX, \ } #define INIT_CPU_TIMERBASES(b) { \ INIT_CPU_TIMERBASE(b[0]), \ INIT_CPU_TIMERBASE(b[1]), \ INIT_CPU_TIMERBASE(b[2]), \ } #define INIT_CPU_TIMERS(s) \ .posix_cputimers = { \ .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ }, #else struct posix_cputimers { }; struct cpu_timer { }; #define INIT_CPU_TIMERS(s) static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK void clear_posix_cputimers_work(struct task_struct *p); void posix_cputimers_init_work(void); #else static inline void clear_posix_cputimers_work(struct task_struct *p) { } static inline void posix_cputimers_init_work(void) { } #endif #define REQUEUE_PENDING 1 /** * struct k_itimer - POSIX.1b interval timer structure. * @list: List head for binding the timer to signals->posix_timers * @t_hash: Entry in the posix timer hash table * @it_lock: Lock protecting the timer * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer * @it_active: Marker that timer is active * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_requeue_pending: Indicator that timer waits for being requeued on * signal delivery * @it_sigev_notify: The notify word of sigevent struct for signal delivery * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) * @sigq: Pointer to preallocated sigqueue * @it: Union representing the various posix timer type * internals. * @rcu: RCU head for freeing the timer. */ struct k_itimer { struct list_head list; struct hlist_node t_hash; spinlock_t it_lock; const struct k_clock *kclock; clockid_t it_clock; timer_t it_id; int it_active; s64 it_overrun; s64 it_overrun_last; int it_requeue_pending; int it_sigev_notify; ktime_t it_interval; struct signal_struct *it_signal; union { struct pid *it_pid; struct task_struct *it_process; }; struct sigqueue *sigq; union { struct { struct hrtimer timer; } real; struct cpu_timer cpu; struct { struct alarm alarmtimer; } alarm; } it; struct rcu_head rcu; }; void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, u64 *newval, u64 *oldval); void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); void posixtimer_rearm(struct kernel_siginfo *info); #endif |
74 254 265 265 179 87 87 87 453 78 375 35 13 34 436 436 435 453 453 253 210 198 259 510 20 507 315 197 197 315 9 9 9 468 469 281 468 2 5 456 454 8 1 1 1 450 1 3 3 265 263 1 157 110 263 265 265 264 3 10 5 3 1 1 3 2 7 2 1 3 1 459 18 454 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/trans_fd.c * * Fd transport layer. Includes deprecated socket layer. * * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> #include <linux/ipv6.h> #include <linux/kthread.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/un.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/syscalls.h> /* killme */ #define P9_PORT 564 #define MAX_SOCK_BUF (1024*1024) #define MAXPOLLWADDR 2 static struct p9_trans_module p9_tcp_trans; static struct p9_trans_module p9_fd_trans; /** * struct p9_fd_opts - per-transport options * @rfd: file descriptor for reading (trans=fd) * @wfd: file descriptor for writing (trans=fd) * @port: port to connect to (trans=tcp) * @privport: port is privileged */ struct p9_fd_opts { int rfd; int wfd; u16 port; bool privport; }; /* * Option Parsing (code inspired by NFS code) * - a little lazy - parse all fd-transport options */ enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, /* Options that take no arguments */ Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, {Opt_privport, "privport"}, {Opt_err, NULL}, }; enum { Rworksched = 1, /* read work scheduled or running */ Rpending = 2, /* can read */ Wworksched = 4, /* write work scheduled or running */ Wpending = 8, /* can write */ }; struct p9_poll_wait { struct p9_conn *conn; wait_queue_entry_t wait; wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information * @mux_list: list link for mux to manage multiple connections (?) * @client: reference to client instance for this connection * @err: error state * @req_lock: lock protecting req_list and requests statuses * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rreq: read request * @wreq: write request * @req: current request being processed (if any) * @tmp_buf: temporary buffer to read in header * @rc: temporary fcall for reading current frame * @wpos: write position for current frame * @wsize: amount of data to write for current frame * @wbuf: current write buffer * @poll_pending_link: pending links to be polled per conn * @poll_wait: array of wait_q's for various worker threads * @pt: poll state * @rq: current read work * @wq: current write work * @wsched: ???? * */ struct p9_conn { struct list_head mux_list; struct p9_client *client; int err; spinlock_t req_lock; struct list_head req_list; struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; char *wbuf; struct list_head poll_pending_link; struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; /** * struct p9_trans_fd - transport state * @rd: reference to file to read from * @wr: reference of file to write to * @conn: connection state reference * */ struct p9_trans_fd { struct file *rd; struct file *wr; struct p9_conn conn; }; static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { struct p9_poll_wait *pwait = &m->poll_wait[i]; if (pwait->wait_addr) { remove_wait_queue(pwait->wait_addr, &pwait->wait); pwait->wait_addr = NULL; } } spin_lock_irqsave(&p9_poll_lock, flags); list_del_init(&m->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); flush_work(&p9_poll_work); } /** * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code * */ static void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req_t *req, *rtmp; LIST_HEAD(cancel_list); p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock(&m->req_lock); if (m->err) { spin_unlock(&m->req_lock); return; } m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); req->status = REQ_STATUS_ERROR; } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); req->status = REQ_STATUS_ERROR; } spin_unlock(&m->req_lock); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); if (!req->t_err) req->t_err = err; p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } static __poll_t p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) { __poll_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status == Connected) ts = client->trans; if (!ts) { if (err) *err = -EREMOTEIO; return EPOLLERR; } ret = vfs_poll(ts->rd, pt); if (ts->rd != ts->wr) ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN); return ret; } /** * p9_fd_read- read from a fd * @client: client instance * @v: buffer to receive data into * @len: size of receive buffer * */ static int p9_fd_read(struct p9_client *client, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; loff_t pos; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); pos = ts->rd->f_pos; ret = kernel_read(ts->rd, v, len, &pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_read_work - called when there is some data to be read from a transport * @work: container of work to be done * */ static void p9_read_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; m = container_of(work, struct p9_conn, rq); if (m->err < 0) return; p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n", m, m->rc.offset, m->rc.capacity, m->rc.capacity - m->rc.offset); err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset, m->rc.capacity - m->rc.offset); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err <= 0) goto error; m->rc.offset += err; /* header read in */ if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, "error parsing header: %d\n", err); goto error; } if (m->rc.size >= m->client->msize) { p9_debug(P9_DEBUG_ERROR, "requested packet size too big: %d\n", m->rc.size); err = -EIO; goto error; } p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); m->rreq = p9_tag_lookup(m->client, m->rc.tag); if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", m->rc.tag); err = -EIO; goto error; } if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); p9_req_put(m->client, m->rreq); m->rreq = NULL; err = -EIO; goto error; } m->rc.sdata = m->rreq->rc.sdata; memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); m->rc.capacity = m->rc.size; } /* packet is read in * not an else because some packets (like clunk) have no payload */ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { p9_debug(P9_DEBUG_TRANS, "got new packet\n"); m->rreq->rc.size = m->rc.offset; spin_lock(&m->req_lock); if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); } else if (m->rreq->status == REQ_STATUS_FLSHD) { /* Ignore replies associated with a cancelled request. */ p9_debug(P9_DEBUG_TRANS, "Ignore replies associated with a cancelled request\n"); } else { spin_unlock(&m->req_lock); p9_debug(P9_DEBUG_ERROR, "Request tag %d errored out while we were reading the reply\n", m->rc.tag); err = -EIO; goto error; } spin_unlock(&m->req_lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; p9_req_put(m->client, m->rreq); m->rreq = NULL; } end_clear: clear_bit(Rworksched, &m->wsched); if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = EPOLLIN; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } return; error: p9_conn_cancel(m, err); clear_bit(Rworksched, &m->wsched); } /** * p9_fd_write - write to a socket * @client: client instance * @v: buffer to send data from * @len: size of send buffer * */ static int p9_fd_write(struct p9_client *client, void *v, int len) { ssize_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status != Disconnected) ts = client->trans; if (!ts) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; } /** * p9_write_work - called when a transport can send some data * @work: container for work to be done * */ static void p9_write_work(struct work_struct *work) { __poll_t n; int err; struct p9_conn *m; struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); if (m->err < 0) { clear_bit(Wworksched, &m->wsched); return; } if (!m->wsize) { spin_lock(&m->req_lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); spin_unlock(&m->req_lock); return; } req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc.sdata; m->wsize = req->tc.size; m->wpos = 0; p9_req_get(req); m->wreq = req; spin_unlock(&m->req_lock); } p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) goto end_clear; if (err < 0) goto error; else if (err == 0) { err = -EREMOTEIO; goto error; } m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; p9_req_put(m->client, m->wreq); m->wreq = NULL; } end_clear: clear_bit(Wworksched, &m->wsched); if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if ((n & EPOLLOUT) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } return; error: p9_conn_cancel(m, err); clear_bit(Wworksched, &m->wsched); } static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); schedule_work(&p9_poll_work); return 1; } /** * p9_pollwait - add poll task to the wait queue * @filp: file pointer being polled * @wait_address: wait_q to block on * @p: poll state * * called by files poll operation to add v9fs-poll task to files wait queue */ static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { struct p9_conn *m = container_of(p, struct p9_conn, pt); struct p9_poll_wait *pwait = NULL; int i; for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { if (m->poll_wait[i].wait_addr == NULL) { pwait = &m->poll_wait[i]; break; } } if (!pwait) { p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } pwait->conn = m; pwait->wait_addr = wait_address; init_waitqueue_func_entry(&pwait->wait, p9_pollwake); add_wait_queue(wait_address, &pwait->wait); } /** * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ static void p9_conn_create(struct p9_client *client) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); INIT_LIST_HEAD(&m->mux_list); m->client = client; spin_lock_init(&m->req_lock); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); INIT_LIST_HEAD(&m->poll_pending_link); init_poll_funcptr(&m->pt, p9_pollwait); n = p9_fd_poll(client, &m->pt, NULL); if (n & EPOLLIN) { p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & EPOLLOUT) { p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } } /** * p9_poll_mux - polls a mux and schedules read or write works if necessary * @m: connection to poll * */ static void p9_poll_mux(struct p9_conn *m) { __poll_t n; int err = -ECONNRESET; if (m->err < 0) return; n = p9_fd_poll(m->client, NULL, &err); if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); p9_conn_cancel(m, err); } if (n & EPOLLIN) { set_bit(Rpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); } } if (n & EPOLLOUT) { set_bit(Wpending, &m->wsched); p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); } } } /** * p9_fd_request - send 9P request * The function can sleep until the request is scheduled for sending. * The function can be interrupted. Return from the function is not * a guarantee that the request is sent successfully. * * @client: client instance * @req: request to be sent * */ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { __poll_t n; struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, &req->tc, req->tc.id); if (m->err < 0) return m->err; spin_lock(&m->req_lock); req->status = REQ_STATUS_UNSENT; list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&m->req_lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = EPOLLOUT; else n = p9_fd_poll(m->client, NULL, NULL); if (n & EPOLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) schedule_work(&m->wq); return 0; } static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; int ret = 1; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; p9_req_put(client, req); ret = 0; } spin_unlock(&m->req_lock); return ret; } static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { struct p9_trans_fd *ts = client->trans; struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&m->req_lock); /* Ignore cancelled request if message has been received * before lock. */ if (req->status == REQ_STATUS_RCVD) { spin_unlock(&m->req_lock); return 0; } /* we haven't received a response for oldreq, * remove it from the list. */ list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; spin_unlock(&m->req_lock); p9_req_put(client, req); return 0; } static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->trans_mod == &p9_tcp_trans) { if (clnt->trans_opts.tcp.port != P9_PORT) seq_printf(m, ",port=%u", clnt->trans_opts.tcp.port); } else if (clnt->trans_mod == &p9_fd_trans) { if (clnt->trans_opts.fd.rfd != ~0) seq_printf(m, ",rfd=%u", clnt->trans_opts.fd.rfd); if (clnt->trans_opts.fd.wfd != ~0) seq_printf(m, ",wfd=%u", clnt->trans_opts.fd.wfd); } return 0; } /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount * @opts: fd transport-specific structure to parse options into * * Returns 0 upon success, -ERRNO upon failure */ static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; opts->privport = false; if (!params) return 0; tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { p9_debug(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; int r; if (!*p) continue; token = match_token(p, tokens, args); if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; } } switch (token) { case Opt_port: opts->port = option; break; case Opt_rfdno: opts->rfd = option; break; case Opt_wfdno: opts->wfd = option; break; case Opt_privport: opts->privport = true; break; default: continue; } } kfree(tmp_options); return 0; } static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; ts->rd = fget(rfd); if (!ts->rd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; /* Prevent workers from hanging on IO when fd is a pipe. * It's technically possible for userspace or concurrent mounts to * modify this flag concurrently, which will likely result in a * broken filesystem. However, just having bad flags here should * not crash the kernel or cause any other sort of bug, so mark this * particular data race as intentional so that tooling (like KCSAN) * can allow it and detect further problems. */ data_race(ts->rd->f_flags |= O_NONBLOCK); ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; data_race(ts->wr->f_flags |= O_NONBLOCK); client->trans = ts; client->status = Connected; return 0; out_put_wr: fput(ts->wr); out_put_rd: fput(ts->rd); out_free_ts: kfree(ts); return -EIO; } static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) { sock_release(csocket); return -ENOMEM; } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); kfree(p); return PTR_ERR(file); } get_file(file); p->wr = p->rd = file; client->trans = p; client->status = Connected; p->rd->f_flags |= O_NONBLOCK; p9_conn_create(client); return 0; } /** * p9_conn_destroy - cancels all pending requests of mux * @m: mux to destroy * */ static void p9_conn_destroy(struct p9_conn *m) { p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { p9_req_put(m->client, m->rreq); m->rreq = NULL; } cancel_work_sync(&m->wq); if (m->wreq) { p9_req_put(m->client, m->wreq); m->wreq = NULL; } p9_conn_cancel(m, -ECONNRESET); m->client = NULL; } /** * p9_fd_close - shutdown file descriptor transport * @client: client instance * */ static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; if (!client) return; ts = client->trans; if (!ts) return; client->status = Disconnected; p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); kfree(ts); } /* * stolen from NFS - maybe should be made a generic function? */ static inline int valid_ipaddr4(const char *buf) { int rc, count, in[4]; rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); if (rc != 4) return -EINVAL; for (count = 0; count < 4; count++) { if (in[count] > 255) return -EINVAL; } return 0; } static int p9_bind_privport(struct socket *sock) { struct sockaddr_in cl; int port, err = -EINVAL; memset(&cl, 0, sizeof(cl)); cl.sin_family = AF_INET; cl.sin_addr.s_addr = htonl(INADDR_ANY); for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { cl.sin_port = htons((ushort)port); err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); if (err != -EADDRINUSE) break; } return err; } static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; err = parse_opts(args, &opts); if (err < 0) return err; if (addr == NULL || valid_ipaddr4(addr) < 0) return -EINVAL; csocket = NULL; client->trans_opts.tcp.port = opts.port; client->trans_opts.tcp.privport = opts.privport; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); err = __sock_create(current->nsproxy->net_ns, PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } if (opts.privport) { err = p9_bind_privport(csocket); if (err < 0) { pr_err("%s (%d): problem binding to privport\n", __func__, task_pid_nr(current)); sock_release(csocket); return err; } } err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { pr_err("%s (%d): problem connecting socket to %s\n", __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; csocket = NULL; if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { pr_err("%s (%d): problem creating socket\n", __func__, task_pid_nr(current)); return err; } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { pr_err("%s (%d): problem connecting socket: %s: %d\n", __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } return p9_socket_open(client, csocket); } static int p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; struct p9_fd_opts opts; parse_opts(args, &opts); client->trans_opts.fd.rfd = opts.rfd; client->trans_opts.fd.wfd = opts.wfd; if (opts.rfd == ~0 || opts.wfd == ~0) { pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) return err; p9_conn_create(client); return 0; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create_unix, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_fd_create, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, .cancelled = p9_fd_cancelled, .show_options = p9_fd_show_options, .owner = THIS_MODULE, }; /** * p9_poll_workfn - poll worker thread * @work: work queue * * polls all v9fs transports for new events and queues the appropriate * work to the work queue * */ static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; p9_debug(P9_DEBUG_TRANS, "start %p\n", current); spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, struct p9_conn, poll_pending_link); list_del_init(&conn->poll_pending_link); spin_unlock_irqrestore(&p9_poll_lock, flags); p9_poll_mux(conn); spin_lock_irqsave(&p9_poll_lock, flags); } spin_unlock_irqrestore(&p9_poll_lock, flags); p9_debug(P9_DEBUG_TRANS, "finish\n"); } int p9_trans_fd_init(void) { v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); return 0; } void p9_trans_fd_exit(void) { flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); } |
1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for Betop based devices * * The devices are distributed under various names and the same USB device ID * can be used in both adapters and actual game controllers. * * 0x11c2:0x2208 "BTP2185 BFM mode Joystick" * - tested with BTP2185 BFM Mode. * * 0x11C0:0x5506 "BTP2185 PC mode Joystick" * - tested with BTP2185 PC Mode. * * 0x8380:0x1850 "BTP2185 V2 PC mode USB Gamepad" * - tested with BTP2185 PC Mode with another version. * * 0x20bc:0x5500 "BTP2185 V2 BFM mode Joystick" * - tested with BTP2171s. * Copyright (c) 2014 Huang Bo <huangbobupt@163.com> */ /* */ #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/hid.h> #include "hid-ids.h" struct betopff_device { struct hid_report *report; }; static int hid_betopff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct betopff_device *betopff = data; __u16 left, right; left = effect->u.rumble.strong_magnitude; right = effect->u.rumble.weak_magnitude; betopff->report->field[2]->value[0] = left / 256; betopff->report->field[3]->value[0] = right / 256; hid_hw_request(hid, betopff->report, HID_REQ_SET_REPORT); return 0; } static int betopff_init(struct hid_device *hid) { struct betopff_device *betopff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; int error; int i, j; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_first_entry(&hid->inputs, struct hid_input, list); dev = hidinput->input; if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; } report = list_first_entry(report_list, struct hid_report, list); /* * Actually there are 4 fields for 4 Bytes as below: * ----------------------------------------- * Byte0 Byte1 Byte2 Byte3 * 0x00 0x00 left_motor right_motor * ----------------------------------------- * Do init them with default value. */ if (report->maxfield < 4) { hid_err(hid, "not enough fields in the report: %d\n", report->maxfield); return -ENODEV; } for (i = 0; i < report->maxfield; i++) { if (report->field[i]->report_count < 1) { hid_err(hid, "no values in the field\n"); return -ENODEV; } for (j = 0; j < report->field[i]->report_count; j++) { report->field[i]->value[j] = 0x00; } } betopff = kzalloc(sizeof(*betopff), GFP_KERNEL); if (!betopff) return -ENOMEM; set_bit(FF_RUMBLE, dev->ffbit); error = input_ff_create_memless(dev, betopff, hid_betopff_play); if (error) { kfree(betopff); return error; } betopff->report = report; hid_hw_request(hid, betopff->report, HID_REQ_SET_REPORT); hid_info(hid, "Force feedback for betop devices by huangbo <huangbobupt@163.com>\n"); return 0; } static int betop_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; if (id->driver_data) hdev->quirks |= HID_QUIRK_MULTI_INPUT; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } betopff_init(hdev); return 0; err: return ret; } static const struct hid_device_id betop_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) }, { } }; MODULE_DEVICE_TABLE(hid, betop_devices); static struct hid_driver betop_driver = { .name = "betop", .id_table = betop_devices, .probe = betop_probe, }; module_hid_driver(betop_driver); MODULE_LICENSE("GPL"); |
22 11 24 23 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | // SPDX-License-Identifier: GPL-2.0-or-later /* * geniv: Shared IV generator code * * This file provides common code to IV generators such as seqiv. * * Copyright (c) 2007-2019 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/internal/geniv.h> #include <crypto/internal/rng.h> #include <crypto/null.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/slab.h> static int aead_geniv_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm); return crypto_aead_setkey(ctx->child, key, keylen); } static int aead_geniv_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm); return crypto_aead_setauthsize(ctx->child, authsize); } static void aead_geniv_free(struct aead_instance *inst) { crypto_drop_aead(aead_instance_ctx(inst)); kfree(inst); } struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_aead_spawn *spawn; struct aead_instance *inst; struct aead_alg *alg; unsigned int ivsize; unsigned int maxauthsize; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask); if (err) return ERR_PTR(err); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return ERR_PTR(-ENOMEM); spawn = aead_instance_ctx(inst); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_aead_alg(spawn); ivsize = crypto_aead_alg_ivsize(alg); maxauthsize = crypto_aead_alg_maxauthsize(alg); err = -EINVAL; if (ivsize < sizeof(u64)) goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = alg->base.cra_blocksize; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx); inst->alg.setkey = aead_geniv_setkey; inst->alg.setauthsize = aead_geniv_setauthsize; inst->alg.ivsize = ivsize; inst->alg.maxauthsize = maxauthsize; inst->free = aead_geniv_free; out: return inst; err_free_inst: aead_geniv_free(inst); inst = ERR_PTR(err); goto out; } EXPORT_SYMBOL_GPL(aead_geniv_alloc); int aead_init_geniv(struct crypto_aead *aead) { struct aead_geniv_ctx *ctx = crypto_aead_ctx(aead); struct aead_instance *inst = aead_alg_instance(aead); struct crypto_aead *child; int err; spin_lock_init(&ctx->lock); err = crypto_get_default_rng(); if (err) goto out; err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, crypto_aead_ivsize(aead)); crypto_put_default_rng(); if (err) goto out; ctx->sknull = crypto_get_default_null_skcipher(); err = PTR_ERR(ctx->sknull); if (IS_ERR(ctx->sknull)) goto out; child = crypto_spawn_aead(aead_instance_ctx(inst)); err = PTR_ERR(child); if (IS_ERR(child)) goto drop_null; ctx->child = child; crypto_aead_set_reqsize(aead, crypto_aead_reqsize(child) + sizeof(struct aead_request)); err = 0; out: return err; drop_null: crypto_put_default_null_skcipher(); goto out; } EXPORT_SYMBOL_GPL(aead_init_geniv); void aead_exit_geniv(struct crypto_aead *tfm) { struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); crypto_put_default_null_skcipher(); } EXPORT_SYMBOL_GPL(aead_exit_geniv); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Shared IV generator code"); |
9 104 34 87 5 17 33 2 66 8 37 32 92 86 10 53 38 35 35 32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 | /* SPDX-License-Identifier: GPL-2.0 */ /* Multipath TCP * * Copyright (c) 2017 - 2019, Intel Corporation. */ #ifndef __MPTCP_PROTOCOL_H #define __MPTCP_PROTOCOL_H #include <linux/random.h> #include <net/tcp.h> #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> #define MPTCP_SUPPORTED_VERSION 1 /* MPTCP option bits */ #define OPTION_MPTCP_MPC_SYN BIT(0) #define OPTION_MPTCP_MPC_SYNACK BIT(1) #define OPTION_MPTCP_MPC_ACK BIT(2) #define OPTION_MPTCP_MPJ_SYN BIT(3) #define OPTION_MPTCP_MPJ_SYNACK BIT(4) #define OPTION_MPTCP_MPJ_ACK BIT(5) #define OPTION_MPTCP_ADD_ADDR BIT(6) #define OPTION_MPTCP_RM_ADDR BIT(7) #define OPTION_MPTCP_FASTCLOSE BIT(8) #define OPTION_MPTCP_PRIO BIT(9) #define OPTION_MPTCP_RST BIT(10) #define OPTION_MPTCP_DSS BIT(11) #define OPTION_MPTCP_FAIL BIT(12) #define OPTION_MPTCP_CSUMREQD BIT(13) #define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \ OPTION_MPTCP_MPC_ACK) #define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \ OPTION_MPTCP_MPJ_ACK) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 #define MPTCPOPT_MP_JOIN 1 #define MPTCPOPT_DSS 2 #define MPTCPOPT_ADD_ADDR 3 #define MPTCPOPT_RM_ADDR 4 #define MPTCPOPT_MP_PRIO 5 #define MPTCPOPT_MP_FAIL 6 #define MPTCPOPT_MP_FASTCLOSE 7 #define MPTCPOPT_RST 8 /* MPTCP suboption lengths */ #define TCPOLEN_MPTCP_MPC_SYN 4 #define TCPOLEN_MPTCP_MPC_SYNACK 12 #define TCPOLEN_MPTCP_MPC_ACK 20 #define TCPOLEN_MPTCP_MPC_ACK_DATA 22 #define TCPOLEN_MPTCP_MPJ_SYN 12 #define TCPOLEN_MPTCP_MPJ_SYNACK 16 #define TCPOLEN_MPTCP_MPJ_ACK 24 #define TCPOLEN_MPTCP_DSS_BASE 4 #define TCPOLEN_MPTCP_DSS_ACK32 4 #define TCPOLEN_MPTCP_DSS_ACK64 8 #define TCPOLEN_MPTCP_DSS_MAP32 10 #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 #define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 #define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 #define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 #define TCPOLEN_MPTCP_PORT_LEN 2 #define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 3 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 #define TCPOLEN_MPTCP_FAIL 12 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_HMAC_LEN 20 #define MPTCPOPT_THMAC_LEN 8 /* MPTCP MP_CAPABLE flags */ #define MPTCP_VERSION_MASK (0x0F) #define MPTCP_CAP_CHECKSUM_REQD BIT(7) #define MPTCP_CAP_EXTENSIBILITY BIT(6) #define MPTCP_CAP_DENY_JOIN_ID0 BIT(5) #define MPTCP_CAP_HMAC_SHA256 BIT(0) #define MPTCP_CAP_FLAG_MASK (0x1F) /* MPTCP DSS flags */ #define MPTCP_DSS_DATA_FIN BIT(4) #define MPTCP_DSS_DSN64 BIT(3) #define MPTCP_DSS_HAS_MAP BIT(2) #define MPTCP_DSS_ACK64 BIT(1) #define MPTCP_DSS_HAS_ACK BIT(0) #define MPTCP_DSS_FLAG_MASK (0x1F) /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) /* MPTCP MP_PRIO flags */ #define MPTCP_PRIO_BKUP BIT(0) /* MPTCP TCPRST flags */ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket flags */ #define MPTCP_DATA_READY 0 #define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 #define MPTCP_PUSH_PENDING 6 #define MPTCP_CLEAN_UNA 7 #define MPTCP_ERROR_REPORT 8 #define MPTCP_RETRANSMIT 9 #define MPTCP_WORK_SYNC_SETSOCKOPT 10 #define MPTCP_CONNECTED 11 static inline bool before64(__u64 seq1, __u64 seq2) { return (__s64)(seq1 - seq2) < 0; } #define after64(seq2, seq1) before64(seq1, seq2) struct mptcp_options_received { u64 sndr_key; u64 rcvr_key; u64 data_ack; u64 data_seq; u32 subflow_seq; u16 data_len; __sum16 csum; struct_group(status, u16 suboptions; u16 use_map:1, dsn64:1, data_fin:1, use_ack:1, ack64:1, mpc_map:1, reset_reason:4, reset_transient:1, echo:1, backup:1, deny_join_id0:1, __unused:2; ); u8 join_id; u32 token; u32 nonce; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; struct mptcp_rm_list rm_list; u64 ahmac; u64 fail_seq; }; static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | ((nib & 0xF) << 8) | field); } enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_RECEIVED, MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED, MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_SUBFLOW_ESTABLISHED, }; enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, MPTCP_RM_ADDR_SIGNAL, }; struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; struct list_head anno_list; spinlock_t lock; /*protects the whole PM data */ u8 addr_signal; bool server_side; bool work_pending; bool accept_addr; bool accept_subflow; bool remote_deny_join_id0; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; u8 subflows; u8 status; struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx; }; struct mptcp_data_frag { struct list_head list; u64 data_seq; u16 data_len; u16 offset; u16 overhead; u16 already_sent; struct page *page; }; /* MPTCP connection sock */ struct mptcp_sock { /* inet_connection_sock must be the first member */ struct inet_connection_sock sk; u64 local_key; u64 remote_key; u64 write_seq; u64 snd_nxt; u64 ack_seq; u64 rcv_wnd_sent; u64 rcv_data_fin_seq; int wmem_reserved; struct sock *last_snd; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; * recovery related fields are under data_lock * protection */ u64 snd_una; u64 wnd_end; unsigned long timer_ival; u32 token; int rmem_released; unsigned long flags; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; bool rcv_data_fin; bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; spinlock_t join_list_lock; int keepalive_cnt; int keepalive_idle; int keepalive_intvl; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; struct sk_buff_head receive_queue; int tx_pending_data; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; struct socket *subflow; /* outgoing connect/listener/!mp_capable */ struct sock *first; struct mptcp_pm_data pm; struct { u32 space; /* bytes copied in last measurement window */ u32 copied; /* bytes copied in this measurement window */ u64 time; /* start time of measurement window */ u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; }; #define mptcp_lock_sock(___sk, cb) do { \ struct sock *__sk = (___sk); /* silence macro reuse warning */ \ might_sleep(); \ spin_lock_bh(&__sk->sk_lock.slock); \ if (__sk->sk_lock.owned) \ __lock_sock(__sk); \ cb; \ __sk->sk_lock.owned = 1; \ spin_unlock(&__sk->sk_lock.slock); \ mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \ local_bh_enable(); \ } while (0) #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) #define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock) #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); } static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) { return (struct mptcp_sock *)sk; } /* the msk socket don't use the backlog, also account for the bulk * free memory */ static inline int __mptcp_rmem(const struct sock *sk) { return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); } static inline int __mptcp_space(const struct sock *sk) { return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); } static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); return READ_ONCE(msk->first_pending); } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *cur; cur = msk->first_pending; return list_is_last(&cur->list, &msk->rtx_queue) ? NULL : list_next_entry(cur, list); } static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); if (!msk->first_pending) return NULL; if (WARN_ON_ONCE(list_empty(&msk->rtx_queue))) return NULL; return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); if (msk->snd_una == READ_ONCE(msk->snd_nxt)) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } struct csum_pseudo_header { __be64 data_seq; __be32 subflow_seq; __be16 data_len; __sum16 csum; }; struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, backup : 1, request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; u8 remote_id; u64 local_key; u64 idsn; u32 token; u32 ssn_offset; u64 thmac; u32 local_nonce; u32 remote_nonce; struct mptcp_sock *msk; struct hlist_nulls_node token_node; }; static inline struct mptcp_subflow_request_sock * mptcp_subflow_rsk(const struct request_sock *rsk) { return (struct mptcp_subflow_request_sock *)rsk; } enum mptcp_data_avail { MPTCP_SUBFLOW_NODATA, MPTCP_SUBFLOW_DATA_AVAIL, }; struct mptcp_delegated_action { struct napi_struct napi; struct list_head head; }; DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); #define MPTCP_DELEGATE_SEND 0 #define MPTCP_DELEGATE_ACK 1 /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ u64 local_key; u64 remote_key; u64 idsn; u64 map_seq; u32 snd_isn; u32 token; u32 rel_write_seq; u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; __wsum map_data_csum; u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ fully_established : 1, /* path validated */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, map_csum_reqd : 1, map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, send_mp_fail : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ valid_csum_seen : 1, /* at least one csum validated */ close_event_done : 1, /* has done the post-closed part */ __unused : 11; enum mptcp_data_avail data_avail; bool pm_listener; /* a listener managed by the kernel PM? */ u32 remote_nonce; u64 thmac; u32 local_nonce; u32 remote_token; u8 hmac[MPTCPOPT_HMAC_LEN]; u8 local_id; u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; u8 reset_reason:4; u8 stale_count; long delegated_status; struct list_head delegated_node; /* link into delegated_action, protected by local BH */ u32 setsockopt_seq; u32 stale_rcv_tstamp; struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ const struct inet_connection_sock_af_ops *icsk_af_ops; void (*tcp_data_ready)(struct sock *sk); void (*tcp_state_change)(struct sock *sk); void (*tcp_write_space)(struct sock *sk); void (*tcp_error_report)(struct sock *sk); struct rcu_head rcu; }; static inline struct mptcp_subflow_context * mptcp_subflow_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code */ return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; } static inline struct sock * mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) { return subflow->tcp_sock; } static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq - subflow->ssn_offset - subflow->map_subflow_seq; } static inline u64 mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) { return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); } static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow) { sock_hold(mptcp_subflow_tcp_sock(subflow)); spin_lock_bh(&msk->join_list_lock); list_add_tail(&subflow->node, &msk->join_list); spin_unlock_bh(&msk->join_list_lock); } void mptcp_subflow_process_delegated(struct sock *ssk); static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action) { struct mptcp_delegated_action *delegated; bool schedule; /* the caller held the subflow bh socket lock */ lockdep_assert_in_softirq(); /* The implied barrier pairs with mptcp_subflow_delegated_done(), and * ensures the below list check sees list updates done prior to status * bit changes */ if (!test_and_set_bit(action, &subflow->delegated_status)) { /* still on delegated list from previous scheduling */ if (!list_empty(&subflow->delegated_node)) return; delegated = this_cpu_ptr(&mptcp_delegated_actions); schedule = list_empty(&delegated->head); list_add_tail(&subflow->delegated_node, &delegated->head); sock_hold(mptcp_subflow_tcp_sock(subflow)); if (schedule) napi_schedule(&delegated->napi); } } static inline struct mptcp_subflow_context * mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) { struct mptcp_subflow_context *ret; if (list_empty(&delegated->head)) return NULL; ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); list_del_init(&ret->delegated_node); return ret; } static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow) { return !!READ_ONCE(subflow->delegated_status); } static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action) { /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before * touching the status bit */ smp_wmb(); clear_bit(action, &subflow->delegated_status); } int mptcp_is_enabled(const struct net *net); unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ if (subflow->request_join && !subflow->fully_established) return false; /* only send if our side has not closed yet */ return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { sk->sk_data_ready = ctx->tcp_data_ready; sk->sk_state_change = ctx->tcp_state_change; sk->sk_write_space = ctx->tcp_write_space; sk->sk_error_report = ctx->tcp_error_report; inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } static inline bool mptcp_has_another_subflow(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp; struct mptcp_sock *msk = mptcp_sk(subflow->conn); mptcp_for_each_subflow(msk, tmp) { if (tmp != subflow) return true; } return false; } void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && READ_ONCE(mptcp_sk(sk)->fully_established); } void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); int mptcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option); u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq); static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) { if (use_64bit) return cur_seq; return __mptcp_expand_seq(old_seq, cur_seq); } void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __mptcp_flush_join_list(struct mptcp_sock *msk); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { return READ_ONCE(msk->snd_data_fin_enable) && READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) { if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf)) return false; WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); return true; } static inline void mptcp_write_space(struct sock *sk) { if (sk_stream_is_writeable(sk)) { /* pairs with memory barrier in mptcp_poll */ smp_mb(); if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) sk_stream_write_space(sk); } } void mptcp_destroy_common(struct mptcp_sock *msk); #define MPTCP_TOKEN_MAX_RETRIES 4 void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) { mptcp_subflow_rsk(req)->token_node.pprev = NULL; } int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(struct request_sock *req); int mptcp_token_new_connect(struct sock *sk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); bool mptcp_token_exists(u32 token); struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token); struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num); void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *remote); void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool check_id); struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, u8 *flags, int *ifindex); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO)); } static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); } static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL); } static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; if (family == AF_INET6) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; /* account for 2 trailing 'nop' options */ if (port) len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) { if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX) return -EINVAL; return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; } bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, unsigned int opt_size, unsigned int remaining, struct mptcp_addr_info *addr, bool *echo, bool *drop_other_suboptions); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk); void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_all(struct mptcp_sock *msk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) { return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); } void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); static inline bool __mptcp_check_fallback(const struct mptcp_sock *msk) { return test_bit(MPTCP_FALLBACK_DONE, &msk->flags); } static inline bool mptcp_check_fallback(const struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); return __mptcp_check_fallback(msk); } static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) { pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } static inline void mptcp_do_fallback(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); __mptcp_do_fallback(msk); } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; return sk->sk_state == TCP_ESTABLISHED && !mptcp_sk(parent)->pm.server_side && !subflow->conn_finished; } #ifdef CONFIG_SYN_COOKIES void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb); bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb); void __init mptcp_join_cookie_init(void); #else static inline void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb) {} static inline bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, struct sk_buff *skb) { return false; } static inline void mptcp_join_cookie_init(void) {} #endif #endif /* __MPTCP_PROTOCOL_H */ |
76 76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar * Copyright (C) 2005-2006, Thomas Gleixner * * This file contains the IRQ-resend code * * If the interrupt is waiting to be processed, we try to re-run it. * We can't directly run it from here since the caller might be in an * interrupt-protected region. Not all irq controller chips can * retrigger interrupts at the hardware level, so in those cases * we allow the resending of IRQs via a tasklet. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> #include "internals.h" #ifdef CONFIG_HARDIRQS_SW_RESEND /* Bitmap to handle software resend of interrupts: */ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); /* * Run software resends of IRQ's */ static void resend_irqs(struct tasklet_struct *unused) { struct irq_desc *desc; int irq; while (!bitmap_empty(irqs_resend, nr_irqs)) { irq = find_first_bit(irqs_resend, nr_irqs); clear_bit(irq, irqs_resend); desc = irq_to_desc(irq); if (!desc) continue; local_irq_disable(); desc->handle_irq(desc); local_irq_enable(); } } /* Tasklet to handle resend: */ static DECLARE_TASKLET(resend_tasklet, resend_irqs); static int irq_sw_resend(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); /* * Validate whether this interrupt can be safely injected from * non interrupt context */ if (handle_enforce_irqctx(&desc->irq_data)) return -EINVAL; /* * If the interrupt is running in the thread context of the parent * irq we need to be careful, because we cannot trigger it * directly. */ if (irq_settings_is_nested_thread(desc)) { /* * If the parent_irq is valid, we retrigger the parent, * otherwise we do nothing. */ if (!desc->parent_irq) return -EINVAL; irq = desc->parent_irq; } /* Set it pending and activate the softirq: */ set_bit(irq, irqs_resend); tasklet_schedule(&resend_tasklet); return 0; } #else static int irq_sw_resend(struct irq_desc *desc) { return -EINVAL; } #endif static int try_retrigger(struct irq_desc *desc) { if (desc->irq_data.chip->irq_retrigger) return desc->irq_data.chip->irq_retrigger(&desc->irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY return irq_chip_retrigger_hierarchy(&desc->irq_data); #else return 0; #endif } /* * IRQ resend * * Is called with interrupts disabled and desc->lock held. */ int check_irq_resend(struct irq_desc *desc, bool inject) { int err = 0; /* * We do not resend level type interrupts. Level type interrupts * are resent by hardware when they are still active. Clear the * pending bit so suspend/resume does not get confused. */ if (irq_settings_is_level(desc)) { desc->istate &= ~IRQS_PENDING; return -EINVAL; } if (desc->istate & IRQS_REPLAY) return -EBUSY; if (!(desc->istate & IRQS_PENDING) && !inject) return 0; desc->istate &= ~IRQS_PENDING; if (!try_retrigger(desc)) err = irq_sw_resend(desc); /* If the retrigger was successful, mark it with the REPLAY bit */ if (!err) desc->istate |= IRQS_REPLAY; return err; } #ifdef CONFIG_GENERIC_IRQ_INJECTION /** * irq_inject_interrupt - Inject an interrupt for testing/error injection * @irq: The interrupt number * * This function must only be used for debug and testing purposes! * * Especially on x86 this can cause a premature completion of an interrupt * affinity change causing the interrupt line to become stale. Very * unlikely, but possible. * * The injection can fail for various reasons: * - Interrupt is not activated * - Interrupt is NMI type or currently replaying * - Interrupt is level type * - Interrupt does not support hardware retrigger and software resend is * either not enabled or not possible for the interrupt. */ int irq_inject_interrupt(unsigned int irq) { struct irq_desc *desc; unsigned long flags; int err; /* Try the state injection hardware interface first */ if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true)) return 0; /* That failed, try via the resend mechanism */ desc = irq_get_desc_buslock(irq, &flags, 0); if (!desc) return -EINVAL; /* * Only try to inject when the interrupt is: * - not NMI type * - activated */ if ((desc->istate & IRQS_NMI) || !irqd_is_activated(&desc->irq_data)) err = -EINVAL; else err = check_irq_resend(desc, true); irq_put_desc_busunlock(desc, flags); return err; } EXPORT_SYMBOL_GPL(irq_inject_interrupt); #endif |
35 67 25 2 114 113 113 93 113 33 113 57 3 7 46 112 83 88 70 109 106 99 67 67 8 115 116 100 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/namei.c * * (C) 1992 Eric Youngdale Modified for ISO 9660 filesystem. * * (C) 1991 Linus Torvalds - minix filesystem */ #include <linux/gfp.h> #include "isofs.h" /* * ok, we cannot use strncmp, as the name is not in our data space. * Thus we'll have to use isofs_match. No big problem. Match also makes * some sanity tests. */ static int isofs_cmp(struct dentry *dentry, const char *compare, int dlen) { struct qstr qstr; qstr.name = compare; qstr.len = dlen; if (likely(!dentry->d_op)) return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen); return dentry->d_op->d_compare(NULL, dentry->d_name.len, dentry->d_name.name, &qstr); } /* * isofs_find_entry() * * finds an entry in the specified directory with the wanted name. It * returns the inode number of the found entry, or 0 on error. */ static unsigned long isofs_find_entry(struct inode *dir, struct dentry *dentry, unsigned long *block_rv, unsigned long *offset_rv, char *tmpname, struct iso_directory_record *tmpde) { unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); unsigned char bufbits = ISOFS_BUFFER_BITS(dir); unsigned long block, f_pos, offset, block_saved, offset_saved; struct buffer_head *bh = NULL; struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); if (!ISOFS_I(dir)->i_first_extent) return 0; f_pos = 0; offset = 0; block = 0; while (f_pos < dir->i_size) { struct iso_directory_record *de; int de_len, match, i, dlen; char *dpnt; if (!bh) { bh = isofs_bread(dir, block); if (!bh) return 0; } de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; if (!de_len) { brelse(bh); bh = NULL; f_pos = (f_pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); block = f_pos >> bufbits; offset = 0; continue; } block_saved = bh->b_blocknr; offset_saved = offset; offset += de_len; f_pos += de_len; /* Make sure we have a full directory entry */ if (offset >= bufsize) { int slop = bufsize - offset + de_len; memcpy(tmpde, de, slop); offset &= bufsize - 1; block++; brelse(bh); bh = NULL; if (offset) { bh = isofs_bread(dir, block); if (!bh) return 0; memcpy((void *) tmpde + slop, bh->b_data, offset); } de = tmpde; } dlen = de->name_len[0]; dpnt = de->name; /* Basic sanity check, whether name doesn't exceed dir entry */ if (de_len < dlen + sizeof(struct iso_directory_record)) { printk(KERN_NOTICE "iso9660: Corrupted directory entry" " in block %lu of inode %lu\n", block, dir->i_ino); brelse(bh); return 0; } if (sbi->s_rock && ((i = get_rock_ridge_filename(de, tmpname, dir)))) { dlen = i; /* possibly -1 */ dpnt = tmpname; #ifdef CONFIG_JOLIET } else if (sbi->s_joliet_level) { dlen = get_joliet_filename(de, tmpname, dir); dpnt = tmpname; #endif } else if (sbi->s_mapping == 'a') { dlen = get_acorn_filename(de, tmpname, dir); dpnt = tmpname; } else if (sbi->s_mapping == 'n') { dlen = isofs_name_translate(de, tmpname, dir); dpnt = tmpname; } /* * Skip hidden or associated files unless hide or showassoc, * respectively, is set */ match = 0; if (dlen > 0 && (!sbi->s_hide || (!(de->flags[-sbi->s_high_sierra] & 1))) && (sbi->s_showassoc || (!(de->flags[-sbi->s_high_sierra] & 4)))) { if (dpnt && (dlen > 1 || dpnt[0] > 1)) match = (isofs_cmp(dentry, dpnt, dlen) == 0); } if (match) { isofs_normalize_block_and_offset(de, &block_saved, &offset_saved); *block_rv = block_saved; *offset_rv = offset_saved; brelse(bh); return 1; } } brelse(bh); return 0; } struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int found; unsigned long block; unsigned long offset; struct inode *inode; struct page *page; page = alloc_page(GFP_USER); if (!page) return ERR_PTR(-ENOMEM); found = isofs_find_entry(dir, dentry, &block, &offset, page_address(page), 1024 + page_address(page)); __free_page(page); inode = found ? isofs_iget(dir->i_sb, block, offset) : NULL; return d_splice_alias(inode, dentry); } |
1645 1 1 78 78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | // SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> #include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_FILTER, }; static unsigned int iptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { return ipt_do_table(skb, state, priv); } static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static bool forward __read_mostly = true; module_param(forward, bool, 0000); static int iptable_filter_table_init(struct net *net) { struct ipt_replace *repl; int err; repl = ipt_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ipt_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; err = ipt_register_table(net, &packet_filter, repl, filter_ops); kfree(repl); return err; } static int __net_init iptable_filter_net_init(struct net *net) { if (!forward) return iptable_filter_table_init(net); return 0; } static void __net_exit iptable_filter_net_pre_exit(struct net *net) { ipt_unregister_table_pre_exit(net, "filter"); } static void __net_exit iptable_filter_net_exit(struct net *net) { ipt_unregister_table_exit(net, "filter"); } static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, .pre_exit = iptable_filter_net_pre_exit, .exit = iptable_filter_net_exit, }; static int __init iptable_filter_init(void) { int ret = xt_register_template(&packet_filter, iptable_filter_table_init); if (ret < 0) return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); } ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) { xt_unregister_template(&packet_filter); kfree(filter_ops); return ret; } return 0; } static void __exit iptable_filter_fini(void) { unregister_pernet_subsys(&iptable_filter_net_ops); xt_unregister_template(&packet_filter); kfree(filter_ops); } module_init(iptable_filter_init); module_exit(iptable_filter_fini); |
3 3 3 3 1 2 2 8 5 4 2 2 3 1 3 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2008 BalaBit IT Ltd. * Author: Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <net/tcp.h> #include <net/udp.h> #include <net/icmp.h> #include <net/sock.h> #include <net/inet_sock.h> #include <net/netfilter/nf_socket.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, __be32 *raddr, __be32 *laddr, __be16 *rport, __be16 *lport) { unsigned int outside_hdrlen = ip_hdrlen(skb); struct iphdr *inside_iph, _inside_iph; struct icmphdr *icmph, _icmph; __be16 *ports, _ports[2]; icmph = skb_header_pointer(skb, outside_hdrlen, sizeof(_icmph), &_icmph); if (icmph == NULL) return 1; if (!icmp_is_err(icmph->type)) return 1; inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(struct icmphdr), sizeof(_inside_iph), &_inside_iph); if (inside_iph == NULL) return 1; if (inside_iph->protocol != IPPROTO_TCP && inside_iph->protocol != IPPROTO_UDP) return 1; ports = skb_header_pointer(skb, outside_hdrlen + sizeof(struct icmphdr) + (inside_iph->ihl << 2), sizeof(_ports), &_ports); if (ports == NULL) return 1; /* the inside IP packet is the one quoted from our side, thus * its saddr is the local address */ *protocol = inside_iph->protocol; *laddr = inside_iph->saddr; *lport = ports[0]; *raddr = inside_iph->daddr; *rport = ports[1]; return 0; } static struct sock * nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: return inet_lookup(net, &tcp_hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); } return NULL; } struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { __be32 daddr, saddr; __be16 dport, sport; const struct iphdr *iph = ip_hdr(skb); struct sk_buff *data_skb = NULL; u8 protocol; #if IS_ENABLED(CONFIG_NF_CONNTRACK) enum ip_conntrack_info ctinfo; struct nf_conn const *ct; #endif int doff = 0; if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp; hp = skb_header_pointer(skb, ip_hdrlen(skb), iph->protocol == IPPROTO_UDP ? sizeof(*hp) : sizeof(_hdr), &_hdr); if (hp == NULL) return NULL; protocol = iph->protocol; saddr = iph->saddr; sport = hp->source; daddr = iph->daddr; dport = hp->dest; data_skb = (struct sk_buff *)skb; doff = iph->protocol == IPPROTO_TCP ? ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : ip_hdrlen(skb) + sizeof(*hp); } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) return NULL; } else { return NULL; } #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Do the lookup with the original socket address in * case this is a reply packet of an established * SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); if (ct && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; dport = (iph->protocol == IPPROTO_TCP) ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; } #endif return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, daddr, sport, dport, indev); } EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure"); |
165 372 1504 2098 336 1933 1099 436 2096 2098 1499 895 1099 895 15 57 15 44 44 44 9 104 103 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_HIGHMEM_H #define _LINUX_HIGHMEM_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/bug.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/cacheflush.h> #include "highmem-internal.h" /** * kmap - Map a page for long term usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Can only be invoked from preemptible task context because on 32bit * systems with CONFIG_HIGHMEM enabled this function might sleep. * * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area * this returns the virtual address of the direct kernel mapping. * * The returned virtual address is globally visible and valid up to the * point where it is unmapped via kunmap(). The pointer can be handed to * other contexts. * * For highmem pages on 32bit systems this can be slow as the mapping space * is limited and protected by a global lock. In case that there is no * mapping slot available the function blocks until a slot is released via * kunmap(). */ static inline void *kmap(struct page *page); /** * kunmap - Unmap the virtual address mapped by kmap() * @addr: Virtual address to be unmapped * * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of * pages in the low memory area. */ static inline void kunmap(struct page *page); /** * kmap_to_page - Get the page for a kmap'ed address * @addr: The address to look up * * Returns: The page which is mapped to @addr. */ static inline struct page *kmap_to_page(void *addr); /** * kmap_flush_unused - Flush all unused kmap mappings in order to * remove stray mappings */ static inline void kmap_flush_unused(void); /** * kmap_local_page - Map a page for temporary usage * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Can be invoked from any context. * * Requires careful handling when nesting multiple mappings because the map * management is stack based. The unmap has to be in the reverse order of * the map operation: * * addr1 = kmap_local_page(page1); * addr2 = kmap_local_page(page2); * ... * kunmap_local(addr2); * kunmap_local(addr1); * * Unmapping addr1 before addr2 is invalid and causes malfunction. * * Contrary to kmap() mappings the mapping is only valid in the context of * the caller and cannot be handed to other contexts. * * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * * While it is significantly faster than kmap() for the higmem case it * comes with restrictions about the pointer validity. Only use when really * necessary. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across * preemption. No caller of kmap_local_page() can rely on this side effect. */ static inline void *kmap_local_page(struct page *page); /** * kmap_atomic - Atomically map a page for temporary usage - Deprecated! * @page: Pointer to the page to be mapped * * Returns: The virtual address of the mapping * * Effectively a wrapper around kmap_local_page() which disables pagefaults * and preemption. * * Do not use in new code. Use kmap_local_page() instead. */ static inline void *kmap_atomic(struct page *page); /** * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic() * @addr: Virtual address to be unmapped * * Counterpart to kmap_atomic(). * * Effectively a wrapper around kunmap_local() which additionally undoes * the side effects of kmap_atomic(), i.e. reenabling pagefaults and * preemption. */ /* Highmem related interfaces for management code */ static inline unsigned int nr_free_highpages(void); static inline unsigned long totalhigh_pages(void); #ifndef ARCH_HAS_FLUSH_ANON_PAGE static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { } #endif #ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE static inline void flush_kernel_vmap_range(void *vaddr, int size) { } static inline void invalidate_kernel_vmap_range(void *vaddr, int size) { } #endif /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page); clear_user_page(addr, vaddr, page); kunmap_atomic(addr); } #endif #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE /** * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move * @vma: The VMA the page is to be allocated for * @vaddr: The virtual address the page will be inserted into * * This function will allocate a page for a VMA that the caller knows will * be able to migrate in the future using move_pages() or reclaimed * * An architecture may override this function by defining * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE and providing their own * implementation. */ static inline struct page * alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, unsigned long vaddr) { struct page *page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (page) clear_user_highpage(page, vaddr); return page; } #endif static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page); clear_page(kaddr); kunmap_atomic(kaddr); } #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE static inline void tag_clear_highpage(struct page *page) { } #endif /* * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. */ #if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE) void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2); #else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { void *kaddr = kmap_atomic(page); unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); if (end1 > start1) memset(kaddr + start1, 0, end1 - start1); if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); kunmap_atomic(kaddr); for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } #endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */ static inline void zero_user_segment(struct page *page, unsigned start, unsigned end) { zero_user_segments(page, start, end, 0, 0); } static inline void zero_user(struct page *page, unsigned start, unsigned size) { zero_user_segments(page, start, start + size, 0, 0); } #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE static inline void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { char *vfrom, *vto; vfrom = kmap_atomic(from); vto = kmap_atomic(to); copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vto); kunmap_atomic(vfrom); } #endif #ifdef copy_mc_to_kernel static inline int copy_mc_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { unsigned long ret; char *vfrom, *vto; vfrom = kmap_local_page(from); vto = kmap_local_page(to); ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); kunmap_local(vto); kunmap_local(vfrom); return ret; } #else static inline int copy_mc_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { copy_user_highpage(to, from, vaddr, vma); return 0; } #endif #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; vfrom = kmap_atomic(from); vto = kmap_atomic(to); copy_page(vto, vfrom); kunmap_atomic(vto); kunmap_atomic(vfrom); } #endif static inline void memcpy_page(struct page *dst_page, size_t dst_off, struct page *src_page, size_t src_off, size_t len) { char *dst = kmap_local_page(dst_page); char *src = kmap_local_page(src_page); VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); memcpy(dst + dst_off, src + src_off, len); kunmap_local(src); kunmap_local(dst); } static inline void memmove_page(struct page *dst_page, size_t dst_off, struct page *src_page, size_t src_off, size_t len) { char *dst = kmap_local_page(dst_page); char *src = kmap_local_page(src_page); VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); memmove(dst + dst_off, src + src_off, len); kunmap_local(src); kunmap_local(dst); } static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { char *addr = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memset(addr + offset, val, len); kunmap_local(addr); } static inline void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) { char *from = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to, from + offset, len); kunmap_local(from); } static inline void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) { char *to = kmap_local_page(page); VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); flush_dcache_page(page); kunmap_local(to); } static inline void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_local_page(page); memset(addr + offset, 0, len); flush_dcache_page(page); kunmap_local(addr); } #endif /* _LINUX_HIGHMEM_H */ |
161 142 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_ENCODER_H__ #define __DRM_ENCODER_H__ #include <linux/list.h> #include <linux/ctype.h> #include <drm/drm_crtc.h> #include <drm/drm_mode.h> #include <drm/drm_mode_object.h> #include <drm/drm_util.h> struct drm_encoder; /** * struct drm_encoder_funcs - encoder controls * * Encoders sit between CRTCs and connectors. */ struct drm_encoder_funcs { /** * @reset: * * Reset encoder hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. */ void (*reset)(struct drm_encoder *encoder); /** * @destroy: * * Clean up encoder resources. This is only called at driver unload time * through drm_mode_config_cleanup() since an encoder cannot be * hotplugged in DRM. */ void (*destroy)(struct drm_encoder *encoder); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the encoder like debugfs interfaces. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_encoder *encoder); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the encoder from * @late_register. It is called from drm_dev_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. */ void (*early_unregister)(struct drm_encoder *encoder); }; /** * struct drm_encoder - central DRM encoder structure * @dev: parent DRM device * @head: list management * @base: base KMS object * @name: human readable name, can be overwritten by the driver * @funcs: control functions, can be NULL for simple managed encoders * @helper_private: mid-layer private data * * CRTCs drive pixels to encoders, which convert them into signals * appropriate for a given connector or set of connectors. */ struct drm_encoder { struct drm_device *dev; struct list_head head; struct drm_mode_object base; char *name; /** * @encoder_type: * * One of the DRM_MODE_ENCODER_<foo> types in drm_mode.h. The following * encoder types are defined thus far: * * - DRM_MODE_ENCODER_DAC for VGA and analog on DVI-I/DVI-A. * * - DRM_MODE_ENCODER_TMDS for DVI, HDMI and (embedded) DisplayPort. * * - DRM_MODE_ENCODER_LVDS for display panels, or in general any panel * with a proprietary parallel connector. * * - DRM_MODE_ENCODER_TVDAC for TV output (Composite, S-Video, * Component, SCART). * * - DRM_MODE_ENCODER_VIRTUAL for virtual machine displays * * - DRM_MODE_ENCODER_DSI for panels connected using the DSI serial bus. * * - DRM_MODE_ENCODER_DPI for panels connected using the DPI parallel * bus. * * - DRM_MODE_ENCODER_DPMST for special fake encoders used to allow * mutliple DP MST streams to share one physical encoder. */ int encoder_type; /** * @index: Position inside the mode_config.list, can be used as an array * index. It is invariant over the lifetime of the encoder. */ unsigned index; /** * @possible_crtcs: Bitmask of potential CRTC bindings, using * drm_crtc_index() as the index into the bitfield. The driver must set * the bits for all &drm_crtc objects this encoder can be connected to * before calling drm_dev_register(). * * You will get a WARN if you get this wrong in the driver. * * Note that since CRTC objects can't be hotplugged the assigned indices * are stable and hence known before registering all objects. */ uint32_t possible_crtcs; /** * @possible_clones: Bitmask of potential sibling encoders for cloning, * using drm_encoder_index() as the index into the bitfield. The driver * must set the bits for all &drm_encoder objects which can clone a * &drm_crtc together with this encoder before calling * drm_dev_register(). Drivers should set the bit representing the * encoder itself, too. Cloning bits should be set such that when two * encoders can be used in a cloned configuration, they both should have * each another bits set. * * As an exception to the above rule if the driver doesn't implement * any cloning it can leave @possible_clones set to 0. The core will * automagically fix this up by setting the bit for the encoder itself. * * You will get a WARN if you get this wrong in the driver. * * Note that since encoder objects can't be hotplugged the assigned indices * are stable and hence known before registering all objects. */ uint32_t possible_clones; /** * @crtc: Currently bound CRTC, only really meaningful for non-atomic * drivers. Atomic drivers should instead check * &drm_connector_state.crtc. */ struct drm_crtc *crtc; /** * @bridge_chain: Bridges attached to this encoder. Drivers shall not * access this field directly. */ struct list_head bridge_chain; const struct drm_encoder_funcs *funcs; const struct drm_encoder_helper_funcs *helper_private; }; #define obj_to_encoder(x) container_of(x, struct drm_encoder, base) __printf(5, 6) int drm_encoder_init(struct drm_device *dev, struct drm_encoder *encoder, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); __printf(6, 7) void *__drmm_encoder_alloc(struct drm_device *dev, size_t size, size_t offset, const struct drm_encoder_funcs *funcs, int encoder_type, const char *name, ...); /** * drmm_encoder_alloc - Allocate and initialize an encoder * @dev: drm device * @type: the type of the struct which contains struct &drm_encoder * @member: the name of the &drm_encoder within @type * @funcs: callbacks for this encoder (optional) * @encoder_type: user visible type of the encoder * @name: printf style format string for the encoder name, or NULL for default name * * Allocates and initializes an encoder. Encoder should be subclassed as part of * driver encoder objects. Cleanup is automatically handled through registering * drm_encoder_cleanup() with drmm_add_action(). * * The @drm_encoder_funcs.destroy hook must be NULL. * * Returns: * Pointer to new encoder, or ERR_PTR on failure. */ #define drmm_encoder_alloc(dev, type, member, funcs, encoder_type, name, ...) \ ((type *)__drmm_encoder_alloc(dev, sizeof(type), \ offsetof(type, member), funcs, \ encoder_type, name, ##__VA_ARGS__)) /** * drmm_plain_encoder_alloc - Allocate and initialize an encoder * @dev: drm device * @funcs: callbacks for this encoder (optional) * @encoder_type: user visible type of the encoder * @name: printf style format string for the encoder name, or NULL for default name * * This is a simplified version of drmm_encoder_alloc(), which only allocates * and returns a struct drm_encoder instance, with no subclassing. * * Returns: * Pointer to the new drm_encoder struct, or ERR_PTR on failure. */ #define drmm_plain_encoder_alloc(dev, funcs, encoder_type, name, ...) \ ((struct drm_encoder *) \ __drmm_encoder_alloc(dev, sizeof(struct drm_encoder), \ 0, funcs, encoder_type, name, ##__VA_ARGS__)) /** * drm_encoder_index - find the index of a registered encoder * @encoder: encoder to find index for * * Given a registered encoder, return the index of that encoder within a DRM * device's list of encoders. */ static inline unsigned int drm_encoder_index(const struct drm_encoder *encoder) { return encoder->index; } /** * drm_encoder_mask - find the mask of a registered encoder * @encoder: encoder to find mask for * * Given a registered encoder, return the mask bit of that encoder for an * encoder's possible_clones field. */ static inline u32 drm_encoder_mask(const struct drm_encoder *encoder) { return 1 << drm_encoder_index(encoder); } /** * drm_encoder_crtc_ok - can a given crtc drive a given encoder? * @encoder: encoder to test * @crtc: crtc to test * * Returns false if @encoder can't be driven by @crtc, true otherwise. */ static inline bool drm_encoder_crtc_ok(struct drm_encoder *encoder, struct drm_crtc *crtc) { return !!(encoder->possible_crtcs & drm_crtc_mask(crtc)); } /** * drm_encoder_find - find a &drm_encoder * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: encoder id * * Returns the encoder with @id, NULL if it doesn't exist. Simple wrapper around * drm_mode_object_find(). */ static inline struct drm_encoder *drm_encoder_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_ENCODER); return mo ? obj_to_encoder(mo) : NULL; } void drm_encoder_cleanup(struct drm_encoder *encoder); /** * drm_for_each_encoder_mask - iterate over encoders specified by bitmask * @encoder: the loop cursor * @dev: the DRM device * @encoder_mask: bitmask of encoder indices * * Iterate over all encoders specified by bitmask. */ #define drm_for_each_encoder_mask(encoder, dev, encoder_mask) \ list_for_each_entry((encoder), &(dev)->mode_config.encoder_list, head) \ for_each_if ((encoder_mask) & drm_encoder_mask(encoder)) /** * drm_for_each_encoder - iterate over all encoders * @encoder: the loop cursor * @dev: the DRM device * * Iterate over all encoders of @dev. */ #define drm_for_each_encoder(encoder, dev) \ list_for_each_entry(encoder, &(dev)->mode_config.encoder_list, head) #endif |
1 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 | #ifndef __LZ4DEFS_H__ #define __LZ4DEFS_H__ /* * lz4defs.h -- common and architecture specific defines for the kernel usage * LZ4 - Fast LZ compression algorithm * Copyright (C) 2011-2016, Yann Collet. * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * You can contact the author at : * - LZ4 homepage : http://www.lz4.org * - LZ4 source repository : https://github.com/lz4/lz4 * * Changed for kernel usage by: * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> */ #include <asm/unaligned.h> #include <linux/string.h> /* memset, memcpy */ #define FORCE_INLINE __always_inline /*-************************************ * Basic Types **************************************/ #include <linux/types.h> typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; typedef uint64_t U64; typedef uintptr_t uptrval; /*-************************************ * Architecture specifics **************************************/ #if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 #endif #if defined(__LITTLE_ENDIAN) #define LZ4_LITTLE_ENDIAN 1 #else #define LZ4_LITTLE_ENDIAN 0 #endif /*-************************************ * Constants **************************************/ #define MINMATCH 4 #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 #define MFLIMIT (WILDCOPYLENGTH + MINMATCH) /* * ensure it's possible to write 2 x wildcopyLength * without overflowing output buffer */ #define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH) /* Increase this value ==> compression run slower on incompressible data */ #define LZ4_SKIPTRIGGER 6 #define HASH_UNIT sizeof(size_t) #define KB (1 << 10) #define MB (1 << 20) #define GB (1U << 30) #define MAXD_LOG 16 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) #define STEPSIZE sizeof(size_t) #define ML_BITS 4 #define ML_MASK ((1U << ML_BITS) - 1) #define RUN_BITS (8 - ML_BITS) #define RUN_MASK ((1U << RUN_BITS) - 1) /*-************************************ * Reading and writing into memory **************************************/ static FORCE_INLINE U16 LZ4_read16(const void *ptr) { return get_unaligned((const U16 *)ptr); } static FORCE_INLINE U32 LZ4_read32(const void *ptr) { return get_unaligned((const U32 *)ptr); } static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr) { return get_unaligned((const size_t *)ptr); } static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); } static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); } static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); } static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) { return put_unaligned_le16(value, memPtr); } /* * LZ4 relies on memcpy with a constant size being inlined. In freestanding * environments, the compiler can't assume the implementation of memcpy() is * standard compliant, so apply its specialized memcpy() inlining logic. When * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() * as-if it were standard compliant, so it can inline it in freestanding * environments. This is needed when decompressing the Linux Kernel, for example. */ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) #define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { #if LZ4_ARCH64 U64 a = get_unaligned((const U64 *)src); put_unaligned(a, (U64 *)dst); #else U32 a = get_unaligned((const U32 *)src); U32 b = get_unaligned((const U32 *)src + 1); put_unaligned(a, (U32 *)dst); put_unaligned(b, (U32 *)dst + 1); #endif } /* * customized variant of memcpy, * which can overwrite up to 7 bytes beyond dstEnd */ static FORCE_INLINE void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd) { BYTE *d = (BYTE *)dstPtr; const BYTE *s = (const BYTE *)srcPtr; BYTE *const e = (BYTE *)dstEnd; do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e); } static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val) { #if LZ4_LITTLE_ENDIAN return __ffs(val) >> 3; #else return (BITS_PER_LONG - 1 - __fls(val)) >> 3; #endif } static FORCE_INLINE unsigned int LZ4_count( const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit) { const BYTE *const pStart = pIn; while (likely(pIn < pInLimit - (STEPSIZE - 1))) { size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn += STEPSIZE; pMatch += STEPSIZE; continue; } pIn += LZ4_NbCommonBytes(diff); return (unsigned int)(pIn - pStart); } #if LZ4_ARCH64 if ((pIn < (pInLimit - 3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn += 4; pMatch += 4; } #endif if ((pIn < (pInLimit - 1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn += 2; pMatch += 2; } if ((pIn < pInLimit) && (*pMatch == *pIn)) pIn++; return (unsigned int)(pIn - pStart); } typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; typedef enum { byPtr, byU32, byU16 } tableType_t; typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; #define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c)) #endif |
1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Alauda-based card readers * * Current development and maintenance by: * (c) 2005 Daniel Drake <dsd@gentoo.org> * * The 'Alauda' is a chip manufacturered by RATOC for OEM use. * * Alauda implements a vendor-specific command set to access two media reader * ports (XD, SmartMedia). This driver converts SCSI commands to the commands * which are accepted by these devices. * * The driver was developed through reverse-engineering, with the help of the * sddr09 driver which has many similarities, and with some help from the * (very old) vendor-supplied GPL sma03 driver. * * For protocol info, see http://alauda.sourceforge.net */ #include <linux/module.h> #include <linux/slab.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-alauda" MODULE_DESCRIPTION("Driver for Alauda-based card readers"); MODULE_AUTHOR("Daniel Drake <dsd@gentoo.org>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(USB_STORAGE); /* * Status bytes */ #define ALAUDA_STATUS_ERROR 0x01 #define ALAUDA_STATUS_READY 0x40 /* * Control opcodes (for request field) */ #define ALAUDA_GET_XD_MEDIA_STATUS 0x08 #define ALAUDA_GET_SM_MEDIA_STATUS 0x98 #define ALAUDA_ACK_XD_MEDIA_CHANGE 0x0a #define ALAUDA_ACK_SM_MEDIA_CHANGE 0x9a #define ALAUDA_GET_XD_MEDIA_SIG 0x86 #define ALAUDA_GET_SM_MEDIA_SIG 0x96 /* * Bulk command identity (byte 0) */ #define ALAUDA_BULK_CMD 0x40 /* * Bulk opcodes (byte 1) */ #define ALAUDA_BULK_GET_REDU_DATA 0x85 #define ALAUDA_BULK_READ_BLOCK 0x94 #define ALAUDA_BULK_ERASE_BLOCK 0xa3 #define ALAUDA_BULK_WRITE_BLOCK 0xb4 #define ALAUDA_BULK_GET_STATUS2 0xb7 #define ALAUDA_BULK_RESET_MEDIA 0xe0 /* * Port to operate on (byte 8) */ #define ALAUDA_PORT_XD 0x00 #define ALAUDA_PORT_SM 0x01 /* * LBA and PBA are unsigned ints. Special values. */ #define UNDEF 0xffff #define SPARE 0xfffe #define UNUSABLE 0xfffd struct alauda_media_info { unsigned long capacity; /* total media size in bytes */ unsigned int pagesize; /* page size in bytes */ unsigned int blocksize; /* number of pages per block */ unsigned int uzonesize; /* number of usable blocks per zone */ unsigned int zonesize; /* number of blocks per zone */ unsigned int blockmask; /* mask to get page from address */ unsigned char pageshift; unsigned char blockshift; unsigned char zoneshift; u16 **lba_to_pba; /* logical to physical block map */ u16 **pba_to_lba; /* physical to logical block map */ }; struct alauda_info { struct alauda_media_info port[2]; int wr_ep; /* endpoint to write data out of */ unsigned char sense_key; unsigned long sense_asc; /* additional sense code */ unsigned long sense_ascq; /* additional sense code qualifier */ bool media_initialized; }; #define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) #define LSB_of(s) ((s)&0xFF) #define MSB_of(s) ((s)>>8) #define MEDIA_PORT(us) us->srb->device->lun #define MEDIA_INFO(us) ((struct alauda_info *)us->extra)->port[MEDIA_PORT(us)] #define PBA_LO(pba) ((pba & 0xF) << 5) #define PBA_HI(pba) (pba >> 3) #define PBA_ZONE(pba) (pba >> 11) static int init_alauda(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static struct usb_device_id alauda_usb_ids[] = { # include "unusual_alauda.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, alauda_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static struct us_unusual_dev alauda_unusual_dev_list[] = { # include "unusual_alauda.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV /* * Media handling */ struct alauda_card_info { unsigned char id; /* id byte */ unsigned char chipshift; /* 1<<cs bytes total capacity */ unsigned char pageshift; /* 1<<ps bytes in a page */ unsigned char blockshift; /* 1<<bs pages per block */ unsigned char zoneshift; /* 1<<zs blocks per zone */ }; static struct alauda_card_info alauda_card_ids[] = { /* NAND flash */ { 0x6e, 20, 8, 4, 8}, /* 1 MB */ { 0xe8, 20, 8, 4, 8}, /* 1 MB */ { 0xec, 20, 8, 4, 8}, /* 1 MB */ { 0x64, 21, 8, 4, 9}, /* 2 MB */ { 0xea, 21, 8, 4, 9}, /* 2 MB */ { 0x6b, 22, 9, 4, 9}, /* 4 MB */ { 0xe3, 22, 9, 4, 9}, /* 4 MB */ { 0xe5, 22, 9, 4, 9}, /* 4 MB */ { 0xe6, 23, 9, 4, 10}, /* 8 MB */ { 0x73, 24, 9, 5, 10}, /* 16 MB */ { 0x75, 25, 9, 5, 10}, /* 32 MB */ { 0x76, 26, 9, 5, 10}, /* 64 MB */ { 0x79, 27, 9, 5, 10}, /* 128 MB */ { 0x71, 28, 9, 5, 10}, /* 256 MB */ /* MASK ROM */ { 0x5d, 21, 9, 4, 8}, /* 2 MB */ { 0xd5, 22, 9, 4, 9}, /* 4 MB */ { 0xd6, 23, 9, 4, 10}, /* 8 MB */ { 0x57, 24, 9, 4, 11}, /* 16 MB */ { 0x58, 25, 9, 4, 12}, /* 32 MB */ { 0,} }; static struct alauda_card_info *alauda_card_find_id(unsigned char id) { int i; for (i = 0; alauda_card_ids[i].id != 0; i++) if (alauda_card_ids[i].id == id) return &(alauda_card_ids[i]); return NULL; } /* * ECC computation. */ static unsigned char parity[256]; static unsigned char ecc2[256]; static void nand_init_ecc(void) { int i, j, a; parity[0] = 0; for (i = 1; i < 256; i++) parity[i] = (parity[i&(i-1)] ^ 1); for (i = 0; i < 256; i++) { a = 0; for (j = 0; j < 8; j++) { if (i & (1<<j)) { if ((j & 1) == 0) a ^= 0x04; if ((j & 2) == 0) a ^= 0x10; if ((j & 4) == 0) a ^= 0x40; } } ecc2[i] = ~(a ^ (a<<1) ^ (parity[i] ? 0xa8 : 0)); } } /* compute 3-byte ecc on 256 bytes */ static void nand_compute_ecc(unsigned char *data, unsigned char *ecc) { int i, j, a; unsigned char par = 0, bit, bits[8] = {0}; /* collect 16 checksum bits */ for (i = 0; i < 256; i++) { par ^= data[i]; bit = parity[data[i]]; for (j = 0; j < 8; j++) if ((i & (1<<j)) == 0) bits[j] ^= bit; } /* put 4+4+4 = 12 bits in the ecc */ a = (bits[3] << 6) + (bits[2] << 4) + (bits[1] << 2) + bits[0]; ecc[0] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0)); a = (bits[7] << 6) + (bits[6] << 4) + (bits[5] << 2) + bits[4]; ecc[1] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0)); ecc[2] = ecc2[par]; } static int nand_compare_ecc(unsigned char *data, unsigned char *ecc) { return (data[0] == ecc[0] && data[1] == ecc[1] && data[2] == ecc[2]); } static void nand_store_ecc(unsigned char *data, unsigned char *ecc) { memcpy(data, ecc, 3); } /* * Alauda driver */ /* * Forget our PBA <---> LBA mappings for a particular port */ static void alauda_free_maps (struct alauda_media_info *media_info) { unsigned int shift = media_info->zoneshift + media_info->blockshift + media_info->pageshift; unsigned int num_zones = media_info->capacity >> shift; unsigned int i; if (media_info->lba_to_pba != NULL) for (i = 0; i < num_zones; i++) { kfree(media_info->lba_to_pba[i]); media_info->lba_to_pba[i] = NULL; } if (media_info->pba_to_lba != NULL) for (i = 0; i < num_zones; i++) { kfree(media_info->pba_to_lba[i]); media_info->pba_to_lba[i] = NULL; } } /* * Returns 2 bytes of status data * The first byte describes media status, and second byte describes door status */ static int alauda_get_media_status(struct us_data *us, unsigned char *data) { int rc; unsigned char command; if (MEDIA_PORT(us) == ALAUDA_PORT_XD) command = ALAUDA_GET_XD_MEDIA_STATUS; else command = ALAUDA_GET_SM_MEDIA_STATUS; rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, command, 0xc0, 0, 1, data, 2); if (rc == USB_STOR_XFER_GOOD) usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]); return rc; } /* * Clears the "media was changed" bit so that we know when it changes again * in the future. */ static int alauda_ack_media(struct us_data *us) { unsigned char command; if (MEDIA_PORT(us) == ALAUDA_PORT_XD) command = ALAUDA_ACK_XD_MEDIA_CHANGE; else command = ALAUDA_ACK_SM_MEDIA_CHANGE; return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, command, 0x40, 0, 1, NULL, 0); } /* * Retrieves a 4-byte media signature, which indicates manufacturer, capacity, * and some other details. */ static int alauda_get_media_signature(struct us_data *us, unsigned char *data) { unsigned char command; if (MEDIA_PORT(us) == ALAUDA_PORT_XD) command = ALAUDA_GET_XD_MEDIA_SIG; else command = ALAUDA_GET_SM_MEDIA_SIG; return usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, command, 0xc0, 0, 0, data, 4); } /* * Resets the media status (but not the whole device?) */ static int alauda_reset_media(struct us_data *us) { unsigned char *command = us->iobuf; memset(command, 0, 9); command[0] = ALAUDA_BULK_CMD; command[1] = ALAUDA_BULK_RESET_MEDIA; command[8] = MEDIA_PORT(us); return usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); } /* * Examines the media and deduces capacity, etc. */ static int alauda_init_media(struct us_data *us) { unsigned char *data = us->iobuf; int ready = 0; struct alauda_card_info *media_info; unsigned int num_zones; while (ready == 0) { msleep(20); if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (data[0] & 0x10) ready = 1; } usb_stor_dbg(us, "We are ready for action!\n"); if (alauda_ack_media(us) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; msleep(10); if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (data[0] != 0x14) { usb_stor_dbg(us, "Media not ready after ack\n"); return USB_STOR_TRANSPORT_ERROR; } if (alauda_get_media_signature(us, data) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "Media signature: %4ph\n", data); media_info = alauda_card_find_id(data[1]); if (media_info == NULL) { pr_warn("alauda_init_media: Unrecognised media signature: %4ph\n", data); return USB_STOR_TRANSPORT_ERROR; } MEDIA_INFO(us).capacity = 1 << media_info->chipshift; usb_stor_dbg(us, "Found media with capacity: %ldMB\n", MEDIA_INFO(us).capacity >> 20); MEDIA_INFO(us).pageshift = media_info->pageshift; MEDIA_INFO(us).blockshift = media_info->blockshift; MEDIA_INFO(us).zoneshift = media_info->zoneshift; MEDIA_INFO(us).pagesize = 1 << media_info->pageshift; MEDIA_INFO(us).blocksize = 1 << media_info->blockshift; MEDIA_INFO(us).zonesize = 1 << media_info->zoneshift; MEDIA_INFO(us).uzonesize = ((1 << media_info->zoneshift) / 128) * 125; MEDIA_INFO(us).blockmask = MEDIA_INFO(us).blocksize - 1; num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); if (MEDIA_INFO(us).pba_to_lba == NULL || MEDIA_INFO(us).lba_to_pba == NULL) return USB_STOR_TRANSPORT_ERROR; if (alauda_reset_media(us) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } /* * Examines the media status and does the right thing when the media has gone, * appeared, or changed. */ static int alauda_check_media(struct us_data *us) { struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char *status = us->iobuf; int rc; rc = alauda_get_media_status(us, status); if (rc != USB_STOR_XFER_GOOD) { status[0] = 0xF0; /* Pretend there's no media */ status[1] = 0; } /* Check for no media or door open */ if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10) || ((status[1] & 0x01) == 0)) { usb_stor_dbg(us, "No media, or door open\n"); alauda_free_maps(&MEDIA_INFO(us)); info->sense_key = 0x02; info->sense_asc = 0x3A; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } /* Check for media change */ if (status[0] & 0x08 || !info->media_initialized) { usb_stor_dbg(us, "Media change detected\n"); alauda_free_maps(&MEDIA_INFO(us)); rc = alauda_init_media(us); if (rc == USB_STOR_TRANSPORT_GOOD) info->media_initialized = true; info->sense_key = UNIT_ATTENTION; info->sense_asc = 0x28; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } return USB_STOR_TRANSPORT_GOOD; } /* * Checks the status from the 2nd status register * Returns 3 bytes of status data, only the first is known */ static int alauda_check_status2(struct us_data *us) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_GET_STATUS2, 0, 0, 0, 0, 3, 0, MEDIA_PORT(us) }; unsigned char data[3]; rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, 3, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; usb_stor_dbg(us, "%3ph\n", data); if (data[0] & ALAUDA_STATUS_ERROR) return USB_STOR_XFER_ERROR; return USB_STOR_XFER_GOOD; } /* * Gets the redundancy data for the first page of a PBA * Returns 16 bytes. */ static int alauda_get_redu_data(struct us_data *us, u16 pba, unsigned char *data) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_GET_REDU_DATA, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 0, 0, MEDIA_PORT(us) }; rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, 16, NULL); } /* * Finds the first unused PBA in a zone * Returns the absolute PBA of an unused PBA, or 0 if none found. */ static u16 alauda_find_unused_pba(struct alauda_media_info *info, unsigned int zone) { u16 *pba_to_lba = info->pba_to_lba[zone]; unsigned int i; for (i = 0; i < info->zonesize; i++) if (pba_to_lba[i] == UNDEF) return (zone << info->zoneshift) + i; return 0; } /* * Reads the redundancy data for all PBA's in a zone * Produces lba <--> pba mappings */ static int alauda_read_map(struct us_data *us, unsigned int zone) { unsigned char *data = us->iobuf; int result; int i, j; unsigned int zonesize = MEDIA_INFO(us).zonesize; unsigned int uzonesize = MEDIA_INFO(us).uzonesize; unsigned int lba_offset, lba_real, blocknum; unsigned int zone_base_lba = zone * uzonesize; unsigned int zone_base_pba = zone * zonesize; u16 *lba_to_pba = kcalloc(zonesize, sizeof(u16), GFP_NOIO); u16 *pba_to_lba = kcalloc(zonesize, sizeof(u16), GFP_NOIO); if (lba_to_pba == NULL || pba_to_lba == NULL) { result = USB_STOR_TRANSPORT_ERROR; goto error; } usb_stor_dbg(us, "Mapping blocks for zone %d\n", zone); /* 1024 PBA's per zone */ for (i = 0; i < zonesize; i++) lba_to_pba[i] = pba_to_lba[i] = UNDEF; for (i = 0; i < zonesize; i++) { blocknum = zone_base_pba + i; result = alauda_get_redu_data(us, blocknum, data); if (result != USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; goto error; } /* special PBAs have control field 0^16 */ for (j = 0; j < 16; j++) if (data[j] != 0) goto nonz; pba_to_lba[i] = UNUSABLE; usb_stor_dbg(us, "PBA %d has no logical mapping\n", blocknum); continue; nonz: /* unwritten PBAs have control field FF^16 */ for (j = 0; j < 16; j++) if (data[j] != 0xff) goto nonff; continue; nonff: /* normal PBAs start with six FFs */ if (j < 6) { usb_stor_dbg(us, "PBA %d has no logical mapping: reserved area = %02X%02X%02X%02X data status %02X block status %02X\n", blocknum, data[0], data[1], data[2], data[3], data[4], data[5]); pba_to_lba[i] = UNUSABLE; continue; } if ((data[6] >> 4) != 0x01) { usb_stor_dbg(us, "PBA %d has invalid address field %02X%02X/%02X%02X\n", blocknum, data[6], data[7], data[11], data[12]); pba_to_lba[i] = UNUSABLE; continue; } /* check even parity */ if (parity[data[6] ^ data[7]]) { printk(KERN_WARNING "alauda_read_map: Bad parity in LBA for block %d" " (%02X %02X)\n", i, data[6], data[7]); pba_to_lba[i] = UNUSABLE; continue; } lba_offset = short_pack(data[7], data[6]); lba_offset = (lba_offset & 0x07FF) >> 1; lba_real = lba_offset + zone_base_lba; /* * Every 1024 physical blocks ("zone"), the LBA numbers * go back to zero, but are within a higher block of LBA's. * Also, there is a maximum of 1000 LBA's per zone. * In other words, in PBA 1024-2047 you will find LBA 0-999 * which are really LBA 1000-1999. This allows for 24 bad * or special physical blocks per zone. */ if (lba_offset >= uzonesize) { printk(KERN_WARNING "alauda_read_map: Bad low LBA %d for block %d\n", lba_real, blocknum); continue; } if (lba_to_pba[lba_offset] != UNDEF) { printk(KERN_WARNING "alauda_read_map: " "LBA %d seen for PBA %d and %d\n", lba_real, lba_to_pba[lba_offset], blocknum); continue; } pba_to_lba[i] = lba_real; lba_to_pba[lba_offset] = blocknum; continue; } MEDIA_INFO(us).lba_to_pba[zone] = lba_to_pba; MEDIA_INFO(us).pba_to_lba[zone] = pba_to_lba; result = 0; goto out; error: kfree(lba_to_pba); kfree(pba_to_lba); out: return result; } /* * Checks to see whether we have already mapped a certain zone * If we haven't, the map is generated */ static void alauda_ensure_map_for_zone(struct us_data *us, unsigned int zone) { if (MEDIA_INFO(us).lba_to_pba[zone] == NULL || MEDIA_INFO(us).pba_to_lba[zone] == NULL) alauda_read_map(us, zone); } /* * Erases an entire block */ static int alauda_erase_block(struct us_data *us, u16 pba) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_ERASE_BLOCK, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 0x02, 0, MEDIA_PORT(us) }; unsigned char buf[2]; usb_stor_dbg(us, "Erasing PBA %d\n", pba); rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, buf, 2, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; usb_stor_dbg(us, "Erase result: %02X %02X\n", buf[0], buf[1]); return rc; } /* * Reads data from a certain offset page inside a PBA, including interleaved * redundancy data. Returns (pagesize+64)*pages bytes in data. */ static int alauda_read_block_raw(struct us_data *us, u16 pba, unsigned int page, unsigned int pages, unsigned char *data) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_READ_BLOCK, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba) + page, pages, 0, MEDIA_PORT(us) }; usb_stor_dbg(us, "pba %d page %d count %d\n", pba, page, pages); rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, (MEDIA_INFO(us).pagesize + 64) * pages, NULL); } /* * Reads data from a certain offset page inside a PBA, excluding redundancy * data. Returns pagesize*pages bytes in data. Note that data must be big enough * to hold (pagesize+64)*pages bytes of data, but you can ignore those 'extra' * trailing bytes outside this function. */ static int alauda_read_block(struct us_data *us, u16 pba, unsigned int page, unsigned int pages, unsigned char *data) { int i, rc; unsigned int pagesize = MEDIA_INFO(us).pagesize; rc = alauda_read_block_raw(us, pba, page, pages, data); if (rc != USB_STOR_XFER_GOOD) return rc; /* Cut out the redundancy data */ for (i = 0; i < pages; i++) { int dest_offset = i * pagesize; int src_offset = i * (pagesize + 64); memmove(data + dest_offset, data + src_offset, pagesize); } return rc; } /* * Writes an entire block of data and checks status after write. * Redundancy data must be already included in data. Data should be * (pagesize+64)*blocksize bytes in length. */ static int alauda_write_block(struct us_data *us, u16 pba, unsigned char *data) { int rc; struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_WRITE_BLOCK, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 32, 0, MEDIA_PORT(us) }; usb_stor_dbg(us, "pba %d\n", pba); rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; rc = usb_stor_bulk_transfer_buf(us, info->wr_ep, data, (MEDIA_INFO(us).pagesize + 64) * MEDIA_INFO(us).blocksize, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; return alauda_check_status2(us); } /* * Write some data to a specific LBA. */ static int alauda_write_lba(struct us_data *us, u16 lba, unsigned int page, unsigned int pages, unsigned char *ptr, unsigned char *blockbuffer) { u16 pba, lbap, new_pba; unsigned char *bptr, *cptr, *xptr; unsigned char ecc[3]; int i, result; unsigned int uzonesize = MEDIA_INFO(us).uzonesize; unsigned int zonesize = MEDIA_INFO(us).zonesize; unsigned int pagesize = MEDIA_INFO(us).pagesize; unsigned int blocksize = MEDIA_INFO(us).blocksize; unsigned int lba_offset = lba % uzonesize; unsigned int new_pba_offset; unsigned int zone = lba / uzonesize; alauda_ensure_map_for_zone(us, zone); pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset]; if (pba == 1) { /* * Maybe it is impossible to write to PBA 1. * Fake success, but don't do anything. */ printk(KERN_WARNING "alauda_write_lba: avoid writing to pba 1\n"); return USB_STOR_TRANSPORT_GOOD; } new_pba = alauda_find_unused_pba(&MEDIA_INFO(us), zone); if (!new_pba) { printk(KERN_WARNING "alauda_write_lba: Out of unused blocks\n"); return USB_STOR_TRANSPORT_ERROR; } /* read old contents */ if (pba != UNDEF) { result = alauda_read_block_raw(us, pba, 0, blocksize, blockbuffer); if (result != USB_STOR_XFER_GOOD) return result; } else { memset(blockbuffer, 0, blocksize * (pagesize + 64)); } lbap = (lba_offset << 1) | 0x1000; if (parity[MSB_of(lbap) ^ LSB_of(lbap)]) lbap ^= 1; /* check old contents and fill lba */ for (i = 0; i < blocksize; i++) { bptr = blockbuffer + (i * (pagesize + 64)); cptr = bptr + pagesize; nand_compute_ecc(bptr, ecc); if (!nand_compare_ecc(cptr+13, ecc)) { usb_stor_dbg(us, "Warning: bad ecc in page %d- of pba %d\n", i, pba); nand_store_ecc(cptr+13, ecc); } nand_compute_ecc(bptr + (pagesize / 2), ecc); if (!nand_compare_ecc(cptr+8, ecc)) { usb_stor_dbg(us, "Warning: bad ecc in page %d+ of pba %d\n", i, pba); nand_store_ecc(cptr+8, ecc); } cptr[6] = cptr[11] = MSB_of(lbap); cptr[7] = cptr[12] = LSB_of(lbap); } /* copy in new stuff and compute ECC */ xptr = ptr; for (i = page; i < page+pages; i++) { bptr = blockbuffer + (i * (pagesize + 64)); cptr = bptr + pagesize; memcpy(bptr, xptr, pagesize); xptr += pagesize; nand_compute_ecc(bptr, ecc); nand_store_ecc(cptr+13, ecc); nand_compute_ecc(bptr + (pagesize / 2), ecc); nand_store_ecc(cptr+8, ecc); } result = alauda_write_block(us, new_pba, blockbuffer); if (result != USB_STOR_XFER_GOOD) return result; new_pba_offset = new_pba - (zone * zonesize); MEDIA_INFO(us).pba_to_lba[zone][new_pba_offset] = lba; MEDIA_INFO(us).lba_to_pba[zone][lba_offset] = new_pba; usb_stor_dbg(us, "Remapped LBA %d to PBA %d\n", lba, new_pba); if (pba != UNDEF) { unsigned int pba_offset = pba - (zone * zonesize); result = alauda_erase_block(us, pba); if (result != USB_STOR_XFER_GOOD) return result; MEDIA_INFO(us).pba_to_lba[zone][pba_offset] = UNDEF; } return USB_STOR_TRANSPORT_GOOD; } /* * Read data from a specific sector address */ static int alauda_read_data(struct us_data *us, unsigned long address, unsigned int sectors) { unsigned char *buffer; u16 lba, max_lba; unsigned int page, len, offset; unsigned int blockshift = MEDIA_INFO(us).blockshift; unsigned int pageshift = MEDIA_INFO(us).pageshift; unsigned int blocksize = MEDIA_INFO(us).blocksize; unsigned int pagesize = MEDIA_INFO(us).pagesize; unsigned int uzonesize = MEDIA_INFO(us).uzonesize; struct scatterlist *sg; int result; /* * Since we only read in one block at a time, we have to create * a bounce buffer and move the data a piece at a time between the * bounce buffer and the actual transfer buffer. * We make this buffer big enough to hold temporary redundancy data, * which we use when reading the data blocks. */ len = min(sectors, blocksize) * (pagesize + 64); buffer = kmalloc(len, GFP_NOIO); if (!buffer) return USB_STOR_TRANSPORT_ERROR; /* Figure out the initial LBA and page */ lba = address >> blockshift; page = (address & MEDIA_INFO(us).blockmask); max_lba = MEDIA_INFO(us).capacity >> (blockshift + pageshift); result = USB_STOR_TRANSPORT_GOOD; offset = 0; sg = NULL; while (sectors > 0) { unsigned int zone = lba / uzonesize; /* integer division */ unsigned int lba_offset = lba - (zone * uzonesize); unsigned int pages; u16 pba; alauda_ensure_map_for_zone(us, zone); /* Not overflowing capacity? */ if (lba >= max_lba) { usb_stor_dbg(us, "Error: Requested lba %u exceeds maximum %u\n", lba, max_lba); result = USB_STOR_TRANSPORT_ERROR; break; } /* Find number of pages we can read in this block */ pages = min(sectors, blocksize - page); len = pages << pageshift; /* Find where this lba lives on disk */ pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset]; if (pba == UNDEF) { /* this lba was never written */ usb_stor_dbg(us, "Read %d zero pages (LBA %d) page %d\n", pages, lba, page); /* * This is not really an error. It just means * that the block has never been written. * Instead of returning USB_STOR_TRANSPORT_ERROR * it is better to return all zero data. */ memset(buffer, 0, len); } else { usb_stor_dbg(us, "Read %d pages, from PBA %d (LBA %d) page %d\n", pages, pba, lba, page); result = alauda_read_block(us, pba, page, pages, buffer); if (result != USB_STOR_TRANSPORT_GOOD) break; } /* Store the data in the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &offset, TO_XFER_BUF); page = 0; lba++; sectors -= pages; } kfree(buffer); return result; } /* * Write data to a specific sector address */ static int alauda_write_data(struct us_data *us, unsigned long address, unsigned int sectors) { unsigned char *buffer, *blockbuffer; unsigned int page, len, offset; unsigned int blockshift = MEDIA_INFO(us).blockshift; unsigned int pageshift = MEDIA_INFO(us).pageshift; unsigned int blocksize = MEDIA_INFO(us).blocksize; unsigned int pagesize = MEDIA_INFO(us).pagesize; struct scatterlist *sg; u16 lba, max_lba; int result; /* * Since we don't write the user data directly to the device, * we have to create a bounce buffer and move the data a piece * at a time between the bounce buffer and the actual transfer buffer. */ len = min(sectors, blocksize) * pagesize; buffer = kmalloc(len, GFP_NOIO); if (!buffer) return USB_STOR_TRANSPORT_ERROR; /* * We also need a temporary block buffer, where we read in the old data, * overwrite parts with the new data, and manipulate the redundancy data */ blockbuffer = kmalloc_array(pagesize + 64, blocksize, GFP_NOIO); if (!blockbuffer) { kfree(buffer); return USB_STOR_TRANSPORT_ERROR; } /* Figure out the initial LBA and page */ lba = address >> blockshift; page = (address & MEDIA_INFO(us).blockmask); max_lba = MEDIA_INFO(us).capacity >> (pageshift + blockshift); result = USB_STOR_TRANSPORT_GOOD; offset = 0; sg = NULL; while (sectors > 0) { /* Write as many sectors as possible in this block */ unsigned int pages = min(sectors, blocksize - page); len = pages << pageshift; /* Not overflowing capacity? */ if (lba >= max_lba) { usb_stor_dbg(us, "Requested lba %u exceeds maximum %u\n", lba, max_lba); result = USB_STOR_TRANSPORT_ERROR; break; } /* Get the data from the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &offset, FROM_XFER_BUF); result = alauda_write_lba(us, lba, page, pages, buffer, blockbuffer); if (result != USB_STOR_TRANSPORT_GOOD) break; page = 0; lba++; sectors -= pages; } kfree(buffer); kfree(blockbuffer); return result; } /* * Our interface with the rest of the world */ static void alauda_info_destructor(void *extra) { struct alauda_info *info = (struct alauda_info *) extra; int port; if (!info) return; for (port = 0; port < 2; port++) { struct alauda_media_info *media_info = &info->port[port]; alauda_free_maps(media_info); kfree(media_info->lba_to_pba); kfree(media_info->pba_to_lba); } } /* * Initialize alauda_info struct and find the data-write endpoint */ static int init_alauda(struct us_data *us) { struct alauda_info *info; struct usb_host_interface *altsetting = us->pusb_intf->cur_altsetting; nand_init_ecc(); us->extra = kzalloc(sizeof(struct alauda_info), GFP_NOIO); if (!us->extra) return USB_STOR_TRANSPORT_ERROR; info = (struct alauda_info *) us->extra; us->extra_destructor = alauda_info_destructor; info->wr_ep = usb_sndbulkpipe(us->pusb_dev, altsetting->endpoint[0].desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); return USB_STOR_TRANSPORT_GOOD; } static int alauda_transport(struct scsi_cmnd *srb, struct us_data *us) { int rc; struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char *ptr = us->iobuf; static unsigned char inquiry_response[36] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; if (srb->cmnd[0] == INQUIRY) { usb_stor_dbg(us, "INQUIRY - Returning bogus response\n"); memcpy(ptr, inquiry_response, sizeof(inquiry_response)); fill_inquiry_response(us, ptr, 36); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == TEST_UNIT_READY) { usb_stor_dbg(us, "TEST_UNIT_READY\n"); return alauda_check_media(us); } if (srb->cmnd[0] == READ_CAPACITY) { unsigned int num_zones; unsigned long capacity; rc = alauda_check_media(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); capacity = num_zones * MEDIA_INFO(us).uzonesize * MEDIA_INFO(us).blocksize; /* Report capacity and page size */ ((__be32 *) ptr)[0] = cpu_to_be32(capacity - 1); ((__be32 *) ptr)[1] = cpu_to_be32(512); usb_stor_set_xfer_buf(ptr, 8, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == READ_10) { unsigned int page, pages; rc = alauda_check_media(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; page = short_pack(srb->cmnd[3], srb->cmnd[2]); page <<= 16; page |= short_pack(srb->cmnd[5], srb->cmnd[4]); pages = short_pack(srb->cmnd[8], srb->cmnd[7]); usb_stor_dbg(us, "READ_10: page %d pagect %d\n", page, pages); return alauda_read_data(us, page, pages); } if (srb->cmnd[0] == WRITE_10) { unsigned int page, pages; rc = alauda_check_media(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; page = short_pack(srb->cmnd[3], srb->cmnd[2]); page <<= 16; page |= short_pack(srb->cmnd[5], srb->cmnd[4]); pages = short_pack(srb->cmnd[8], srb->cmnd[7]); usb_stor_dbg(us, "WRITE_10: page %d pagect %d\n", page, pages); return alauda_write_data(us, page, pages); } if (srb->cmnd[0] == REQUEST_SENSE) { usb_stor_dbg(us, "REQUEST_SENSE\n"); memset(ptr, 0, 18); ptr[0] = 0xF0; ptr[2] = info->sense_key; ptr[7] = 11; ptr[12] = info->sense_asc; ptr[13] = info->sense_ascq; usb_stor_set_xfer_buf(ptr, 18, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { /* * sure. whatever. not like we can stop the user from popping * the media out of the device (no locking doors, etc) */ return USB_STOR_TRANSPORT_GOOD; } usb_stor_dbg(us, "Gah! Unknown command: %d (0x%x)\n", srb->cmnd[0], srb->cmnd[0]); info->sense_key = 0x05; info->sense_asc = 0x20; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } static struct scsi_host_template alauda_host_template; static int alauda_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - alauda_usb_ids) + alauda_unusual_dev_list, &alauda_host_template); if (result) return result; us->transport_name = "Alauda Control/Bulk"; us->transport = alauda_transport; us->transport_reset = usb_stor_Bulk_reset; us->max_lun = 1; result = usb_stor_probe2(us); return result; } static struct usb_driver alauda_driver = { .name = DRV_NAME, .probe = alauda_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = alauda_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(alauda_driver, alauda_host_template, DRV_NAME); |
3 3 3 2 4 3 3 1 2 1 2 10 2 4 5 2 3 2 1 2 4 1 4 1 2 3 1 4 1 4 3 2 4 4 3 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | // SPDX-License-Identifier: GPL-2.0-or-later /* * USB Synaptics device driver * * Copyright (c) 2002 Rob Miller (rob@inpharmatica . co . uk) * Copyright (c) 2003 Ron Lee (ron@debian.org) * cPad driver for kernel 2.4 * * Copyright (c) 2004 Jan Steinhoff (cpad@jan-steinhoff . de) * Copyright (c) 2004 Ron Lee (ron@debian.org) * rewritten for kernel 2.6 * * cPad display character device part is not included. It can be found at * http://jan-steinhoff.de/linux/synaptics-usb.html * * Bases on: usb_skeleton.c v2.2 by Greg Kroah-Hartman * drivers/hid/usbhid/usbmouse.c by Vojtech Pavlik * drivers/input/mouse/synaptics.c by Peter Osterlund * * Trademarks are the property of their respective owners. */ /* * There are three different types of Synaptics USB devices: Touchpads, * touchsticks (or trackpoints), and touchscreens. Touchpads are well supported * by this driver, touchstick support has not been tested much yet, and * touchscreens have not been tested at all. * * Up to three alternate settings are possible: * setting 0: one int endpoint for relative movement (used by usbhid.ko) * setting 1: one int endpoint for absolute finger position * setting 2 (cPad only): one int endpoint for absolute finger position and * two bulk endpoints for the display (in/out) * This driver uses setting 1. */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/usb.h> #include <linux/input.h> #include <linux/usb/input.h> #define USB_VENDOR_ID_SYNAPTICS 0x06cb #define USB_DEVICE_ID_SYNAPTICS_TP 0x0001 /* Synaptics USB TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_INT_TP 0x0002 /* Integrated USB TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_CPAD 0x0003 /* Synaptics cPad */ #define USB_DEVICE_ID_SYNAPTICS_TS 0x0006 /* Synaptics TouchScreen */ #define USB_DEVICE_ID_SYNAPTICS_STICK 0x0007 /* Synaptics USB Styk */ #define USB_DEVICE_ID_SYNAPTICS_WP 0x0008 /* Synaptics USB WheelPad */ #define USB_DEVICE_ID_SYNAPTICS_COMP_TP 0x0009 /* Composite USB TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_WTP 0x0010 /* Wireless TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_DPAD 0x0013 /* DisplayPad */ #define SYNUSB_TOUCHPAD (1 << 0) #define SYNUSB_STICK (1 << 1) #define SYNUSB_TOUCHSCREEN (1 << 2) #define SYNUSB_AUXDISPLAY (1 << 3) /* For cPad */ #define SYNUSB_COMBO (1 << 4) /* Composite device (TP + stick) */ #define SYNUSB_IO_ALWAYS (1 << 5) #define USB_DEVICE_SYNAPTICS(prod, kind) \ USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, \ USB_DEVICE_ID_SYNAPTICS_##prod), \ .driver_info = (kind), #define SYNUSB_RECV_SIZE 8 #define XMIN_NOMINAL 1472 #define XMAX_NOMINAL 5472 #define YMIN_NOMINAL 1408 #define YMAX_NOMINAL 4448 struct synusb { struct usb_device *udev; struct usb_interface *intf; struct urb *urb; unsigned char *data; /* serialize access to open/suspend */ struct mutex pm_mutex; bool is_open; /* input device related data structures */ struct input_dev *input; char name[128]; char phys[64]; /* characteristics of the device */ unsigned long flags; }; static void synusb_report_buttons(struct synusb *synusb) { struct input_dev *input_dev = synusb->input; input_report_key(input_dev, BTN_LEFT, synusb->data[1] & 0x04); input_report_key(input_dev, BTN_RIGHT, synusb->data[1] & 0x01); input_report_key(input_dev, BTN_MIDDLE, synusb->data[1] & 0x02); } static void synusb_report_stick(struct synusb *synusb) { struct input_dev *input_dev = synusb->input; int x, y; unsigned int pressure; pressure = synusb->data[6]; x = (s16)(be16_to_cpup((__be16 *)&synusb->data[2]) << 3) >> 7; y = (s16)(be16_to_cpup((__be16 *)&synusb->data[4]) << 3) >> 7; if (pressure > 0) { input_report_rel(input_dev, REL_X, x); input_report_rel(input_dev, REL_Y, -y); } input_report_abs(input_dev, ABS_PRESSURE, pressure); synusb_report_buttons(synusb); input_sync(input_dev); } static void synusb_report_touchpad(struct synusb *synusb) { struct input_dev *input_dev = synusb->input; unsigned int num_fingers, tool_width; unsigned int x, y; unsigned int pressure, w; pressure = synusb->data[6]; x = be16_to_cpup((__be16 *)&synusb->data[2]); y = be16_to_cpup((__be16 *)&synusb->data[4]); w = synusb->data[0] & 0x0f; if (pressure > 0) { num_fingers = 1; tool_width = 5; switch (w) { case 0 ... 1: num_fingers = 2 + w; break; case 2: /* pen, pretend its a finger */ break; case 4 ... 15: tool_width = w; break; } } else { num_fingers = 0; tool_width = 0; } /* * Post events * BTN_TOUCH has to be first as mousedev relies on it when doing * absolute -> relative conversion */ if (pressure > 30) input_report_key(input_dev, BTN_TOUCH, 1); if (pressure < 25) input_report_key(input_dev, BTN_TOUCH, 0); if (num_fingers > 0) { input_report_abs(input_dev, ABS_X, x); input_report_abs(input_dev, ABS_Y, YMAX_NOMINAL + YMIN_NOMINAL - y); } input_report_abs(input_dev, ABS_PRESSURE, pressure); input_report_abs(input_dev, ABS_TOOL_WIDTH, tool_width); input_report_key(input_dev, BTN_TOOL_FINGER, num_fingers == 1); input_report_key(input_dev, BTN_TOOL_DOUBLETAP, num_fingers == 2); input_report_key(input_dev, BTN_TOOL_TRIPLETAP, num_fingers == 3); synusb_report_buttons(synusb); if (synusb->flags & SYNUSB_AUXDISPLAY) input_report_key(input_dev, BTN_MIDDLE, synusb->data[1] & 0x08); input_sync(input_dev); } static void synusb_irq(struct urb *urb) { struct synusb *synusb = urb->context; int error; /* Check our status in case we need to bail out early. */ switch (urb->status) { case 0: usb_mark_last_busy(synusb->udev); break; /* Device went away so don't keep trying to read from it. */ case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: return; default: goto resubmit; break; } if (synusb->flags & SYNUSB_STICK) synusb_report_stick(synusb); else synusb_report_touchpad(synusb); resubmit: error = usb_submit_urb(urb, GFP_ATOMIC); if (error && error != -EPERM) dev_err(&synusb->intf->dev, "%s - usb_submit_urb failed with result: %d", __func__, error); } static struct usb_endpoint_descriptor * synusb_get_in_endpoint(struct usb_host_interface *iface) { struct usb_endpoint_descriptor *endpoint; int i; for (i = 0; i < iface->desc.bNumEndpoints; ++i) { endpoint = &iface->endpoint[i].desc; if (usb_endpoint_is_int_in(endpoint)) { /* we found our interrupt in endpoint */ return endpoint; } } return NULL; } static int synusb_open(struct input_dev *dev) { struct synusb *synusb = input_get_drvdata(dev); int retval; retval = usb_autopm_get_interface(synusb->intf); if (retval) { dev_err(&synusb->intf->dev, "%s - usb_autopm_get_interface failed, error: %d\n", __func__, retval); return retval; } mutex_lock(&synusb->pm_mutex); retval = usb_submit_urb(synusb->urb, GFP_KERNEL); if (retval) { dev_err(&synusb->intf->dev, "%s - usb_submit_urb failed, error: %d\n", __func__, retval); retval = -EIO; goto out; } synusb->intf->needs_remote_wakeup = 1; synusb->is_open = true; out: mutex_unlock(&synusb->pm_mutex); usb_autopm_put_interface(synusb->intf); return retval; } static void synusb_close(struct input_dev *dev) { struct synusb *synusb = input_get_drvdata(dev); int autopm_error; autopm_error = usb_autopm_get_interface(synusb->intf); mutex_lock(&synusb->pm_mutex); usb_kill_urb(synusb->urb); synusb->intf->needs_remote_wakeup = 0; synusb->is_open = false; mutex_unlock(&synusb->pm_mutex); if (!autopm_error) usb_autopm_put_interface(synusb->intf); } static int synusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep; struct synusb *synusb; struct input_dev *input_dev; unsigned int intf_num = intf->cur_altsetting->desc.bInterfaceNumber; unsigned int altsetting = min(intf->num_altsetting, 1U); int error; error = usb_set_interface(udev, intf_num, altsetting); if (error) { dev_err(&udev->dev, "Can not set alternate setting to %i, error: %i", altsetting, error); return error; } ep = synusb_get_in_endpoint(intf->cur_altsetting); if (!ep) return -ENODEV; synusb = kzalloc(sizeof(*synusb), GFP_KERNEL); input_dev = input_allocate_device(); if (!synusb || !input_dev) { error = -ENOMEM; goto err_free_mem; } synusb->udev = udev; synusb->intf = intf; synusb->input = input_dev; mutex_init(&synusb->pm_mutex); synusb->flags = id->driver_info; if (synusb->flags & SYNUSB_COMBO) { /* * This is a combo device, we need to set proper * capability, depending on the interface. */ synusb->flags |= intf_num == 1 ? SYNUSB_STICK : SYNUSB_TOUCHPAD; } synusb->urb = usb_alloc_urb(0, GFP_KERNEL); if (!synusb->urb) { error = -ENOMEM; goto err_free_mem; } synusb->data = usb_alloc_coherent(udev, SYNUSB_RECV_SIZE, GFP_KERNEL, &synusb->urb->transfer_dma); if (!synusb->data) { error = -ENOMEM; goto err_free_urb; } usb_fill_int_urb(synusb->urb, udev, usb_rcvintpipe(udev, ep->bEndpointAddress), synusb->data, SYNUSB_RECV_SIZE, synusb_irq, synusb, ep->bInterval); synusb->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (udev->manufacturer) strlcpy(synusb->name, udev->manufacturer, sizeof(synusb->name)); if (udev->product) { if (udev->manufacturer) strlcat(synusb->name, " ", sizeof(synusb->name)); strlcat(synusb->name, udev->product, sizeof(synusb->name)); } if (!strlen(synusb->name)) snprintf(synusb->name, sizeof(synusb->name), "USB Synaptics Device %04x:%04x", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); if (synusb->flags & SYNUSB_STICK) strlcat(synusb->name, " (Stick)", sizeof(synusb->name)); usb_make_path(udev, synusb->phys, sizeof(synusb->phys)); strlcat(synusb->phys, "/input0", sizeof(synusb->phys)); input_dev->name = synusb->name; input_dev->phys = synusb->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &synusb->intf->dev; if (!(synusb->flags & SYNUSB_IO_ALWAYS)) { input_dev->open = synusb_open; input_dev->close = synusb_close; } input_set_drvdata(input_dev, synusb); __set_bit(EV_ABS, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); if (synusb->flags & SYNUSB_STICK) { __set_bit(EV_REL, input_dev->evbit); __set_bit(REL_X, input_dev->relbit); __set_bit(REL_Y, input_dev->relbit); __set_bit(INPUT_PROP_POINTING_STICK, input_dev->propbit); input_set_abs_params(input_dev, ABS_PRESSURE, 0, 127, 0, 0); } else { input_set_abs_params(input_dev, ABS_X, XMIN_NOMINAL, XMAX_NOMINAL, 0, 0); input_set_abs_params(input_dev, ABS_Y, YMIN_NOMINAL, YMAX_NOMINAL, 0, 0); input_set_abs_params(input_dev, ABS_PRESSURE, 0, 255, 0, 0); input_set_abs_params(input_dev, ABS_TOOL_WIDTH, 0, 15, 0, 0); __set_bit(BTN_TOUCH, input_dev->keybit); __set_bit(BTN_TOOL_FINGER, input_dev->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); } if (synusb->flags & SYNUSB_TOUCHSCREEN) __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); else __set_bit(INPUT_PROP_POINTER, input_dev->propbit); __set_bit(BTN_LEFT, input_dev->keybit); __set_bit(BTN_RIGHT, input_dev->keybit); __set_bit(BTN_MIDDLE, input_dev->keybit); usb_set_intfdata(intf, synusb); if (synusb->flags & SYNUSB_IO_ALWAYS) { error = synusb_open(input_dev); if (error) goto err_free_dma; } error = input_register_device(input_dev); if (error) { dev_err(&udev->dev, "Failed to register input device, error %d\n", error); goto err_stop_io; } return 0; err_stop_io: if (synusb->flags & SYNUSB_IO_ALWAYS) synusb_close(synusb->input); err_free_dma: usb_free_coherent(udev, SYNUSB_RECV_SIZE, synusb->data, synusb->urb->transfer_dma); err_free_urb: usb_free_urb(synusb->urb); err_free_mem: input_free_device(input_dev); kfree(synusb); usb_set_intfdata(intf, NULL); return error; } static void synusb_disconnect(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); struct usb_device *udev = interface_to_usbdev(intf); if (synusb->flags & SYNUSB_IO_ALWAYS) synusb_close(synusb->input); input_unregister_device(synusb->input); usb_free_coherent(udev, SYNUSB_RECV_SIZE, synusb->data, synusb->urb->transfer_dma); usb_free_urb(synusb->urb); kfree(synusb); usb_set_intfdata(intf, NULL); } static int synusb_suspend(struct usb_interface *intf, pm_message_t message) { struct synusb *synusb = usb_get_intfdata(intf); mutex_lock(&synusb->pm_mutex); usb_kill_urb(synusb->urb); mutex_unlock(&synusb->pm_mutex); return 0; } static int synusb_resume(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); int retval = 0; mutex_lock(&synusb->pm_mutex); if ((synusb->is_open || (synusb->flags & SYNUSB_IO_ALWAYS)) && usb_submit_urb(synusb->urb, GFP_NOIO) < 0) { retval = -EIO; } mutex_unlock(&synusb->pm_mutex); return retval; } static int synusb_pre_reset(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); mutex_lock(&synusb->pm_mutex); usb_kill_urb(synusb->urb); return 0; } static int synusb_post_reset(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); int retval = 0; if ((synusb->is_open || (synusb->flags & SYNUSB_IO_ALWAYS)) && usb_submit_urb(synusb->urb, GFP_NOIO) < 0) { retval = -EIO; } mutex_unlock(&synusb->pm_mutex); return retval; } static int synusb_reset_resume(struct usb_interface *intf) { return synusb_resume(intf); } static const struct usb_device_id synusb_idtable[] = { { USB_DEVICE_SYNAPTICS(TP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(INT_TP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(CPAD, SYNUSB_TOUCHPAD | SYNUSB_AUXDISPLAY | SYNUSB_IO_ALWAYS) }, { USB_DEVICE_SYNAPTICS(TS, SYNUSB_TOUCHSCREEN) }, { USB_DEVICE_SYNAPTICS(STICK, SYNUSB_STICK) }, { USB_DEVICE_SYNAPTICS(WP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(COMP_TP, SYNUSB_COMBO) }, { USB_DEVICE_SYNAPTICS(WTP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(DPAD, SYNUSB_TOUCHPAD) }, { } }; MODULE_DEVICE_TABLE(usb, synusb_idtable); static struct usb_driver synusb_driver = { .name = "synaptics_usb", .probe = synusb_probe, .disconnect = synusb_disconnect, .id_table = synusb_idtable, .suspend = synusb_suspend, .resume = synusb_resume, .pre_reset = synusb_pre_reset, .post_reset = synusb_post_reset, .reset_resume = synusb_reset_resume, .supports_autosuspend = 1, }; module_usb_driver(synusb_driver); MODULE_AUTHOR("Rob Miller <rob@inpharmatica.co.uk>, " "Ron Lee <ron@debian.org>, " "Jan Steinhoff <cpad@jan-steinhoff.de>"); MODULE_DESCRIPTION("Synaptics USB device driver"); MODULE_LICENSE("GPL"); |
10 1 2 1 3 2 1 2 1 20 20 12 9 22 21 20 14 22 24 24 24 24 24 24 1 23 23 23 24 30 30 30 10 23 30 30 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "device.h" #include "peer.h" #include "socket.h" #include "queueing.h" #include "messages.h" #include <linux/ctype.h> #include <linux/net.h> #include <linux/if_vlan.h> #include <linux/if_ether.h> #include <linux/inetdevice.h> #include <net/udp_tunnel.h> #include <net/ipv6.h> static int send4(struct wg_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache) { struct flowi4 fl = { .saddr = endpoint->src4.s_addr, .daddr = endpoint->addr4.sin_addr.s_addr, .fl4_dport = endpoint->addr4.sin_port, .flowi4_mark = wg->fwmark, .flowi4_proto = IPPROTO_UDP }; struct rtable *rt = NULL; struct sock *sock; int ret = 0; skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; rcu_read_lock_bh(); sock = rcu_dereference_bh(wg->sock4); if (unlikely(!sock)) { ret = -ENONET; goto err; } fl.fl4_sport = inet_sk(sock)->inet_sport; if (cache) rt = dst_cache_get_ip4(cache, &fl.saddr); if (!rt) { security_sk_classify_flow(sock, flowi4_to_flowi_common(&fl)); if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { endpoint->src4.s_addr = 0; endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); } rt = ip_route_output_flow(sock_net(sock), &fl, sock); if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && rt->dst.dev->ifindex != endpoint->src_if4)))) { endpoint->src4.s_addr = 0; endpoint->src_if4 = 0; fl.saddr = 0; if (cache) dst_cache_reset(cache); if (!IS_ERR(rt)) ip_rt_put(rt); rt = ip_route_output_flow(sock_net(sock), &fl, sock); } if (IS_ERR(rt)) { ret = PTR_ERR(rt); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; } if (cache) dst_cache_set_ip4(cache, &rt->dst, fl.saddr); } skb->ignore_df = 1; udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, fl.fl4_dport, false, false); goto out; err: kfree_skb(skb); out: rcu_read_unlock_bh(); return ret; } static int send6(struct wg_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache) { #if IS_ENABLED(CONFIG_IPV6) struct flowi6 fl = { .saddr = endpoint->src6, .daddr = endpoint->addr6.sin6_addr, .fl6_dport = endpoint->addr6.sin6_port, .flowi6_mark = wg->fwmark, .flowi6_oif = endpoint->addr6.sin6_scope_id, .flowi6_proto = IPPROTO_UDP /* TODO: addr->sin6_flowinfo */ }; struct dst_entry *dst = NULL; struct sock *sock; int ret = 0; skb_mark_not_on_list(skb); skb->dev = wg->dev; skb->mark = wg->fwmark; rcu_read_lock_bh(); sock = rcu_dereference_bh(wg->sock6); if (unlikely(!sock)) { ret = -ENONET; goto err; } fl.fl6_sport = inet_sk(sock)->inet_sport; if (cache) dst = dst_cache_get_ip6(cache, &fl.saddr); if (!dst) { security_sk_classify_flow(sock, flowi6_to_flowi_common(&fl)); if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { endpoint->src6 = fl.saddr = in6addr_any; if (cache) dst_cache_reset(cache); } dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, NULL); if (IS_ERR(dst)) { ret = PTR_ERR(dst); net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", wg->dev->name, &endpoint->addr, ret); goto err; } if (cache) dst_cache_set_ip6(cache, dst, &fl.saddr); } skb->ignore_df = 1; udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, ip6_dst_hoplimit(dst), 0, fl.fl6_sport, fl.fl6_dport, false); goto out; err: kfree_skb(skb); out: rcu_read_unlock_bh(); return ret; #else kfree_skb(skb); return -EAFNOSUPPORT; #endif } int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) { size_t skb_len = skb->len; int ret = -EAFNOSUPPORT; read_lock_bh(&peer->endpoint_lock); if (peer->endpoint.addr.sa_family == AF_INET) ret = send4(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache); else if (peer->endpoint.addr.sa_family == AF_INET6) ret = send6(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache); else dev_kfree_skb(skb); if (likely(!ret)) peer->tx_bytes += skb_len; read_unlock_bh(&peer->endpoint_lock); return ret; } int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, size_t len, u8 ds) { struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_reserve(skb, SKB_HEADER_LEN); skb_set_inner_network_header(skb, 0); skb_put_data(skb, buffer, len); return wg_socket_send_skb_to_peer(peer, skb, ds); } int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, struct sk_buff *in_skb, void *buffer, size_t len) { int ret = 0; struct sk_buff *skb; struct endpoint endpoint; if (unlikely(!in_skb)) return -EINVAL; ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); if (unlikely(ret < 0)) return ret; skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); if (unlikely(!skb)) return -ENOMEM; skb_reserve(skb, SKB_HEADER_LEN); skb_set_inner_network_header(skb, 0); skb_put_data(skb, buffer, len); if (endpoint.addr.sa_family == AF_INET) ret = send4(wg, skb, &endpoint, 0, NULL); else if (endpoint.addr.sa_family == AF_INET6) ret = send6(wg, skb, &endpoint, 0, NULL); /* No other possibilities if the endpoint is valid, which it is, * as we checked above. */ return ret; } int wg_socket_endpoint_from_skb(struct endpoint *endpoint, const struct sk_buff *skb) { memset(endpoint, 0, sizeof(*endpoint)); if (skb->protocol == htons(ETH_P_IP)) { endpoint->addr4.sin_family = AF_INET; endpoint->addr4.sin_port = udp_hdr(skb)->source; endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; endpoint->src_if4 = skb->skb_iif; } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( &ipv6_hdr(skb)->saddr, skb->skb_iif); endpoint->src6 = ipv6_hdr(skb)->daddr; } else { return -EINVAL; } return 0; } static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) { return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && a->addr4.sin_port == b->addr4.sin_port && a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || (a->addr.sa_family == AF_INET6 && b->addr.sa_family == AF_INET6 && a->addr6.sin6_port == b->addr6.sin6_port && ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && ipv6_addr_equal(&a->src6, &b->src6)) || unlikely(!a->addr.sa_family && !b->addr.sa_family); } void wg_socket_set_peer_endpoint(struct wg_peer *peer, const struct endpoint *endpoint) { /* First we check unlocked, in order to optimize, since it's pretty rare * that an endpoint will change. If we happen to be mid-write, and two * CPUs wind up writing the same thing or something slightly different, * it doesn't really matter much either. */ if (endpoint_eq(endpoint, &peer->endpoint)) return; write_lock_bh(&peer->endpoint_lock); if (endpoint->addr.sa_family == AF_INET) { peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; peer->endpoint.src_if4 = endpoint->src_if4; } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { peer->endpoint.addr6 = endpoint->addr6; peer->endpoint.src6 = endpoint->src6; } else { goto out; } dst_cache_reset(&peer->endpoint_cache); out: write_unlock_bh(&peer->endpoint_lock); } void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, const struct sk_buff *skb) { struct endpoint endpoint; if (!wg_socket_endpoint_from_skb(&endpoint, skb)) wg_socket_set_peer_endpoint(peer, &endpoint); } void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } static int wg_receive(struct sock *sk, struct sk_buff *skb) { struct wg_device *wg; if (unlikely(!sk)) goto err; wg = sk->sk_user_data; if (unlikely(!wg)) goto err; skb_mark_not_on_list(skb); wg_packet_receive(wg, skb); return 0; err: kfree_skb(skb); return 0; } static void sock_free(struct sock *sock) { if (unlikely(!sock)) return; sk_clear_memalloc(sock); udp_tunnel_sock_release(sock->sk_socket); } static void set_sock_opts(struct socket *sock) { sock->sk->sk_allocation = GFP_ATOMIC; sock->sk->sk_sndbuf = INT_MAX; sk_set_memalloc(sock->sk); } int wg_socket_init(struct wg_device *wg, u16 port) { struct net *net; int ret; struct udp_tunnel_sock_cfg cfg = { .sk_user_data = wg, .encap_type = 1, .encap_rcv = wg_receive }; struct socket *new4 = NULL, *new6 = NULL; struct udp_port_cfg port4 = { .family = AF_INET, .local_ip.s_addr = htonl(INADDR_ANY), .local_udp_port = htons(port), .use_udp_checksums = true }; #if IS_ENABLED(CONFIG_IPV6) int retries = 0; struct udp_port_cfg port6 = { .family = AF_INET6, .local_ip6 = IN6ADDR_ANY_INIT, .use_udp6_tx_checksums = true, .use_udp6_rx_checksums = true, .ipv6_v6only = true }; #endif rcu_read_lock(); net = rcu_dereference(wg->creating_net); net = net ? maybe_get_net(net) : NULL; rcu_read_unlock(); if (unlikely(!net)) return -ENONET; #if IS_ENABLED(CONFIG_IPV6) retry: #endif ret = udp_sock_create(net, &port4, &new4); if (ret < 0) { pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); goto out; } set_sock_opts(new4); setup_udp_tunnel_sock(net, new4, &cfg); #if IS_ENABLED(CONFIG_IPV6) if (ipv6_mod_enabled()) { port6.local_udp_port = inet_sk(new4->sk)->inet_sport; ret = udp_sock_create(net, &port6, &new6); if (ret < 0) { udp_tunnel_sock_release(new4); if (ret == -EADDRINUSE && !port && retries++ < 100) goto retry; pr_err("%s: Could not create IPv6 socket\n", wg->dev->name); goto out; } set_sock_opts(new6); setup_udp_tunnel_sock(net, new6, &cfg); } #endif wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); ret = 0; out: put_net(net); return ret; } void wg_socket_reinit(struct wg_device *wg, struct sock *new4, struct sock *new6) { struct sock *old4, *old6; mutex_lock(&wg->socket_update_lock); old4 = rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock)); old6 = rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock)); rcu_assign_pointer(wg->sock4, new4); rcu_assign_pointer(wg->sock6, new6); if (new4) wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); synchronize_net(); sock_free(old4); sock_free(old6); } |
4 4 4 4 4 3 7 7 23 22 7 16 3 20 15 5 17 3 35 4 32 5 1 23 4 14 14 14 2 1 2 2 2 1 11 11 4 4 4 3 11 4 11 11 11 2 2 2 2 11 16 16 5 11 11 11 11 4 4 3 11 11 10 10 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/ceph/ceph_debug.h> #include <linux/backing-dev.h> #include <linux/ctype.h> #include <linux/fs.h> #include <linux/inet.h> #include <linux/in6.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/statfs.h> #include <linux/string.h> #include "super.h" #include "mds_client.h" #include "cache.h" #include <linux/ceph/ceph_features.h> #include <linux/ceph/decode.h> #include <linux/ceph/mon_client.h> #include <linux/ceph/auth.h> #include <linux/ceph/debugfs.h> static DEFINE_SPINLOCK(ceph_fsc_lock); static LIST_HEAD(ceph_fsc_list); /* * Ceph superblock operations * * Handle the basics of mounting, unmounting. */ /* * super ops */ static void ceph_put_super(struct super_block *s) { struct ceph_fs_client *fsc = ceph_sb_to_client(s); dout("put_super\n"); ceph_mdsc_close_sessions(fsc->mdsc); } static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); struct ceph_mon_client *monc = &fsc->client->monc; struct ceph_statfs st; int i, err; u64 data_pool; if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; } else { data_pool = CEPH_NOPOOL; } dout("statfs\n"); err = ceph_monc_do_statfs(monc, data_pool, &st); if (err < 0) return err; /* fill in kstatfs */ buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. * * NOTE: for the time being, we make bsize == frsize to humor * not-yet-ancient versions of glibc that are broken. * Someday, we will probably want to report a real block * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; /* * By default use root quota for stats; fallback to overall filesystem * usage if using 'noquotadf' mount option or if the root dir doesn't * have max_bytes quota set. */ if (ceph_test_mount_opt(fsc, NOQUOTADF) || !ceph_quota_update_statfs(fsc, buf)) { buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); } buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; /* Must convert the fsid, for consistent values across arches */ buf->f_fsid.val[0] = 0; mutex_lock(&monc->mutex); for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); mutex_unlock(&monc->mutex); /* fold the fs_cluster_id into the upper bits */ buf->f_fsid.val[1] = monc->fs_cluster_id; return 0; } static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); if (!wait) { dout("sync_fs (non-blocking)\n"); ceph_flush_dirty_caps(fsc->mdsc); dout("sync_fs (non-blocking) done\n"); return 0; } dout("sync_fs (blocking)\n"); ceph_osdc_sync(&fsc->client->osdc); ceph_mdsc_sync(fsc->mdsc); dout("sync_fs (blocking) done\n"); return 0; } /* * mount options */ enum { Opt_wsize, Opt_rsize, Opt_rasize, Opt_caps_wanted_delay_min, Opt_caps_wanted_delay_max, Opt_caps_max, Opt_readdir_max_entries, Opt_readdir_max_bytes, Opt_congestion_kb, /* int args above */ Opt_snapdirname, Opt_mds_namespace, Opt_recover_session, Opt_source, /* string args above */ Opt_dirstat, Opt_rbytes, Opt_asyncreaddir, Opt_dcache, Opt_ino32, Opt_fscache, Opt_poolperm, Opt_require_active_mds, Opt_acl, Opt_quotadf, Opt_copyfrom, Opt_wsync, }; enum ceph_recover_session_mode { ceph_recover_session_no, ceph_recover_session_clean }; static const struct constant_table ceph_param_recover[] = { { "no", ceph_recover_session_no }, { "clean", ceph_recover_session_clean }, {} }; static const struct fs_parameter_spec ceph_mount_parameters[] = { fsparam_flag_no ("acl", Opt_acl), fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), fsparam_s32 ("caps_max", Opt_caps_max), fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), fsparam_flag_no ("copyfrom", Opt_copyfrom), fsparam_flag_no ("dcache", Opt_dcache), fsparam_flag_no ("dirstat", Opt_dirstat), fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc fsparam_string ("fsc", Opt_fscache), // fsc=... fsparam_flag_no ("ino32", Opt_ino32), fsparam_string ("mds_namespace", Opt_mds_namespace), fsparam_flag_no ("poolperm", Opt_poolperm), fsparam_flag_no ("quotadf", Opt_quotadf), fsparam_u32 ("rasize", Opt_rasize), fsparam_flag_no ("rbytes", Opt_rbytes), fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), fsparam_flag_no ("require_active_mds", Opt_require_active_mds), fsparam_u32 ("rsize", Opt_rsize), fsparam_string ("snapdirname", Opt_snapdirname), fsparam_string ("source", Opt_source), fsparam_u32 ("wsize", Opt_wsize), fsparam_flag_no ("wsync", Opt_wsync), {} }; struct ceph_parse_opts_ctx { struct ceph_options *copts; struct ceph_mount_options *opts; }; /* * Remove adjacent slashes and then the trailing slash, unless it is * the only remaining character. * * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". */ static void canonicalize_path(char *path) { int i, j = 0; for (i = 0; path[i] != '\0'; i++) { if (path[i] != '/' || j < 1 || path[j - 1] != '/') path[j++] = path[i]; } if (j > 1 && path[j - 1] == '/') j--; path[j] = '\0'; } /* * Parse the source parameter. Distinguish the server list from the path. * * The source will look like: * <server_spec>[,<server_spec>...]:[<path>] * where * <server_spec> is <ip>[:<port>] * <path> is optional, but if present must begin with '/' */ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_mount_options *fsopt = pctx->opts; char *dev_name = param->string, *dev_name_end; int ret; dout("%s '%s'\n", __func__, dev_name); if (!dev_name || !*dev_name) return invalfc(fc, "Empty source"); dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { /* * The server_path will include the whole chars from userland * including the leading '/'. */ kfree(fsopt->server_path); fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); if (!fsopt->server_path) return -ENOMEM; canonicalize_path(fsopt->server_path); } else { dev_name_end = dev_name + strlen(dev_name); } dev_name_end--; /* back up to ':' separator */ if (dev_name_end < dev_name || *dev_name_end != ':') return invalfc(fc, "No path or : separator in source"); dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); if (fsopt->server_path) dout("server path '%s'\n", fsopt->server_path); ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, pctx->copts, fc->log.log); if (ret) return ret; fc->source = param->string; param->string = NULL; return 0; } static int ceph_parse_mount_param(struct fs_context *fc, struct fs_parameter *param) { struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_mount_options *fsopt = pctx->opts; struct fs_parse_result result; unsigned int mode; int token, ret; ret = ceph_parse_param(param, pctx->copts, fc->log.log); if (ret != -ENOPARAM) return ret; token = fs_parse(fc, ceph_mount_parameters, param, &result); dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); if (token < 0) return token; switch (token) { case Opt_snapdirname: if (strlen(param->string) > NAME_MAX) return invalfc(fc, "snapdirname too long"); kfree(fsopt->snapdir_name); fsopt->snapdir_name = param->string; param->string = NULL; break; case Opt_mds_namespace: kfree(fsopt->mds_namespace); fsopt->mds_namespace = param->string; param->string = NULL; break; case Opt_recover_session: mode = result.uint_32; if (mode == ceph_recover_session_no) fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; else if (mode == ceph_recover_session_clean) fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; else BUG(); break; case Opt_source: if (fc->source) return invalfc(fc, "Multiple sources specified"); return ceph_parse_source(param, fc); case Opt_wsize: if (result.uint_32 < PAGE_SIZE || result.uint_32 > CEPH_MAX_WRITE_SIZE) goto out_of_range; fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); break; case Opt_rsize: if (result.uint_32 < PAGE_SIZE || result.uint_32 > CEPH_MAX_READ_SIZE) goto out_of_range; fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); break; case Opt_rasize: fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); break; case Opt_caps_wanted_delay_min: if (result.uint_32 < 1) goto out_of_range; fsopt->caps_wanted_delay_min = result.uint_32; break; case Opt_caps_wanted_delay_max: if (result.uint_32 < 1) goto out_of_range; fsopt->caps_wanted_delay_max = result.uint_32; break; case Opt_caps_max: if (result.int_32 < 0) goto out_of_range; fsopt->caps_max = result.int_32; break; case Opt_readdir_max_entries: if (result.uint_32 < 1) goto out_of_range; fsopt->max_readdir = result.uint_32; break; case Opt_readdir_max_bytes: if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) goto out_of_range; fsopt->max_readdir_bytes = result.uint_32; break; case Opt_congestion_kb: if (result.uint_32 < 1024) /* at least 1M */ goto out_of_range; fsopt->congestion_kb = result.uint_32; break; case Opt_dirstat: if (!result.negated) fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; else fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; break; case Opt_rbytes: if (!result.negated) fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; else fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; break; case Opt_asyncreaddir: if (!result.negated) fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; else fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; break; case Opt_dcache: if (!result.negated) fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; else fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; break; case Opt_ino32: if (!result.negated) fsopt->flags |= CEPH_MOUNT_OPT_INO32; else fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; break; case Opt_fscache: #ifdef CONFIG_CEPH_FSCACHE kfree(fsopt->fscache_uniq); fsopt->fscache_uniq = NULL; if (result.negated) { fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; } else { fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; fsopt->fscache_uniq = param->string; param->string = NULL; } break; #else return invalfc(fc, "fscache support is disabled"); #endif case Opt_poolperm: if (!result.negated) fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; else fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; break; case Opt_require_active_mds: if (!result.negated) fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; else fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; break; case Opt_quotadf: if (!result.negated) fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; else fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; break; case Opt_copyfrom: if (!result.negated) fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; else fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; break; case Opt_acl: if (!result.negated) { #ifdef CONFIG_CEPH_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #else return invalfc(fc, "POSIX ACL support is disabled"); #endif } else { fc->sb_flags &= ~SB_POSIXACL; } break; case Opt_wsync: if (!result.negated) fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; else fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; break; default: BUG(); } return 0; out_of_range: return invalfc(fc, "%s out of range", param->key); } static void destroy_mount_options(struct ceph_mount_options *args) { dout("destroy_mount_options %p\n", args); if (!args) return; kfree(args->snapdir_name); kfree(args->mds_namespace); kfree(args->server_path); kfree(args->fscache_uniq); kfree(args); } static int strcmp_null(const char *s1, const char *s2) { if (!s1 && !s2) return 0; if (s1 && !s2) return -1; if (!s1 && s2) return 1; return strcmp(s1, s2); } static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_options *new_opt, struct ceph_fs_client *fsc) { struct ceph_mount_options *fsopt1 = new_fsopt; struct ceph_mount_options *fsopt2 = fsc->mount_options; int ofs = offsetof(struct ceph_mount_options, snapdir_name); int ret; ret = memcmp(fsopt1, fsopt2, ofs); if (ret) return ret; ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); if (ret) return ret; ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); if (ret) return ret; ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); if (ret) return ret; return ceph_compare_options(new_opt, fsc->client); } /** * ceph_show_options - Show mount options in /proc/mounts * @m: seq_file to write to * @root: root of that (sub)tree */ static int ceph_show_options(struct seq_file *m, struct dentry *root) { struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); struct ceph_mount_options *fsopt = fsc->mount_options; size_t pos; int ret; /* a comma between MNT/MS and client options */ seq_putc(m, ','); pos = m->count; ret = ceph_print_client_options(m, fsc->client, false); if (ret) return ret; /* retract our comma if no client options */ if (m->count == pos) m->count--; if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) seq_puts(m, ",dirstat"); if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) seq_puts(m, ",rbytes"); if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) seq_puts(m, ",noasyncreaddir"); if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_INO32) seq_puts(m, ",ino32"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { seq_show_option(m, "fsc", fsopt->fscache_uniq); } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) seq_puts(m, ",noquotadf"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (root->d_sb->s_flags & SB_POSIXACL) seq_puts(m, ",acl"); else seq_puts(m, ",noacl"); #endif if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) seq_puts(m, ",copyfrom"); if (fsopt->mds_namespace) seq_show_option(m, "mds_namespace", fsopt->mds_namespace); if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) seq_show_option(m, "recover_session", "clean"); if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) seq_puts(m, ",nowsync"); if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) seq_printf(m, ",wsize=%u", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) seq_printf(m, ",rsize=%u", fsopt->rsize); if (fsopt->rasize != CEPH_RASIZE_DEFAULT) seq_printf(m, ",rasize=%u", fsopt->rasize); if (fsopt->congestion_kb != default_congestion_kb()) seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); if (fsopt->caps_max) seq_printf(m, ",caps_max=%d", fsopt->caps_max); if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) seq_printf(m, ",caps_wanted_delay_min=%u", fsopt->caps_wanted_delay_min); if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) seq_printf(m, ",caps_wanted_delay_max=%u", fsopt->caps_wanted_delay_max); if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) seq_show_option(m, "snapdirname", fsopt->snapdir_name); return 0; } /* * handle any mon messages the standard library doesn't understand. * return error if we don't either. */ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) { struct ceph_fs_client *fsc = client->private; int type = le16_to_cpu(msg->hdr.type); switch (type) { case CEPH_MSG_MDS_MAP: ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); return 0; case CEPH_MSG_FS_MAP_USER: ceph_mdsc_handle_fsmap(fsc->mdsc, msg); return 0; default: return -1; } } /* * create a new fs client * * Success or not, this function consumes @fsopt and @opt. */ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_options *opt) { struct ceph_fs_client *fsc; int err; fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); if (!fsc) { err = -ENOMEM; goto fail; } fsc->client = ceph_create_client(opt, fsc); if (IS_ERR(fsc->client)) { err = PTR_ERR(fsc->client); goto fail; } opt = NULL; /* fsc->client now owns this */ fsc->client->extra_mon_dispatch = extra_mon_dispatch; ceph_set_opt(fsc->client, ABORT_ON_FULL); if (!fsopt->mds_namespace) { ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 0, true); } else { ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, 0, false); } fsc->mount_options = fsopt; fsc->sb = NULL; fsc->mount_state = CEPH_MOUNT_MOUNTING; fsc->filp_gen = 1; fsc->have_copy_from2 = true; atomic_long_set(&fsc->writeback_count, 0); err = -ENOMEM; /* * The number of concurrent works can be high but they don't need * to be processed in parallel, limit concurrency. */ fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); if (!fsc->inode_wq) goto fail_client; fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); if (!fsc->cap_wq) goto fail_inode_wq; spin_lock(&ceph_fsc_lock); list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); spin_unlock(&ceph_fsc_lock); return fsc; fail_inode_wq: destroy_workqueue(fsc->inode_wq); fail_client: ceph_destroy_client(fsc->client); fail: kfree(fsc); if (opt) ceph_destroy_options(opt); destroy_mount_options(fsopt); return ERR_PTR(err); } static void flush_fs_workqueues(struct ceph_fs_client *fsc) { flush_workqueue(fsc->inode_wq); flush_workqueue(fsc->cap_wq); } static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); spin_lock(&ceph_fsc_lock); list_del(&fsc->metric_wakeup); spin_unlock(&ceph_fsc_lock); ceph_mdsc_destroy(fsc); destroy_workqueue(fsc->inode_wq); destroy_workqueue(fsc->cap_wq); destroy_mount_options(fsc->mount_options); ceph_destroy_client(fsc->client); kfree(fsc); dout("destroy_fs_client %p done\n", fsc); } /* * caches */ struct kmem_cache *ceph_inode_cachep; struct kmem_cache *ceph_cap_cachep; struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; struct kmem_cache *ceph_dir_file_cachep; struct kmem_cache *ceph_mds_request_cachep; mempool_t *ceph_wb_pagevec_pool; static void ceph_inode_init_once(void *foo) { struct ceph_inode_info *ci = foo; inode_init_once(&ci->vfs_inode); } static int __init init_caches(void) { int error = -ENOMEM; ceph_inode_cachep = kmem_cache_create("ceph_inode_info", sizeof(struct ceph_inode_info), __alignof__(struct ceph_inode_info), SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| SLAB_ACCOUNT, ceph_inode_init_once); if (!ceph_inode_cachep) return -ENOMEM; ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); if (!ceph_cap_cachep) goto bad_cap; ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (!ceph_cap_flush_cachep) goto bad_cap_flush; ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (!ceph_dentry_cachep) goto bad_dentry; ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); if (!ceph_file_cachep) goto bad_file; ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); if (!ceph_dir_file_cachep) goto bad_dir_file; ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); if (!ceph_mds_request_cachep) goto bad_mds_req; ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); if (!ceph_wb_pagevec_pool) goto bad_pagevec_pool; error = ceph_fscache_register(); if (error) goto bad_fscache; return 0; bad_fscache: kmem_cache_destroy(ceph_mds_request_cachep); bad_pagevec_pool: mempool_destroy(ceph_wb_pagevec_pool); bad_mds_req: kmem_cache_destroy(ceph_dir_file_cachep); bad_dir_file: kmem_cache_destroy(ceph_file_cachep); bad_file: kmem_cache_destroy(ceph_dentry_cachep); bad_dentry: kmem_cache_destroy(ceph_cap_flush_cachep); bad_cap_flush: kmem_cache_destroy(ceph_cap_cachep); bad_cap: kmem_cache_destroy(ceph_inode_cachep); return error; } static void destroy_caches(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); kmem_cache_destroy(ceph_dir_file_cachep); kmem_cache_destroy(ceph_mds_request_cachep); mempool_destroy(ceph_wb_pagevec_pool); ceph_fscache_unregister(); } static void __ceph_umount_begin(struct ceph_fs_client *fsc) { ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); ceph_mdsc_force_umount(fsc->mdsc); fsc->filp_gen++; // invalidate open files } /* * ceph_umount_begin - initiate forced umount. Tear down the * mount, skipping steps that may hang while waiting for server(s). */ static void ceph_umount_begin(struct super_block *sb) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); dout("ceph_umount_begin - starting forced umount\n"); if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; __ceph_umount_begin(fsc); } static const struct super_operations ceph_super_ops = { .alloc_inode = ceph_alloc_inode, .free_inode = ceph_free_inode, .write_inode = ceph_write_inode, .drop_inode = generic_delete_inode, .evict_inode = ceph_evict_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, .show_options = ceph_show_options, .statfs = ceph_statfs, .umount_begin = ceph_umount_begin, }; /* * Bootstrap mount by opening the root directory. Note the mount * @started time from caller, and time out if this takes too long. */ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, const char *path, unsigned long started) { struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req = NULL; int err; struct dentry *root; /* open dir */ dout("open_root_inode opening '%s'\n", path); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); if (IS_ERR(req)) return ERR_CAST(req); req->r_path1 = kstrdup(path, GFP_NOFS); if (!req->r_path1) { root = ERR_PTR(-ENOMEM); goto out; } req->r_ino1.ino = CEPH_INO_ROOT; req->r_ino1.snap = CEPH_NOSNAP; req->r_started = started; req->r_timeout = fsc->client->options->mount_timeout; req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_num_caps = 2; err = ceph_mdsc_do_request(mdsc, NULL, req); if (err == 0) { struct inode *inode = req->r_target_inode; req->r_target_inode = NULL; dout("open_root_inode success\n"); root = d_make_root(inode); if (!root) { root = ERR_PTR(-ENOMEM); goto out; } dout("open_root_inode success, root dentry is %p\n", root); } else { root = ERR_PTR(err); } out: ceph_mdsc_put_request(req); return root; } /* * mount: join the ceph cluster, and open root directory. */ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, struct fs_context *fc) { int err; unsigned long started = jiffies; /* note the start time */ struct dentry *root; dout("mount start %p\n", fsc); mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { const char *path = fsc->mount_options->server_path ? fsc->mount_options->server_path + 1 : ""; err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; /* setup fscache */ if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { err = ceph_fscache_register_fs(fsc, fc); if (err < 0) goto out; } dout("mount opening path '%s'\n", path); ceph_fs_debugfs_init(fsc); root = open_root_dentry(fsc, path, started); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; } fsc->sb->s_root = dget(root); } else { root = dget(fsc->sb->s_root); } fsc->mount_state = CEPH_MOUNT_MOUNTED; dout("mount success\n"); mutex_unlock(&fsc->client->mount_mutex); return root; out: mutex_unlock(&fsc->client->mount_mutex); return ERR_PTR(err); } static int ceph_set_super(struct super_block *s, struct fs_context *fc) { struct ceph_fs_client *fsc = s->s_fs_info; int ret; dout("set_super %p\n", s); s->s_maxbytes = MAX_LFS_FILESIZE; s->s_xattr = ceph_xattr_handlers; fsc->sb = s; fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ s->s_op = &ceph_super_ops; s->s_d_op = &ceph_dentry_ops; s->s_export_op = &ceph_export_ops; s->s_time_gran = 1; s->s_time_min = 0; s->s_time_max = U32_MAX; ret = set_anon_super_fc(s, fc); if (ret != 0) fsc->sb = NULL; return ret; } /* * share superblock if same fs AND options */ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) { struct ceph_fs_client *new = fc->s_fs_info; struct ceph_mount_options *fsopt = new->mount_options; struct ceph_options *opt = new->client->options; struct ceph_fs_client *fsc = ceph_sb_to_client(sb); dout("ceph_compare_super %p\n", sb); if (compare_mount_options(fsopt, opt, fsc)) { dout("monitor(s)/mount options don't match\n"); return 0; } if ((opt->flags & CEPH_OPT_FSID) && ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { dout("fsid doesn't match\n"); return 0; } if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { dout("flags differ\n"); return 0; } if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { dout("client is blocklisted (and CLEANRECOVER is not set)\n"); return 0; } if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { dout("client has been forcibly unmounted\n"); return 0; } return 1; } /* * construct our own bdi so we can control readahead, etc. */ static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) { int err; err = super_setup_bdi_name(sb, "ceph-%ld", atomic_long_inc_return(&bdi_seq)); if (err) return err; /* set ra_pages based on rasize mount option? */ sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; /* set io_pages based on max osd read size */ sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; return 0; } static int ceph_get_tree(struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct super_block *sb; struct ceph_fs_client *fsc; struct dentry *res; int (*compare_super)(struct super_block *, struct fs_context *) = ceph_compare_super; int err; dout("ceph_get_tree\n"); if (!fc->source) return invalfc(fc, "No source"); /* create client (which we may/may not use) */ fsc = create_fs_client(pctx->opts, pctx->copts); pctx->opts = NULL; pctx->copts = NULL; if (IS_ERR(fsc)) { err = PTR_ERR(fsc); goto out_final; } err = ceph_mdsc_init(fsc); if (err < 0) goto out; if (ceph_test_opt(fsc->client, NOSHARE)) compare_super = NULL; fc->s_fs_info = fsc; sb = sget_fc(fc, compare_super, ceph_set_super); fc->s_fs_info = NULL; if (IS_ERR(sb)) { err = PTR_ERR(sb); goto out; } if (ceph_sb_to_client(sb) != fsc) { destroy_fs_client(fsc); fsc = ceph_sb_to_client(sb); dout("get_sb got existing client %p\n", fsc); } else { dout("get_sb using new client %p\n", fsc); err = ceph_setup_bdi(sb, fsc); if (err < 0) goto out_splat; } res = ceph_real_mount(fsc, fc); if (IS_ERR(res)) { err = PTR_ERR(res); goto out_splat; } dout("root %p inode %p ino %llx.%llx\n", res, d_inode(res), ceph_vinop(d_inode(res))); fc->root = fsc->sb->s_root; return 0; out_splat: if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { pr_info("No mds server is up or the cluster is laggy\n"); err = -EHOSTUNREACH; } ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; out: destroy_fs_client(fsc); out_final: dout("ceph_get_tree fail %d\n", err); return err; } static void ceph_free_fc(struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx = fc->fs_private; if (pctx) { destroy_mount_options(pctx->opts); ceph_destroy_options(pctx->copts); kfree(pctx); } } static int ceph_reconfigure_fc(struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx = fc->fs_private; struct ceph_mount_options *fsopt = pctx->opts; struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) ceph_set_mount_opt(fsc, ASYNC_DIROPS); else ceph_clear_mount_opt(fsc, ASYNC_DIROPS); sync_filesystem(fc->root->d_sb); return 0; } static const struct fs_context_operations ceph_context_ops = { .free = ceph_free_fc, .parse_param = ceph_parse_mount_param, .get_tree = ceph_get_tree, .reconfigure = ceph_reconfigure_fc, }; /* * Set up the filesystem mount context. */ static int ceph_init_fs_context(struct fs_context *fc) { struct ceph_parse_opts_ctx *pctx; struct ceph_mount_options *fsopt; pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); if (!pctx) return -ENOMEM; pctx->copts = ceph_alloc_options(); if (!pctx->copts) goto nomem; pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); if (!pctx->opts) goto nomem; fsopt = pctx->opts; fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; fsopt->wsize = CEPH_MAX_WRITE_SIZE; fsopt->rsize = CEPH_MAX_READ_SIZE; fsopt->rasize = CEPH_RASIZE_DEFAULT; fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); if (!fsopt->snapdir_name) goto nomem; fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; fsopt->congestion_kb = default_congestion_kb(); #ifdef CONFIG_CEPH_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #endif fc->fs_private = pctx; fc->ops = &ceph_context_ops; return 0; nomem: destroy_mount_options(pctx->opts); ceph_destroy_options(pctx->copts); kfree(pctx); return -ENOMEM; } static void ceph_kill_sb(struct super_block *s) { struct ceph_fs_client *fsc = ceph_sb_to_client(s); dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(fsc->mdsc); flush_fs_workqueues(fsc); /* * Though the kill_anon_super() will finally trigger the * sync_filesystem() anyway, we still need to do it here * and then bump the stage of shutdown to stop the work * queue as earlier as possible. */ sync_filesystem(s); fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED; kill_anon_super(s); fsc->client->extra_mon_dispatch = NULL; ceph_fs_debugfs_cleanup(fsc); ceph_fscache_unregister_fs(fsc); destroy_fs_client(fsc); } static struct file_system_type ceph_fs_type = { .owner = THIS_MODULE, .name = "ceph", .init_fs_context = ceph_init_fs_context, .kill_sb = ceph_kill_sb, .fs_flags = FS_RENAME_DOES_D_MOVE, }; MODULE_ALIAS_FS("ceph"); int ceph_force_reconnect(struct super_block *sb) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); int err = 0; fsc->mount_state = CEPH_MOUNT_RECOVER; __ceph_umount_begin(fsc); /* Make sure all page caches get invalidated. * see remove_session_caps_cb() */ flush_workqueue(fsc->inode_wq); /* In case that we were blocklisted. This also reset * all mon/osd connections */ ceph_reset_client_addr(fsc->client); ceph_osdc_clear_abort_err(&fsc->client->osdc); fsc->blocklisted = false; fsc->mount_state = CEPH_MOUNT_MOUNTED; if (sb->s_root) { err = __ceph_do_getattr(d_inode(sb->s_root), NULL, CEPH_STAT_CAP_INODE, true); } return err; } static int __init init_ceph(void) { int ret = init_caches(); if (ret) goto out; ceph_flock_init(); ret = register_filesystem(&ceph_fs_type); if (ret) goto out_caches; pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); return 0; out_caches: destroy_caches(); out: return ret; } static void __exit exit_ceph(void) { dout("exit_ceph\n"); unregister_filesystem(&ceph_fs_type); destroy_caches(); } static int param_set_metrics(const char *val, const struct kernel_param *kp) { struct ceph_fs_client *fsc; int ret; ret = param_set_bool(val, kp); if (ret) { pr_err("Failed to parse sending metrics switch value '%s'\n", val); return ret; } else if (!disable_send_metrics) { // wake up all the mds clients spin_lock(&ceph_fsc_lock); list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { metric_schedule_delayed(&fsc->mdsc->metric); } spin_unlock(&ceph_fsc_lock); } return 0; } static const struct kernel_param_ops param_ops_metrics = { .set = param_set_metrics, .get = param_get_bool, }; bool disable_send_metrics = false; module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); module_init(init_ceph); module_exit(exit_ceph); MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); MODULE_DESCRIPTION("Ceph filesystem for Linux"); MODULE_LICENSE("GPL"); |
7 7 7 6 6 6 7 7 7 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 | // SPDX-License-Identifier: GPL-2.0-or-later /* * usbusy2y.c - ALSA USB US-428 Driver * 2005-04-14 Karsten Wiese Version 0.8.7.2: Call snd_card_free() instead of snd_card_free_in_thread() to prevent oops with dead keyboard symptom. Tested ok with kernel 2.6.12-rc2. 2004-12-14 Karsten Wiese Version 0.8.7.1: snd_pcm_open for rawusb pcm-devices now returns -EBUSY if called without rawusb's hwdep device being open. 2004-12-02 Karsten Wiese Version 0.8.7: Use macro usb_maxpacket() for portability. 2004-10-26 Karsten Wiese Version 0.8.6: wake_up() process waiting in usx2y_urbs_start() on error. 2004-10-21 Karsten Wiese Version 0.8.5: nrpacks is runtime or compiletime configurable now with tested values from 1 to 4. 2004-10-03 Karsten Wiese Version 0.8.2: Avoid any possible racing while in prepare callback. 2004-09-30 Karsten Wiese Version 0.8.0: Simplified things and made ohci work again. 2004-09-20 Karsten Wiese Version 0.7.3: Use usb_kill_urb() instead of deprecated (kernel 2.6.9) usb_unlink_urb(). 2004-07-13 Karsten Wiese Version 0.7.1: Don't sleep in START/STOP callbacks anymore. us428 channels C/D not handled just for this version, sorry. 2004-06-21 Karsten Wiese Version 0.6.4: Temporarely suspend midi input to sanely call usb_set_interface() when setting format. 2004-06-12 Karsten Wiese Version 0.6.3: Made it thus the following rule is enforced: "All pcm substreams of one usx2y have to operate at the same rate & format." 2004-04-06 Karsten Wiese Version 0.6.0: Runs on 2.6.5 kernel without any "--with-debug=" things. us224 reported running. 2004-01-14 Karsten Wiese Version 0.5.1: Runs with 2.6.1 kernel. 2003-12-30 Karsten Wiese Version 0.4.1: Fix 24Bit 4Channel capturing for the us428. 2003-11-27 Karsten Wiese, Martin Langer Version 0.4: us122 support. us224 could be tested by uncommenting the sections containing USB_ID_US224 2003-11-03 Karsten Wiese Version 0.3: 24Bit support. "arecord -D hw:1 -c 2 -r 48000 -M -f S24_3LE|aplay -D hw:1 -c 2 -r 48000 -M -f S24_3LE" works. 2003-08-22 Karsten Wiese Version 0.0.8: Removed EZUSB Firmware. First Stage Firmwaredownload is now done by tascam-firmware downloader. See: http://usb-midi-fw.sourceforge.net/tascam-firmware.tar.gz 2003-06-18 Karsten Wiese Version 0.0.5: changed to compile with kernel 2.4.21 and alsa 0.9.4 2002-10-16 Karsten Wiese Version 0.0.4: compiles again with alsa-current. USB_ISO_ASAP not used anymore (most of the time), instead urb->start_frame is calculated here now, some calls inside usb-driver don't need to happen anymore. To get the best out of this: Disable APM-support in the kernel as APM-BIOS calls (once each second) hard disable interrupt for many precious milliseconds. This helped me much on my slowish PII 400 & PIII 500. ACPI yet untested but might cause the same bad behaviour. Use a kernel with lowlatency and preemptiv patches applied. To autoload snd-usb-midi append a line post-install snd-usb-us428 modprobe snd-usb-midi to /etc/modules.conf. known problems: sliders, knobs, lights not yet handled except MASTER Volume slider. "pcm -c 2" doesn't work. "pcm -c 2 -m direct_interleaved" does. KDE3: "Enable full duplex operation" deadlocks. 2002-08-31 Karsten Wiese Version 0.0.3: audio also simplex; simplifying: iso urbs only 1 packet, melted structs. ASYNC_UNLINK not used anymore: no more crashes so far..... for alsa 0.9 rc3. 2002-08-09 Karsten Wiese Version 0.0.2: midi works with snd-usb-midi, audio (only fullduplex now) with i.e. bristol. The firmware has been sniffed from win2k us-428 driver 3.09. * Copyright (c) 2002 - 2004 Karsten Wiese */ #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/usb.h> #include <sound/core.h> #include <sound/initval.h> #include <sound/pcm.h> #include <sound/rawmidi.h> #include "usx2y.h" #include "usbusx2y.h" #include "usX2Yhwdep.h" MODULE_AUTHOR("Karsten Wiese <annabellesgarden@yahoo.de>"); MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.2"); MODULE_LICENSE("GPL"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card */ module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for "NAME_ALLCAPS"."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for "NAME_ALLCAPS"."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable "NAME_ALLCAPS"."); static int snd_usx2y_card_used[SNDRV_CARDS]; static void snd_usx2y_card_private_free(struct snd_card *card); static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s); #ifdef USX2Y_NRPACKS_VARIABLE int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ module_param(nrpacks, int, 0444); MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); #endif /* * pipe 4 is used for switching the lamps, setting samplerate, volumes .... */ static void i_usx2y_out04_int(struct urb *urb) { #ifdef CONFIG_SND_DEBUG if (urb->status) { int i; struct usx2ydev *usx2y = urb->context; for (i = 0; i < 10 && usx2y->as04.urb[i] != urb; i++) ; snd_printdd("%s urb %i status=%i\n", __func__, i, urb->status); } #endif } static void i_usx2y_in04_int(struct urb *urb) { int err = 0; struct usx2ydev *usx2y = urb->context; struct us428ctls_sharedmem *us428ctls = usx2y->us428ctls_sharedmem; struct us428_p4out *p4out; int i, j, n, diff, send; usx2y->in04_int_calls++; if (urb->status) { snd_printdd("Interrupt Pipe 4 came back with status=%i\n", urb->status); return; } // printk("%i:0x%02X ", 8, (int)((unsigned char*)usx2y->in04_buf)[8]); Master volume shows 0 here if fader is at max during boot ?!? if (us428ctls) { diff = -1; if (us428ctls->ctl_snapshot_last == -2) { diff = 0; memcpy(usx2y->in04_last, usx2y->in04_buf, sizeof(usx2y->in04_last)); us428ctls->ctl_snapshot_last = -1; } else { for (i = 0; i < 21; i++) { if (usx2y->in04_last[i] != ((char *)usx2y->in04_buf)[i]) { if (diff < 0) diff = i; usx2y->in04_last[i] = ((char *)usx2y->in04_buf)[i]; } } } if (diff >= 0) { n = us428ctls->ctl_snapshot_last + 1; if (n >= N_US428_CTL_BUFS || n < 0) n = 0; memcpy(us428ctls->ctl_snapshot + n, usx2y->in04_buf, sizeof(us428ctls->ctl_snapshot[0])); us428ctls->ctl_snapshot_differs_at[n] = diff; us428ctls->ctl_snapshot_last = n; wake_up(&usx2y->us428ctls_wait_queue_head); } } if (usx2y->us04) { if (!usx2y->us04->submitted) { do { err = usb_submit_urb(usx2y->us04->urb[usx2y->us04->submitted++], GFP_ATOMIC); } while (!err && usx2y->us04->submitted < usx2y->us04->len); } } else { if (us428ctls && us428ctls->p4out_last >= 0 && us428ctls->p4out_last < N_US428_P4OUT_BUFS) { if (us428ctls->p4out_last != us428ctls->p4out_sent) { send = us428ctls->p4out_sent + 1; if (send >= N_US428_P4OUT_BUFS) send = 0; for (j = 0; j < URBS_ASYNC_SEQ && !err; ++j) { if (!usx2y->as04.urb[j]->status) { p4out = us428ctls->p4out + send; // FIXME if more than 1 p4out is new, 1 gets lost. usb_fill_bulk_urb(usx2y->as04.urb[j], usx2y->dev, usb_sndbulkpipe(usx2y->dev, 0x04), &p4out->val.vol, p4out->type == ELT_LIGHT ? sizeof(struct us428_lights) : 5, i_usx2y_out04_int, usx2y); err = usb_submit_urb(usx2y->as04.urb[j], GFP_ATOMIC); us428ctls->p4out_sent = send; break; } } } } } if (err) snd_printk(KERN_ERR "in04_int() usb_submit_urb err=%i\n", err); urb->dev = usx2y->dev; usb_submit_urb(urb, GFP_ATOMIC); } /* * Prepare some urbs */ int usx2y_async_seq04_init(struct usx2ydev *usx2y) { int err = 0, i; if (WARN_ON(usx2y->as04.buffer)) return -EBUSY; usx2y->as04.buffer = kmalloc_array(URBS_ASYNC_SEQ, URB_DATA_LEN_ASYNC_SEQ, GFP_KERNEL); if (!usx2y->as04.buffer) { err = -ENOMEM; } else { for (i = 0; i < URBS_ASYNC_SEQ; ++i) { usx2y->as04.urb[i] = usb_alloc_urb(0, GFP_KERNEL); if (!usx2y->as04.urb[i]) { err = -ENOMEM; break; } usb_fill_bulk_urb(usx2y->as04.urb[i], usx2y->dev, usb_sndbulkpipe(usx2y->dev, 0x04), usx2y->as04.buffer + URB_DATA_LEN_ASYNC_SEQ * i, 0, i_usx2y_out04_int, usx2y); err = usb_urb_ep_type_check(usx2y->as04.urb[i]); if (err < 0) break; } } if (err) usx2y_unlinkseq(&usx2y->as04); return err; } int usx2y_in04_init(struct usx2ydev *usx2y) { int err; if (WARN_ON(usx2y->in04_urb)) return -EBUSY; usx2y->in04_urb = usb_alloc_urb(0, GFP_KERNEL); if (!usx2y->in04_urb) { err = -ENOMEM; goto error; } usx2y->in04_buf = kmalloc(21, GFP_KERNEL); if (!usx2y->in04_buf) { err = -ENOMEM; goto error; } init_waitqueue_head(&usx2y->in04_wait_queue); usb_fill_int_urb(usx2y->in04_urb, usx2y->dev, usb_rcvintpipe(usx2y->dev, 0x4), usx2y->in04_buf, 21, i_usx2y_in04_int, usx2y, 10); if (usb_urb_ep_type_check(usx2y->in04_urb)) { err = -EINVAL; goto error; } return usb_submit_urb(usx2y->in04_urb, GFP_KERNEL); error: kfree(usx2y->in04_buf); usb_free_urb(usx2y->in04_urb); usx2y->in04_buf = NULL; usx2y->in04_urb = NULL; return err; } static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s) { int i; for (i = 0; i < URBS_ASYNC_SEQ; ++i) { if (!s->urb[i]) continue; usb_kill_urb(s->urb[i]); usb_free_urb(s->urb[i]); s->urb[i] = NULL; } kfree(s->buffer); s->buffer = NULL; } static const struct usb_device_id snd_usx2y_usb_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x1604, .idProduct = USB_ID_US428 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x1604, .idProduct = USB_ID_US122 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x1604, .idProduct = USB_ID_US224 }, { /* terminator */ } }; MODULE_DEVICE_TABLE(usb, snd_usx2y_usb_id_table); static int usx2y_create_card(struct usb_device *device, struct usb_interface *intf, struct snd_card **cardp) { int dev; struct snd_card *card; int err; for (dev = 0; dev < SNDRV_CARDS; ++dev) if (enable[dev] && !snd_usx2y_card_used[dev]) break; if (dev >= SNDRV_CARDS) return -ENODEV; err = snd_card_new(&intf->dev, index[dev], id[dev], THIS_MODULE, sizeof(struct usx2ydev), &card); if (err < 0) return err; snd_usx2y_card_used[usx2y(card)->card_index = dev] = 1; card->private_free = snd_usx2y_card_private_free; usx2y(card)->dev = device; init_waitqueue_head(&usx2y(card)->prepare_wait_queue); init_waitqueue_head(&usx2y(card)->us428ctls_wait_queue_head); mutex_init(&usx2y(card)->pcm_mutex); INIT_LIST_HEAD(&usx2y(card)->midi_list); strcpy(card->driver, "USB "NAME_ALLCAPS""); sprintf(card->shortname, "TASCAM "NAME_ALLCAPS""); sprintf(card->longname, "%s (%x:%x if %d at %03d/%03d)", card->shortname, le16_to_cpu(device->descriptor.idVendor), le16_to_cpu(device->descriptor.idProduct), 0,//us428(card)->usbmidi.ifnum, usx2y(card)->dev->bus->busnum, usx2y(card)->dev->devnum); *cardp = card; return 0; } static void snd_usx2y_card_private_free(struct snd_card *card) { struct usx2ydev *usx2y = usx2y(card); kfree(usx2y->in04_buf); usb_free_urb(usx2y->in04_urb); if (usx2y->us428ctls_sharedmem) free_pages_exact(usx2y->us428ctls_sharedmem, US428_SHAREDMEM_PAGES); if (usx2y->card_index >= 0 && usx2y->card_index < SNDRV_CARDS) snd_usx2y_card_used[usx2y->card_index] = 0; } static void snd_usx2y_disconnect(struct usb_interface *intf) { struct snd_card *card; struct usx2ydev *usx2y; struct list_head *p; card = usb_get_intfdata(intf); if (!card) return; usx2y = usx2y(card); usx2y->chip_status = USX2Y_STAT_CHIP_HUP; usx2y_unlinkseq(&usx2y->as04); usb_kill_urb(usx2y->in04_urb); snd_card_disconnect(card); /* release the midi resources */ list_for_each(p, &usx2y->midi_list) { snd_usbmidi_disconnect(p); } if (usx2y->us428ctls_sharedmem) wake_up(&usx2y->us428ctls_wait_queue_head); snd_card_free_when_closed(card); } static int snd_usx2y_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *device = interface_to_usbdev(intf); struct snd_card *card; int err; #ifdef USX2Y_NRPACKS_VARIABLE if (nrpacks < 0 || nrpacks > USX2Y_NRPACKS_MAX) return -EINVAL; #endif if (le16_to_cpu(device->descriptor.idVendor) != 0x1604 || (le16_to_cpu(device->descriptor.idProduct) != USB_ID_US122 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US224 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US428)) return -EINVAL; err = usx2y_create_card(device, intf, &card); if (err < 0) return err; err = usx2y_hwdep_new(card, device); if (err < 0) goto error; err = snd_card_register(card); if (err < 0) goto error; dev_set_drvdata(&intf->dev, card); return 0; error: snd_card_free(card); return err; } static struct usb_driver snd_usx2y_usb_driver = { .name = "snd-usb-usx2y", .probe = snd_usx2y_probe, .disconnect = snd_usx2y_disconnect, .id_table = snd_usx2y_usb_id_table, }; module_usb_driver(snd_usx2y_usb_driver); |
64 69 69 66 69 65 64 64 64 61 60 59 59 64 64 64 64 64 64 64 167 61 4 4 3 3 4 3 4 4 4 4 4 4 4 4 4 4 3 3 59 59 1 1 3 1 1 1 1 1 1 6 5 3 1 1 60 60 60 55 55 59 60 60 55 55 55 55 55 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 | // SPDX-License-Identifier: GPL-2.0-only /* * Media entity * * Copyright (C) 2010 Nokia Corporation * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> * Sakari Ailus <sakari.ailus@iki.fi> */ #include <linux/bitmap.h> #include <linux/property.h> #include <linux/slab.h> #include <media/media-entity.h> #include <media/media-device.h> static inline const char *gobj_type(enum media_gobj_type type) { switch (type) { case MEDIA_GRAPH_ENTITY: return "entity"; case MEDIA_GRAPH_PAD: return "pad"; case MEDIA_GRAPH_LINK: return "link"; case MEDIA_GRAPH_INTF_DEVNODE: return "intf-devnode"; default: return "unknown"; } } static inline const char *intf_type(struct media_interface *intf) { switch (intf->type) { case MEDIA_INTF_T_DVB_FE: return "dvb-frontend"; case MEDIA_INTF_T_DVB_DEMUX: return "dvb-demux"; case MEDIA_INTF_T_DVB_DVR: return "dvb-dvr"; case MEDIA_INTF_T_DVB_CA: return "dvb-ca"; case MEDIA_INTF_T_DVB_NET: return "dvb-net"; case MEDIA_INTF_T_V4L_VIDEO: return "v4l-video"; case MEDIA_INTF_T_V4L_VBI: return "v4l-vbi"; case MEDIA_INTF_T_V4L_RADIO: return "v4l-radio"; case MEDIA_INTF_T_V4L_SUBDEV: return "v4l-subdev"; case MEDIA_INTF_T_V4L_SWRADIO: return "v4l-swradio"; case MEDIA_INTF_T_V4L_TOUCH: return "v4l-touch"; default: return "unknown-intf"; } }; __must_check int __media_entity_enum_init(struct media_entity_enum *ent_enum, int idx_max) { idx_max = ALIGN(idx_max, BITS_PER_LONG); ent_enum->bmap = kcalloc(idx_max / BITS_PER_LONG, sizeof(long), GFP_KERNEL); if (!ent_enum->bmap) return -ENOMEM; bitmap_zero(ent_enum->bmap, idx_max); ent_enum->idx_max = idx_max; return 0; } EXPORT_SYMBOL_GPL(__media_entity_enum_init); void media_entity_enum_cleanup(struct media_entity_enum *ent_enum) { kfree(ent_enum->bmap); } EXPORT_SYMBOL_GPL(media_entity_enum_cleanup); /** * dev_dbg_obj - Prints in debug mode a change on some object * * @event_name: Name of the event to report. Could be __func__ * @gobj: Pointer to the object * * Enabled only if DEBUG or CONFIG_DYNAMIC_DEBUG. Otherwise, it * won't produce any code. */ static void dev_dbg_obj(const char *event_name, struct media_gobj *gobj) { #if defined(DEBUG) || defined (CONFIG_DYNAMIC_DEBUG) switch (media_type(gobj)) { case MEDIA_GRAPH_ENTITY: dev_dbg(gobj->mdev->dev, "%s id %u: entity '%s'\n", event_name, media_id(gobj), gobj_to_entity(gobj)->name); break; case MEDIA_GRAPH_LINK: { struct media_link *link = gobj_to_link(gobj); dev_dbg(gobj->mdev->dev, "%s id %u: %s link id %u ==> id %u\n", event_name, media_id(gobj), media_type(link->gobj0) == MEDIA_GRAPH_PAD ? "data" : "interface", media_id(link->gobj0), media_id(link->gobj1)); break; } case MEDIA_GRAPH_PAD: { struct media_pad *pad = gobj_to_pad(gobj); dev_dbg(gobj->mdev->dev, "%s id %u: %s%spad '%s':%d\n", event_name, media_id(gobj), pad->flags & MEDIA_PAD_FL_SINK ? "sink " : "", pad->flags & MEDIA_PAD_FL_SOURCE ? "source " : "", pad->entity->name, pad->index); break; } case MEDIA_GRAPH_INTF_DEVNODE: { struct media_interface *intf = gobj_to_intf(gobj); struct media_intf_devnode *devnode = intf_to_devnode(intf); dev_dbg(gobj->mdev->dev, "%s id %u: intf_devnode %s - major: %d, minor: %d\n", event_name, media_id(gobj), intf_type(intf), devnode->major, devnode->minor); break; } } #endif } void media_gobj_create(struct media_device *mdev, enum media_gobj_type type, struct media_gobj *gobj) { BUG_ON(!mdev); gobj->mdev = mdev; /* Create a per-type unique object ID */ gobj->id = media_gobj_gen_id(type, ++mdev->id); switch (type) { case MEDIA_GRAPH_ENTITY: list_add_tail(&gobj->list, &mdev->entities); break; case MEDIA_GRAPH_PAD: list_add_tail(&gobj->list, &mdev->pads); break; case MEDIA_GRAPH_LINK: list_add_tail(&gobj->list, &mdev->links); break; case MEDIA_GRAPH_INTF_DEVNODE: list_add_tail(&gobj->list, &mdev->interfaces); break; } mdev->topology_version++; dev_dbg_obj(__func__, gobj); } void media_gobj_destroy(struct media_gobj *gobj) { /* Do nothing if the object is not linked. */ if (gobj->mdev == NULL) return; dev_dbg_obj(__func__, gobj); gobj->mdev->topology_version++; /* Remove the object from mdev list */ list_del(&gobj->list); gobj->mdev = NULL; } /* * TODO: Get rid of this. */ #define MEDIA_ENTITY_MAX_PADS 512 int media_entity_pads_init(struct media_entity *entity, u16 num_pads, struct media_pad *pads) { struct media_device *mdev = entity->graph_obj.mdev; unsigned int i; if (num_pads >= MEDIA_ENTITY_MAX_PADS) return -E2BIG; entity->num_pads = num_pads; entity->pads = pads; if (mdev) mutex_lock(&mdev->graph_mutex); for (i = 0; i < num_pads; i++) { pads[i].entity = entity; pads[i].index = i; if (mdev) media_gobj_create(mdev, MEDIA_GRAPH_PAD, &entity->pads[i].graph_obj); } if (mdev) mutex_unlock(&mdev->graph_mutex); return 0; } EXPORT_SYMBOL_GPL(media_entity_pads_init); /* ----------------------------------------------------------------------------- * Graph traversal */ static struct media_entity * media_entity_other(struct media_entity *entity, struct media_link *link) { if (link->source->entity == entity) return link->sink->entity; else return link->source->entity; } /* push an entity to traversal stack */ static void stack_push(struct media_graph *graph, struct media_entity *entity) { if (graph->top == MEDIA_ENTITY_ENUM_MAX_DEPTH - 1) { WARN_ON(1); return; } graph->top++; graph->stack[graph->top].link = entity->links.next; graph->stack[graph->top].entity = entity; } static struct media_entity *stack_pop(struct media_graph *graph) { struct media_entity *entity; entity = graph->stack[graph->top].entity; graph->top--; return entity; } #define link_top(en) ((en)->stack[(en)->top].link) #define stack_top(en) ((en)->stack[(en)->top].entity) /** * media_graph_walk_init - Allocate resources for graph walk * @graph: Media graph structure that will be used to walk the graph * @mdev: Media device * * Reserve resources for graph walk in media device's current * state. The memory must be released using * media_graph_walk_free(). * * Returns error on failure, zero on success. */ __must_check int media_graph_walk_init( struct media_graph *graph, struct media_device *mdev) { return media_entity_enum_init(&graph->ent_enum, mdev); } EXPORT_SYMBOL_GPL(media_graph_walk_init); /** * media_graph_walk_cleanup - Release resources related to graph walking * @graph: Media graph structure that was used to walk the graph */ void media_graph_walk_cleanup(struct media_graph *graph) { media_entity_enum_cleanup(&graph->ent_enum); } EXPORT_SYMBOL_GPL(media_graph_walk_cleanup); void media_graph_walk_start(struct media_graph *graph, struct media_entity *entity) { media_entity_enum_zero(&graph->ent_enum); media_entity_enum_set(&graph->ent_enum, entity); graph->top = 0; graph->stack[graph->top].entity = NULL; stack_push(graph, entity); dev_dbg(entity->graph_obj.mdev->dev, "begin graph walk at '%s'\n", entity->name); } EXPORT_SYMBOL_GPL(media_graph_walk_start); static void media_graph_walk_iter(struct media_graph *graph) { struct media_entity *entity = stack_top(graph); struct media_link *link; struct media_entity *next; link = list_entry(link_top(graph), typeof(*link), list); /* The link is not enabled so we do not follow. */ if (!(link->flags & MEDIA_LNK_FL_ENABLED)) { link_top(graph) = link_top(graph)->next; dev_dbg(entity->graph_obj.mdev->dev, "walk: skipping disabled link '%s':%u -> '%s':%u\n", link->source->entity->name, link->source->index, link->sink->entity->name, link->sink->index); return; } /* Get the entity at the other end of the link. */ next = media_entity_other(entity, link); /* Has the entity already been visited? */ if (media_entity_enum_test_and_set(&graph->ent_enum, next)) { link_top(graph) = link_top(graph)->next; dev_dbg(entity->graph_obj.mdev->dev, "walk: skipping entity '%s' (already seen)\n", next->name); return; } /* Push the new entity to stack and start over. */ link_top(graph) = link_top(graph)->next; stack_push(graph, next); dev_dbg(entity->graph_obj.mdev->dev, "walk: pushing '%s' on stack\n", next->name); lockdep_assert_held(&entity->graph_obj.mdev->graph_mutex); } struct media_entity *media_graph_walk_next(struct media_graph *graph) { struct media_entity *entity; if (stack_top(graph) == NULL) return NULL; /* * Depth first search. Push entity to stack and continue from * top of the stack until no more entities on the level can be * found. */ while (link_top(graph) != &stack_top(graph)->links) media_graph_walk_iter(graph); entity = stack_pop(graph); dev_dbg(entity->graph_obj.mdev->dev, "walk: returning entity '%s'\n", entity->name); return entity; } EXPORT_SYMBOL_GPL(media_graph_walk_next); int media_entity_get_fwnode_pad(struct media_entity *entity, struct fwnode_handle *fwnode, unsigned long direction_flags) { struct fwnode_endpoint endpoint; unsigned int i; int ret; if (!entity->ops || !entity->ops->get_fwnode_pad) { for (i = 0; i < entity->num_pads; i++) { if (entity->pads[i].flags & direction_flags) return i; } return -ENXIO; } ret = fwnode_graph_parse_endpoint(fwnode, &endpoint); if (ret) return ret; ret = entity->ops->get_fwnode_pad(entity, &endpoint); if (ret < 0) return ret; if (ret >= entity->num_pads) return -ENXIO; if (!(entity->pads[ret].flags & direction_flags)) return -ENXIO; return ret; } EXPORT_SYMBOL_GPL(media_entity_get_fwnode_pad); /* ----------------------------------------------------------------------------- * Pipeline management */ __must_check int __media_pipeline_start(struct media_entity *entity, struct media_pipeline *pipe) { struct media_device *mdev = entity->graph_obj.mdev; struct media_graph *graph = &pipe->graph; struct media_entity *entity_err = entity; struct media_link *link; int ret; if (!pipe->streaming_count++) { ret = media_graph_walk_init(&pipe->graph, mdev); if (ret) goto error_graph_walk_start; } media_graph_walk_start(&pipe->graph, entity); while ((entity = media_graph_walk_next(graph))) { DECLARE_BITMAP(active, MEDIA_ENTITY_MAX_PADS); DECLARE_BITMAP(has_no_links, MEDIA_ENTITY_MAX_PADS); entity->stream_count++; if (entity->pipe && entity->pipe != pipe) { pr_err("Pipe active for %s. Can't start for %s\n", entity->name, entity_err->name); ret = -EBUSY; goto error; } entity->pipe = pipe; /* Already streaming --- no need to check. */ if (entity->stream_count > 1) continue; if (!entity->ops || !entity->ops->link_validate) continue; bitmap_zero(active, entity->num_pads); bitmap_fill(has_no_links, entity->num_pads); list_for_each_entry(link, &entity->links, list) { struct media_pad *pad = link->sink->entity == entity ? link->sink : link->source; /* Mark that a pad is connected by a link. */ bitmap_clear(has_no_links, pad->index, 1); /* * Pads that either do not need to connect or * are connected through an enabled link are * fine. */ if (!(pad->flags & MEDIA_PAD_FL_MUST_CONNECT) || link->flags & MEDIA_LNK_FL_ENABLED) bitmap_set(active, pad->index, 1); /* * Link validation will only take place for * sink ends of the link that are enabled. */ if (link->sink != pad || !(link->flags & MEDIA_LNK_FL_ENABLED)) continue; ret = entity->ops->link_validate(link); if (ret < 0 && ret != -ENOIOCTLCMD) { dev_dbg(entity->graph_obj.mdev->dev, "link validation failed for '%s':%u -> '%s':%u, error %d\n", link->source->entity->name, link->source->index, entity->name, link->sink->index, ret); goto error; } } /* Either no links or validated links are fine. */ bitmap_or(active, active, has_no_links, entity->num_pads); if (!bitmap_full(active, entity->num_pads)) { ret = -ENOLINK; dev_dbg(entity->graph_obj.mdev->dev, "'%s':%u must be connected by an enabled link\n", entity->name, (unsigned)find_first_zero_bit( active, entity->num_pads)); goto error; } } return 0; error: /* * Link validation on graph failed. We revert what we did and * return the error. */ media_graph_walk_start(graph, entity_err); while ((entity_err = media_graph_walk_next(graph))) { /* Sanity check for negative stream_count */ if (!WARN_ON_ONCE(entity_err->stream_count <= 0)) { entity_err->stream_count--; if (entity_err->stream_count == 0) entity_err->pipe = NULL; } /* * We haven't increased stream_count further than this * so we quit here. */ if (entity_err == entity) break; } error_graph_walk_start: if (!--pipe->streaming_count) media_graph_walk_cleanup(graph); return ret; } EXPORT_SYMBOL_GPL(__media_pipeline_start); __must_check int media_pipeline_start(struct media_entity *entity, struct media_pipeline *pipe) { struct media_device *mdev = entity->graph_obj.mdev; int ret; mutex_lock(&mdev->graph_mutex); ret = __media_pipeline_start(entity, pipe); mutex_unlock(&mdev->graph_mutex); return ret; } EXPORT_SYMBOL_GPL(media_pipeline_start); void __media_pipeline_stop(struct media_entity *entity) { struct media_graph *graph = &entity->pipe->graph; struct media_pipeline *pipe = entity->pipe; /* * If the following check fails, the driver has performed an * unbalanced call to media_pipeline_stop() */ if (WARN_ON(!pipe)) return; media_graph_walk_start(graph, entity); while ((entity = media_graph_walk_next(graph))) { /* Sanity check for negative stream_count */ if (!WARN_ON_ONCE(entity->stream_count <= 0)) { entity->stream_count--; if (entity->stream_count == 0) entity->pipe = NULL; } } if (!--pipe->streaming_count) media_graph_walk_cleanup(graph); } EXPORT_SYMBOL_GPL(__media_pipeline_stop); void media_pipeline_stop(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; mutex_lock(&mdev->graph_mutex); __media_pipeline_stop(entity); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_pipeline_stop); /* ----------------------------------------------------------------------------- * Links management */ static struct media_link *media_add_link(struct list_head *head) { struct media_link *link; link = kzalloc(sizeof(*link), GFP_KERNEL); if (link == NULL) return NULL; list_add_tail(&link->list, head); return link; } static void __media_entity_remove_link(struct media_entity *entity, struct media_link *link) { struct media_link *rlink, *tmp; struct media_entity *remote; if (link->source->entity == entity) remote = link->sink->entity; else remote = link->source->entity; list_for_each_entry_safe(rlink, tmp, &remote->links, list) { if (rlink != link->reverse) continue; if (link->source->entity == entity) remote->num_backlinks--; /* Remove the remote link */ list_del(&rlink->list); media_gobj_destroy(&rlink->graph_obj); kfree(rlink); if (--remote->num_links == 0) break; } list_del(&link->list); media_gobj_destroy(&link->graph_obj); kfree(link); } int media_get_pad_index(struct media_entity *entity, bool is_sink, enum media_pad_signal_type sig_type) { int i; bool pad_is_sink; if (!entity) return -EINVAL; for (i = 0; i < entity->num_pads; i++) { if (entity->pads[i].flags & MEDIA_PAD_FL_SINK) pad_is_sink = true; else if (entity->pads[i].flags & MEDIA_PAD_FL_SOURCE) pad_is_sink = false; else continue; /* This is an error! */ if (pad_is_sink != is_sink) continue; if (entity->pads[i].sig_type == sig_type) return i; } return -EINVAL; } EXPORT_SYMBOL_GPL(media_get_pad_index); int media_create_pad_link(struct media_entity *source, u16 source_pad, struct media_entity *sink, u16 sink_pad, u32 flags) { struct media_link *link; struct media_link *backlink; if (WARN_ON(!source || !sink) || WARN_ON(source_pad >= source->num_pads) || WARN_ON(sink_pad >= sink->num_pads)) return -EINVAL; if (WARN_ON(!(source->pads[source_pad].flags & MEDIA_PAD_FL_SOURCE))) return -EINVAL; if (WARN_ON(!(sink->pads[sink_pad].flags & MEDIA_PAD_FL_SINK))) return -EINVAL; link = media_add_link(&source->links); if (link == NULL) return -ENOMEM; link->source = &source->pads[source_pad]; link->sink = &sink->pads[sink_pad]; link->flags = flags & ~MEDIA_LNK_FL_INTERFACE_LINK; /* Initialize graph object embedded at the new link */ media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK, &link->graph_obj); /* Create the backlink. Backlinks are used to help graph traversal and * are not reported to userspace. */ backlink = media_add_link(&sink->links); if (backlink == NULL) { __media_entity_remove_link(source, link); return -ENOMEM; } backlink->source = &source->pads[source_pad]; backlink->sink = &sink->pads[sink_pad]; backlink->flags = flags; backlink->is_backlink = true; /* Initialize graph object embedded at the new link */ media_gobj_create(sink->graph_obj.mdev, MEDIA_GRAPH_LINK, &backlink->graph_obj); link->reverse = backlink; backlink->reverse = link; sink->num_backlinks++; sink->num_links++; source->num_links++; return 0; } EXPORT_SYMBOL_GPL(media_create_pad_link); int media_create_pad_links(const struct media_device *mdev, const u32 source_function, struct media_entity *source, const u16 source_pad, const u32 sink_function, struct media_entity *sink, const u16 sink_pad, u32 flags, const bool allow_both_undefined) { struct media_entity *entity; unsigned function; int ret; /* Trivial case: 1:1 relation */ if (source && sink) return media_create_pad_link(source, source_pad, sink, sink_pad, flags); /* Worse case scenario: n:n relation */ if (!source && !sink) { if (!allow_both_undefined) return 0; media_device_for_each_entity(source, mdev) { if (source->function != source_function) continue; media_device_for_each_entity(sink, mdev) { if (sink->function != sink_function) continue; ret = media_create_pad_link(source, source_pad, sink, sink_pad, flags); if (ret) return ret; flags &= ~(MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); } } return 0; } /* Handle 1:n and n:1 cases */ if (source) function = sink_function; else function = source_function; media_device_for_each_entity(entity, mdev) { if (entity->function != function) continue; if (source) ret = media_create_pad_link(source, source_pad, entity, sink_pad, flags); else ret = media_create_pad_link(entity, source_pad, sink, sink_pad, flags); if (ret) return ret; flags &= ~(MEDIA_LNK_FL_ENABLED | MEDIA_LNK_FL_IMMUTABLE); } return 0; } EXPORT_SYMBOL_GPL(media_create_pad_links); void __media_entity_remove_links(struct media_entity *entity) { struct media_link *link, *tmp; list_for_each_entry_safe(link, tmp, &entity->links, list) __media_entity_remove_link(entity, link); entity->num_links = 0; entity->num_backlinks = 0; } EXPORT_SYMBOL_GPL(__media_entity_remove_links); void media_entity_remove_links(struct media_entity *entity) { struct media_device *mdev = entity->graph_obj.mdev; /* Do nothing if the entity is not registered. */ if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_entity_remove_links(entity); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_entity_remove_links); static int __media_entity_setup_link_notify(struct media_link *link, u32 flags) { int ret; /* Notify both entities. */ ret = media_entity_call(link->source->entity, link_setup, link->source, link->sink, flags); if (ret < 0 && ret != -ENOIOCTLCMD) return ret; ret = media_entity_call(link->sink->entity, link_setup, link->sink, link->source, flags); if (ret < 0 && ret != -ENOIOCTLCMD) { media_entity_call(link->source->entity, link_setup, link->source, link->sink, link->flags); return ret; } link->flags = flags; link->reverse->flags = link->flags; return 0; } int __media_entity_setup_link(struct media_link *link, u32 flags) { const u32 mask = MEDIA_LNK_FL_ENABLED; struct media_device *mdev; struct media_entity *source, *sink; int ret = -EBUSY; if (link == NULL) return -EINVAL; /* The non-modifiable link flags must not be modified. */ if ((link->flags & ~mask) != (flags & ~mask)) return -EINVAL; if (link->flags & MEDIA_LNK_FL_IMMUTABLE) return link->flags == flags ? 0 : -EINVAL; if (link->flags == flags) return 0; source = link->source->entity; sink = link->sink->entity; if (!(link->flags & MEDIA_LNK_FL_DYNAMIC) && (source->stream_count || sink->stream_count)) return -EBUSY; mdev = source->graph_obj.mdev; if (mdev->ops && mdev->ops->link_notify) { ret = mdev->ops->link_notify(link, flags, MEDIA_DEV_NOTIFY_PRE_LINK_CH); if (ret < 0) return ret; } ret = __media_entity_setup_link_notify(link, flags); if (mdev->ops && mdev->ops->link_notify) mdev->ops->link_notify(link, flags, MEDIA_DEV_NOTIFY_POST_LINK_CH); return ret; } EXPORT_SYMBOL_GPL(__media_entity_setup_link); int media_entity_setup_link(struct media_link *link, u32 flags) { int ret; mutex_lock(&link->graph_obj.mdev->graph_mutex); ret = __media_entity_setup_link(link, flags); mutex_unlock(&link->graph_obj.mdev->graph_mutex); return ret; } EXPORT_SYMBOL_GPL(media_entity_setup_link); struct media_link * media_entity_find_link(struct media_pad *source, struct media_pad *sink) { struct media_link *link; list_for_each_entry(link, &source->entity->links, list) { if (link->source->entity == source->entity && link->source->index == source->index && link->sink->entity == sink->entity && link->sink->index == sink->index) return link; } return NULL; } EXPORT_SYMBOL_GPL(media_entity_find_link); struct media_pad *media_entity_remote_pad(const struct media_pad *pad) { struct media_link *link; list_for_each_entry(link, &pad->entity->links, list) { if (!(link->flags & MEDIA_LNK_FL_ENABLED)) continue; if (link->source == pad) return link->sink; if (link->sink == pad) return link->source; } return NULL; } EXPORT_SYMBOL_GPL(media_entity_remote_pad); static void media_interface_init(struct media_device *mdev, struct media_interface *intf, u32 gobj_type, u32 intf_type, u32 flags) { intf->type = intf_type; intf->flags = flags; INIT_LIST_HEAD(&intf->links); media_gobj_create(mdev, gobj_type, &intf->graph_obj); } /* Functions related to the media interface via device nodes */ struct media_intf_devnode *media_devnode_create(struct media_device *mdev, u32 type, u32 flags, u32 major, u32 minor) { struct media_intf_devnode *devnode; devnode = kzalloc(sizeof(*devnode), GFP_KERNEL); if (!devnode) return NULL; devnode->major = major; devnode->minor = minor; media_interface_init(mdev, &devnode->intf, MEDIA_GRAPH_INTF_DEVNODE, type, flags); return devnode; } EXPORT_SYMBOL_GPL(media_devnode_create); void media_devnode_remove(struct media_intf_devnode *devnode) { media_remove_intf_links(&devnode->intf); media_gobj_destroy(&devnode->intf.graph_obj); kfree(devnode); } EXPORT_SYMBOL_GPL(media_devnode_remove); struct media_link *media_create_intf_link(struct media_entity *entity, struct media_interface *intf, u32 flags) { struct media_link *link; link = media_add_link(&intf->links); if (link == NULL) return NULL; link->intf = intf; link->entity = entity; link->flags = flags | MEDIA_LNK_FL_INTERFACE_LINK; /* Initialize graph object embedded at the new link */ media_gobj_create(intf->graph_obj.mdev, MEDIA_GRAPH_LINK, &link->graph_obj); return link; } EXPORT_SYMBOL_GPL(media_create_intf_link); void __media_remove_intf_link(struct media_link *link) { list_del(&link->list); media_gobj_destroy(&link->graph_obj); kfree(link); } EXPORT_SYMBOL_GPL(__media_remove_intf_link); void media_remove_intf_link(struct media_link *link) { struct media_device *mdev = link->graph_obj.mdev; /* Do nothing if the intf is not registered. */ if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_remove_intf_link(link); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_remove_intf_link); void __media_remove_intf_links(struct media_interface *intf) { struct media_link *link, *tmp; list_for_each_entry_safe(link, tmp, &intf->links, list) __media_remove_intf_link(link); } EXPORT_SYMBOL_GPL(__media_remove_intf_links); void media_remove_intf_links(struct media_interface *intf) { struct media_device *mdev = intf->graph_obj.mdev; /* Do nothing if the intf is not registered. */ if (mdev == NULL) return; mutex_lock(&mdev->graph_mutex); __media_remove_intf_links(intf); mutex_unlock(&mdev->graph_mutex); } EXPORT_SYMBOL_GPL(media_remove_intf_links); |
5 5 10 1 3 1 5 1 1 2 5 5 1 4 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 | // SPDX-License-Identifier: GPL-2.0+ /* * MaxLinear/Exar USB to Serial driver * * Copyright (c) 2020 Manivannan Sadhasivam <mani@kernel.org> * Copyright (c) 2021 Johan Hovold <johan@kernel.org> * * Based on the initial driver written by Patong Yang: * * https://lore.kernel.org/r/20180404070634.nhspvmxcjwfgjkcv@advantechmxl-desktop * * Copyright (c) 2018 Patong Yang <patong.mxl@gmail.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/serial.h> struct xr_txrx_clk_mask { u16 tx; u16 rx0; u16 rx1; }; #define XR_INT_OSC_HZ 48000000U #define XR21V141X_MIN_SPEED 46U #define XR21V141X_MAX_SPEED XR_INT_OSC_HZ /* XR21V141X register blocks */ #define XR21V141X_UART_REG_BLOCK 0 #define XR21V141X_UM_REG_BLOCK 4 #define XR21V141X_UART_CUSTOM_BLOCK 0x66 /* XR21V141X UART registers */ #define XR21V141X_CLOCK_DIVISOR_0 0x04 #define XR21V141X_CLOCK_DIVISOR_1 0x05 #define XR21V141X_CLOCK_DIVISOR_2 0x06 #define XR21V141X_TX_CLOCK_MASK_0 0x07 #define XR21V141X_TX_CLOCK_MASK_1 0x08 #define XR21V141X_RX_CLOCK_MASK_0 0x09 #define XR21V141X_RX_CLOCK_MASK_1 0x0a #define XR21V141X_REG_FORMAT 0x0b /* XR21V141X UART Manager registers */ #define XR21V141X_UM_FIFO_ENABLE_REG 0x10 #define XR21V141X_UM_ENABLE_TX_FIFO 0x01 #define XR21V141X_UM_ENABLE_RX_FIFO 0x02 #define XR21V141X_UM_RX_FIFO_RESET 0x18 #define XR21V141X_UM_TX_FIFO_RESET 0x1c #define XR_UART_ENABLE_TX 0x1 #define XR_UART_ENABLE_RX 0x2 #define XR_GPIO_RI BIT(0) #define XR_GPIO_CD BIT(1) #define XR_GPIO_DSR BIT(2) #define XR_GPIO_DTR BIT(3) #define XR_GPIO_CTS BIT(4) #define XR_GPIO_RTS BIT(5) #define XR_GPIO_CLK BIT(6) #define XR_GPIO_XEN BIT(7) #define XR_GPIO_TXT BIT(8) #define XR_GPIO_RXT BIT(9) #define XR_UART_DATA_MASK GENMASK(3, 0) #define XR_UART_DATA_7 0x7 #define XR_UART_DATA_8 0x8 #define XR_UART_PARITY_MASK GENMASK(6, 4) #define XR_UART_PARITY_SHIFT 4 #define XR_UART_PARITY_NONE (0x0 << XR_UART_PARITY_SHIFT) #define XR_UART_PARITY_ODD (0x1 << XR_UART_PARITY_SHIFT) #define XR_UART_PARITY_EVEN (0x2 << XR_UART_PARITY_SHIFT) #define XR_UART_PARITY_MARK (0x3 << XR_UART_PARITY_SHIFT) #define XR_UART_PARITY_SPACE (0x4 << XR_UART_PARITY_SHIFT) #define XR_UART_STOP_MASK BIT(7) #define XR_UART_STOP_SHIFT 7 #define XR_UART_STOP_1 (0x0 << XR_UART_STOP_SHIFT) #define XR_UART_STOP_2 (0x1 << XR_UART_STOP_SHIFT) #define XR_UART_FLOW_MODE_NONE 0x0 #define XR_UART_FLOW_MODE_HW 0x1 #define XR_UART_FLOW_MODE_SW 0x2 #define XR_GPIO_MODE_SEL_MASK GENMASK(2, 0) #define XR_GPIO_MODE_SEL_RTS_CTS 0x1 #define XR_GPIO_MODE_SEL_DTR_DSR 0x2 #define XR_GPIO_MODE_SEL_RS485 0x3 #define XR_GPIO_MODE_SEL_RS485_ADDR 0x4 #define XR_GPIO_MODE_TX_TOGGLE 0x100 #define XR_GPIO_MODE_RX_TOGGLE 0x200 #define XR_FIFO_RESET 0x1 #define XR_CUSTOM_DRIVER_ACTIVE 0x1 static int xr21v141x_uart_enable(struct usb_serial_port *port); static int xr21v141x_uart_disable(struct usb_serial_port *port); static int xr21v141x_fifo_reset(struct usb_serial_port *port); static void xr21v141x_set_line_settings(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios); struct xr_type { int reg_width; u8 reg_recipient; u8 set_reg; u8 get_reg; u16 uart_enable; u16 flow_control; u16 xon_char; u16 xoff_char; u16 tx_break; u16 gpio_mode; u16 gpio_direction; u16 gpio_set; u16 gpio_clear; u16 gpio_status; u16 tx_fifo_reset; u16 rx_fifo_reset; u16 custom_driver; bool have_5_6_bit_mode; bool have_xmit_toggle; int (*enable)(struct usb_serial_port *port); int (*disable)(struct usb_serial_port *port); int (*fifo_reset)(struct usb_serial_port *port); void (*set_line_settings)(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios); }; enum xr_type_id { XR21V141X, XR21B142X, XR21B1411, XR2280X, XR_TYPE_COUNT, }; static const struct xr_type xr_types[] = { [XR21V141X] = { .reg_width = 8, .reg_recipient = USB_RECIP_DEVICE, .set_reg = 0x00, .get_reg = 0x01, .uart_enable = 0x03, .flow_control = 0x0c, .xon_char = 0x10, .xoff_char = 0x11, .tx_break = 0x14, .gpio_mode = 0x1a, .gpio_direction = 0x1b, .gpio_set = 0x1d, .gpio_clear = 0x1e, .gpio_status = 0x1f, .enable = xr21v141x_uart_enable, .disable = xr21v141x_uart_disable, .fifo_reset = xr21v141x_fifo_reset, .set_line_settings = xr21v141x_set_line_settings, }, [XR21B142X] = { .reg_width = 16, .reg_recipient = USB_RECIP_INTERFACE, .set_reg = 0x00, .get_reg = 0x00, .uart_enable = 0x00, .flow_control = 0x06, .xon_char = 0x07, .xoff_char = 0x08, .tx_break = 0x0a, .gpio_mode = 0x0c, .gpio_direction = 0x0d, .gpio_set = 0x0e, .gpio_clear = 0x0f, .gpio_status = 0x10, .tx_fifo_reset = 0x40, .rx_fifo_reset = 0x43, .custom_driver = 0x60, .have_5_6_bit_mode = true, .have_xmit_toggle = true, }, [XR21B1411] = { .reg_width = 12, .reg_recipient = USB_RECIP_DEVICE, .set_reg = 0x00, .get_reg = 0x01, .uart_enable = 0xc00, .flow_control = 0xc06, .xon_char = 0xc07, .xoff_char = 0xc08, .tx_break = 0xc0a, .gpio_mode = 0xc0c, .gpio_direction = 0xc0d, .gpio_set = 0xc0e, .gpio_clear = 0xc0f, .gpio_status = 0xc10, .tx_fifo_reset = 0xc80, .rx_fifo_reset = 0xcc0, .custom_driver = 0x20d, }, [XR2280X] = { .reg_width = 16, .reg_recipient = USB_RECIP_DEVICE, .set_reg = 0x05, .get_reg = 0x05, .uart_enable = 0x40, .flow_control = 0x46, .xon_char = 0x47, .xoff_char = 0x48, .tx_break = 0x4a, .gpio_mode = 0x4c, .gpio_direction = 0x4d, .gpio_set = 0x4e, .gpio_clear = 0x4f, .gpio_status = 0x50, .tx_fifo_reset = 0x60, .rx_fifo_reset = 0x63, .custom_driver = 0x81, }, }; struct xr_data { const struct xr_type *type; u8 channel; /* zero-based index or interface number */ }; static int xr_set_reg(struct usb_serial_port *port, u8 channel, u16 reg, u16 val) { struct xr_data *data = usb_get_serial_port_data(port); const struct xr_type *type = data->type; struct usb_serial *serial = port->serial; int ret; ret = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), type->set_reg, USB_DIR_OUT | USB_TYPE_VENDOR | type->reg_recipient, val, (channel << 8) | reg, NULL, 0, USB_CTRL_SET_TIMEOUT); if (ret < 0) { dev_err(&port->dev, "Failed to set reg 0x%02x: %d\n", reg, ret); return ret; } return 0; } static int xr_get_reg(struct usb_serial_port *port, u8 channel, u16 reg, u16 *val) { struct xr_data *data = usb_get_serial_port_data(port); const struct xr_type *type = data->type; struct usb_serial *serial = port->serial; u8 *dmabuf; int ret, len; if (type->reg_width == 8) len = 1; else len = 2; dmabuf = kmalloc(len, GFP_KERNEL); if (!dmabuf) return -ENOMEM; ret = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), type->get_reg, USB_DIR_IN | USB_TYPE_VENDOR | type->reg_recipient, 0, (channel << 8) | reg, dmabuf, len, USB_CTRL_GET_TIMEOUT); if (ret == len) { if (len == 2) *val = le16_to_cpup((__le16 *)dmabuf); else *val = *dmabuf; ret = 0; } else { dev_err(&port->dev, "Failed to get reg 0x%02x: %d\n", reg, ret); if (ret >= 0) ret = -EIO; } kfree(dmabuf); return ret; } static int xr_set_reg_uart(struct usb_serial_port *port, u16 reg, u16 val) { struct xr_data *data = usb_get_serial_port_data(port); return xr_set_reg(port, data->channel, reg, val); } static int xr_get_reg_uart(struct usb_serial_port *port, u16 reg, u16 *val) { struct xr_data *data = usb_get_serial_port_data(port); return xr_get_reg(port, data->channel, reg, val); } static int xr_set_reg_um(struct usb_serial_port *port, u8 reg_base, u8 val) { struct xr_data *data = usb_get_serial_port_data(port); u8 reg; reg = reg_base + data->channel; return xr_set_reg(port, XR21V141X_UM_REG_BLOCK, reg, val); } static int __xr_uart_enable(struct usb_serial_port *port) { struct xr_data *data = usb_get_serial_port_data(port); return xr_set_reg_uart(port, data->type->uart_enable, XR_UART_ENABLE_TX | XR_UART_ENABLE_RX); } static int __xr_uart_disable(struct usb_serial_port *port) { struct xr_data *data = usb_get_serial_port_data(port); return xr_set_reg_uart(port, data->type->uart_enable, 0); } /* * According to datasheet, below is the recommended sequence for enabling UART * module in XR21V141X: * * Enable Tx FIFO * Enable Tx and Rx * Enable Rx FIFO */ static int xr21v141x_uart_enable(struct usb_serial_port *port) { int ret; ret = xr_set_reg_um(port, XR21V141X_UM_FIFO_ENABLE_REG, XR21V141X_UM_ENABLE_TX_FIFO); if (ret) return ret; ret = __xr_uart_enable(port); if (ret) return ret; ret = xr_set_reg_um(port, XR21V141X_UM_FIFO_ENABLE_REG, XR21V141X_UM_ENABLE_TX_FIFO | XR21V141X_UM_ENABLE_RX_FIFO); if (ret) __xr_uart_disable(port); return ret; } static int xr21v141x_uart_disable(struct usb_serial_port *port) { int ret; ret = __xr_uart_disable(port); if (ret) return ret; ret = xr_set_reg_um(port, XR21V141X_UM_FIFO_ENABLE_REG, 0); return ret; } static int xr_uart_enable(struct usb_serial_port *port) { struct xr_data *data = usb_get_serial_port_data(port); if (data->type->enable) return data->type->enable(port); return __xr_uart_enable(port); } static int xr_uart_disable(struct usb_serial_port *port) { struct xr_data *data = usb_get_serial_port_data(port); if (data->type->disable) return data->type->disable(port); return __xr_uart_disable(port); } static int xr21v141x_fifo_reset(struct usb_serial_port *port) { int ret; ret = xr_set_reg_um(port, XR21V141X_UM_TX_FIFO_RESET, XR_FIFO_RESET); if (ret) return ret; ret = xr_set_reg_um(port, XR21V141X_UM_RX_FIFO_RESET, XR_FIFO_RESET); if (ret) return ret; return 0; } static int xr_fifo_reset(struct usb_serial_port *port) { struct xr_data *data = usb_get_serial_port_data(port); int ret; if (data->type->fifo_reset) return data->type->fifo_reset(port); ret = xr_set_reg_uart(port, data->type->tx_fifo_reset, XR_FIFO_RESET); if (ret) return ret; ret = xr_set_reg_uart(port, data->type->rx_fifo_reset, XR_FIFO_RESET); if (ret) return ret; return 0; } static int xr_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct xr_data *data = usb_get_serial_port_data(port); u16 status; int ret; ret = xr_get_reg_uart(port, data->type->gpio_status, &status); if (ret) return ret; /* * Modem control pins are active low, so reading '0' means it is active * and '1' means not active. */ ret = ((status & XR_GPIO_DTR) ? 0 : TIOCM_DTR) | ((status & XR_GPIO_RTS) ? 0 : TIOCM_RTS) | ((status & XR_GPIO_CTS) ? 0 : TIOCM_CTS) | ((status & XR_GPIO_DSR) ? 0 : TIOCM_DSR) | ((status & XR_GPIO_RI) ? 0 : TIOCM_RI) | ((status & XR_GPIO_CD) ? 0 : TIOCM_CD); return ret; } static int xr_tiocmset_port(struct usb_serial_port *port, unsigned int set, unsigned int clear) { struct xr_data *data = usb_get_serial_port_data(port); const struct xr_type *type = data->type; u16 gpio_set = 0; u16 gpio_clr = 0; int ret = 0; /* Modem control pins are active low, so set & clr are swapped */ if (set & TIOCM_RTS) gpio_clr |= XR_GPIO_RTS; if (set & TIOCM_DTR) gpio_clr |= XR_GPIO_DTR; if (clear & TIOCM_RTS) gpio_set |= XR_GPIO_RTS; if (clear & TIOCM_DTR) gpio_set |= XR_GPIO_DTR; /* Writing '0' to gpio_{set/clr} bits has no effect, so no need to do */ if (gpio_clr) ret = xr_set_reg_uart(port, type->gpio_clear, gpio_clr); if (gpio_set) ret = xr_set_reg_uart(port, type->gpio_set, gpio_set); return ret; } static int xr_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; return xr_tiocmset_port(port, set, clear); } static void xr_dtr_rts(struct usb_serial_port *port, int on) { if (on) xr_tiocmset_port(port, TIOCM_DTR | TIOCM_RTS, 0); else xr_tiocmset_port(port, 0, TIOCM_DTR | TIOCM_RTS); } static void xr_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct xr_data *data = usb_get_serial_port_data(port); const struct xr_type *type = data->type; u16 state; if (break_state == 0) state = 0; else state = GENMASK(type->reg_width - 1, 0); dev_dbg(&port->dev, "Turning break %s\n", state == 0 ? "off" : "on"); xr_set_reg_uart(port, type->tx_break, state); } /* Tx and Rx clock mask values obtained from section 3.3.4 of datasheet */ static const struct xr_txrx_clk_mask xr21v141x_txrx_clk_masks[] = { { 0x000, 0x000, 0x000 }, { 0x000, 0x000, 0x000 }, { 0x100, 0x000, 0x100 }, { 0x020, 0x400, 0x020 }, { 0x010, 0x100, 0x010 }, { 0x208, 0x040, 0x208 }, { 0x104, 0x820, 0x108 }, { 0x844, 0x210, 0x884 }, { 0x444, 0x110, 0x444 }, { 0x122, 0x888, 0x224 }, { 0x912, 0x448, 0x924 }, { 0x492, 0x248, 0x492 }, { 0x252, 0x928, 0x292 }, { 0x94a, 0x4a4, 0xa52 }, { 0x52a, 0xaa4, 0x54a }, { 0xaaa, 0x954, 0x4aa }, { 0xaaa, 0x554, 0xaaa }, { 0x555, 0xad4, 0x5aa }, { 0xb55, 0xab4, 0x55a }, { 0x6b5, 0x5ac, 0xb56 }, { 0x5b5, 0xd6c, 0x6d6 }, { 0xb6d, 0xb6a, 0xdb6 }, { 0x76d, 0x6da, 0xbb6 }, { 0xedd, 0xdda, 0x76e }, { 0xddd, 0xbba, 0xeee }, { 0x7bb, 0xf7a, 0xdde }, { 0xf7b, 0xef6, 0x7de }, { 0xdf7, 0xbf6, 0xf7e }, { 0x7f7, 0xfee, 0xefe }, { 0xfdf, 0xfbe, 0x7fe }, { 0xf7f, 0xefe, 0xffe }, { 0xfff, 0xffe, 0xffd }, }; static int xr21v141x_set_baudrate(struct tty_struct *tty, struct usb_serial_port *port) { u32 divisor, baud, idx; u16 tx_mask, rx_mask; int ret; baud = tty->termios.c_ospeed; if (!baud) return 0; baud = clamp(baud, XR21V141X_MIN_SPEED, XR21V141X_MAX_SPEED); divisor = XR_INT_OSC_HZ / baud; idx = ((32 * XR_INT_OSC_HZ) / baud) & 0x1f; tx_mask = xr21v141x_txrx_clk_masks[idx].tx; if (divisor & 0x01) rx_mask = xr21v141x_txrx_clk_masks[idx].rx1; else rx_mask = xr21v141x_txrx_clk_masks[idx].rx0; dev_dbg(&port->dev, "Setting baud rate: %u\n", baud); /* * XR21V141X uses fractional baud rate generator with 48MHz internal * oscillator and 19-bit programmable divisor. So theoretically it can * generate most commonly used baud rates with high accuracy. */ ret = xr_set_reg_uart(port, XR21V141X_CLOCK_DIVISOR_0, divisor & 0xff); if (ret) return ret; ret = xr_set_reg_uart(port, XR21V141X_CLOCK_DIVISOR_1, (divisor >> 8) & 0xff); if (ret) return ret; ret = xr_set_reg_uart(port, XR21V141X_CLOCK_DIVISOR_2, (divisor >> 16) & 0xff); if (ret) return ret; ret = xr_set_reg_uart(port, XR21V141X_TX_CLOCK_MASK_0, tx_mask & 0xff); if (ret) return ret; ret = xr_set_reg_uart(port, XR21V141X_TX_CLOCK_MASK_1, (tx_mask >> 8) & 0xff); if (ret) return ret; ret = xr_set_reg_uart(port, XR21V141X_RX_CLOCK_MASK_0, rx_mask & 0xff); if (ret) return ret; ret = xr_set_reg_uart(port, XR21V141X_RX_CLOCK_MASK_1, (rx_mask >> 8) & 0xff); if (ret) return ret; tty_encode_baud_rate(tty, baud, baud); return 0; } static void xr_set_flow_mode(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { struct xr_data *data = usb_get_serial_port_data(port); const struct xr_type *type = data->type; u16 flow, gpio_mode; int ret; ret = xr_get_reg_uart(port, type->gpio_mode, &gpio_mode); if (ret) return; /* * According to the datasheets, the UART needs to be disabled while * writing to the FLOW_CONTROL register (XR21V141X), or any register * but GPIO_SET, GPIO_CLEAR, TX_BREAK and ERROR_STATUS (XR21B142X). */ xr_uart_disable(port); /* Set GPIO mode for controlling the pins manually by default. */ gpio_mode &= ~XR_GPIO_MODE_SEL_MASK; if (C_CRTSCTS(tty) && C_BAUD(tty) != B0) { dev_dbg(&port->dev, "Enabling hardware flow ctrl\n"); gpio_mode |= XR_GPIO_MODE_SEL_RTS_CTS; flow = XR_UART_FLOW_MODE_HW; } else if (I_IXON(tty)) { u8 start_char = START_CHAR(tty); u8 stop_char = STOP_CHAR(tty); dev_dbg(&port->dev, "Enabling sw flow ctrl\n"); flow = XR_UART_FLOW_MODE_SW; xr_set_reg_uart(port, type->xon_char, start_char); xr_set_reg_uart(port, type->xoff_char, stop_char); } else { dev_dbg(&port->dev, "Disabling flow ctrl\n"); flow = XR_UART_FLOW_MODE_NONE; } xr_set_reg_uart(port, type->flow_control, flow); xr_set_reg_uart(port, type->gpio_mode, gpio_mode); xr_uart_enable(port); if (C_BAUD(tty) == B0) xr_dtr_rts(port, 0); else if (old_termios && (old_termios->c_cflag & CBAUD) == B0) xr_dtr_rts(port, 1); } static void xr21v141x_set_line_settings(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { struct ktermios *termios = &tty->termios; u8 bits = 0; int ret; if (!old_termios || (tty->termios.c_ospeed != old_termios->c_ospeed)) xr21v141x_set_baudrate(tty, port); switch (C_CSIZE(tty)) { case CS5: case CS6: /* CS5 and CS6 are not supported, so just restore old setting */ termios->c_cflag &= ~CSIZE; if (old_termios) termios->c_cflag |= old_termios->c_cflag & CSIZE; else termios->c_cflag |= CS8; if (C_CSIZE(tty) == CS7) bits |= XR_UART_DATA_7; else bits |= XR_UART_DATA_8; break; case CS7: bits |= XR_UART_DATA_7; break; case CS8: default: bits |= XR_UART_DATA_8; break; } if (C_PARENB(tty)) { if (C_CMSPAR(tty)) { if (C_PARODD(tty)) bits |= XR_UART_PARITY_MARK; else bits |= XR_UART_PARITY_SPACE; } else { if (C_PARODD(tty)) bits |= XR_UART_PARITY_ODD; else bits |= XR_UART_PARITY_EVEN; } } if (C_CSTOPB(tty)) bits |= XR_UART_STOP_2; else bits |= XR_UART_STOP_1; ret = xr_set_reg_uart(port, XR21V141X_REG_FORMAT, bits); if (ret) return; } static void xr_cdc_set_line_coding(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { struct xr_data *data = usb_get_serial_port_data(port); struct usb_host_interface *alt = port->serial->interface->cur_altsetting; struct usb_device *udev = port->serial->dev; struct usb_cdc_line_coding *lc; int ret; lc = kzalloc(sizeof(*lc), GFP_KERNEL); if (!lc) return; if (tty->termios.c_ospeed) lc->dwDTERate = cpu_to_le32(tty->termios.c_ospeed); else if (old_termios) lc->dwDTERate = cpu_to_le32(old_termios->c_ospeed); else lc->dwDTERate = cpu_to_le32(9600); if (C_CSTOPB(tty)) lc->bCharFormat = USB_CDC_2_STOP_BITS; else lc->bCharFormat = USB_CDC_1_STOP_BITS; if (C_PARENB(tty)) { if (C_CMSPAR(tty)) { if (C_PARODD(tty)) lc->bParityType = USB_CDC_MARK_PARITY; else lc->bParityType = USB_CDC_SPACE_PARITY; } else { if (C_PARODD(tty)) lc->bParityType = USB_CDC_ODD_PARITY; else lc->bParityType = USB_CDC_EVEN_PARITY; } } else { lc->bParityType = USB_CDC_NO_PARITY; } if (!data->type->have_5_6_bit_mode && (C_CSIZE(tty) == CS5 || C_CSIZE(tty) == CS6)) { tty->termios.c_cflag &= ~CSIZE; if (old_termios) tty->termios.c_cflag |= old_termios->c_cflag & CSIZE; else tty->termios.c_cflag |= CS8; } switch (C_CSIZE(tty)) { case CS5: lc->bDataBits = 5; break; case CS6: lc->bDataBits = 6; break; case CS7: lc->bDataBits = 7; break; case CS8: default: lc->bDataBits = 8; break; } ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_CDC_REQ_SET_LINE_CODING, USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, alt->desc.bInterfaceNumber, lc, sizeof(*lc), USB_CTRL_SET_TIMEOUT); if (ret < 0) dev_err(&port->dev, "Failed to set line coding: %d\n", ret); kfree(lc); } static void xr_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { struct xr_data *data = usb_get_serial_port_data(port); /* * XR21V141X does not have a CUSTOM_DRIVER flag and always enters CDC * mode upon receiving CDC requests. */ if (data->type->set_line_settings) data->type->set_line_settings(tty, port, old_termios); else xr_cdc_set_line_coding(tty, port, old_termios); xr_set_flow_mode(tty, port, old_termios); } static int xr_open(struct tty_struct *tty, struct usb_serial_port *port) { int ret; ret = xr_fifo_reset(port); if (ret) return ret; ret = xr_uart_enable(port); if (ret) { dev_err(&port->dev, "Failed to enable UART\n"); return ret; } /* Setup termios */ if (tty) xr_set_termios(tty, port, NULL); ret = usb_serial_generic_open(tty, port); if (ret) { xr_uart_disable(port); return ret; } return 0; } static void xr_close(struct usb_serial_port *port) { usb_serial_generic_close(port); xr_uart_disable(port); } static int xr_probe(struct usb_serial *serial, const struct usb_device_id *id) { struct usb_interface *control = serial->interface; struct usb_host_interface *alt = control->cur_altsetting; struct usb_cdc_parsed_header hdrs; struct usb_cdc_union_desc *desc; struct usb_interface *data; int ret; ret = cdc_parse_cdc_header(&hdrs, control, alt->extra, alt->extralen); if (ret < 0) return -ENODEV; desc = hdrs.usb_cdc_union_desc; if (!desc) return -ENODEV; data = usb_ifnum_to_if(serial->dev, desc->bSlaveInterface0); if (!data) return -ENODEV; ret = usb_serial_claim_interface(serial, data); if (ret) return ret; usb_set_serial_data(serial, (void *)id->driver_info); return 0; } static int xr_gpio_init(struct usb_serial_port *port, const struct xr_type *type) { u16 mask, mode; int ret; /* * Configure all pins as GPIO except for Receive and Transmit Toggle. */ mode = 0; if (type->have_xmit_toggle) mode |= XR_GPIO_MODE_RX_TOGGLE | XR_GPIO_MODE_TX_TOGGLE; ret = xr_set_reg_uart(port, type->gpio_mode, mode); if (ret) return ret; /* * Configure DTR and RTS as outputs and make sure they are deasserted * (active low), and configure RI, CD, DSR and CTS as inputs. */ mask = XR_GPIO_DTR | XR_GPIO_RTS; ret = xr_set_reg_uart(port, type->gpio_direction, mask); if (ret) return ret; ret = xr_set_reg_uart(port, type->gpio_set, mask); if (ret) return ret; return 0; } static int xr_port_probe(struct usb_serial_port *port) { struct usb_interface_descriptor *desc; const struct xr_type *type; struct xr_data *data; enum xr_type_id type_id; int ret; type_id = (int)(unsigned long)usb_get_serial_data(port->serial); type = &xr_types[type_id]; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->type = type; desc = &port->serial->interface->cur_altsetting->desc; if (type_id == XR21V141X) data->channel = desc->bInterfaceNumber / 2; else data->channel = desc->bInterfaceNumber; usb_set_serial_port_data(port, data); if (type->custom_driver) { ret = xr_set_reg_uart(port, type->custom_driver, XR_CUSTOM_DRIVER_ACTIVE); if (ret) goto err_free; } ret = xr_gpio_init(port, type); if (ret) goto err_free; return 0; err_free: kfree(data); return ret; } static void xr_port_remove(struct usb_serial_port *port) { struct xr_data *data = usb_get_serial_port_data(port); kfree(data); } #define XR_DEVICE(vid, pid, type) \ USB_DEVICE_INTERFACE_CLASS((vid), (pid), USB_CLASS_COMM), \ .driver_info = (type) static const struct usb_device_id id_table[] = { { XR_DEVICE(0x04e2, 0x1400, XR2280X) }, { XR_DEVICE(0x04e2, 0x1401, XR2280X) }, { XR_DEVICE(0x04e2, 0x1402, XR2280X) }, { XR_DEVICE(0x04e2, 0x1403, XR2280X) }, { XR_DEVICE(0x04e2, 0x1410, XR21V141X) }, { XR_DEVICE(0x04e2, 0x1411, XR21B1411) }, { XR_DEVICE(0x04e2, 0x1412, XR21V141X) }, { XR_DEVICE(0x04e2, 0x1414, XR21V141X) }, { XR_DEVICE(0x04e2, 0x1420, XR21B142X) }, { XR_DEVICE(0x04e2, 0x1422, XR21B142X) }, { XR_DEVICE(0x04e2, 0x1424, XR21B142X) }, { } }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_serial_driver xr_device = { .driver = { .owner = THIS_MODULE, .name = "xr_serial", }, .id_table = id_table, .num_ports = 1, .probe = xr_probe, .port_probe = xr_port_probe, .port_remove = xr_port_remove, .open = xr_open, .close = xr_close, .break_ctl = xr_break_ctl, .set_termios = xr_set_termios, .tiocmget = xr_tiocmget, .tiocmset = xr_tiocmset, .dtr_rts = xr_dtr_rts }; static struct usb_serial_driver * const serial_drivers[] = { &xr_device, NULL }; module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR("Manivannan Sadhasivam <mani@kernel.org>"); MODULE_DESCRIPTION("MaxLinear/Exar USB to Serial driver"); MODULE_LICENSE("GPL"); |
9 1 8 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID Driver for ELAN Touchpad * * Currently only supports touchpad found on HP Pavilion X2 10 * * Copyright (c) 2016 Alexandrov Stanislav <neko@nya.ai> */ #include <linux/hid.h> #include <linux/input/mt.h> #include <linux/leds.h> #include <linux/module.h> #include <linux/usb.h> #include "hid-ids.h" #define ELAN_MT_I2C 0x5d #define ELAN_SINGLE_FINGER 0x81 #define ELAN_MT_FIRST_FINGER 0x82 #define ELAN_MT_SECOND_FINGER 0x83 #define ELAN_INPUT_REPORT_SIZE 8 #define ELAN_I2C_REPORT_SIZE 32 #define ELAN_FINGER_DATA_LEN 5 #define ELAN_MAX_FINGERS 5 #define ELAN_MAX_PRESSURE 255 #define ELAN_TP_USB_INTF 1 #define ELAN_FEATURE_REPORT 0x0d #define ELAN_FEATURE_SIZE 5 #define ELAN_PARAM_MAX_X 6 #define ELAN_PARAM_MAX_Y 7 #define ELAN_PARAM_RES 8 #define ELAN_MUTE_LED_REPORT 0xBC #define ELAN_LED_REPORT_SIZE 8 #define ELAN_HAS_LED BIT(0) struct elan_drvdata { struct input_dev *input; u8 prev_report[ELAN_INPUT_REPORT_SIZE]; struct led_classdev mute_led; u8 mute_led_state; u16 max_x; u16 max_y; u16 res_x; u16 res_y; }; static int is_not_elan_touchpad(struct hid_device *hdev) { if (hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); return (intf->altsetting->desc.bInterfaceNumber != ELAN_TP_USB_INTF); } return 0; } static int elan_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { if (is_not_elan_touchpad(hdev)) return 0; if (field->report->id == ELAN_SINGLE_FINGER || field->report->id == ELAN_MT_FIRST_FINGER || field->report->id == ELAN_MT_SECOND_FINGER || field->report->id == ELAN_MT_I2C) return -1; return 0; } static int elan_get_device_param(struct hid_device *hdev, unsigned char *dmabuf, unsigned char param) { int ret; dmabuf[0] = ELAN_FEATURE_REPORT; dmabuf[1] = 0x05; dmabuf[2] = 0x03; dmabuf[3] = param; dmabuf[4] = 0x01; ret = hid_hw_raw_request(hdev, ELAN_FEATURE_REPORT, dmabuf, ELAN_FEATURE_SIZE, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret != ELAN_FEATURE_SIZE) { hid_err(hdev, "Set report error for parm %d: %d\n", param, ret); return ret; } ret = hid_hw_raw_request(hdev, ELAN_FEATURE_REPORT, dmabuf, ELAN_FEATURE_SIZE, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret != ELAN_FEATURE_SIZE) { hid_err(hdev, "Get report error for parm %d: %d\n", param, ret); return ret; } return 0; } static unsigned int elan_convert_res(char val) { /* * (value from firmware) * 10 + 790 = dpi * dpi * 10 / 254 = dots/mm */ return (val * 10 + 790) * 10 / 254; } static int elan_get_device_params(struct hid_device *hdev) { struct elan_drvdata *drvdata = hid_get_drvdata(hdev); unsigned char *dmabuf; int ret; dmabuf = kmalloc(ELAN_FEATURE_SIZE, GFP_KERNEL); if (!dmabuf) return -ENOMEM; ret = elan_get_device_param(hdev, dmabuf, ELAN_PARAM_MAX_X); if (ret) goto err; drvdata->max_x = (dmabuf[4] << 8) | dmabuf[3]; ret = elan_get_device_param(hdev, dmabuf, ELAN_PARAM_MAX_Y); if (ret) goto err; drvdata->max_y = (dmabuf[4] << 8) | dmabuf[3]; ret = elan_get_device_param(hdev, dmabuf, ELAN_PARAM_RES); if (ret) goto err; drvdata->res_x = elan_convert_res(dmabuf[3]); drvdata->res_y = elan_convert_res(dmabuf[4]); err: kfree(dmabuf); return ret; } static int elan_input_configured(struct hid_device *hdev, struct hid_input *hi) { int ret; struct input_dev *input; struct elan_drvdata *drvdata = hid_get_drvdata(hdev); if (is_not_elan_touchpad(hdev)) return 0; ret = elan_get_device_params(hdev); if (ret) return ret; input = devm_input_allocate_device(&hdev->dev); if (!input) return -ENOMEM; input->name = "Elan Touchpad"; input->phys = hdev->phys; input->uniq = hdev->uniq; input->id.bustype = hdev->bus; input->id.vendor = hdev->vendor; input->id.product = hdev->product; input->id.version = hdev->version; input->dev.parent = &hdev->dev; input_set_abs_params(input, ABS_MT_POSITION_X, 0, drvdata->max_x, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, drvdata->max_y, 0, 0); input_set_abs_params(input, ABS_MT_PRESSURE, 0, ELAN_MAX_PRESSURE, 0, 0); __set_bit(BTN_LEFT, input->keybit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); ret = input_mt_init_slots(input, ELAN_MAX_FINGERS, INPUT_MT_POINTER); if (ret) { hid_err(hdev, "Failed to init elan MT slots: %d\n", ret); return ret; } input_abs_set_res(input, ABS_X, drvdata->res_x); input_abs_set_res(input, ABS_Y, drvdata->res_y); ret = input_register_device(input); if (ret) { hid_err(hdev, "Failed to register elan input device: %d\n", ret); input_mt_destroy_slots(input); return ret; } drvdata->input = input; return 0; } static void elan_report_mt_slot(struct elan_drvdata *drvdata, u8 *data, unsigned int slot_num) { struct input_dev *input = drvdata->input; int x, y, p; bool active = !!data; input_mt_slot(input, slot_num); input_mt_report_slot_state(input, MT_TOOL_FINGER, active); if (active) { x = ((data[0] & 0xF0) << 4) | data[1]; y = drvdata->max_y - (((data[0] & 0x07) << 8) | data[2]); p = data[4]; input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); input_report_abs(input, ABS_MT_PRESSURE, p); } } static void elan_usb_report_input(struct elan_drvdata *drvdata, u8 *data) { int i; struct input_dev *input = drvdata->input; /* * There is 3 types of reports: for single touch, * for multitouch - first finger and for multitouch - second finger * * packet structure for ELAN_SINGLE_FINGER and ELAN_MT_FIRST_FINGER: * * byte 1: 1 0 0 0 0 0 0 1 // 0x81 or 0x82 * byte 2: 0 0 0 0 0 0 0 0 // looks like unused * byte 3: f5 f4 f3 f2 f1 0 0 L * byte 4: x12 x11 x10 x9 0? y11 y10 y9 * byte 5: x8 x7 x6 x5 x4 x3 x2 x1 * byte 6: y8 y7 y6 y5 y4 y3 y2 y1 * byte 7: sy4 sy3 sy2 sy1 sx4 sx3 sx2 sx1 * byte 8: p8 p7 p6 p5 p4 p3 p2 p1 * * packet structure for ELAN_MT_SECOND_FINGER: * * byte 1: 1 0 0 0 0 0 1 1 // 0x83 * byte 2: x12 x11 x10 x9 0 y11 y10 y9 * byte 3: x8 x7 x6 x5 x4 x3 x2 x1 * byte 4: y8 y7 y6 y5 y4 y3 y2 y1 * byte 5: sy4 sy3 sy2 sy1 sx4 sx3 sx2 sx1 * byte 6: p8 p7 p6 p5 p4 p3 p2 p1 * byte 7: 0 0 0 0 0 0 0 0 * byte 8: 0 0 0 0 0 0 0 0 * * f5-f1: finger touch bits * L: clickpad button * sy / sx: finger width / height expressed in traces, the total number * of traces can be queried by doing a HID_REQ_SET_REPORT * { 0x0d, 0x05, 0x03, 0x05, 0x01 } followed by a GET, in the * returned buf, buf[3]=no-x-traces, buf[4]=no-y-traces. * p: pressure */ if (data[0] == ELAN_SINGLE_FINGER) { for (i = 0; i < ELAN_MAX_FINGERS; i++) { if (data[2] & BIT(i + 3)) elan_report_mt_slot(drvdata, data + 3, i); else elan_report_mt_slot(drvdata, NULL, i); } input_report_key(input, BTN_LEFT, data[2] & 0x01); } /* * When touched with two fingers Elan touchpad will emit two HID reports * first is ELAN_MT_FIRST_FINGER and second is ELAN_MT_SECOND_FINGER * we will save ELAN_MT_FIRST_FINGER report and wait for * ELAN_MT_SECOND_FINGER to finish multitouch */ if (data[0] == ELAN_MT_FIRST_FINGER) { memcpy(drvdata->prev_report, data, sizeof(drvdata->prev_report)); return; } if (data[0] == ELAN_MT_SECOND_FINGER) { int first = 0; u8 *prev_report = drvdata->prev_report; if (prev_report[0] != ELAN_MT_FIRST_FINGER) return; for (i = 0; i < ELAN_MAX_FINGERS; i++) { if (prev_report[2] & BIT(i + 3)) { if (!first) { first = 1; elan_report_mt_slot(drvdata, prev_report + 3, i); } else { elan_report_mt_slot(drvdata, data + 1, i); } } else { elan_report_mt_slot(drvdata, NULL, i); } } input_report_key(input, BTN_LEFT, prev_report[2] & 0x01); } input_mt_sync_frame(input); input_sync(input); } static void elan_i2c_report_input(struct elan_drvdata *drvdata, u8 *data) { struct input_dev *input = drvdata->input; u8 *finger_data; int i; /* * Elan MT touchpads in i2c mode send finger data in the same format * as in USB mode, but then with all fingers in a single packet. * * packet structure for ELAN_MT_I2C: * * byte 1: 1 0 0 1 1 1 0 1 // 0x5d * byte 2: f5 f4 f3 f2 f1 0 0 L * byte 3: x12 x11 x10 x9 0? y11 y10 y9 * byte 4: x8 x7 x6 x5 x4 x3 x2 x1 * byte 5: y8 y7 y6 y5 y4 y3 y2 y1 * byte 6: sy4 sy3 sy2 sy1 sx4 sx3 sx2 sx1 * byte 7: p8 p7 p6 p5 p4 p3 p2 p1 * byte 8-12: Same as byte 3-7 for second finger down * byte 13-17: Same as byte 3-7 for third finger down * byte 18-22: Same as byte 3-7 for fourth finger down * byte 23-27: Same as byte 3-7 for fifth finger down */ finger_data = data + 2; for (i = 0; i < ELAN_MAX_FINGERS; i++) { if (data[1] & BIT(i + 3)) { elan_report_mt_slot(drvdata, finger_data, i); finger_data += ELAN_FINGER_DATA_LEN; } else { elan_report_mt_slot(drvdata, NULL, i); } } input_report_key(input, BTN_LEFT, data[1] & 0x01); input_mt_sync_frame(input); input_sync(input); } static int elan_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct elan_drvdata *drvdata = hid_get_drvdata(hdev); if (is_not_elan_touchpad(hdev)) return 0; if (data[0] == ELAN_SINGLE_FINGER || data[0] == ELAN_MT_FIRST_FINGER || data[0] == ELAN_MT_SECOND_FINGER) { if (size == ELAN_INPUT_REPORT_SIZE) { elan_usb_report_input(drvdata, data); return 1; } } if (data[0] == ELAN_MT_I2C && size == ELAN_I2C_REPORT_SIZE) { elan_i2c_report_input(drvdata, data); return 1; } return 0; } static int elan_start_multitouch(struct hid_device *hdev) { int ret; /* * This byte sequence will enable multitouch mode and disable * mouse emulation */ static const unsigned char buf[] = { 0x0D, 0x00, 0x03, 0x21, 0x00 }; unsigned char *dmabuf = kmemdup(buf, sizeof(buf), GFP_KERNEL); if (!dmabuf) return -ENOMEM; ret = hid_hw_raw_request(hdev, dmabuf[0], dmabuf, sizeof(buf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT); kfree(dmabuf); if (ret != sizeof(buf)) { hid_err(hdev, "Failed to start multitouch: %d\n", ret); return ret; } return 0; } static int elan_mute_led_set_brigtness(struct led_classdev *led_cdev, enum led_brightness value) { int ret; u8 led_state; struct device *dev = led_cdev->dev->parent; struct hid_device *hdev = to_hid_device(dev); struct elan_drvdata *drvdata = hid_get_drvdata(hdev); unsigned char *dmabuf = kzalloc(ELAN_LED_REPORT_SIZE, GFP_KERNEL); if (!dmabuf) return -ENOMEM; led_state = !!value; dmabuf[0] = ELAN_MUTE_LED_REPORT; dmabuf[1] = 0x02; dmabuf[2] = led_state; ret = hid_hw_raw_request(hdev, dmabuf[0], dmabuf, ELAN_LED_REPORT_SIZE, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); kfree(dmabuf); if (ret != ELAN_LED_REPORT_SIZE) { if (ret != -ENODEV) hid_err(hdev, "Failed to set mute led brightness: %d\n", ret); return ret < 0 ? ret : -EIO; } drvdata->mute_led_state = led_state; return 0; } static int elan_init_mute_led(struct hid_device *hdev) { struct elan_drvdata *drvdata = hid_get_drvdata(hdev); struct led_classdev *mute_led = &drvdata->mute_led; mute_led->name = "elan:red:mute"; mute_led->default_trigger = "audio-mute"; mute_led->brightness_set_blocking = elan_mute_led_set_brigtness; mute_led->max_brightness = LED_ON; mute_led->flags = LED_HW_PLUGGABLE; mute_led->dev = &hdev->dev; return devm_led_classdev_register(&hdev->dev, mute_led); } static int elan_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; struct elan_drvdata *drvdata; drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; hid_set_drvdata(hdev, drvdata); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "Hid Parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "Hid hw start failed\n"); return ret; } if (is_not_elan_touchpad(hdev)) return 0; if (!drvdata->input) { hid_err(hdev, "Input device is not registered\n"); ret = -ENAVAIL; goto err; } ret = elan_start_multitouch(hdev); if (ret) goto err; if (id->driver_data & ELAN_HAS_LED) { ret = elan_init_mute_led(hdev); if (ret) goto err; } return 0; err: hid_hw_stop(hdev); return ret; } static void elan_remove(struct hid_device *hdev) { hid_hw_stop(hdev); } static const struct hid_device_id elan_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2), .driver_data = ELAN_HAS_LED }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2_10_COVER), .driver_data = ELAN_HAS_LED }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_TOSHIBA_CLICK_L9W) }, { } }; MODULE_DEVICE_TABLE(hid, elan_devices); static struct hid_driver elan_driver = { .name = "elan", .id_table = elan_devices, .input_mapping = elan_input_mapping, .input_configured = elan_input_configured, .raw_event = elan_raw_event, .probe = elan_probe, .remove = elan_remove, }; module_hid_driver(elan_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alexandrov Stanislav"); MODULE_DESCRIPTION("Driver for HID ELAN Touchpads"); |
3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 | // SPDX-License-Identifier: GPL-2.0-or-later /* * caiaq.c: ALSA driver for caiaq/NativeInstruments devices * * Copyright (c) 2007 Daniel Mack <daniel@caiaq.de> * Karsten Wiese <fzu@wemgehoertderstaat.de> */ #include <linux/moduleparam.h> #include <linux/device.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/init.h> #include <linux/gfp.h> #include <linux/usb.h> #include <sound/initval.h> #include <sound/core.h> #include <sound/pcm.h> #include "device.h" #include "audio.h" #include "midi.h" #include "control.h" #include "input.h" MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>"); MODULE_DESCRIPTION("caiaq USB audio"); MODULE_LICENSE("GPL"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card */ module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the caiaq sound device"); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for the caiaq soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable the caiaq soundcard."); enum { SAMPLERATE_44100 = 0, SAMPLERATE_48000 = 1, SAMPLERATE_96000 = 2, SAMPLERATE_192000 = 3, SAMPLERATE_88200 = 4, SAMPLERATE_INVALID = 0xff }; enum { DEPTH_NONE = 0, DEPTH_16 = 1, DEPTH_24 = 2, DEPTH_32 = 3 }; static const struct usb_device_id snd_usb_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_RIGKONTROL2 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_RIGKONTROL3 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_KORECONTROLLER }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_KORECONTROLLER2 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_AK1 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_AUDIO8DJ }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_SESSIONIO }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_GUITARRIGMOBILE }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_AUDIO4DJ }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_AUDIO2DJ }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_TRAKTORKONTROLX1 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_TRAKTORKONTROLS4 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_TRAKTORAUDIO2 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_MASCHINECONTROLLER }, { /* terminator */ } }; static void usb_ep1_command_reply_dispatch (struct urb* urb) { int ret; struct device *dev = &urb->dev->dev; struct snd_usb_caiaqdev *cdev = urb->context; unsigned char *buf = urb->transfer_buffer; if (urb->status || !cdev) { dev_warn(dev, "received EP1 urb->status = %i\n", urb->status); return; } switch(buf[0]) { case EP1_CMD_GET_DEVICE_INFO: memcpy(&cdev->spec, buf+1, sizeof(struct caiaq_device_spec)); cdev->spec.fw_version = le16_to_cpu(cdev->spec.fw_version); dev_dbg(dev, "device spec (firmware %d): audio: %d in, %d out, " "MIDI: %d in, %d out, data alignment %d\n", cdev->spec.fw_version, cdev->spec.num_analog_audio_in, cdev->spec.num_analog_audio_out, cdev->spec.num_midi_in, cdev->spec.num_midi_out, cdev->spec.data_alignment); cdev->spec_received++; wake_up(&cdev->ep1_wait_queue); break; case EP1_CMD_AUDIO_PARAMS: cdev->audio_parm_answer = buf[1]; wake_up(&cdev->ep1_wait_queue); break; case EP1_CMD_MIDI_READ: snd_usb_caiaq_midi_handle_input(cdev, buf[1], buf + 3, buf[2]); break; case EP1_CMD_READ_IO: if (cdev->chip.usb_id == USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AUDIO8DJ)) { if (urb->actual_length > sizeof(cdev->control_state)) urb->actual_length = sizeof(cdev->control_state); memcpy(cdev->control_state, buf + 1, urb->actual_length); wake_up(&cdev->ep1_wait_queue); break; } #ifdef CONFIG_SND_USB_CAIAQ_INPUT fallthrough; case EP1_CMD_READ_ERP: case EP1_CMD_READ_ANALOG: snd_usb_caiaq_input_dispatch(cdev, buf, urb->actual_length); #endif break; } cdev->ep1_in_urb.actual_length = 0; ret = usb_submit_urb(&cdev->ep1_in_urb, GFP_ATOMIC); if (ret < 0) dev_err(dev, "unable to submit urb. OOM!?\n"); } int snd_usb_caiaq_send_command(struct snd_usb_caiaqdev *cdev, unsigned char command, const unsigned char *buffer, int len) { int actual_len; struct usb_device *usb_dev = cdev->chip.dev; if (!usb_dev) return -EIO; if (len > EP1_BUFSIZE - 1) len = EP1_BUFSIZE - 1; if (buffer && len > 0) memcpy(cdev->ep1_out_buf+1, buffer, len); cdev->ep1_out_buf[0] = command; return usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, 1), cdev->ep1_out_buf, len+1, &actual_len, 200); } int snd_usb_caiaq_send_command_bank(struct snd_usb_caiaqdev *cdev, unsigned char command, unsigned char bank, const unsigned char *buffer, int len) { int actual_len; struct usb_device *usb_dev = cdev->chip.dev; if (!usb_dev) return -EIO; if (len > EP1_BUFSIZE - 2) len = EP1_BUFSIZE - 2; if (buffer && len > 0) memcpy(cdev->ep1_out_buf+2, buffer, len); cdev->ep1_out_buf[0] = command; cdev->ep1_out_buf[1] = bank; return usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, 1), cdev->ep1_out_buf, len+2, &actual_len, 200); } int snd_usb_caiaq_set_audio_params (struct snd_usb_caiaqdev *cdev, int rate, int depth, int bpp) { int ret; char tmp[5]; struct device *dev = caiaqdev_to_dev(cdev); switch (rate) { case 44100: tmp[0] = SAMPLERATE_44100; break; case 48000: tmp[0] = SAMPLERATE_48000; break; case 88200: tmp[0] = SAMPLERATE_88200; break; case 96000: tmp[0] = SAMPLERATE_96000; break; case 192000: tmp[0] = SAMPLERATE_192000; break; default: return -EINVAL; } switch (depth) { case 16: tmp[1] = DEPTH_16; break; case 24: tmp[1] = DEPTH_24; break; default: return -EINVAL; } tmp[2] = bpp & 0xff; tmp[3] = bpp >> 8; tmp[4] = 1; /* packets per microframe */ dev_dbg(dev, "setting audio params: %d Hz, %d bits, %d bpp\n", rate, depth, bpp); cdev->audio_parm_answer = -1; ret = snd_usb_caiaq_send_command(cdev, EP1_CMD_AUDIO_PARAMS, tmp, sizeof(tmp)); if (ret) return ret; if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->audio_parm_answer >= 0, HZ)) return -EPIPE; if (cdev->audio_parm_answer != 1) dev_dbg(dev, "unable to set the device's audio params\n"); else cdev->bpp = bpp; return cdev->audio_parm_answer == 1 ? 0 : -EINVAL; } int snd_usb_caiaq_set_auto_msg(struct snd_usb_caiaqdev *cdev, int digital, int analog, int erp) { char tmp[3] = { digital, analog, erp }; return snd_usb_caiaq_send_command(cdev, EP1_CMD_AUTO_MSG, tmp, sizeof(tmp)); } static void setup_card(struct snd_usb_caiaqdev *cdev) { int ret; char val[4]; struct device *dev = caiaqdev_to_dev(cdev); /* device-specific startup specials */ switch (cdev->chip.usb_id) { case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2): /* RigKontrol2 - display centered dash ('-') */ val[0] = 0x00; val[1] = 0x00; val[2] = 0x01; snd_usb_caiaq_send_command(cdev, EP1_CMD_WRITE_IO, val, 3); break; case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL3): /* RigKontrol2 - display two centered dashes ('--') */ val[0] = 0x00; val[1] = 0x40; val[2] = 0x40; val[3] = 0x00; snd_usb_caiaq_send_command(cdev, EP1_CMD_WRITE_IO, val, 4); break; case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1): /* Audio Kontrol 1 - make USB-LED stop blinking */ val[0] = 0x00; snd_usb_caiaq_send_command(cdev, EP1_CMD_WRITE_IO, val, 1); break; case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AUDIO8DJ): /* Audio 8 DJ - trigger read of current settings */ cdev->control_state[0] = 0xff; snd_usb_caiaq_set_auto_msg(cdev, 1, 0, 0); snd_usb_caiaq_send_command(cdev, EP1_CMD_READ_IO, NULL, 0); if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->control_state[0] != 0xff, HZ)) return; /* fix up some defaults */ if ((cdev->control_state[1] != 2) || (cdev->control_state[2] != 3) || (cdev->control_state[4] != 2)) { cdev->control_state[1] = 2; cdev->control_state[2] = 3; cdev->control_state[4] = 2; snd_usb_caiaq_send_command(cdev, EP1_CMD_WRITE_IO, cdev->control_state, 6); } break; } if (cdev->spec.num_analog_audio_out + cdev->spec.num_analog_audio_in + cdev->spec.num_digital_audio_out + cdev->spec.num_digital_audio_in > 0) { ret = snd_usb_caiaq_audio_init(cdev); if (ret < 0) dev_err(dev, "Unable to set up audio system (ret=%d)\n", ret); } if (cdev->spec.num_midi_in + cdev->spec.num_midi_out > 0) { ret = snd_usb_caiaq_midi_init(cdev); if (ret < 0) dev_err(dev, "Unable to set up MIDI system (ret=%d)\n", ret); } #ifdef CONFIG_SND_USB_CAIAQ_INPUT ret = snd_usb_caiaq_input_init(cdev); if (ret < 0) dev_err(dev, "Unable to set up input system (ret=%d)\n", ret); #endif /* finally, register the card and all its sub-instances */ ret = snd_card_register(cdev->chip.card); if (ret < 0) { dev_err(dev, "snd_card_register() returned %d\n", ret); snd_card_free(cdev->chip.card); } ret = snd_usb_caiaq_control_init(cdev); if (ret < 0) dev_err(dev, "Unable to set up control system (ret=%d)\n", ret); } static void card_free(struct snd_card *card) { struct snd_usb_caiaqdev *cdev = caiaqdev(card); #ifdef CONFIG_SND_USB_CAIAQ_INPUT snd_usb_caiaq_input_free(cdev); #endif snd_usb_caiaq_audio_free(cdev); usb_reset_device(cdev->chip.dev); } static int create_card(struct usb_device *usb_dev, struct usb_interface *intf, struct snd_card **cardp) { int devnum; int err; struct snd_card *card; struct snd_usb_caiaqdev *cdev; for (devnum = 0; devnum < SNDRV_CARDS; devnum++) if (enable[devnum]) break; if (devnum >= SNDRV_CARDS) return -ENODEV; err = snd_card_new(&intf->dev, index[devnum], id[devnum], THIS_MODULE, sizeof(struct snd_usb_caiaqdev), &card); if (err < 0) return err; cdev = caiaqdev(card); cdev->chip.dev = usb_dev; cdev->chip.card = card; cdev->chip.usb_id = USB_ID(le16_to_cpu(usb_dev->descriptor.idVendor), le16_to_cpu(usb_dev->descriptor.idProduct)); spin_lock_init(&cdev->spinlock); *cardp = card; return 0; } static int init_card(struct snd_usb_caiaqdev *cdev) { char *c, usbpath[32]; struct usb_device *usb_dev = cdev->chip.dev; struct snd_card *card = cdev->chip.card; struct device *dev = caiaqdev_to_dev(cdev); int err, len; if (usb_set_interface(usb_dev, 0, 1) != 0) { dev_err(dev, "can't set alt interface.\n"); return -EIO; } usb_init_urb(&cdev->ep1_in_urb); usb_init_urb(&cdev->midi_out_urb); usb_fill_bulk_urb(&cdev->ep1_in_urb, usb_dev, usb_rcvbulkpipe(usb_dev, 0x1), cdev->ep1_in_buf, EP1_BUFSIZE, usb_ep1_command_reply_dispatch, cdev); usb_fill_bulk_urb(&cdev->midi_out_urb, usb_dev, usb_sndbulkpipe(usb_dev, 0x1), cdev->midi_out_buf, EP1_BUFSIZE, snd_usb_caiaq_midi_output_done, cdev); /* sanity checks of EPs before actually submitting */ if (usb_urb_ep_type_check(&cdev->ep1_in_urb) || usb_urb_ep_type_check(&cdev->midi_out_urb)) { dev_err(dev, "invalid EPs\n"); return -EINVAL; } init_waitqueue_head(&cdev->ep1_wait_queue); init_waitqueue_head(&cdev->prepare_wait_queue); if (usb_submit_urb(&cdev->ep1_in_urb, GFP_KERNEL) != 0) return -EIO; err = snd_usb_caiaq_send_command(cdev, EP1_CMD_GET_DEVICE_INFO, NULL, 0); if (err) goto err_kill_urb; if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) { err = -ENODEV; goto err_kill_urb; } usb_string(usb_dev, usb_dev->descriptor.iManufacturer, cdev->vendor_name, CAIAQ_USB_STR_LEN); usb_string(usb_dev, usb_dev->descriptor.iProduct, cdev->product_name, CAIAQ_USB_STR_LEN); strscpy(card->driver, MODNAME, sizeof(card->driver)); strscpy(card->shortname, cdev->product_name, sizeof(card->shortname)); strscpy(card->mixername, cdev->product_name, sizeof(card->mixername)); /* if the id was not passed as module option, fill it with a shortened * version of the product string which does not contain any * whitespaces */ if (*card->id == '\0') { char id[sizeof(card->id)]; memset(id, 0, sizeof(id)); for (c = card->shortname, len = 0; *c && len < sizeof(card->id); c++) if (*c != ' ') id[len++] = *c; snd_card_set_id(card, id); } usb_make_path(usb_dev, usbpath, sizeof(usbpath)); snprintf(card->longname, sizeof(card->longname), "%s %s (%s)", cdev->vendor_name, cdev->product_name, usbpath); setup_card(cdev); card->private_free = card_free; return 0; err_kill_urb: usb_kill_urb(&cdev->ep1_in_urb); return err; } static int snd_probe(struct usb_interface *intf, const struct usb_device_id *id) { int ret; struct snd_card *card = NULL; struct usb_device *usb_dev = interface_to_usbdev(intf); ret = create_card(usb_dev, intf, &card); if (ret < 0) return ret; usb_set_intfdata(intf, card); ret = init_card(caiaqdev(card)); if (ret < 0) { dev_err(&usb_dev->dev, "unable to init card! (ret=%d)\n", ret); snd_card_free(card); return ret; } return 0; } static void snd_disconnect(struct usb_interface *intf) { struct snd_card *card = usb_get_intfdata(intf); struct device *dev = intf->usb_dev; struct snd_usb_caiaqdev *cdev; if (!card) return; cdev = caiaqdev(card); dev_dbg(dev, "%s(%p)\n", __func__, intf); snd_card_disconnect(card); #ifdef CONFIG_SND_USB_CAIAQ_INPUT snd_usb_caiaq_input_disconnect(cdev); #endif snd_usb_caiaq_audio_disconnect(cdev); usb_kill_urb(&cdev->ep1_in_urb); usb_kill_urb(&cdev->midi_out_urb); snd_card_free_when_closed(card); } MODULE_DEVICE_TABLE(usb, snd_usb_id_table); static struct usb_driver snd_usb_driver = { .name = MODNAME, .probe = snd_probe, .disconnect = snd_disconnect, .id_table = snd_usb_id_table, }; module_usb_driver(snd_usb_driver); |
2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Least-Connection Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: * Wensong Zhang : added the ip_vs_lc_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> /* * Least Connection scheduling */ static struct ip_vs_dest * ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* * Simply select the server with the least number of * (activeconns<<5) + inactconns * Except whose weight is equal to zero. * If the weight is equal to zero, it means that the server is * quiesced, the existing connections to the server still get * served, but no new connection is assigned to the server. */ list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || atomic_read(&dest->weight) == 0) continue; doh = ip_vs_dest_conn_overhead(dest); if (!least || doh < loh) { least = dest; loh = doh; } } if (!least) ip_vs_scheduler_err(svc, "no destination available"); else IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d " "inactconns %d\n", IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->inactconns)); return least; } static struct ip_vs_scheduler ip_vs_lc_scheduler = { .name = "lc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), .schedule = ip_vs_lc_schedule, }; static int __init ip_vs_lc_init(void) { return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ; } static void __exit ip_vs_lc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); synchronize_rcu(); } module_init(ip_vs_lc_init); module_exit(ip_vs_lc_cleanup); MODULE_LICENSE("GPL"); |
20 526 416 533 14 582 23 2 2 287 204 429 428 216 94 210 190 268 18 1 305 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ASM_KVM_CACHE_REGS_H #define ASM_KVM_CACHE_REGS_H #include <linux/kvm_host.h> #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS #define KVM_POSSIBLE_CR4_GUEST_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ { \ return vcpu->arch.regs[VCPU_REGS_##uname]; \ } \ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ unsigned long val) \ { \ vcpu->arch.regs[VCPU_REGS_##uname] = val; \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) BUILD_KVM_GPR_ACCESSORS(rcx, RCX) BUILD_KVM_GPR_ACCESSORS(rdx, RDX) BUILD_KVM_GPR_ACCESSORS(rbp, RBP) BUILD_KVM_GPR_ACCESSORS(rsi, RSI) BUILD_KVM_GPR_ACCESSORS(rdi, RDI) #ifdef CONFIG_X86_64 BUILD_KVM_GPR_ACCESSORS(r8, R8) BUILD_KVM_GPR_ACCESSORS(r9, R9) BUILD_KVM_GPR_ACCESSORS(r10, R10) BUILD_KVM_GPR_ACCESSORS(r11, R11) BUILD_KVM_GPR_ACCESSORS(r12, R12) BUILD_KVM_GPR_ACCESSORS(r13, R13) BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline void kvm_register_clear_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { __clear_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); __clear_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } /* * The "raw" register helpers are only for cases where the full 64 bits of a * register are read/written irrespective of current vCPU mode. In other words, * odds are good you shouldn't be using the raw variants. */ static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return 0; if (!kvm_register_is_available(vcpu, reg)) static_call(kvm_x86_cache_reg)(vcpu, reg); return vcpu->arch.regs[reg]; } static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg, unsigned long val) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) return; vcpu->arch.regs[reg] = val; kvm_register_mark_dirty(vcpu, reg); } static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) { return kvm_register_read_raw(vcpu, VCPU_REGS_RIP); } static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) { kvm_register_write_raw(vcpu, VCPU_REGS_RIP, val); } static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) { return kvm_register_read_raw(vcpu, VCPU_REGS_RSP); } static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val) { kvm_register_write_raw(vcpu, VCPU_REGS_RSP, val); } static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) { might_sleep(); /* on svm */ if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR)) static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_PDPTR); return vcpu->arch.walk_mmu->pdptrs[index]; } static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value) { vcpu->arch.walk_mmu->pdptrs[index] = value; } static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; if ((tmask & vcpu->arch.cr0_guest_owned_bits) && !kvm_register_is_available(vcpu, VCPU_EXREG_CR0)) static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_CR0); return vcpu->arch.cr0 & mask; } static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) { return kvm_read_cr0_bits(vcpu, ~0UL); } static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; if ((tmask & vcpu->arch.cr4_guest_owned_bits) && !kvm_register_is_available(vcpu, VCPU_EXREG_CR4)) static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_CR4); return vcpu->arch.cr4 & mask; } static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) { if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) static_call(kvm_x86_cache_reg)(vcpu, VCPU_EXREG_CR3); return vcpu->arch.cr3; } static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, ~0UL); } static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) { return (kvm_rax_read(vcpu) & -1u) | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32); } static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; vcpu->stat.guest_mode = 1; } static inline void leave_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags &= ~HF_GUEST_MASK; if (vcpu->arch.load_eoi_exitmap_pending) { vcpu->arch.load_eoi_exitmap_pending = false; kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); } vcpu->stat.guest_mode = 0; } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) { return vcpu->arch.hflags & HF_GUEST_MASK; } static inline bool is_smm(struct kvm_vcpu *vcpu) { return vcpu->arch.hflags & HF_SMM_MASK; } #endif |
4 4 4 1 3 4 4 4 4 1 3 1 3 4 4 4 4 4 1 3 4 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 | // SPDX-License-Identifier: GPL-2.0+ /* * usbduxsigma.c * Copyright (C) 2011-2015 Bernd Porr, mail@berndporr.me.uk */ /* * Driver: usbduxsigma * Description: University of Stirling USB DAQ & INCITE Technology Limited * Devices: [ITL] USB-DUX-SIGMA (usbduxsigma) * Author: Bernd Porr <mail@berndporr.me.uk> * Updated: 20 July 2015 * Status: stable */ /* * I must give credit here to Chris Baugher who * wrote the driver for AT-MIO-16d. I used some parts of this * driver. I also must give credits to David Brownell * who supported me with the USB development. * * Note: the raw data from the A/D converter is 24 bit big endian * anything else is little endian to/from the dux board * * * Revision history: * 0.1: initial version * 0.2: all basic functions implemented, digital I/O only for one port * 0.3: proper vendor ID and driver name * 0.4: fixed D/A voltage range * 0.5: various bug fixes, health check at startup * 0.6: corrected wrong input range * 0.7: rewrite code that urb->interval is always 1 */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/fcntl.h> #include <linux/compiler.h> #include <asm/unaligned.h> #include "../comedi_usb.h" /* timeout for the USB-transfer in ms*/ #define BULK_TIMEOUT 1000 /* constants for "firmware" upload and download */ #define FIRMWARE "usbduxsigma_firmware.bin" #define FIRMWARE_MAX_LEN 0x4000 #define USBDUXSUB_FIRMWARE 0xa0 #define VENDOR_DIR_IN 0xc0 #define VENDOR_DIR_OUT 0x40 /* internal addresses of the 8051 processor */ #define USBDUXSUB_CPUCS 0xE600 /* 300Hz max frequ under PWM */ #define MIN_PWM_PERIOD ((long)(1E9 / 300)) /* Default PWM frequency */ #define PWM_DEFAULT_PERIOD ((long)(1E9 / 100)) /* Number of channels (16 AD and offset)*/ #define NUMCHANNELS 16 /* Size of one A/D value */ #define SIZEADIN ((sizeof(u32))) /* * Size of the async input-buffer IN BYTES, the DIO state is transmitted * as the first byte. */ #define SIZEINBUF (((NUMCHANNELS + 1) * SIZEADIN)) /* 16 bytes. */ #define SIZEINSNBUF 16 /* Number of DA channels */ #define NUMOUTCHANNELS 8 /* size of one value for the D/A converter: channel and value */ #define SIZEDAOUT ((sizeof(u8) + sizeof(uint16_t))) /* * Size of the output-buffer in bytes * Actually only the first 4 triplets are used but for the * high speed mode we need to pad it to 8 (microframes). */ #define SIZEOUTBUF ((8 * SIZEDAOUT)) /* * Size of the buffer for the dux commands: just now max size is determined * by the analogue out + command byte + panic bytes... */ #define SIZEOFDUXBUFFER ((8 * SIZEDAOUT + 2)) /* Number of in-URBs which receive the data: min=2 */ #define NUMOFINBUFFERSFULL 5 /* Number of out-URBs which send the data: min=2 */ #define NUMOFOUTBUFFERSFULL 5 /* Number of in-URBs which receive the data: min=5 */ /* must have more buffers due to buggy USB ctr */ #define NUMOFINBUFFERSHIGH 10 /* Number of out-URBs which send the data: min=5 */ /* must have more buffers due to buggy USB ctr */ #define NUMOFOUTBUFFERSHIGH 10 /* number of retries to get the right dux command */ #define RETRIES 10 /* bulk transfer commands to usbduxsigma */ #define USBBUXSIGMA_AD_CMD 9 #define USBDUXSIGMA_DA_CMD 1 #define USBDUXSIGMA_DIO_CFG_CMD 2 #define USBDUXSIGMA_DIO_BITS_CMD 3 #define USBDUXSIGMA_SINGLE_AD_CMD 4 #define USBDUXSIGMA_PWM_ON_CMD 7 #define USBDUXSIGMA_PWM_OFF_CMD 8 static const struct comedi_lrange usbduxsigma_ai_range = { 1, { BIP_RANGE(2.5 * 0x800000 / 0x780000 / 2.0) } }; struct usbduxsigma_private { /* actual number of in-buffers */ int n_ai_urbs; /* actual number of out-buffers */ int n_ao_urbs; /* ISO-transfer handling: buffers */ struct urb **ai_urbs; struct urb **ao_urbs; /* pwm-transfer handling */ struct urb *pwm_urb; /* PWM period */ unsigned int pwm_period; /* PWM internal delay for the GPIF in the FX2 */ u8 pwm_delay; /* size of the PWM buffer which holds the bit pattern */ int pwm_buf_sz; /* input buffer for the ISO-transfer */ __be32 *in_buf; /* input buffer for single insn */ u8 *insn_buf; unsigned high_speed:1; unsigned ai_cmd_running:1; unsigned ao_cmd_running:1; unsigned pwm_cmd_running:1; /* time between samples in units of the timer */ unsigned int ai_timer; unsigned int ao_timer; /* counter between acquisitions */ unsigned int ai_counter; unsigned int ao_counter; /* interval in frames/uframes */ unsigned int ai_interval; /* commands */ u8 *dux_commands; struct mutex mut; }; static void usbduxsigma_unlink_urbs(struct urb **urbs, int num_urbs) { int i; for (i = 0; i < num_urbs; i++) usb_kill_urb(urbs[i]); } static void usbduxsigma_ai_stop(struct comedi_device *dev, int do_unlink) { struct usbduxsigma_private *devpriv = dev->private; if (do_unlink && devpriv->ai_urbs) usbduxsigma_unlink_urbs(devpriv->ai_urbs, devpriv->n_ai_urbs); devpriv->ai_cmd_running = 0; } static int usbduxsigma_ai_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; mutex_lock(&devpriv->mut); /* unlink only if it is really running */ usbduxsigma_ai_stop(dev, devpriv->ai_cmd_running); mutex_unlock(&devpriv->mut); return 0; } static void usbduxsigma_ai_handle_urb(struct comedi_device *dev, struct comedi_subdevice *s, struct urb *urb) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; u32 val; int ret; int i; if ((urb->actual_length > 0) && (urb->status != -EXDEV)) { devpriv->ai_counter--; if (devpriv->ai_counter == 0) { devpriv->ai_counter = devpriv->ai_timer; /* * Get the data from the USB bus and hand it over * to comedi. Note, first byte is the DIO state. */ for (i = 0; i < cmd->chanlist_len; i++) { val = be32_to_cpu(devpriv->in_buf[i + 1]); val &= 0x00ffffff; /* strip status byte */ val = comedi_offset_munge(s, val); if (!comedi_buf_write_samples(s, &val, 1)) return; } if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) async->events |= COMEDI_CB_EOA; } } /* if command is still running, resubmit urb */ if (!(async->events & COMEDI_CB_CANCEL_MASK)) { urb->dev = comedi_to_usb_dev(dev); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed (%d)\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler\n"); async->events |= COMEDI_CB_ERROR; } } } static void usbduxsigma_ai_urb_complete(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbduxsigma_private *devpriv = dev->private; struct comedi_subdevice *s = dev->read_subdev; struct comedi_async *async = s->async; /* exit if not running a command, do not resubmit urb */ if (!devpriv->ai_cmd_running) return; switch (urb->status) { case 0: /* copy the result in the transfer buffer */ memcpy(devpriv->in_buf, urb->transfer_buffer, SIZEINBUF); usbduxsigma_ai_handle_urb(dev, s, urb); break; case -EILSEQ: /* * error in the ISOchronous data * we don't copy the data into the transfer buffer * and recycle the last data byte */ dev_dbg(dev->class_dev, "CRC error in ISO IN stream\n"); usbduxsigma_ai_handle_urb(dev, s, urb); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* happens after an unlink command */ async->events |= COMEDI_CB_ERROR; break; default: /* a real error */ dev_err(dev->class_dev, "non-zero urb status (%d)\n", urb->status); async->events |= COMEDI_CB_ERROR; break; } /* * comedi_handle_events() cannot be used in this driver. The (*cancel) * operation would unlink the urb. */ if (async->events & COMEDI_CB_CANCEL_MASK) usbduxsigma_ai_stop(dev, 0); comedi_event(dev, s); } static void usbduxsigma_ao_stop(struct comedi_device *dev, int do_unlink) { struct usbduxsigma_private *devpriv = dev->private; if (do_unlink && devpriv->ao_urbs) usbduxsigma_unlink_urbs(devpriv->ao_urbs, devpriv->n_ao_urbs); devpriv->ao_cmd_running = 0; } static int usbduxsigma_ao_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; mutex_lock(&devpriv->mut); /* unlink only if it is really running */ usbduxsigma_ao_stop(dev, devpriv->ao_cmd_running); mutex_unlock(&devpriv->mut); return 0; } static void usbduxsigma_ao_handle_urb(struct comedi_device *dev, struct comedi_subdevice *s, struct urb *urb) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; u8 *datap; int ret; int i; devpriv->ao_counter--; if (devpriv->ao_counter == 0) { devpriv->ao_counter = devpriv->ao_timer; if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) { async->events |= COMEDI_CB_EOA; return; } /* transmit data to the USB bus */ datap = urb->transfer_buffer; *datap++ = cmd->chanlist_len; for (i = 0; i < cmd->chanlist_len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); unsigned short val; if (!comedi_buf_read_samples(s, &val, 1)) { dev_err(dev->class_dev, "buffer underflow\n"); async->events |= COMEDI_CB_OVERFLOW; return; } *datap++ = val; *datap++ = chan; s->readback[chan] = val; } } /* if command is still running, resubmit urb */ if (!(async->events & COMEDI_CB_CANCEL_MASK)) { urb->transfer_buffer_length = SIZEOUTBUF; urb->dev = comedi_to_usb_dev(dev); urb->status = 0; urb->interval = 1; /* (u)frames */ urb->number_of_packets = 1; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEOUTBUF; urb->iso_frame_desc[0].status = 0; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed (%d)\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler\n"); async->events |= COMEDI_CB_ERROR; } } } static void usbduxsigma_ao_urb_complete(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbduxsigma_private *devpriv = dev->private; struct comedi_subdevice *s = dev->write_subdev; struct comedi_async *async = s->async; /* exit if not running a command, do not resubmit urb */ if (!devpriv->ao_cmd_running) return; switch (urb->status) { case 0: usbduxsigma_ao_handle_urb(dev, s, urb); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* happens after an unlink command */ async->events |= COMEDI_CB_ERROR; break; default: /* a real error */ dev_err(dev->class_dev, "non-zero urb status (%d)\n", urb->status); async->events |= COMEDI_CB_ERROR; break; } /* * comedi_handle_events() cannot be used in this driver. The (*cancel) * operation would unlink the urb. */ if (async->events & COMEDI_CB_CANCEL_MASK) usbduxsigma_ao_stop(dev, 0); comedi_event(dev, s); } static int usbduxsigma_submit_urbs(struct comedi_device *dev, struct urb **urbs, int num_urbs, int input_urb) { struct usb_device *usb = comedi_to_usb_dev(dev); struct urb *urb; int ret; int i; /* Submit all URBs and start the transfer on the bus */ for (i = 0; i < num_urbs; i++) { urb = urbs[i]; /* in case of a resubmission after an unlink... */ if (input_urb) urb->interval = 1; urb->context = dev; urb->dev = usb; urb->status = 0; urb->transfer_flags = URB_ISO_ASAP; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) return ret; } return 0; } static int usbduxsigma_chans_to_interval(int num_chan) { if (num_chan <= 2) return 2; /* 4kHz */ if (num_chan <= 8) return 4; /* 2kHz */ return 8; /* 1kHz */ } static int usbduxsigma_ai_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { struct usbduxsigma_private *devpriv = dev->private; int high_speed = devpriv->high_speed; int interval = usbduxsigma_chans_to_interval(cmd->chanlist_len); unsigned int tmp; int err = 0; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER); err |= comedi_check_trigger_src(&cmd->convert_src, TRIG_NOW); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) return 1; /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); if (high_speed) { /* * In high speed mode microframes are possible. * However, during one microframe we can roughly * sample two channels. Thus, the more channels * are in the channel list the more time we need. */ err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, (125000 * interval)); } else { /* full speed */ /* 1kHz scans every USB frame */ err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, 1000000); } err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; /* Step 4: fix up any arguments */ tmp = rounddown(cmd->scan_begin_arg, high_speed ? 125000 : 1000000); err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, tmp); if (err) return 4; return 0; } /* * creates the ADC command for the MAX1271 * range is the range value from comedi */ static void create_adc_command(unsigned int chan, u8 *muxsg0, u8 *muxsg1) { if (chan < 8) (*muxsg0) = (*muxsg0) | (1 << chan); else if (chan < 16) (*muxsg1) = (*muxsg1) | (1 << (chan - 8)); } static int usbbuxsigma_send_cmd(struct comedi_device *dev, int cmd_type) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; int nsent; devpriv->dux_commands[0] = cmd_type; return usb_bulk_msg(usb, usb_sndbulkpipe(usb, 1), devpriv->dux_commands, SIZEOFDUXBUFFER, &nsent, BULK_TIMEOUT); } static int usbduxsigma_receive_cmd(struct comedi_device *dev, int command) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; int nrec; int ret; int i; for (i = 0; i < RETRIES; i++) { ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, 8), devpriv->insn_buf, SIZEINSNBUF, &nrec, BULK_TIMEOUT); if (ret < 0) return ret; if (devpriv->insn_buf[0] == command) return 0; } /* * This is only reached if the data has been requested a * couple of times and the command was not received. */ return -EFAULT; } static int usbduxsigma_ai_inttrig(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; if (trig_num != cmd->start_arg) return -EINVAL; mutex_lock(&devpriv->mut); if (!devpriv->ai_cmd_running) { devpriv->ai_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ai_urbs, devpriv->n_ai_urbs, 1); if (ret < 0) { devpriv->ai_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } mutex_unlock(&devpriv->mut); return 1; } static int usbduxsigma_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; unsigned int len = cmd->chanlist_len; u8 muxsg0 = 0; u8 muxsg1 = 0; u8 sysred = 0; int ret; int i; mutex_lock(&devpriv->mut); if (devpriv->high_speed) { /* * every 2 channels get a time window of 125us. Thus, if we * sample all 16 channels we need 1ms. If we sample only one * channel we need only 125us */ unsigned int interval = usbduxsigma_chans_to_interval(len); devpriv->ai_interval = interval; devpriv->ai_timer = cmd->scan_begin_arg / (125000 * interval); } else { /* interval always 1ms */ devpriv->ai_interval = 1; devpriv->ai_timer = cmd->scan_begin_arg / 1000000; } for (i = 0; i < len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); create_adc_command(chan, &muxsg0, &muxsg1); } devpriv->dux_commands[1] = devpriv->ai_interval; devpriv->dux_commands[2] = len; /* num channels per time step */ devpriv->dux_commands[3] = 0x12; /* CONFIG0 */ devpriv->dux_commands[4] = 0x03; /* CONFIG1: 23kHz sample, delay 0us */ devpriv->dux_commands[5] = 0x00; /* CONFIG3: diff. channels off */ devpriv->dux_commands[6] = muxsg0; devpriv->dux_commands[7] = muxsg1; devpriv->dux_commands[8] = sysred; ret = usbbuxsigma_send_cmd(dev, USBBUXSIGMA_AD_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } devpriv->ai_counter = devpriv->ai_timer; if (cmd->start_src == TRIG_NOW) { /* enable this acquisition operation */ devpriv->ai_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ai_urbs, devpriv->n_ai_urbs, 1); if (ret < 0) { devpriv->ai_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } else { /* TRIG_INT */ s->async->inttrig = usbduxsigma_ai_inttrig; } mutex_unlock(&devpriv->mut); return 0; } static int usbduxsigma_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); u8 muxsg0 = 0; u8 muxsg1 = 0; u8 sysred = 0; int ret; int i; mutex_lock(&devpriv->mut); if (devpriv->ai_cmd_running) { mutex_unlock(&devpriv->mut); return -EBUSY; } create_adc_command(chan, &muxsg0, &muxsg1); /* Mode 0 is used to get a single conversion on demand */ devpriv->dux_commands[1] = 0x16; /* CONFIG0: chopper on */ devpriv->dux_commands[2] = 0x80; /* CONFIG1: 2kHz sampling rate */ devpriv->dux_commands[3] = 0x00; /* CONFIG3: diff. channels off */ devpriv->dux_commands[4] = muxsg0; devpriv->dux_commands[5] = muxsg1; devpriv->dux_commands[6] = sysred; /* adc commands */ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } for (i = 0; i < insn->n; i++) { u32 val; ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } /* 32 bits big endian from the A/D converter */ val = be32_to_cpu(get_unaligned((__be32 *)(devpriv->insn_buf + 1))); val &= 0x00ffffff; /* strip status byte */ data[i] = comedi_offset_munge(s, val); } mutex_unlock(&devpriv->mut); return insn->n; } static int usbduxsigma_ao_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); ret = comedi_readback_insn_read(dev, s, insn, data); mutex_unlock(&devpriv->mut); return ret; } static int usbduxsigma_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); int ret; int i; mutex_lock(&devpriv->mut); if (devpriv->ao_cmd_running) { mutex_unlock(&devpriv->mut); return -EBUSY; } for (i = 0; i < insn->n; i++) { devpriv->dux_commands[1] = 1; /* num channels */ devpriv->dux_commands[2] = data[i]; /* value */ devpriv->dux_commands[3] = chan; /* channel number */ ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_DA_CMD); if (ret < 0) { mutex_unlock(&devpriv->mut); return ret; } s->readback[chan] = data[i]; } mutex_unlock(&devpriv->mut); return insn->n; } static int usbduxsigma_ao_inttrig(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; if (trig_num != cmd->start_arg) return -EINVAL; mutex_lock(&devpriv->mut); if (!devpriv->ao_cmd_running) { devpriv->ao_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ao_urbs, devpriv->n_ao_urbs, 0); if (ret < 0) { devpriv->ao_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } mutex_unlock(&devpriv->mut); return 1; } static int usbduxsigma_ao_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { struct usbduxsigma_private *devpriv = dev->private; unsigned int tmp; int err = 0; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); /* * For now, always use "scan" timing with all channels updated at once * (cmd->scan_begin_src == TRIG_TIMER, cmd->convert_src == TRIG_NOW). * * In a future version, "convert" timing with channels updated * indivually may be supported in high speed mode * (cmd->scan_begin_src == TRIG_FOLLOW, cmd->convert_src == TRIG_TIMER). */ err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER); err |= comedi_check_trigger_src(&cmd->convert_src, TRIG_NOW); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) { mutex_unlock(&devpriv->mut); return 1; } /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, 1000000); err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; /* Step 4: fix up any arguments */ tmp = rounddown(cmd->scan_begin_arg, 1000000); err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, tmp); if (err) return 4; return 0; } static int usbduxsigma_ao_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; mutex_lock(&devpriv->mut); /* * For now, only "scan" timing is supported. A future version may * support "convert" timing in high speed mode. * * Timing of the scan: every 1ms all channels updated at once. */ devpriv->ao_timer = cmd->scan_begin_arg / 1000000; devpriv->ao_counter = devpriv->ao_timer; if (cmd->start_src == TRIG_NOW) { /* enable this acquisition operation */ devpriv->ao_cmd_running = 1; ret = usbduxsigma_submit_urbs(dev, devpriv->ao_urbs, devpriv->n_ao_urbs, 0); if (ret < 0) { devpriv->ao_cmd_running = 0; mutex_unlock(&devpriv->mut); return ret; } s->async->inttrig = NULL; } else { /* TRIG_INT */ s->async->inttrig = usbduxsigma_ao_inttrig; } mutex_unlock(&devpriv->mut); return 0; } static int usbduxsigma_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0); if (ret) return ret; /* * We don't tell the firmware here as it would take 8 frames * to submit the information. We do it in the (*insn_bits). */ return insn->n; } static int usbduxsigma_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); comedi_dio_update_state(s, data); /* Always update the hardware. See the (*insn_config). */ devpriv->dux_commands[1] = s->io_bits & 0xff; devpriv->dux_commands[4] = s->state & 0xff; devpriv->dux_commands[2] = (s->io_bits >> 8) & 0xff; devpriv->dux_commands[5] = (s->state >> 8) & 0xff; devpriv->dux_commands[3] = (s->io_bits >> 16) & 0xff; devpriv->dux_commands[6] = (s->state >> 16) & 0xff; ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_DIO_BITS_CMD); if (ret < 0) goto done; ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_DIO_BITS_CMD); if (ret < 0) goto done; s->state = devpriv->insn_buf[1] | (devpriv->insn_buf[2] << 8) | (devpriv->insn_buf[3] << 16); data[1] = s->state; ret = insn->n; done: mutex_unlock(&devpriv->mut); return ret; } static void usbduxsigma_pwm_stop(struct comedi_device *dev, int do_unlink) { struct usbduxsigma_private *devpriv = dev->private; if (do_unlink) { if (devpriv->pwm_urb) usb_kill_urb(devpriv->pwm_urb); } devpriv->pwm_cmd_running = 0; } static int usbduxsigma_pwm_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; /* unlink only if it is really running */ usbduxsigma_pwm_stop(dev, devpriv->pwm_cmd_running); return usbbuxsigma_send_cmd(dev, USBDUXSIGMA_PWM_OFF_CMD); } static void usbduxsigma_pwm_urb_complete(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbduxsigma_private *devpriv = dev->private; int ret; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* happens after an unlink command */ if (devpriv->pwm_cmd_running) usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */ return; default: /* a real error */ if (devpriv->pwm_cmd_running) { dev_err(dev->class_dev, "non-zero urb status (%d)\n", urb->status); usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */ } return; } if (!devpriv->pwm_cmd_running) return; urb->transfer_buffer_length = devpriv->pwm_buf_sz; urb->dev = comedi_to_usb_dev(dev); urb->status = 0; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed (%d)\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler\n"); usbduxsigma_pwm_stop(dev, 0); /* w/o unlink */ } } static int usbduxsigma_submit_pwm_urb(struct comedi_device *dev) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; struct urb *urb = devpriv->pwm_urb; /* in case of a resubmission after an unlink... */ usb_fill_bulk_urb(urb, usb, usb_sndbulkpipe(usb, 4), urb->transfer_buffer, devpriv->pwm_buf_sz, usbduxsigma_pwm_urb_complete, dev); return usb_submit_urb(urb, GFP_ATOMIC); } static int usbduxsigma_pwm_period(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int period) { struct usbduxsigma_private *devpriv = dev->private; int fx2delay; if (period < MIN_PWM_PERIOD) return -EAGAIN; fx2delay = (period / (6 * 512 * 1000 / 33)) - 6; if (fx2delay > 255) return -EAGAIN; devpriv->pwm_delay = fx2delay; devpriv->pwm_period = period; return 0; } static int usbduxsigma_pwm_start(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbduxsigma_private *devpriv = dev->private; int ret; if (devpriv->pwm_cmd_running) return 0; devpriv->dux_commands[1] = devpriv->pwm_delay; ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_PWM_ON_CMD); if (ret < 0) return ret; memset(devpriv->pwm_urb->transfer_buffer, 0, devpriv->pwm_buf_sz); devpriv->pwm_cmd_running = 1; ret = usbduxsigma_submit_pwm_urb(dev); if (ret < 0) { devpriv->pwm_cmd_running = 0; return ret; } return 0; } static void usbduxsigma_pwm_pattern(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int chan, unsigned int value, unsigned int sign) { struct usbduxsigma_private *devpriv = dev->private; char pwm_mask = (1 << chan); /* DIO bit for the PWM data */ char sgn_mask = (16 << chan); /* DIO bit for the sign */ char *buf = (char *)(devpriv->pwm_urb->transfer_buffer); int szbuf = devpriv->pwm_buf_sz; int i; for (i = 0; i < szbuf; i++) { char c = *buf; c &= ~pwm_mask; if (i < value) c |= pwm_mask; if (!sign) c &= ~sgn_mask; else c |= sgn_mask; *buf++ = c; } } static int usbduxsigma_pwm_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); /* * It doesn't make sense to support more than one value here * because it would just overwrite the PWM buffer. */ if (insn->n != 1) return -EINVAL; /* * The sign is set via a special INSN only, this gives us 8 bits * for normal operation, sign is 0 by default. */ usbduxsigma_pwm_pattern(dev, s, chan, data[0], 0); return insn->n; } static int usbduxsigma_pwm_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbduxsigma_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); switch (data[0]) { case INSN_CONFIG_ARM: /* * if not zero the PWM is limited to a certain time which is * not supported here */ if (data[1] != 0) return -EINVAL; return usbduxsigma_pwm_start(dev, s); case INSN_CONFIG_DISARM: return usbduxsigma_pwm_cancel(dev, s); case INSN_CONFIG_GET_PWM_STATUS: data[1] = devpriv->pwm_cmd_running; return 0; case INSN_CONFIG_PWM_SET_PERIOD: return usbduxsigma_pwm_period(dev, s, data[1]); case INSN_CONFIG_PWM_GET_PERIOD: data[1] = devpriv->pwm_period; return 0; case INSN_CONFIG_PWM_SET_H_BRIDGE: /* * data[1] = value * data[2] = sign (for a relay) */ usbduxsigma_pwm_pattern(dev, s, chan, data[1], (data[2] != 0)); return 0; case INSN_CONFIG_PWM_GET_H_BRIDGE: /* values are not kept in this driver, nothing to return */ return -EINVAL; } return -EINVAL; } static int usbduxsigma_getstatusinfo(struct comedi_device *dev, int chan) { struct comedi_subdevice *s = dev->read_subdev; struct usbduxsigma_private *devpriv = dev->private; u8 sysred; u32 val; int ret; switch (chan) { default: case 0: sysred = 0; /* ADC zero */ break; case 1: sysred = 1; /* ADC offset */ break; case 2: sysred = 4; /* VCC */ break; case 3: sysred = 8; /* temperature */ break; case 4: sysred = 16; /* gain */ break; case 5: sysred = 32; /* ref */ break; } devpriv->dux_commands[1] = 0x12; /* CONFIG0 */ devpriv->dux_commands[2] = 0x80; /* CONFIG1: 2kHz sampling rate */ devpriv->dux_commands[3] = 0x00; /* CONFIG3: diff. channels off */ devpriv->dux_commands[4] = 0; devpriv->dux_commands[5] = 0; devpriv->dux_commands[6] = sysred; ret = usbbuxsigma_send_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) return ret; ret = usbduxsigma_receive_cmd(dev, USBDUXSIGMA_SINGLE_AD_CMD); if (ret < 0) return ret; /* 32 bits big endian from the A/D converter */ val = be32_to_cpu(get_unaligned((__be32 *)(devpriv->insn_buf + 1))); val &= 0x00ffffff; /* strip status byte */ return (int)comedi_offset_munge(s, val); } static int usbduxsigma_firmware_upload(struct comedi_device *dev, const u8 *data, size_t size, unsigned long context) { struct usb_device *usb = comedi_to_usb_dev(dev); u8 *buf; u8 *tmp; int ret; if (!data) return 0; if (size > FIRMWARE_MAX_LEN) { dev_err(dev->class_dev, "firmware binary too large for FX2\n"); return -ENOMEM; } /* we generate a local buffer for the firmware */ buf = kmemdup(data, size, GFP_KERNEL); if (!buf) return -ENOMEM; /* we need a malloc'ed buffer for usb_control_msg() */ tmp = kmalloc(1, GFP_KERNEL); if (!tmp) { kfree(buf); return -ENOMEM; } /* stop the current firmware on the device */ *tmp = 1; /* 7f92 to one */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUXSUB_FIRMWARE, VENDOR_DIR_OUT, USBDUXSUB_CPUCS, 0x0000, tmp, 1, BULK_TIMEOUT); if (ret < 0) { dev_err(dev->class_dev, "can not stop firmware\n"); goto done; } /* upload the new firmware to the device */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUXSUB_FIRMWARE, VENDOR_DIR_OUT, 0, 0x0000, buf, size, BULK_TIMEOUT); if (ret < 0) { dev_err(dev->class_dev, "firmware upload failed\n"); goto done; } /* start the new firmware on the device */ *tmp = 0; /* 7f92 to zero */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUXSUB_FIRMWARE, VENDOR_DIR_OUT, USBDUXSUB_CPUCS, 0x0000, tmp, 1, BULK_TIMEOUT); if (ret < 0) dev_err(dev->class_dev, "can not start firmware\n"); done: kfree(tmp); kfree(buf); return ret; } static int usbduxsigma_alloc_usb_buffers(struct comedi_device *dev) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv = dev->private; struct urb *urb; int i; devpriv->dux_commands = kzalloc(SIZEOFDUXBUFFER, GFP_KERNEL); devpriv->in_buf = kzalloc(SIZEINBUF, GFP_KERNEL); devpriv->insn_buf = kzalloc(SIZEINSNBUF, GFP_KERNEL); devpriv->ai_urbs = kcalloc(devpriv->n_ai_urbs, sizeof(urb), GFP_KERNEL); devpriv->ao_urbs = kcalloc(devpriv->n_ao_urbs, sizeof(urb), GFP_KERNEL); if (!devpriv->dux_commands || !devpriv->in_buf || !devpriv->insn_buf || !devpriv->ai_urbs || !devpriv->ao_urbs) return -ENOMEM; for (i = 0; i < devpriv->n_ai_urbs; i++) { /* one frame: 1ms */ urb = usb_alloc_urb(1, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->ai_urbs[i] = urb; urb->dev = usb; /* will be filled later with a pointer to the comedi-device */ /* and ONLY then the urb should be submitted */ urb->context = NULL; urb->pipe = usb_rcvisocpipe(usb, 6); urb->transfer_flags = URB_ISO_ASAP; urb->transfer_buffer = kzalloc(SIZEINBUF, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; urb->complete = usbduxsigma_ai_urb_complete; urb->number_of_packets = 1; urb->transfer_buffer_length = SIZEINBUF; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEINBUF; } for (i = 0; i < devpriv->n_ao_urbs; i++) { /* one frame: 1ms */ urb = usb_alloc_urb(1, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->ao_urbs[i] = urb; urb->dev = usb; /* will be filled later with a pointer to the comedi-device */ /* and ONLY then the urb should be submitted */ urb->context = NULL; urb->pipe = usb_sndisocpipe(usb, 2); urb->transfer_flags = URB_ISO_ASAP; urb->transfer_buffer = kzalloc(SIZEOUTBUF, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; urb->complete = usbduxsigma_ao_urb_complete; urb->number_of_packets = 1; urb->transfer_buffer_length = SIZEOUTBUF; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEOUTBUF; urb->interval = 1; /* (u)frames */ } if (devpriv->pwm_buf_sz) { urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->pwm_urb = urb; urb->transfer_buffer = kzalloc(devpriv->pwm_buf_sz, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; } return 0; } static void usbduxsigma_free_usb_buffers(struct comedi_device *dev) { struct usbduxsigma_private *devpriv = dev->private; struct urb *urb; int i; urb = devpriv->pwm_urb; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } if (devpriv->ao_urbs) { for (i = 0; i < devpriv->n_ao_urbs; i++) { urb = devpriv->ao_urbs[i]; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } kfree(devpriv->ao_urbs); } if (devpriv->ai_urbs) { for (i = 0; i < devpriv->n_ai_urbs; i++) { urb = devpriv->ai_urbs[i]; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } kfree(devpriv->ai_urbs); } kfree(devpriv->insn_buf); kfree(devpriv->in_buf); kfree(devpriv->dux_commands); } static int usbduxsigma_auto_attach(struct comedi_device *dev, unsigned long context_unused) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_device *usb = comedi_to_usb_dev(dev); struct usbduxsigma_private *devpriv; struct comedi_subdevice *s; int offset; int ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; mutex_init(&devpriv->mut); usb_set_intfdata(intf, devpriv); devpriv->high_speed = (usb->speed == USB_SPEED_HIGH); if (devpriv->high_speed) { devpriv->n_ai_urbs = NUMOFINBUFFERSHIGH; devpriv->n_ao_urbs = NUMOFOUTBUFFERSHIGH; devpriv->pwm_buf_sz = 512; } else { devpriv->n_ai_urbs = NUMOFINBUFFERSFULL; devpriv->n_ao_urbs = NUMOFOUTBUFFERSFULL; } ret = usbduxsigma_alloc_usb_buffers(dev); if (ret) return ret; /* setting to alternate setting 3: enabling iso ep and bulk ep. */ ret = usb_set_interface(usb, intf->altsetting->desc.bInterfaceNumber, 3); if (ret < 0) { dev_err(dev->class_dev, "could not set alternate setting 3 in high speed\n"); return ret; } ret = comedi_load_firmware(dev, &usb->dev, FIRMWARE, usbduxsigma_firmware_upload, 0); if (ret) return ret; ret = comedi_alloc_subdevices(dev, (devpriv->high_speed) ? 4 : 3); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; dev->read_subdev = s; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_CMD_READ | SDF_LSAMPL; s->n_chan = NUMCHANNELS; s->len_chanlist = NUMCHANNELS; s->maxdata = 0x00ffffff; s->range_table = &usbduxsigma_ai_range; s->insn_read = usbduxsigma_ai_insn_read; s->do_cmdtest = usbduxsigma_ai_cmdtest; s->do_cmd = usbduxsigma_ai_cmd; s->cancel = usbduxsigma_ai_cancel; /* Analog Output subdevice */ s = &dev->subdevices[1]; dev->write_subdev = s; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE | SDF_GROUND | SDF_CMD_WRITE; s->n_chan = 4; s->len_chanlist = s->n_chan; s->maxdata = 0x00ff; s->range_table = &range_unipolar2_5; s->insn_write = usbduxsigma_ao_insn_write; s->insn_read = usbduxsigma_ao_insn_read; s->do_cmdtest = usbduxsigma_ao_cmdtest; s->do_cmd = usbduxsigma_ao_cmd; s->cancel = usbduxsigma_ao_cancel; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; /* Digital I/O subdevice */ s = &dev->subdevices[2]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 24; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = usbduxsigma_dio_insn_bits; s->insn_config = usbduxsigma_dio_insn_config; if (devpriv->high_speed) { /* Timer / pwm subdevice */ s = &dev->subdevices[3]; s->type = COMEDI_SUBD_PWM; s->subdev_flags = SDF_WRITABLE | SDF_PWM_HBRIDGE; s->n_chan = 8; s->maxdata = devpriv->pwm_buf_sz; s->insn_write = usbduxsigma_pwm_write; s->insn_config = usbduxsigma_pwm_config; usbduxsigma_pwm_period(dev, s, PWM_DEFAULT_PERIOD); } offset = usbduxsigma_getstatusinfo(dev, 0); if (offset < 0) { dev_err(dev->class_dev, "Communication to USBDUXSIGMA failed! Check firmware and cabling.\n"); return offset; } dev_info(dev->class_dev, "ADC_zero = %x\n", offset); return 0; } static void usbduxsigma_detach(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct usbduxsigma_private *devpriv = dev->private; usb_set_intfdata(intf, NULL); if (!devpriv) return; mutex_lock(&devpriv->mut); /* force unlink all urbs */ usbduxsigma_ai_stop(dev, 1); usbduxsigma_ao_stop(dev, 1); usbduxsigma_pwm_stop(dev, 1); usbduxsigma_free_usb_buffers(dev); mutex_unlock(&devpriv->mut); mutex_destroy(&devpriv->mut); } static struct comedi_driver usbduxsigma_driver = { .driver_name = "usbduxsigma", .module = THIS_MODULE, .auto_attach = usbduxsigma_auto_attach, .detach = usbduxsigma_detach, }; static int usbduxsigma_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return comedi_usb_auto_config(intf, &usbduxsigma_driver, 0); } static const struct usb_device_id usbduxsigma_usb_table[] = { { USB_DEVICE(0x13d8, 0x0020) }, { USB_DEVICE(0x13d8, 0x0021) }, { USB_DEVICE(0x13d8, 0x0022) }, { } }; MODULE_DEVICE_TABLE(usb, usbduxsigma_usb_table); static struct usb_driver usbduxsigma_usb_driver = { .name = "usbduxsigma", .probe = usbduxsigma_usb_probe, .disconnect = comedi_usb_auto_unconfig, .id_table = usbduxsigma_usb_table, }; module_comedi_usb_driver(usbduxsigma_driver, usbduxsigma_usb_driver); MODULE_AUTHOR("Bernd Porr, mail@berndporr.me.uk"); MODULE_DESCRIPTION("Stirling/ITL USB-DUX SIGMA -- mail@berndporr.me.uk"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE); |
3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match running CPU */ /* * Might be used to distribute connections on several daemons, if * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, * each RX queue IRQ affined to one CPU (1:1 mapping) */ /* (C) 2010 Eric Dumazet */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/xt_cpu.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); MODULE_DESCRIPTION("Xtables: CPU match"); MODULE_ALIAS("ipt_cpu"); MODULE_ALIAS("ip6t_cpu"); static int cpu_mt_check(const struct xt_mtchk_param *par) { const struct xt_cpu_info *info = par->matchinfo; if (info->invert & ~1) return -EINVAL; return 0; } static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cpu_info *info = par->matchinfo; return (info->cpu == smp_processor_id()) ^ info->invert; } static struct xt_match cpu_mt_reg __read_mostly = { .name = "cpu", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = cpu_mt_check, .match = cpu_mt, .matchsize = sizeof(struct xt_cpu_info), .me = THIS_MODULE, }; static int __init cpu_mt_init(void) { return xt_register_match(&cpu_mt_reg); } static void __exit cpu_mt_exit(void) { xt_unregister_match(&cpu_mt_reg); } module_init(cpu_mt_init); module_exit(cpu_mt_exit); |
25 26 26 26 17 17 17 1 1 1 1 1 1 1 1 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | // SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2020 Facebook Inc. #include <linux/debugfs.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <net/udp_tunnel.h> #include "netdevsim.h" static int nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (ns->udp_ports.sleep) msleep(ns->udp_ports.sleep); if (!ret) { if (ns->udp_ports.ports[table][entry]) { WARN(1, "entry already in use\n"); ret = -EBUSY; } else { ns->udp_ports.ports[table][entry] = be16_to_cpu(ti->port) << 16 | ti->type; } } netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, unsigned int entry, struct udp_tunnel_info *ti) { struct netdevsim *ns = netdev_priv(dev); int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; if (ns->udp_ports.sleep) msleep(ns->udp_ports.sleep); if (!ret) { u32 val = be16_to_cpu(ti->port) << 16 | ti->type; if (val == ns->udp_ports.ports[table][entry]) { ns->udp_ports.ports[table][entry] = 0; } else { WARN(1, "entry not installed %x vs %x\n", val, ns->udp_ports.ports[table][entry]); ret = -ENOENT; } } netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", table, entry, ti->type, ti->sa_family, ntohs(ti->port), ret); return ret; } static int nsim_udp_tunnel_sync_table(struct net_device *dev, unsigned int table) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_info ti; unsigned int i; int ret; ret = -ns->udp_ports.inject_error; ns->udp_ports.inject_error = 0; for (i = 0; i < NSIM_UDP_TUNNEL_N_PORTS; i++) { udp_tunnel_nic_get_port(dev, table, i, &ti); ns->udp_ports.ports[table][i] = be16_to_cpu(ti.port) << 16 | ti.type; } return ret; } static const struct udp_tunnel_nic_info nsim_udp_tunnel_info = { .set_port = nsim_udp_tunnel_set_port, .unset_port = nsim_udp_tunnel_unset_port, .sync_table = nsim_udp_tunnel_sync_table, .tables = { { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, { .n_entries = NSIM_UDP_TUNNEL_N_PORTS, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE | UDP_TUNNEL_TYPE_VXLAN_GPE, }, }, }; static ssize_t nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, size_t count, loff_t *ppos) { struct net_device *dev = file->private_data; struct netdevsim *ns = netdev_priv(dev); rtnl_lock(); if (dev->reg_state == NETREG_REGISTERED) { memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); udp_tunnel_nic_reset_ntf(dev); } rtnl_unlock(); return count; } static const struct file_operations nsim_udp_tunnels_info_reset_fops = { .open = simple_open, .write = nsim_udp_tunnels_info_reset_write, .llseek = generic_file_llseek, .owner = THIS_MODULE, }; int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); struct udp_tunnel_nic_info *info; if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { dev_err(&nsim_dev->nsim_bus_dev->dev, "shared can't be used in conjunction with open_only\n"); return -EINVAL; } if (!nsim_dev->udp_ports.shared) ns->udp_ports.ports = ns->udp_ports.__ports; else ns->udp_ports.ports = nsim_dev->udp_ports.__ports; ns->udp_ports.ddir = debugfs_create_dir("udp_ports", ns->nsim_dev_port->ddir); debugfs_create_u32("inject_error", 0600, ns->udp_ports.ddir, &ns->udp_ports.inject_error); ns->udp_ports.dfs_ports[0].array = ns->udp_ports.ports[0]; ns->udp_ports.dfs_ports[0].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("table0", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[0]); ns->udp_ports.dfs_ports[1].array = ns->udp_ports.ports[1]; ns->udp_ports.dfs_ports[1].n_elements = NSIM_UDP_TUNNEL_N_PORTS; debugfs_create_u32_array("table1", 0400, ns->udp_ports.ddir, &ns->udp_ports.dfs_ports[1]); debugfs_create_file("reset", 0200, ns->udp_ports.ddir, dev, &nsim_udp_tunnels_info_reset_fops); /* Note: it's not normal to allocate the info struct like this! * Drivers are expected to use a static const one, here we're testing. */ info = kmemdup(&nsim_udp_tunnel_info, sizeof(nsim_udp_tunnel_info), GFP_KERNEL); if (!info) return -ENOMEM; ns->udp_ports.sleep = nsim_dev->udp_ports.sleep; if (nsim_dev->udp_ports.sync_all) { info->set_port = NULL; info->unset_port = NULL; } else { info->sync_table = NULL; } if (ns->udp_ports.sleep) info->flags |= UDP_TUNNEL_NIC_INFO_MAY_SLEEP; if (nsim_dev->udp_ports.open_only) info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; if (nsim_dev->udp_ports.ipv4_only) info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; if (nsim_dev->udp_ports.shared) info->shared = &nsim_dev->udp_ports.utn_shared; if (nsim_dev->udp_ports.static_iana_vxlan) info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; dev->udp_tunnel_nic_info = info; return 0; } void nsim_udp_tunnels_info_destroy(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); debugfs_remove_recursive(ns->udp_ports.ddir); kfree(dev->udp_tunnel_nic_info); dev->udp_tunnel_nic_info = NULL; } void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) { debugfs_create_bool("udp_ports_sync_all", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sync_all); debugfs_create_bool("udp_ports_open_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.open_only); debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.ipv4_only); debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.shared); debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.static_iana_vxlan); debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, &nsim_dev->udp_ports.sleep); } |
2 3 3 15 15 1 15 8 7 15 5 3 1 1 5 5 12 11 12 14 14 3 3 11 67 19 7 9 6 5 2 2 4 6 6 5 4 2 7 5 40 1 1 39 27 1 8 10 5 3 4 4 1 4 1 3 12 16 10 17 22 27 26 15 7 17 9 7 5 9 12 7 15 9 12 5 7 56 56 1 52 2 52 39 3 4 9 5 9 28 14 24 1 3 1 3 33 7 21 8 6 25 7 9 6 26 33 5 1 1 4 4 138 5 3 3 6 101 1 3 5 1 3 40 55 6 9 3 5 1 5 3 7 2 2 7 5 5 7 5 1963 1903 97 58 10 7 7 12 1 11 11 4 7 11 11 3 3 2 11 4 7 11 17 2 1 1 1 5 7 12 22 4 19 1 18 8 10 2 78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content * * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/can/bcm.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/sock.h> #include <net/net_namespace.h> /* * To send multiple CAN frame content within TX_SETUP or to filter * CAN messages with multiplex index within RX_SETUP, the number of * different filters is limited to 256 due to the one byte index value. */ #define MAX_NFRAMES 256 /* limit timers to 400 days for sending/timeouts */ #define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) /* use of last_frames[index].flags */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ #define BCM_CAN_FLAGS_MASK 0x3F /* to clean private flags after usage */ /* get best masking value for can_rx_register() for a given single can_id */ #define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); MODULE_ALIAS("can-proto-2"); #define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) /* * easy access to the first 64 bit of can(fd)_frame payload. cp->data is * 64 bit aligned so the offset has to be multiples of 8 which is ensured * by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler(). */ static inline u64 get_u64(const struct canfd_frame *cp, int offset) { return *(u64 *)(cp->data + offset); } struct bcm_op { struct list_head list; struct rcu_head rcu; int ifindex; canid_t can_id; u32 flags; unsigned long frames_abs, frames_filtered; struct bcm_timeval ival1, ival2; struct hrtimer timer, thrtimer; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int cfsiz; u32 count; u32 nframes; u32 currframe; /* void pointers to arrays of struct can[fd]_frame */ void *frames; void *last_frames; struct canfd_frame sframe; struct canfd_frame last_sframe; struct sock *sk; struct net_device *rx_reg_dev; spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */ }; struct bcm_sock { struct sock sk; int bound; int ifindex; struct list_head notifier; struct list_head rx_ops; struct list_head tx_ops; unsigned long dropped_usr_msgs; struct proc_dir_entry *bcm_proc_read; char procname [32]; /* inode number in decimal with \0 */ }; static LIST_HEAD(bcm_notifier_list); static DEFINE_SPINLOCK(bcm_notifier_lock); static struct bcm_sock *bcm_busy_notifier; static inline struct bcm_sock *bcm_sk(const struct sock *sk) { return (struct bcm_sock *)sk; } static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) { return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } /* check limitations for timeval provided by user */ static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) { if ((msg_head->ival1.tv_sec < 0) || (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || (msg_head->ival1.tv_usec < 0) || (msg_head->ival1.tv_usec >= USEC_PER_SEC) || (msg_head->ival2.tv_sec < 0) || (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || (msg_head->ival2.tv_usec < 0) || (msg_head->ival2.tv_usec >= USEC_PER_SEC)) return true; return false; } #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) /* * procfs functions */ #if IS_ENABLED(CONFIG_PROC_FS) static char *bcm_proc_getifname(struct net *net, char *result, int ifindex) { struct net_device *dev; if (!ifindex) return "any"; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) strcpy(result, dev->name); else strcpy(result, "???"); rcu_read_unlock(); return result; } static int bcm_proc_show(struct seq_file *m, void *v) { char ifname[IFNAMSIZ]; struct net *net = m->private; struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode); struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; seq_printf(m, ">>> socket %pK", sk->sk_socket); seq_printf(m, " / sk %pK", sk); seq_printf(m, " / bo %pK", bo); seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs); seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); rcu_read_lock(); list_for_each_entry_rcu(op, &bo->rx_ops, list) { unsigned long reduction; /* print only active entries & prevent division by zero */ if (!op->frames_abs) continue; seq_printf(m, "rx_op: %03X %-5s ", op->can_id, bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u)", op->nframes); else seq_printf(m, "[%u]", op->nframes); seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' '); if (op->kt_ival1) seq_printf(m, "timeo=%lld ", (long long)ktime_to_us(op->kt_ival1)); if (op->kt_ival2) seq_printf(m, "thr=%lld ", (long long)ktime_to_us(op->kt_ival2)); seq_printf(m, "# recv %ld (%ld) => reduction: ", op->frames_filtered, op->frames_abs); reduction = 100 - (op->frames_filtered * 100) / op->frames_abs; seq_printf(m, "%s%ld%%\n", (reduction == 100) ? "near " : "", reduction); } list_for_each_entry(op, &bo->tx_ops, list) { seq_printf(m, "tx_op: %03X %s ", op->can_id, bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u) ", op->nframes); else seq_printf(m, "[%u] ", op->nframes); if (op->kt_ival1) seq_printf(m, "t1=%lld ", (long long)ktime_to_us(op->kt_ival1)); if (op->kt_ival2) seq_printf(m, "t2=%lld ", (long long)ktime_to_us(op->kt_ival2)); seq_printf(m, "# sent %ld\n", op->frames_abs); } seq_putc(m, '\n'); rcu_read_unlock(); return 0; } #endif /* CONFIG_PROC_FS */ /* * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface * of the given bcm tx op */ static void bcm_can_tx(struct bcm_op *op) { struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf; int err; /* no target device? => exit */ if (!op->ifindex) return; /* read currframe under lock protection */ spin_lock_bh(&op->bcm_tx_lock); cf = op->frames + op->cfsiz * op->currframe; spin_unlock_bh(&op->bcm_tx_lock); dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ return; } skb = alloc_skb(op->cfsiz + sizeof(struct can_skb_priv), gfp_any()); if (!skb) goto out; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb_put_data(skb, cf, op->cfsiz); /* send with loopback */ skb->dev = dev; can_skb_set_owner(skb, op->sk); err = can_send(skb, 1); /* update currframe and count under lock protection */ spin_lock_bh(&op->bcm_tx_lock); if (!err) op->frames_abs++; op->currframe++; /* reached last frame? */ if (op->currframe >= op->nframes) op->currframe = 0; if (op->count > 0) op->count--; spin_unlock_bh(&op->bcm_tx_lock); out: dev_put(dev); } /* * bcm_send_to_user - send a BCM message to the userspace * (consisting of bcm_msg_head + x CAN frames) */ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct canfd_frame *frames, int has_timestamp) { struct sk_buff *skb; struct canfd_frame *firstframe; struct sockaddr_can *addr; struct sock *sk = op->sk; unsigned int datalen = head->nframes * op->cfsiz; int err; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); if (!skb) return; skb_put_data(skb, head, sizeof(*head)); if (head->nframes) { /* CAN frames starting here */ firstframe = (struct canfd_frame *)skb_tail_pointer(skb); skb_put_data(skb, frames, datalen); /* * the BCM uses the flags-element of the canfd_frame * structure for internal purposes. This is only * relevant for updates that are generated by the * BCM, where nframes is 1 */ if (head->nframes == 1) firstframe->flags &= BCM_CAN_FLAGS_MASK; } if (has_timestamp) { /* restore rx timestamp */ skb->tstamp = op->rx_stamp; } /* * Put the datagram to the queue so that bcm_recvmsg() can * get it from there. We need to pass the interface index to * bcm_recvmsg(). We pass a whole struct sockaddr_can in skb->cb * containing the interface index. */ sock_skb_cb_check_size(sizeof(struct sockaddr_can)); addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; err = sock_queue_rcv_skb(sk, skb); if (err < 0) { struct bcm_sock *bo = bcm_sk(sk); kfree_skb(skb); /* don't care about overflows in this statistic */ bo->dropped_usr_msgs++; } } static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) { ktime_t ival; if (op->kt_ival1 && op->count) ival = op->kt_ival1; else if (op->kt_ival2) ival = op->kt_ival2; else return false; hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); return true; } static void bcm_tx_start_timer(struct bcm_op *op) { if (bcm_tx_set_expiry(op, &op->timer)) hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); } /* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { bcm_can_tx(op); if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = TX_EXPIRED; msg_head.flags = op->flags; msg_head.count = op->count; msg_head.ival1 = op->ival1; msg_head.ival2 = op->ival2; msg_head.can_id = op->can_id; msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); } } else if (op->kt_ival2) { bcm_can_tx(op); } return bcm_tx_set_expiry(op, &op->timer) ? HRTIMER_RESTART : HRTIMER_NORESTART; } /* * bcm_rx_changed - create a RX_CHANGED notification due to changed content */ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data) { struct bcm_msg_head head; /* update statistics */ op->frames_filtered++; /* prevent statistics overflow */ if (op->frames_filtered > ULONG_MAX/100) op->frames_filtered = op->frames_abs = 0; /* this element is not throttled anymore */ data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV); memset(&head, 0, sizeof(head)); head.opcode = RX_CHANGED; head.flags = op->flags; head.count = op->count; head.ival1 = op->ival1; head.ival2 = op->ival2; head.can_id = op->can_id; head.nframes = 1; bcm_send_to_user(op, &head, data, 1); } /* * bcm_rx_update_and_send - process a detected relevant receive content change * 1. update the last received data * 2. send a notification to the user (if possible) */ static void bcm_rx_update_and_send(struct bcm_op *op, struct canfd_frame *lastdata, const struct canfd_frame *rxdata) { memcpy(lastdata, rxdata, op->cfsiz); /* mark as used and throttled by default */ lastdata->flags |= (RX_RECV|RX_THR); /* throttling mode inactive ? */ if (!op->kt_ival2) { /* send RX_CHANGED to the user immediately */ bcm_rx_changed(op, lastdata); return; } /* with active throttling timer we are just done here */ if (hrtimer_active(&op->thrtimer)) return; /* first reception with enabled throttling mode */ if (!op->kt_lastmsg) goto rx_changed_settime; /* got a second frame inside a potential throttle period? */ if (ktime_us_delta(ktime_get(), op->kt_lastmsg) < ktime_to_us(op->kt_ival2)) { /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), HRTIMER_MODE_ABS_SOFT); return; } /* the gap was that big, that throttling was not needed here */ rx_changed_settime: bcm_rx_changed(op, lastdata); op->kt_lastmsg = ktime_get(); } /* * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly * received data stored in op->last_frames[] */ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, const struct canfd_frame *rxdata) { struct canfd_frame *cf = op->frames + op->cfsiz * index; struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; int i; /* * no one uses the MSBs of flags for comparison, * so we use it here to detect the first time of reception */ if (!(lcf->flags & RX_RECV)) { /* received data for the first time => send update to user */ bcm_rx_update_and_send(op, lcf, rxdata); return; } /* do a real check in CAN frame data section */ for (i = 0; i < rxdata->len; i += 8) { if ((get_u64(cf, i) & get_u64(rxdata, i)) != (get_u64(cf, i) & get_u64(lcf, i))) { bcm_rx_update_and_send(op, lcf, rxdata); return; } } if (op->flags & RX_CHECK_DLC) { /* do a real check in CAN frame length */ if (rxdata->len != lcf->len) { bcm_rx_update_and_send(op, lcf, rxdata); return; } } } /* * bcm_rx_starttimer - enable timeout monitoring for CAN frame reception */ static void bcm_rx_starttimer(struct bcm_op *op) { if (op->flags & RX_NO_AUTOTIMER) return; if (op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } /* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; /* if user wants to be informed, when cyclic CAN-Messages come back */ if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { /* clear received CAN frames to indicate 'nothing received' */ memset(op->last_frames, 0, op->nframes * op->cfsiz); } /* create notification to user */ memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; msg_head.count = op->count; msg_head.ival1 = op->ival1; msg_head.ival2 = op->ival2; msg_head.can_id = op->can_id; msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); return HRTIMER_NORESTART; } /* * bcm_rx_do_flush - helper for bcm_rx_thr_flush */ static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) { struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; if ((op->last_frames) && (lcf->flags & RX_THR)) { bcm_rx_changed(op, lcf); return 1; } return 0; } /* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace */ static int bcm_rx_thr_flush(struct bcm_op *op) { int updated = 0; if (op->nframes > 1) { unsigned int i; /* for MUX filter we start at index 1 */ for (i = 1; i < op->nframes; i++) updated += bcm_rx_do_flush(op, i); } else { /* for RX_FILTER_ID and simple filter */ updated += bcm_rx_do_flush(op, 0); } return updated; } /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace */ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); if (bcm_rx_thr_flush(op)) { hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); return HRTIMER_RESTART; } else { /* rearm throttle handling */ op->kt_lastmsg = 0; return HRTIMER_NORESTART; } } /* * bcm_rx_handler - handle a CAN frame reception */ static void bcm_rx_handler(struct sk_buff *skb, void *data) { struct bcm_op *op = (struct bcm_op *)data; const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data; unsigned int i; if (op->can_id != rxframe->can_id) return; /* make sure to handle the correct frame type (CAN / CAN FD) */ if (skb->len != op->cfsiz) return; /* disable timeout */ hrtimer_cancel(&op->timer); /* save rx timestamp */ op->rx_stamp = skb->tstamp; /* save originator for recvfrom() */ op->rx_ifindex = skb->dev->ifindex; /* update statistics */ op->frames_abs++; if (op->flags & RX_RTR_FRAME) { /* send reply for RTR-request (placed in op->frames[0]) */ bcm_can_tx(op); return; } if (op->flags & RX_FILTER_ID) { /* the easiest case */ bcm_rx_update_and_send(op, op->last_frames, rxframe); goto rx_starttimer; } if (op->nframes == 1) { /* simple compare with index 0 */ bcm_rx_cmp_to_index(op, 0, rxframe); goto rx_starttimer; } if (op->nframes > 1) { /* * multiplex compare * * find the first multiplex mask that fits. * Remark: The MUX-mask is stored in index 0 - but only the * first 64 bits of the frame data[] are relevant (CAN FD) */ for (i = 1; i < op->nframes; i++) { if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) == (get_u64(op->frames, 0) & get_u64(op->frames + op->cfsiz * i, 0))) { bcm_rx_cmp_to_index(op, i, rxframe); break; } } } rx_starttimer: bcm_rx_starttimer(op); } /* * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements */ static struct bcm_op *bcm_find_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op; list_for_each_entry(op, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) return op; } return NULL; } static void bcm_free_op_rcu(struct rcu_head *rcu_head) { struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); if ((op->last_frames) && (op->last_frames != &op->last_sframe)) kfree(op->last_frames); kfree(op); } static void bcm_remove_op(struct bcm_op *op) { hrtimer_cancel(&op->timer); hrtimer_cancel(&op->thrtimer); call_rcu(&op->rcu, bcm_free_op_rcu); } static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { can_rx_unregister(dev_net(dev), dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); /* mark as removed subscription */ op->rx_reg_dev = NULL; } else printk(KERN_ERR "can-bcm: bcm_rx_unreg: registered device " "mismatch %p %p\n", op->rx_reg_dev, dev); } /* * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops) */ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op, *n; list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { /* disable automatic timer on frame reception */ op->flags |= RX_NO_AUTOTIMER; /* * Don't care if we're bound or not (due to netdev * problems) can_rx_unregister() is always a save * thing to do here. */ if (op->ifindex) { /* * Only remove subscriptions that had not * been removed due to NETDEV_UNREGISTER * in bcm_notifier() */ if (op->rx_reg_dev) { struct net_device *dev; dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else can_rx_unregister(sock_net(op->sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } } return 0; /* not found */ } /* * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops) */ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op, *n; list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } } return 0; /* not found */ } /* * bcm_read_op - read out a bcm_op and send it to the user (for bcm_sendmsg) */ static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head, int ifindex) { struct bcm_op *op = bcm_find_op(ops, msg_head, ifindex); if (!op) return -EINVAL; /* put current values into msg_head */ msg_head->flags = op->flags; msg_head->count = op->count; msg_head->ival1 = op->ival1; msg_head->ival2 = op->ival2; msg_head->nframes = op->nframes; bcm_send_to_user(op, msg_head, op->frames, 0); return MHSIZ; } /* * bcm_tx_setup - create or update a bcm tx op (for bcm_sendmsg) */ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, int ifindex, struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; struct canfd_frame *cf; unsigned int i; int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; /* check nframes boundaries - we need at least one CAN frame */ if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; /* check timeval limitations */ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) return -EINVAL; /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex); if (op) { /* update existing BCM operation */ /* * Do we need more space for the CAN frames than currently * allocated? -> This is a _really_ unusual use-case and * therefore (complexity / locking) it is not supported. */ if (msg_head->nframes > op->nframes) return -E2BIG; /* update CAN frames content */ for (i = 0; i < msg_head->nframes; i++) { cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) err = -EINVAL; } else { if (cf->len > 8) err = -EINVAL; } if (err < 0) return err; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ cf->can_id = msg_head->can_id; } } op->flags = msg_head->flags; /* only lock for unlikely count/nframes/currframe changes */ if (op->nframes != msg_head->nframes || op->flags & TX_RESET_MULTI_IDX || op->flags & SETTIMER) { spin_lock_bh(&op->bcm_tx_lock); if (op->nframes != msg_head->nframes || op->flags & TX_RESET_MULTI_IDX) { /* potentially update changed nframes */ op->nframes = msg_head->nframes; /* restart multiple frame transmission */ op->currframe = 0; } if (op->flags & SETTIMER) op->count = msg_head->count; spin_unlock_bh(&op->bcm_tx_lock); } } else { /* insert new BCM operation for the given can_id */ op = kzalloc(OPSIZ, GFP_KERNEL); if (!op) return -ENOMEM; spin_lock_init(&op->bcm_tx_lock); op->can_id = msg_head->can_id; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; op->nframes = msg_head->nframes; if (op->flags & SETTIMER) op->count = msg_head->count; /* create array for CAN frames and copy the data */ if (msg_head->nframes > 1) { op->frames = kmalloc_array(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->frames) { kfree(op); return -ENOMEM; } } else op->frames = &op->sframe; for (i = 0; i < msg_head->nframes; i++) { cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); if (err < 0) goto free_op; if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) err = -EINVAL; } else { if (cf->len > 8) err = -EINVAL; } if (err < 0) goto free_op; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ cf->can_id = msg_head->can_id; } } /* tx_ops never compare with previous received messages */ op->last_frames = NULL; /* bcm_can_tx / bcm_tx_timeout_handler needs this */ op->sk = sk; op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_tx_timeout_handler; /* currently unused in tx_ops */ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */ if (op->flags & SETTIMER) { /* set timer values */ op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); /* disable an active timer due to zero values? */ if (!op->kt_ival1 && !op->kt_ival2) hrtimer_cancel(&op->timer); } if (op->flags & STARTTIMER) { hrtimer_cancel(&op->timer); /* spec: send CAN frame when starting timer */ op->flags |= TX_ANNOUNCE; } if (op->flags & TX_ANNOUNCE) bcm_can_tx(op); if (op->flags & STARTTIMER) bcm_tx_start_timer(op); return msg_head->nframes * op->cfsiz + MHSIZ; free_op: if (op->frames != &op->sframe) kfree(op->frames); kfree(op); return err; } /* * bcm_rx_setup - create or update a bcm rx op (for bcm_sendmsg) */ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, int ifindex, struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; int do_rx_register; int err = 0; if ((msg_head->flags & RX_FILTER_ID) || (!(msg_head->nframes))) { /* be robust against wrong usage ... */ msg_head->flags |= RX_FILTER_ID; /* ignore trailing garbage */ msg_head->nframes = 0; } /* the first element contains the mux-mask => MAX_NFRAMES + 1 */ if (msg_head->nframes > MAX_NFRAMES + 1) return -EINVAL; if ((msg_head->flags & RX_RTR_FRAME) && ((msg_head->nframes != 1) || (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; /* check timeval limitations */ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) return -EINVAL; /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex); if (op) { /* update existing BCM operation */ /* * Do we need more space for the CAN frames than currently * allocated? -> This is a _really_ unusual use-case and * therefore (complexity / locking) it is not supported. */ if (msg_head->nframes > op->nframes) return -E2BIG; if (msg_head->nframes) { /* update CAN frames content */ err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) return err; /* clear last_frames to indicate 'nothing received' */ memset(op->last_frames, 0, msg_head->nframes * op->cfsiz); } op->nframes = msg_head->nframes; op->flags = msg_head->flags; /* Only an update -> do not call can_rx_register() */ do_rx_register = 0; } else { /* insert new BCM operation for the given can_id */ op = kzalloc(OPSIZ, GFP_KERNEL); if (!op) return -ENOMEM; op->can_id = msg_head->can_id; op->nframes = msg_head->nframes; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; if (msg_head->nframes > 1) { /* create array for CAN frames and copy the data */ op->frames = kmalloc_array(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->frames) { kfree(op); return -ENOMEM; } /* create and init array for received CAN frames */ op->last_frames = kcalloc(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->last_frames) { kfree(op->frames); kfree(op); return -ENOMEM; } } else { op->frames = &op->sframe; op->last_frames = &op->last_sframe; } if (msg_head->nframes) { err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); if (op->last_frames != &op->last_sframe) kfree(op->last_frames); kfree(op); return err; } } /* bcm_can_tx / bcm_tx_timeout_handler needs this */ op->sk = sk; op->ifindex = ifindex; /* ifindex for timeout events w/o previous frame reception */ op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_rx_timeout_handler; hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->thrtimer.function = bcm_rx_thr_handler; /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); /* call can_rx_register() */ do_rx_register = 1; } /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */ /* check flags */ if (op->flags & RX_RTR_FRAME) { struct canfd_frame *frame0 = op->frames; /* no timers in RTR-mode */ hrtimer_cancel(&op->thrtimer); hrtimer_cancel(&op->timer); /* * funny feature in RX(!)_SETUP only for RTR-mode: * copy can_id into frame BUT without RTR-flag to * prevent a full-load-loopback-test ... ;-] */ if ((op->flags & TX_CP_CAN_ID) || (frame0->can_id == op->can_id)) frame0->can_id = op->can_id & ~CAN_RTR_FLAG; } else { if (op->flags & SETTIMER) { /* set timer value */ op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); /* disable an active timer due to zero value? */ if (!op->kt_ival1) hrtimer_cancel(&op->timer); /* * In any case cancel the throttle timer, flush * potentially blocked msgs and reset throttle handling */ op->kt_lastmsg = 0; hrtimer_cancel(&op->thrtimer); bcm_rx_thr_flush(op); } if ((op->flags & STARTTIMER) && op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } /* now we can register for can_ids, if we added a new bcm_op */ if (do_rx_register) { if (ifindex) { struct net_device *dev; dev = dev_get_by_index(sock_net(sk), ifindex); if (dev) { err = can_rx_register(sock_net(sk), dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); } } else err = can_rx_register(sock_net(sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del_rcu(&op->list); bcm_remove_op(op); return err; } } return msg_head->nframes * op->cfsiz + MHSIZ; } /* * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg) */ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk, int cfsiz) { struct sk_buff *skb; struct net_device *dev; int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; skb = alloc_skb(cfsiz + sizeof(struct can_skb_priv), GFP_KERNEL); if (!skb) return -ENOMEM; can_skb_reserve(skb); err = memcpy_from_msg(skb_put(skb, cfsiz), msg, cfsiz); if (err < 0) { kfree_skb(skb); return err; } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) { kfree_skb(skb); return -ENODEV; } can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ dev_put(dev); if (err) return err; return cfsiz + MHSIZ; } /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); int ifindex = bo->ifindex; /* default ifindex for this bcm_op */ struct bcm_msg_head msg_head; int cfsiz; int ret; /* read bytes or error codes as return value */ if (!bo->bound) return -ENOTCONN; /* check for valid message length from userspace */ if (size < MHSIZ) return -EINVAL; /* read message head information */ ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ); if (ret < 0) return ret; cfsiz = CFSIZ(msg_head.flags); if ((size - MHSIZ) % cfsiz) return -EINVAL; /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < BCM_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; /* ifindex from sendto() */ ifindex = addr->can_ifindex; if (ifindex) { struct net_device *dev; dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENODEV; if (dev->type != ARPHRD_CAN) { dev_put(dev); return -ENODEV; } dev_put(dev); } } lock_sock(sk); switch (msg_head.opcode) { case TX_SETUP: ret = bcm_tx_setup(&msg_head, msg, ifindex, sk); break; case RX_SETUP: ret = bcm_rx_setup(&msg_head, msg, ifindex, sk); break; case TX_DELETE: if (bcm_delete_tx_op(&bo->tx_ops, &msg_head, ifindex)) ret = MHSIZ; else ret = -EINVAL; break; case RX_DELETE: if (bcm_delete_rx_op(&bo->rx_ops, &msg_head, ifindex)) ret = MHSIZ; else ret = -EINVAL; break; case TX_READ: /* reuse msg_head for the reply to TX_READ */ msg_head.opcode = TX_STATUS; ret = bcm_read_op(&bo->tx_ops, &msg_head, ifindex); break; case RX_READ: /* reuse msg_head for the reply to RX_READ */ msg_head.opcode = RX_STATUS; ret = bcm_read_op(&bo->rx_ops, &msg_head, ifindex); break; case TX_SEND: /* we need exactly one CAN frame behind the msg head */ if ((msg_head.nframes != 1) || (size != cfsiz + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk, cfsiz); break; default: ret = -EINVAL; break; } release_sock(sk); return ret; } /* * notification handler for netdevice status changes */ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, struct net_device *dev) { struct sock *sk = &bo->sk; struct bcm_op *op; int notify_enodev = 0; if (!net_eq(dev_net(dev), sock_net(sk))) return; switch (msg) { case NETDEV_UNREGISTER: lock_sock(sk); /* remove device specific receive entries */ list_for_each_entry(op, &bo->rx_ops, list) if (op->rx_reg_dev == dev) bcm_rx_unreg(dev, op); /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { #if IS_ENABLED(CONFIG_PROC_FS) if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) { remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir); bo->bcm_proc_read = NULL; } #endif bo->bound = 0; bo->ifindex = 0; notify_enodev = 1; } release_sock(sk); if (notify_enodev) { sk->sk_err = ENODEV; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } break; case NETDEV_DOWN: if (bo->bound && bo->ifindex == dev->ifindex) { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } } } static int bcm_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) return NOTIFY_DONE; if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */ return NOTIFY_DONE; spin_lock(&bcm_notifier_lock); list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) { spin_unlock(&bcm_notifier_lock); bcm_notify(bcm_busy_notifier, msg, dev); spin_lock(&bcm_notifier_lock); } bcm_busy_notifier = NULL; spin_unlock(&bcm_notifier_lock); return NOTIFY_DONE; } /* * initial settings for all BCM sockets to be set at socket creation time */ static int bcm_init(struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); bo->bound = 0; bo->ifindex = 0; bo->dropped_usr_msgs = 0; bo->bcm_proc_read = NULL; INIT_LIST_HEAD(&bo->tx_ops); INIT_LIST_HEAD(&bo->rx_ops); /* set notifier */ spin_lock(&bcm_notifier_lock); list_add_tail(&bo->notifier, &bcm_notifier_list); spin_unlock(&bcm_notifier_lock); return 0; } /* * standard socket functions */ static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; struct net *net; struct bcm_sock *bo; struct bcm_op *op, *next; if (!sk) return 0; net = sock_net(sk); bo = bcm_sk(sk); /* remove bcm_ops, timer, rx_unregister(), etc. */ spin_lock(&bcm_notifier_lock); while (bcm_busy_notifier == bo) { spin_unlock(&bcm_notifier_lock); schedule_timeout_uninterruptible(1); spin_lock(&bcm_notifier_lock); } list_del(&bo->notifier); spin_unlock(&bcm_notifier_lock); lock_sock(sk); #if IS_ENABLED(CONFIG_PROC_FS) /* remove procfs entry */ if (net->can.bcmproc_dir && bo->bcm_proc_read) remove_proc_entry(bo->procname, net->can.bcmproc_dir); #endif /* CONFIG_PROC_FS */ list_for_each_entry_safe(op, next, &bo->tx_ops, list) bcm_remove_op(op); list_for_each_entry_safe(op, next, &bo->rx_ops, list) { /* * Don't care if we're bound or not (due to netdev problems) * can_rx_unregister() is always a save thing to do here. */ if (op->ifindex) { /* * Only remove subscriptions that had not * been removed due to NETDEV_UNREGISTER * in bcm_notifier() */ if (op->rx_reg_dev) { struct net_device *dev; dev = dev_get_by_index(net, op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else can_rx_unregister(net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); } synchronize_rcu(); list_for_each_entry_safe(op, next, &bo->rx_ops, list) bcm_remove_op(op); /* remove device reference */ if (bo->bound) { bo->bound = 0; bo->ifindex = 0; } sock_orphan(sk); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, int flags) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); struct net *net = sock_net(sk); int ret = 0; if (len < BCM_MIN_NAMELEN) return -EINVAL; lock_sock(sk); if (bo->bound) { ret = -EISCONN; goto fail; } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { ret = -ENODEV; goto fail; } if (dev->type != ARPHRD_CAN) { dev_put(dev); ret = -ENODEV; goto fail; } bo->ifindex = dev->ifindex; dev_put(dev); } else { /* no interface reference for ifindex = 0 ('any' CAN device) */ bo->ifindex = 0; } #if IS_ENABLED(CONFIG_PROC_FS) if (net->can.bcmproc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644, net->can.bcmproc_dir, bcm_proc_show, sk); if (!bo->bcm_proc_read) { ret = -ENOMEM; goto fail; } } #endif /* CONFIG_PROC_FS */ bo->bound = 1; fail: release_sock(sk); return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; int noblock; int err; noblock = flags & MSG_DONTWAIT; flags &= ~MSG_DONTWAIT; skb = skb_recv_datagram(sk, flags, noblock, &error); if (!skb) return error; if (skb->len < size) size = skb->len; err = memcpy_to_msg(msg, skb->data, size); if (err < 0) { skb_free_datagram(sk, skb); return err; } sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(BCM_MIN_NAMELEN); msg->msg_namelen = BCM_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } skb_free_datagram(sk, skb); return size; } static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { /* no ioctls for socket layer -> hand it down to NIC layer */ return -ENOIOCTLCMD; } static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, .bind = sock_no_bind, .connect = bcm_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, .ioctl = bcm_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; static struct proto bcm_proto __read_mostly = { .name = "CAN_BCM", .owner = THIS_MODULE, .obj_size = sizeof(struct bcm_sock), .init = bcm_init, }; static const struct can_proto bcm_can_proto = { .type = SOCK_DGRAM, .protocol = CAN_BCM, .ops = &bcm_ops, .prot = &bcm_proto, }; static int canbcm_pernet_init(struct net *net) { #if IS_ENABLED(CONFIG_PROC_FS) /* create /proc/net/can-bcm directory */ net->can.bcmproc_dir = proc_net_mkdir(net, "can-bcm", net->proc_net); #endif /* CONFIG_PROC_FS */ return 0; } static void canbcm_pernet_exit(struct net *net) { #if IS_ENABLED(CONFIG_PROC_FS) /* remove /proc/net/can-bcm directory */ if (net->can.bcmproc_dir) remove_proc_entry("can-bcm", net->proc_net); #endif /* CONFIG_PROC_FS */ } static struct pernet_operations canbcm_pernet_ops __read_mostly = { .init = canbcm_pernet_init, .exit = canbcm_pernet_exit, }; static struct notifier_block canbcm_notifier = { .notifier_call = bcm_notifier }; static int __init bcm_module_init(void) { int err; pr_info("can: broadcast manager protocol\n"); err = can_proto_register(&bcm_can_proto); if (err < 0) { printk(KERN_ERR "can: registration of bcm protocol failed\n"); return err; } register_pernet_subsys(&canbcm_pernet_ops); register_netdevice_notifier(&canbcm_notifier); return 0; } static void __exit bcm_module_exit(void) { can_proto_unregister(&bcm_can_proto); unregister_netdevice_notifier(&canbcm_notifier); unregister_pernet_subsys(&canbcm_pernet_ops); } module_init(bcm_module_init); module_exit(bcm_module_exit); |
15 6 1 5 1 10 10 7 2 5 10 5 9 4 10 6 4 5 3 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | // SPDX-License-Identifier: GPL-2.0-only /* * GHASH: hash function for GCM (Galois/Counter Mode). * * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi> * Copyright (c) 2009 Intel Corp. * Author: Huang Ying <ying.huang@intel.com> */ /* * GHASH is a keyed hash function used in GCM authentication tag generation. * * The original GCM paper [1] presents GHASH as a function GHASH(H, A, C) which * takes a 16-byte hash key H, additional authenticated data A, and a ciphertext * C. It formats A and C into a single byte string X, interprets X as a * polynomial over GF(2^128), and evaluates this polynomial at the point H. * * However, the NIST standard for GCM [2] presents GHASH as GHASH(H, X) where X * is the already-formatted byte string containing both A and C. * * "ghash" in the Linux crypto API uses the 'X' (pre-formatted) convention, * since the API supports only a single data stream per hash. Thus, the * formatting of 'A' and 'C' is done in the "gcm" template, not in "ghash". * * The reason "ghash" is separate from "gcm" is to allow "gcm" to use an * accelerated "ghash" when a standalone accelerated "gcm(aes)" is unavailable. * It is generally inappropriate to use "ghash" for other purposes, since it is * an "ε-almost-XOR-universal hash function", not a cryptographic hash function. * It can only be used securely in crypto modes specially designed to use it. * * [1] The Galois/Counter Mode of Operation (GCM) * (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.694.695&rep=rep1&type=pdf) * [2] Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC * (https://csrc.nist.gov/publications/detail/sp/800-38d/final) */ #include <crypto/algapi.h> #include <crypto/gf128mul.h> #include <crypto/ghash.h> #include <crypto/internal/hash.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); memset(dctx, 0, sizeof(*dctx)); return 0; } static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct ghash_ctx *ctx = crypto_shash_ctx(tfm); be128 k; if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; if (ctx->gf128) gf128mul_free_4k(ctx->gf128); BUILD_BUG_ON(sizeof(k) != GHASH_BLOCK_SIZE); memcpy(&k, key, GHASH_BLOCK_SIZE); /* avoid violating alignment rules */ ctx->gf128 = gf128mul_init_4k_lle(&k); memzero_explicit(&k, GHASH_BLOCK_SIZE); if (!ctx->gf128) return -ENOMEM; return 0; } static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); u8 *dst = dctx->buffer; if (dctx->bytes) { int n = min(srclen, dctx->bytes); u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); dctx->bytes -= n; srclen -= n; while (n--) *pos++ ^= *src++; if (!dctx->bytes) gf128mul_4k_lle((be128 *)dst, ctx->gf128); } while (srclen >= GHASH_BLOCK_SIZE) { crypto_xor(dst, src, GHASH_BLOCK_SIZE); gf128mul_4k_lle((be128 *)dst, ctx->gf128); src += GHASH_BLOCK_SIZE; srclen -= GHASH_BLOCK_SIZE; } if (srclen) { dctx->bytes = GHASH_BLOCK_SIZE - srclen; while (srclen--) *dst++ ^= *src++; } return 0; } static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) { u8 *dst = dctx->buffer; if (dctx->bytes) { u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); while (dctx->bytes--) *tmp++ ^= 0; gf128mul_4k_lle((be128 *)dst, ctx->gf128); } dctx->bytes = 0; } static int ghash_final(struct shash_desc *desc, u8 *dst) { struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); u8 *buf = dctx->buffer; ghash_flush(ctx, dctx); memcpy(dst, buf, GHASH_BLOCK_SIZE); return 0; } static void ghash_exit_tfm(struct crypto_tfm *tfm) { struct ghash_ctx *ctx = crypto_tfm_ctx(tfm); if (ctx->gf128) gf128mul_free_4k(ctx->gf128); } static struct shash_alg ghash_alg = { .digestsize = GHASH_DIGEST_SIZE, .init = ghash_init, .update = ghash_update, .final = ghash_final, .setkey = ghash_setkey, .descsize = sizeof(struct ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-generic", .cra_priority = 100, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct ghash_ctx), .cra_module = THIS_MODULE, .cra_exit = ghash_exit_tfm, }, }; static int __init ghash_mod_init(void) { return crypto_register_shash(&ghash_alg); } static void __exit ghash_mod_exit(void) { crypto_unregister_shash(&ghash_alg); } subsys_initcall(ghash_mod_init); module_exit(ghash_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH hash function"); MODULE_ALIAS_CRYPTO("ghash"); MODULE_ALIAS_CRYPTO("ghash-generic"); |
1 7 1 6 3 2 7 8 1 1 6 5 4 1 14 14 20 18 7 11 6 5 8 8 13 1 12 9 8 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | // SPDX-License-Identifier: GPL-2.0-or-later /* net/sched/sch_ingress.c - Ingress and clsact qdisc * * Authors: Jamal Hadi Salim 1999 */ #include <linux/module.h> #include <linux/types.h> #include <linux/list.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> struct ingress_sched_data { struct tcf_block *block; struct tcf_block_ext_info block_info; struct mini_Qdisc_pair miniqp; }; static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; } static unsigned long ingress_find(struct Qdisc *sch, u32 classid) { return TC_H_MIN(classid) + 1; } static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { return ingress_find(sch, classid); } static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl) { } static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { } static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl, struct netlink_ext_ack *extack) { struct ingress_sched_data *q = qdisc_priv(sch); return q->block; } static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) { struct mini_Qdisc_pair *miniqp = priv; mini_qdisc_pair_swap(miniqp, tp_head); }; static void ingress_ingress_block_set(struct Qdisc *sch, u32 block_index) { struct ingress_sched_data *q = qdisc_priv(sch); q->block_info.block_index = block_index; } static u32 ingress_ingress_block_get(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); return q->block_info.block_index; } static int ingress_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); int err; if (sch->parent != TC_H_INGRESS) return -EOPNOTSUPP; net_inc_ingress_queue(); mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); q->block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack); if (err) return err; mini_qdisc_pair_block_init(&q->miniqp, q->block); return 0; } static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); if (sch->parent != TC_H_INGRESS) return; tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static const struct Qdisc_class_ops ingress_class_ops = { .flags = QDISC_CLASS_OPS_DOIT_UNLOCKED, .leaf = ingress_leaf, .find = ingress_find, .walk = ingress_walk, .tcf_block = ingress_tcf_block, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_unbind_filter, }; static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", .priv_size = sizeof(struct ingress_sched_data), .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, .ingress_block_set = ingress_ingress_block_set, .ingress_block_get = ingress_ingress_block_get, .owner = THIS_MODULE, }; struct clsact_sched_data { struct tcf_block *ingress_block; struct tcf_block *egress_block; struct tcf_block_ext_info ingress_block_info; struct tcf_block_ext_info egress_block_info; struct mini_Qdisc_pair miniqp_ingress; struct mini_Qdisc_pair miniqp_egress; }; static unsigned long clsact_find(struct Qdisc *sch, u32 classid) { switch (TC_H_MIN(classid)) { case TC_H_MIN(TC_H_MIN_INGRESS): case TC_H_MIN(TC_H_MIN_EGRESS): return TC_H_MIN(classid); default: return 0; } } static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { return clsact_find(sch, classid); } static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl, struct netlink_ext_ack *extack) { struct clsact_sched_data *q = qdisc_priv(sch); switch (cl) { case TC_H_MIN(TC_H_MIN_INGRESS): return q->ingress_block; case TC_H_MIN(TC_H_MIN_EGRESS): return q->egress_block; default: return NULL; } } static void clsact_ingress_block_set(struct Qdisc *sch, u32 block_index) { struct clsact_sched_data *q = qdisc_priv(sch); q->ingress_block_info.block_index = block_index; } static void clsact_egress_block_set(struct Qdisc *sch, u32 block_index) { struct clsact_sched_data *q = qdisc_priv(sch); q->egress_block_info.block_index = block_index; } static u32 clsact_ingress_block_get(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); return q->ingress_block_info.block_index; } static u32 clsact_egress_block_get(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); return q->egress_block_info.block_index; } static int clsact_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct clsact_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); int err; if (sch->parent != TC_H_CLSACT) return -EOPNOTSUPP; net_inc_ingress_queue(); net_inc_egress_queue(); mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); q->ingress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; q->ingress_block_info.chain_head_change = clsact_chain_head_change; q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info, extack); if (err) return err; mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block); mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS; q->egress_block_info.chain_head_change = clsact_chain_head_change; q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info, extack); } static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); if (sch->parent != TC_H_CLSACT) return; tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); net_dec_ingress_queue(); net_dec_egress_queue(); } static const struct Qdisc_class_ops clsact_class_ops = { .flags = QDISC_CLASS_OPS_DOIT_UNLOCKED, .leaf = ingress_leaf, .find = clsact_find, .walk = ingress_walk, .tcf_block = clsact_tcf_block, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_unbind_filter, }; static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", .priv_size = sizeof(struct clsact_sched_data), .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, .ingress_block_set = clsact_ingress_block_set, .egress_block_set = clsact_egress_block_set, .ingress_block_get = clsact_ingress_block_get, .egress_block_get = clsact_egress_block_get, .owner = THIS_MODULE, }; static int __init ingress_module_init(void) { int ret; ret = register_qdisc(&ingress_qdisc_ops); if (!ret) { ret = register_qdisc(&clsact_qdisc_ops); if (ret) unregister_qdisc(&ingress_qdisc_ops); } return ret; } static void __exit ingress_module_exit(void) { unregister_qdisc(&ingress_qdisc_ops); unregister_qdisc(&clsact_qdisc_ops); } module_init(ingress_module_init); module_exit(ingress_module_exit); MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); |
1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 | // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter Generic functions * * Copyright (C) 2010 - 2013 Johan Hovold (jhovold@gmail.com) * Copyright (C) 1999 - 2002 Greg Kroah-Hartman (greg@kroah.com) */ #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/sysrq.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/uaccess.h> #include <linux/kfifo.h> #include <linux/serial.h> #ifdef CONFIG_USB_SERIAL_GENERIC static __u16 vendor = 0x05f9; static __u16 product = 0xffff; module_param(vendor, ushort, 0); MODULE_PARM_DESC(vendor, "User specified USB idVendor"); module_param(product, ushort, 0); MODULE_PARM_DESC(product, "User specified USB idProduct"); static struct usb_device_id generic_device_ids[2]; /* Initially all zeroes. */ static int usb_serial_generic_probe(struct usb_serial *serial, const struct usb_device_id *id) { struct device *dev = &serial->interface->dev; dev_info(dev, "The \"generic\" usb-serial driver is only for testing and one-off prototypes.\n"); dev_info(dev, "Tell linux-usb@vger.kernel.org to add your device to a proper driver.\n"); return 0; } static int usb_serial_generic_calc_num_ports(struct usb_serial *serial, struct usb_serial_endpoints *epds) { struct device *dev = &serial->interface->dev; int num_ports; num_ports = max(epds->num_bulk_in, epds->num_bulk_out); if (num_ports == 0) { dev_err(dev, "device has no bulk endpoints\n"); return -ENODEV; } return num_ports; } static struct usb_serial_driver usb_serial_generic_device = { .driver = { .owner = THIS_MODULE, .name = "generic", }, .id_table = generic_device_ids, .probe = usb_serial_generic_probe, .calc_num_ports = usb_serial_generic_calc_num_ports, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, .resume = usb_serial_generic_resume, }; static struct usb_serial_driver * const serial_drivers[] = { &usb_serial_generic_device, NULL }; #endif int usb_serial_generic_register(void) { int retval = 0; #ifdef CONFIG_USB_SERIAL_GENERIC generic_device_ids[0].idVendor = vendor; generic_device_ids[0].idProduct = product; generic_device_ids[0].match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT; retval = usb_serial_register_drivers(serial_drivers, "usbserial_generic", generic_device_ids); #endif return retval; } void usb_serial_generic_deregister(void) { #ifdef CONFIG_USB_SERIAL_GENERIC usb_serial_deregister_drivers(serial_drivers); #endif } int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port) { int result = 0; clear_bit(USB_SERIAL_THROTTLED, &port->flags); if (port->bulk_in_size) result = usb_serial_generic_submit_read_urbs(port, GFP_KERNEL); return result; } EXPORT_SYMBOL_GPL(usb_serial_generic_open); void usb_serial_generic_close(struct usb_serial_port *port) { unsigned long flags; int i; if (port->bulk_out_size) { for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) usb_kill_urb(port->write_urbs[i]); spin_lock_irqsave(&port->lock, flags); kfifo_reset_out(&port->write_fifo); spin_unlock_irqrestore(&port->lock, flags); } if (port->bulk_in_size) { for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) usb_kill_urb(port->read_urbs[i]); } } EXPORT_SYMBOL_GPL(usb_serial_generic_close); int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port, void *dest, size_t size) { return kfifo_out_locked(&port->write_fifo, dest, size, &port->lock); } /** * usb_serial_generic_write_start - start writing buffered data * @port: usb-serial port * @mem_flags: flags to use for memory allocations * * Serialised using USB_SERIAL_WRITE_BUSY flag. * * Return: Zero on success or if busy, otherwise a negative errno value. */ int usb_serial_generic_write_start(struct usb_serial_port *port, gfp_t mem_flags) { struct urb *urb; int count, result; unsigned long flags; int i; if (test_and_set_bit_lock(USB_SERIAL_WRITE_BUSY, &port->flags)) return 0; retry: spin_lock_irqsave(&port->lock, flags); if (!port->write_urbs_free || !kfifo_len(&port->write_fifo)) { clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags); spin_unlock_irqrestore(&port->lock, flags); return 0; } i = (int)find_first_bit(&port->write_urbs_free, ARRAY_SIZE(port->write_urbs)); spin_unlock_irqrestore(&port->lock, flags); urb = port->write_urbs[i]; count = port->serial->type->prepare_write_buffer(port, urb->transfer_buffer, port->bulk_out_size); urb->transfer_buffer_length = count; usb_serial_debug_data(&port->dev, __func__, count, urb->transfer_buffer); spin_lock_irqsave(&port->lock, flags); port->tx_bytes += count; spin_unlock_irqrestore(&port->lock, flags); clear_bit(i, &port->write_urbs_free); result = usb_submit_urb(urb, mem_flags); if (result) { dev_err_console(port, "%s - error submitting urb: %d\n", __func__, result); set_bit(i, &port->write_urbs_free); spin_lock_irqsave(&port->lock, flags); port->tx_bytes -= count; spin_unlock_irqrestore(&port->lock, flags); clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags); return result; } goto retry; /* try sending off another urb */ } EXPORT_SYMBOL_GPL(usb_serial_generic_write_start); /** * usb_serial_generic_write - generic write function * @tty: tty for the port * @port: usb-serial port * @buf: data to write * @count: number of bytes to write * * Return: The number of characters buffered, which may be anything from * zero to @count, or a negative errno value. */ int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count) { int result; if (!port->bulk_out_size) return -ENODEV; if (!count) return 0; count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock); result = usb_serial_generic_write_start(port, GFP_ATOMIC); if (result) return result; return count; } EXPORT_SYMBOL_GPL(usb_serial_generic_write); unsigned int usb_serial_generic_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned long flags; unsigned int room; if (!port->bulk_out_size) return 0; spin_lock_irqsave(&port->lock, flags); room = kfifo_avail(&port->write_fifo); spin_unlock_irqrestore(&port->lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, room); return room; } unsigned int usb_serial_generic_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned long flags; unsigned int chars; if (!port->bulk_out_size) return 0; spin_lock_irqsave(&port->lock, flags); chars = kfifo_len(&port->write_fifo) + port->tx_bytes; spin_unlock_irqrestore(&port->lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, chars); return chars; } EXPORT_SYMBOL_GPL(usb_serial_generic_chars_in_buffer); void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout) { struct usb_serial_port *port = tty->driver_data; unsigned int bps; unsigned long period; unsigned long expire; bps = tty_get_baud_rate(tty); if (!bps) bps = 9600; /* B0 */ /* * Use a poll-period of roughly the time it takes to send one * character or at least one jiffy. */ period = max_t(unsigned long, (10 * HZ / bps), 1); if (timeout) period = min_t(unsigned long, period, timeout); dev_dbg(&port->dev, "%s - timeout = %u ms, period = %u ms\n", __func__, jiffies_to_msecs(timeout), jiffies_to_msecs(period)); expire = jiffies + timeout; while (!port->serial->type->tx_empty(port)) { schedule_timeout_interruptible(period); if (signal_pending(current)) break; if (timeout && time_after(jiffies, expire)) break; } } EXPORT_SYMBOL_GPL(usb_serial_generic_wait_until_sent); static int usb_serial_generic_submit_read_urb(struct usb_serial_port *port, int index, gfp_t mem_flags) { int res; if (!test_and_clear_bit(index, &port->read_urbs_free)) return 0; dev_dbg(&port->dev, "%s - urb %d\n", __func__, index); res = usb_submit_urb(port->read_urbs[index], mem_flags); if (res) { if (res != -EPERM && res != -ENODEV) { dev_err(&port->dev, "%s - usb_submit_urb failed: %d\n", __func__, res); } set_bit(index, &port->read_urbs_free); return res; } return 0; } int usb_serial_generic_submit_read_urbs(struct usb_serial_port *port, gfp_t mem_flags) { int res; int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { res = usb_serial_generic_submit_read_urb(port, i, mem_flags); if (res) goto err; } return 0; err: for (; i >= 0; --i) usb_kill_urb(port->read_urbs[i]); return res; } EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urbs); void usb_serial_generic_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; char *ch = urb->transfer_buffer; int i; if (!urb->actual_length) return; /* * The per character mucking around with sysrq path it too slow for * stuff like 3G modems, so shortcircuit it in the 99.9999999% of * cases where the USB serial is not a console anyway. */ if (port->sysrq) { for (i = 0; i < urb->actual_length; i++, ch++) { if (!usb_serial_handle_sysrq_char(port, *ch)) tty_insert_flip_char(&port->port, *ch, TTY_NORMAL); } } else { tty_insert_flip_string(&port->port, ch, urb->actual_length); } tty_flip_buffer_push(&port->port); } EXPORT_SYMBOL_GPL(usb_serial_generic_process_read_urb); void usb_serial_generic_read_bulk_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; bool stopped = false; int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->read_urbs); ++i) { if (urb == port->read_urbs[i]) break; } dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i, urb->actual_length); switch (status) { case 0: usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); port->serial->type->process_read_urb(urb); break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", __func__, status); stopped = true; break; case -EPIPE: dev_err(&port->dev, "%s - urb stopped: %d\n", __func__, status); stopped = true; break; default: dev_dbg(&port->dev, "%s - nonzero urb status: %d\n", __func__, status); break; } /* * Make sure URB processing is done before marking as free to avoid * racing with unthrottle() on another CPU. Matches the barriers * implied by the test_and_clear_bit() in * usb_serial_generic_submit_read_urb(). */ smp_mb__before_atomic(); set_bit(i, &port->read_urbs_free); /* * Make sure URB is marked as free before checking the throttled flag * to avoid racing with unthrottle() on another CPU. Matches the * smp_mb__after_atomic() in unthrottle(). */ smp_mb__after_atomic(); if (stopped) return; if (test_bit(USB_SERIAL_THROTTLED, &port->flags)) return; usb_serial_generic_submit_read_urb(port, i, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(usb_serial_generic_read_bulk_callback); void usb_serial_generic_write_bulk_callback(struct urb *urb) { unsigned long flags; struct usb_serial_port *port = urb->context; int status = urb->status; int i; for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) { if (port->write_urbs[i] == urb) break; } spin_lock_irqsave(&port->lock, flags); port->tx_bytes -= urb->transfer_buffer_length; set_bit(i, &port->write_urbs_free); spin_unlock_irqrestore(&port->lock, flags); switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb stopped: %d\n", __func__, status); return; case -EPIPE: dev_err_console(port, "%s - urb stopped: %d\n", __func__, status); return; default: dev_err_console(port, "%s - nonzero urb status: %d\n", __func__, status); break; } usb_serial_generic_write_start(port, GFP_ATOMIC); usb_serial_port_softint(port); } EXPORT_SYMBOL_GPL(usb_serial_generic_write_bulk_callback); void usb_serial_generic_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; set_bit(USB_SERIAL_THROTTLED, &port->flags); } EXPORT_SYMBOL_GPL(usb_serial_generic_throttle); void usb_serial_generic_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; clear_bit(USB_SERIAL_THROTTLED, &port->flags); /* * Matches the smp_mb__after_atomic() in * usb_serial_generic_read_bulk_callback(). */ smp_mb__after_atomic(); usb_serial_generic_submit_read_urbs(port, GFP_KERNEL); } EXPORT_SYMBOL_GPL(usb_serial_generic_unthrottle); static bool usb_serial_generic_msr_changed(struct tty_struct *tty, unsigned long arg, struct async_icount *cprev) { struct usb_serial_port *port = tty->driver_data; struct async_icount cnow; unsigned long flags; bool ret; /* * Use tty-port initialised flag to detect all hangups including the * one generated at USB-device disconnect. */ if (!tty_port_initialized(&port->port)) return true; spin_lock_irqsave(&port->lock, flags); cnow = port->icount; /* atomic copy*/ spin_unlock_irqrestore(&port->lock, flags); ret = ((arg & TIOCM_RNG) && (cnow.rng != cprev->rng)) || ((arg & TIOCM_DSR) && (cnow.dsr != cprev->dsr)) || ((arg & TIOCM_CD) && (cnow.dcd != cprev->dcd)) || ((arg & TIOCM_CTS) && (cnow.cts != cprev->cts)); *cprev = cnow; return ret; } int usb_serial_generic_tiocmiwait(struct tty_struct *tty, unsigned long arg) { struct usb_serial_port *port = tty->driver_data; struct async_icount cnow; unsigned long flags; int ret; spin_lock_irqsave(&port->lock, flags); cnow = port->icount; /* atomic copy */ spin_unlock_irqrestore(&port->lock, flags); ret = wait_event_interruptible(port->port.delta_msr_wait, usb_serial_generic_msr_changed(tty, arg, &cnow)); if (!ret && !tty_port_initialized(&port->port)) ret = -EIO; return ret; } EXPORT_SYMBOL_GPL(usb_serial_generic_tiocmiwait); int usb_serial_generic_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { struct usb_serial_port *port = tty->driver_data; struct async_icount cnow; unsigned long flags; spin_lock_irqsave(&port->lock, flags); cnow = port->icount; /* atomic copy */ spin_unlock_irqrestore(&port->lock, flags); icount->cts = cnow.cts; icount->dsr = cnow.dsr; icount->rng = cnow.rng; icount->dcd = cnow.dcd; icount->tx = cnow.tx; icount->rx = cnow.rx; icount->frame = cnow.frame; icount->parity = cnow.parity; icount->overrun = cnow.overrun; icount->brk = cnow.brk; icount->buf_overrun = cnow.buf_overrun; return 0; } EXPORT_SYMBOL_GPL(usb_serial_generic_get_icount); #if defined(CONFIG_USB_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch) { if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { handle_sysrq(ch); port->sysrq = 0; return 1; } port->sysrq = 0; } return 0; } EXPORT_SYMBOL_GPL(usb_serial_handle_sysrq_char); int usb_serial_handle_break(struct usb_serial_port *port) { if (!port->port.console) return 0; if (!port->sysrq) { port->sysrq = jiffies + HZ*5; return 1; } port->sysrq = 0; return 0; } EXPORT_SYMBOL_GPL(usb_serial_handle_break); #endif /** * usb_serial_handle_dcd_change - handle a change of carrier detect state * @port: usb-serial port * @tty: tty for the port * @status: new carrier detect status, nonzero if active */ void usb_serial_handle_dcd_change(struct usb_serial_port *port, struct tty_struct *tty, unsigned int status) { dev_dbg(&port->dev, "%s - status %d\n", __func__, status); if (tty) { struct tty_ldisc *ld = tty_ldisc_ref(tty); if (ld) { if (ld->ops->dcd_change) ld->ops->dcd_change(tty, status); tty_ldisc_deref(ld); } } if (status) wake_up_interruptible(&port->port.open_wait); else if (tty && !C_CLOCAL(tty)) tty_hangup(tty); } EXPORT_SYMBOL_GPL(usb_serial_handle_dcd_change); int usb_serial_generic_resume(struct usb_serial *serial) { struct usb_serial_port *port; int i, c = 0, r; for (i = 0; i < serial->num_ports; i++) { port = serial->port[i]; if (!tty_port_initialized(&port->port)) continue; if (port->bulk_in_size) { r = usb_serial_generic_submit_read_urbs(port, GFP_NOIO); if (r < 0) c++; } if (port->bulk_out_size) { r = usb_serial_generic_write_start(port, GFP_NOIO); if (r < 0) c++; } } return c ? -EIO : 0; } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); |
10 10 71 5 52 16 19 2 3 16 16 6 10 45 20 25 21 25 1 1 2 22 21 3 22 71 68 6 64 68 167 1 166 3 16 51 64 1 1 1 1 1 1 14 1 14 2 14 74 35 42 3 68 4 64 64 63 6 28 44 74 74 30 30 2 27 1 2 26 7 20 48 48 48 4 1 43 1 6 37 64 13 1 75 13 65 30 48 30 55 13 12 56 9 1 1 2 1 3 3 3 80 72 7 76 6 7 75 59 15 68 3 80 141 80 132 79 80 80 26 60 43 1 18 9 124 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 | // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/fileattr.h> #include <linux/splice.h> #include <linux/xattr.h> #include <linux/security.h> #include <linux/uaccess.h> #include <linux/sched/signal.h> #include <linux/cred.h> #include <linux/namei.h> #include <linux/fdtable.h> #include <linux/ratelimit.h> #include <linux/exportfs.h> #include "overlayfs.h" #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) static int ovl_ccup_set(const char *buf, const struct kernel_param *param) { pr_warn("\"check_copy_up\" module option is obsolete\n"); return 0; } static int ovl_ccup_get(char *buf, const struct kernel_param *param) { return sprintf(buf, "N\n"); } module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing"); static bool ovl_must_copy_xattr(const char *name) { return !strcmp(name, XATTR_POSIX_ACL_ACCESS) || !strcmp(name, XATTR_POSIX_ACL_DEFAULT) || !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } int ovl_copy_xattr(struct super_block *sb, struct dentry *old, struct dentry *new) { ssize_t list_size, size, value_size = 0; char *buf, *name, *value = NULL; int error = 0; size_t slen; if (!(old->d_inode->i_opflags & IOP_XATTR) || !(new->d_inode->i_opflags & IOP_XATTR)) return 0; list_size = vfs_listxattr(old, NULL, 0); if (list_size <= 0) { if (list_size == -EOPNOTSUPP) return 0; return list_size; } buf = kvzalloc(list_size, GFP_KERNEL); if (!buf) return -ENOMEM; list_size = vfs_listxattr(old, buf, list_size); if (list_size <= 0) { error = list_size; goto out; } for (name = buf; list_size; name += slen) { slen = strnlen(name, list_size) + 1; /* underlying fs providing us with an broken xattr list? */ if (WARN_ON(slen > list_size)) { error = -EIO; break; } list_size -= slen; if (ovl_is_private_xattr(sb, name)) continue; error = security_inode_copy_up_xattr(name); if (error < 0 && error != -EOPNOTSUPP) break; if (error == 1) { error = 0; continue; /* Discard */ } retry: size = vfs_getxattr(&init_user_ns, old, name, value, value_size); if (size == -ERANGE) size = vfs_getxattr(&init_user_ns, old, name, NULL, 0); if (size < 0) { error = size; break; } if (size > value_size) { void *new; new = kvmalloc(size, GFP_KERNEL); if (!new) { error = -ENOMEM; break; } kvfree(value); value = new; value_size = size; goto retry; } error = ovl_do_setxattr(OVL_FS(sb), new, name, value, size, 0); if (error) { if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name)) break; /* Ignore failure to copy unknown xattrs */ error = 0; } } kvfree(value); out: kvfree(buf); return error; } static int ovl_copy_fileattr(struct inode *inode, struct path *old, struct path *new) { struct fileattr oldfa = { .flags_valid = true }; struct fileattr newfa = { .flags_valid = true }; int err; err = ovl_real_fileattr_get(old, &oldfa); if (err) { /* Ntfs-3g returns -EINVAL for "no fileattr support" */ if (err == -ENOTTY || err == -EINVAL) return 0; pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", old->dentry, err); return err; } /* * We cannot set immutable and append-only flags on upper inode, * because we would not be able to link upper inode to upper dir * not set overlay private xattr on upper inode. * Store these flags in overlay.protattr xattr instead. */ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { err = ovl_set_protattr(inode, new->dentry, &oldfa); if (err == -EPERM) pr_warn_once("copying fileattr: no xattr on upper\n"); else if (err) return err; } /* Don't bother copying flags if none are set */ if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK)) return 0; err = ovl_real_fileattr_get(new, &newfa); if (err) { /* * Returning an error if upper doesn't support fileattr will * result in a regression, so revert to the old behavior. */ if (err == -ENOTTY || err == -EINVAL) { pr_warn_once("copying fileattr: no support on upper\n"); return 0; } pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", new->dentry, err); return err; } BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK; newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK); return ovl_real_fileattr_set(new, &newfa); } static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old, struct path *new, loff_t len) { struct file *old_file; struct file *new_file; loff_t old_pos = 0; loff_t new_pos = 0; loff_t cloned; loff_t data_pos = -1; loff_t hole_len; bool skip_hole = false; int error = 0; if (len == 0) return 0; old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY); if (IS_ERR(old_file)) return PTR_ERR(old_file); new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY); if (IS_ERR(new_file)) { error = PTR_ERR(new_file); goto out_fput; } /* Try to use clone_file_range to clone up within the same fs */ cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); if (cloned == len) goto out; /* Couldn't clone, so now we try to copy the data */ /* Check if lower fs supports seek operation */ if (old_file->f_mode & FMODE_LSEEK && old_file->f_op->llseek) skip_hole = true; while (len) { size_t this_len = OVL_COPY_UP_CHUNK_SIZE; long bytes; if (len < this_len) this_len = len; if (signal_pending_state(TASK_KILLABLE, current)) { error = -EINTR; break; } /* * Fill zero for hole will cost unnecessary disk space * and meanwhile slow down the copy-up speed, so we do * an optimization for hole during copy-up, it relies * on SEEK_DATA implementation in lower fs so if lower * fs does not support it, copy-up will behave as before. * * Detail logic of hole detection as below: * When we detect next data position is larger than current * position we will skip that hole, otherwise we copy * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually, * it may not recognize all kind of holes and sometimes * only skips partial of hole area. However, it will be * enough for most of the use cases. */ if (skip_hole && data_pos < old_pos) { data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA); if (data_pos > old_pos) { hole_len = data_pos - old_pos; len -= hole_len; old_pos = new_pos = data_pos; continue; } else if (data_pos == -ENXIO) { break; } else if (data_pos < 0) { skip_hole = false; } } bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); if (bytes <= 0) { error = bytes; break; } WARN_ON(old_pos != new_pos); len -= bytes; } out: if (!error && ovl_should_sync(ofs)) error = vfs_fsync(new_file, 0); fput(new_file); out_fput: fput(old_file); return error; } static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat) { struct iattr attr = { .ia_valid = ATTR_SIZE, .ia_size = stat->size, }; return notify_change(&init_user_ns, upperdentry, &attr, NULL); } static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) { struct iattr attr = { .ia_valid = ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME, .ia_atime = stat->atime, .ia_mtime = stat->mtime, }; return notify_change(&init_user_ns, upperdentry, &attr, NULL); } int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) { int err = 0; if (!S_ISLNK(stat->mode)) { struct iattr attr = { .ia_valid = ATTR_MODE, .ia_mode = stat->mode, }; err = notify_change(&init_user_ns, upperdentry, &attr, NULL); } if (!err) { struct iattr attr = { .ia_valid = ATTR_UID | ATTR_GID, .ia_uid = stat->uid, .ia_gid = stat->gid, }; err = notify_change(&init_user_ns, upperdentry, &attr, NULL); } if (!err) ovl_set_timestamps(upperdentry, stat); return err; } struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper) { struct ovl_fh *fh; int fh_type, dwords; int buflen = MAX_HANDLE_SZ; uuid_t *uuid = &real->d_sb->s_uuid; int err; /* Make sure the real fid stays 32bit aligned */ BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4); BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255); fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL); if (!fh) return ERR_PTR(-ENOMEM); /* * We encode a non-connectable file handle for non-dir, because we * only need to find the lower inode number and we don't want to pay * the price or reconnecting the dentry. */ dwords = buflen >> 2; fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0); buflen = (dwords << 2); err = -EIO; if (WARN_ON(fh_type < 0) || WARN_ON(buflen > MAX_HANDLE_SZ) || WARN_ON(fh_type == FILEID_INVALID)) goto out_err; fh->fb.version = OVL_FH_VERSION; fh->fb.magic = OVL_FH_MAGIC; fh->fb.type = fh_type; fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN; /* * When we will want to decode an overlay dentry from this handle * and all layers are on the same fs, if we get a disconncted real * dentry when we decode fid, the only way to tell if we should assign * it to upperdentry or to lowerstack is by checking this flag. */ if (is_upper) fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER; fh->fb.len = sizeof(fh->fb) + buflen; if (ofs->config.uuid) fh->fb.uuid = *uuid; return fh; out_err: kfree(fh); return ERR_PTR(err); } int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; /* * When lower layer doesn't support export operations store a 'null' fh, * so we can use the overlay.origin xattr to distignuish between a copy * up and a pure upper inode. */ if (ovl_can_decode_fh(lower->d_sb)) { fh = ovl_encode_real_fh(ofs, lower, false); if (IS_ERR(fh)) return PTR_ERR(fh); } /* * Do not fail when upper doesn't support xattrs. */ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); kfree(fh); /* Ignore -EPERM from setting "user.*" on symlink/special */ return err == -EPERM ? 0 : err; } /* Store file handle of @upper dir in @index dir entry */ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, struct dentry *index) { const struct ovl_fh *fh; int err; fh = ovl_encode_real_fh(ofs, upper, true); if (IS_ERR(fh)) return PTR_ERR(fh); err = ovl_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len); kfree(fh); return err; } /* * Create and install index entry. * * Caller must hold i_mutex on indexdir. */ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, struct dentry *upper) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *indexdir = ovl_indexdir(dentry->d_sb); struct inode *dir = d_inode(indexdir); struct dentry *index = NULL; struct dentry *temp = NULL; struct qstr name = { }; int err; /* * For now this is only used for creating index entry for directories, * because non-dir are copied up directly to index and then hardlinked * to upper dir. * * TODO: implement create index for non-dir, so we can call it when * encoding file handle for non-dir in case index does not exist. */ if (WARN_ON(!d_is_dir(dentry))) return -EIO; /* Directory not expected to be indexed before copy up */ if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO; err = ovl_get_index_name(ofs, origin, &name); if (err) return err; temp = ovl_create_temp(ofs, indexdir, OVL_CATTR(S_IFDIR | 0)); err = PTR_ERR(temp); if (IS_ERR(temp)) goto free_name; err = ovl_set_upper_fh(ofs, upper, temp); if (err) goto out; index = lookup_one_len(name.name, indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); } else { err = ovl_do_rename(ofs, dir, temp, dir, index, 0); dput(index); } out: if (err) ovl_cleanup(ofs, dir, temp); dput(temp); free_name: kfree(name.name); return err; } struct ovl_copy_up_ctx { struct dentry *parent; struct dentry *dentry; struct path lowerpath; struct kstat stat; struct kstat pstat; const char *link; struct dentry *destdir; struct qstr destname; struct dentry *workdir; bool origin; bool indexed; bool metacopy; }; static int ovl_link_up(struct ovl_copy_up_ctx *c) { int err; struct dentry *upper; struct dentry *upperdir = ovl_dentry_upper(c->parent); struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(upperdir); /* Mark parent "impure" because it may now contain non-pure upper */ err = ovl_set_impure(c->parent, upperdir); if (err) return err; err = ovl_set_nlink_lower(c->dentry); if (err) return err; inode_lock_nested(udir, I_MUTEX_PARENT); upper = lookup_one_len(c->dentry->d_name.name, upperdir, c->dentry->d_name.len); err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); if (!err) { /* Restore timestamps on parent (best effort) */ ovl_set_timestamps(upperdir, &c->pstat); ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } dput(upper); } inode_unlock(udir); if (err) return err; err = ovl_set_nlink_upper(c->dentry); return err; } static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *inode = d_inode(c->dentry); struct path upperpath, datapath; int err; ovl_path_upper(c->dentry, &upperpath); if (WARN_ON(upperpath.dentry != NULL)) return -EIO; upperpath.dentry = temp; /* * Copy up data first and then xattrs. Writing data after * xattrs will remove security.capability xattr automatically. */ if (S_ISREG(c->stat.mode) && !c->metacopy) { ovl_path_lowerdata(c->dentry, &datapath); err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size); if (err) return err; } err = ovl_copy_xattr(c->dentry->d_sb, c->lowerpath.dentry, temp); if (err) return err; if (inode->i_flags & OVL_COPY_I_FLAGS_MASK && (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) { /* * Copy the fileattr inode flags that are the source of already * copied i_flags */ err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath); if (err) return err; } /* * Store identifier of lower inode in upper inode xattr to * allow lookup of the copy up origin inode. * * Don't set origin when we are breaking the association with a lower * hard link. */ if (c->origin) { err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); if (err) return err; } if (c->metacopy) { err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY, NULL, 0, -EOPNOTSUPP); if (err) return err; } inode_lock(temp->d_inode); if (S_ISREG(c->stat.mode)) err = ovl_set_size(temp, &c->stat); if (!err) err = ovl_set_attr(temp, &c->stat); inode_unlock(temp->d_inode); return err; } struct ovl_cu_creds { const struct cred *old; struct cred *new; }; static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc) { int err; cc->old = cc->new = NULL; err = security_inode_copy_up(dentry, &cc->new); if (err < 0) return err; if (cc->new) cc->old = override_creds(cc->new); return 0; } static void ovl_revert_cu_creds(struct ovl_cu_creds *cc) { if (cc->new) { revert_creds(cc->old); put_cred(cc->new); } } /* * Copyup using workdir to prepare temp file. Used when copying up directories, * special files or when upper fs doesn't support O_TMPFILE. */ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *inode; struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir); struct dentry *temp, *upper; struct ovl_cu_creds cc; int err; struct ovl_cattr cattr = { /* Can't properly set mode on creation because of the umask */ .mode = c->stat.mode & S_IFMT, .rdev = c->stat.rdev, .link = c->link }; /* workdir and destdir could be the same when copying up to indexdir */ err = -EIO; if (lock_rename(c->workdir, c->destdir) != NULL) goto unlock; err = ovl_prep_cu_creds(c->dentry, &cc); if (err) goto unlock; temp = ovl_create_temp(ofs, c->workdir, &cattr); ovl_revert_cu_creds(&cc); err = PTR_ERR(temp); if (IS_ERR(temp)) goto unlock; err = ovl_copy_up_inode(c, temp); if (err) goto cleanup; if (S_ISDIR(c->stat.mode) && c->indexed) { err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); if (err) goto cleanup; } upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto cleanup; err = ovl_do_rename(ofs, wdir, temp, udir, upper, 0); dput(upper); if (err) goto cleanup; if (!c->metacopy) ovl_set_upperdata(d_inode(c->dentry)); inode = d_inode(c->dentry); ovl_inode_update(inode, temp); if (S_ISDIR(inode->i_mode)) ovl_set_flag(OVL_WHITEOUTS, inode); unlock: unlock_rename(c->workdir, c->destdir); return err; cleanup: ovl_cleanup(ofs, wdir, temp); dput(temp); goto unlock; } /* Copyup using O_TMPFILE which does not require cross dir locking */ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(c->destdir); struct dentry *temp, *upper; struct ovl_cu_creds cc; int err; err = ovl_prep_cu_creds(c->dentry, &cc); if (err) return err; temp = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode); ovl_revert_cu_creds(&cc); if (IS_ERR(temp)) return PTR_ERR(temp); err = ovl_copy_up_inode(c, temp); if (err) goto out_dput; inode_lock_nested(udir, I_MUTEX_PARENT); upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len); err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, temp, udir, upper); dput(upper); } inode_unlock(udir); if (err) goto out_dput; if (!c->metacopy) ovl_set_upperdata(d_inode(c->dentry)); ovl_inode_update(d_inode(c->dentry), temp); return 0; out_dput: dput(temp); return err; } /* * Copy up a single dentry * * All renames start with copy up of source if necessary. The actual * rename will only proceed once the copy up was successful. Copy up uses * upper parent i_mutex for exclusion. Since rename can change d_parent it * is possible that the copy up will lock the old parent. At that point * the file will have already been copied up anyway. */ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); bool to_index = false; /* * Indexed non-dir is copied up directly to the index entry and then * hardlinked to upper dir. Indexed dir is copied up to indexdir, * then index entry is created and then copied up dir installed. * Copying dir up to indexdir instead of workdir simplifies locking. */ if (ovl_need_index(c->dentry)) { c->indexed = true; if (S_ISDIR(c->stat.mode)) c->workdir = ovl_indexdir(c->dentry->d_sb); else to_index = true; } if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) c->origin = true; if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname); if (err) return err; } else if (WARN_ON(!c->parent)) { /* Disconnected dentry must be copied up to index dir */ return -EIO; } else { /* * Mark parent "impure" because it may now contain non-pure * upper */ err = ovl_set_impure(c->parent, c->destdir); if (err) return err; } /* Should we copyup with O_TMPFILE or with workdir? */ if (S_ISREG(c->stat.mode) && ofs->tmpfile) err = ovl_copy_up_tmpfile(c); else err = ovl_copy_up_workdir(c); if (err) goto out; if (c->indexed) ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); if (to_index) { /* Initialize nlink for copy up of disconnected dentry */ err = ovl_set_nlink_upper(c->dentry); } else { struct inode *udir = d_inode(c->destdir); /* Restore timestamps on parent (best effort) */ inode_lock(udir); ovl_set_timestamps(c->destdir, &c->pstat); inode_unlock(udir); ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry)); } out: if (to_index) kfree(c->destname.name); return err; } static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode, int flags) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; if (!ofs->config.metacopy) return false; if (!S_ISREG(mode)) return false; if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC))) return false; return true; } static ssize_t ovl_getxattr_value(struct ovl_fs *ofs, struct dentry *dentry, char *name, char **value) { ssize_t res; char *buf; res = ovl_do_getxattr(ofs, dentry, name, NULL, 0); if (res == -ENODATA || res == -EOPNOTSUPP) res = 0; if (res > 0) { buf = kzalloc(res, GFP_KERNEL); if (!buf) return -ENOMEM; res = ovl_do_getxattr(ofs, dentry, name, buf, res); if (res < 0) kfree(buf); else *value = buf; } return res; } /* Copy up data of an inode which was copied up metadata only in the past. */ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct path upperpath, datapath; int err; char *capability = NULL; ssize_t cap_size; ovl_path_upper(c->dentry, &upperpath); if (WARN_ON(upperpath.dentry == NULL)) return -EIO; ovl_path_lowerdata(c->dentry, &datapath); if (WARN_ON(datapath.dentry == NULL)) return -EIO; if (c->stat.size) { err = cap_size = ovl_getxattr_value(ofs, upperpath.dentry, XATTR_NAME_CAPS, &capability); if (cap_size < 0) goto out; } err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size); if (err) goto out_free; /* * Writing to upper file will clear security.capability xattr. We * don't want that to happen for normal copy-up operation. */ if (capability) { err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS, capability, cap_size, 0); if (err) goto out_free; } err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY); if (err) goto out_free; ovl_set_upperdata(d_inode(c->dentry)); out_free: kfree(capability); out: return err; } static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, int flags) { int err; DEFINE_DELAYED_CALL(done); struct path parentpath; struct ovl_copy_up_ctx ctx = { .parent = parent, .dentry = dentry, .workdir = ovl_workdir(dentry), }; if (WARN_ON(!ctx.workdir)) return -EROFS; ovl_path_lower(dentry, &ctx.lowerpath); err = vfs_getattr(&ctx.lowerpath, &ctx.stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) return err; if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) || !kgid_has_mapping(current_user_ns(), ctx.stat.gid)) return -EOVERFLOW; ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); if (parent) { ovl_path_upper(parent, &parentpath); ctx.destdir = parentpath.dentry; ctx.destname = dentry->d_name; err = vfs_getattr(&parentpath, &ctx.pstat, STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) return err; } /* maybe truncate regular file. this has no effect on dirs */ if (flags & O_TRUNC) ctx.stat.size = 0; if (S_ISLNK(ctx.stat.mode)) { ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done); if (IS_ERR(ctx.link)) return PTR_ERR(ctx.link); } err = ovl_copy_up_start(dentry, flags); /* err < 0: interrupted, err > 0: raced with another copy-up */ if (unlikely(err)) { if (err > 0) err = 0; } else { if (!ovl_dentry_upper(dentry)) err = ovl_do_copy_up(&ctx); if (!err && parent && !ovl_dentry_has_upper_alias(dentry)) err = ovl_link_up(&ctx); if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags)) err = ovl_copy_up_meta_inode_data(&ctx); ovl_copy_up_end(dentry); } do_delayed_call(&done); return err; } static int ovl_copy_up_flags(struct dentry *dentry, int flags) { int err = 0; const struct cred *old_cred; bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED); /* * With NFS export, copy up can get called for a disconnected non-dir. * In this case, we will copy up lower inode to index dir without * linking it to upper dir. */ if (WARN_ON(disconnected && d_is_dir(dentry))) return -EIO; old_cred = ovl_override_creds(dentry->d_sb); while (!err) { struct dentry *next; struct dentry *parent = NULL; if (ovl_already_copied_up(dentry, flags)) break; next = dget(dentry); /* find the topmost dentry not yet copied up */ for (; !disconnected;) { parent = dget_parent(next); if (ovl_dentry_upper(parent)) break; dput(next); next = parent; } err = ovl_copy_up_one(parent, next, flags); dput(parent); dput(next); } revert_creds(old_cred); return err; } static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) { /* Copy up of disconnected dentry does not set upper alias */ if (ovl_already_copied_up(dentry, flags)) return false; if (special_file(d_inode(dentry)->i_mode)) return false; if (!ovl_open_flags_need_copy_up(flags)) return false; return true; } int ovl_maybe_copy_up(struct dentry *dentry, int flags) { int err = 0; if (ovl_open_need_copy_up(dentry, flags)) { err = ovl_want_write(dentry); if (!err) { err = ovl_copy_up_flags(dentry, flags); ovl_drop_write(dentry); } } return err; } int ovl_copy_up_with_data(struct dentry *dentry) { return ovl_copy_up_flags(dentry, O_WRONLY); } int ovl_copy_up(struct dentry *dentry) { return ovl_copy_up_flags(dentry, 0); } |
17 11 16 32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 | /* * linux/fs/nls/mac-cyrillic.c * * Charset maccyrillic translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ /* * COPYRIGHT AND PERMISSION NOTICE * * Copyright 1991-2012 Unicode, Inc. All rights reserved. Distributed under * the Terms of Use in http://www.unicode.org/copyright.html. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of the Unicode data files and any associated documentation (the "Data * Files") or Unicode software and any associated documentation (the * "Software") to deal in the Data Files or Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, and/or sell copies of the Data Files or Software, and * to permit persons to whom the Data Files or Software are furnished to do * so, provided that (a) the above copyright notice(s) and this permission * notice appear with all copies of the Data Files or Software, (b) both the * above copyright notice(s) and this permission notice appear in associated * documentation, and (c) there is clear notice in each modified Data File or * in the Software as well as in the documentation associated with the Data * File(s) or Software that the data or software has been modified. * * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF * THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS * INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THE DATA FILES OR SOFTWARE. * * Except as contained in this notice, the name of a copyright holder shall * not be used in advertising or otherwise to promote the sale, use or other * dealings in these Data Files or Software without prior written * authorization of the copyright holder. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, /* 0x80 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, /* 0x90 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, /* 0xa0 */ 0x2020, 0x00b0, 0x0490, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x0406, 0x00ae, 0x00a9, 0x2122, 0x0402, 0x0452, 0x2260, 0x0403, 0x0453, /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x0456, 0x00b5, 0x0491, 0x0408, 0x0404, 0x0454, 0x0407, 0x0457, 0x0409, 0x0459, 0x040a, 0x045a, /* 0xc0 */ 0x0458, 0x0405, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 0x00bb, 0x2026, 0x00a0, 0x040b, 0x045b, 0x040c, 0x045c, 0x0455, /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x201e, 0x040e, 0x045e, 0x040f, 0x045f, 0x2116, 0x0401, 0x0451, 0x044f, /* 0xe0 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, /* 0xf0 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x20ac, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0xca, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x00, 0xa4, /* 0xa0-0xa7 */ 0x00, 0xa9, 0x00, 0xc7, 0xc2, 0x00, 0xa8, 0x00, /* 0xa8-0xaf */ 0xa1, 0xb1, 0x00, 0x00, 0x00, 0xb5, 0xa6, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd6, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page01[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page04[256] = { 0x00, 0xdd, 0xab, 0xae, 0xb8, 0xc1, 0xa7, 0xba, /* 0x00-0x07 */ 0xb7, 0xbc, 0xbe, 0xcb, 0xcd, 0x00, 0xd8, 0xda, /* 0x08-0x0f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x10-0x17 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x18-0x1f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x20-0x27 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x28-0x2f */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0x30-0x37 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0x38-0x3f */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0x40-0x47 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* 0x48-0x4f */ 0x00, 0xde, 0xac, 0xaf, 0xb9, 0xcf, 0xb4, 0xbb, /* 0x50-0x57 */ 0xc0, 0xbd, 0xbf, 0xcc, 0xce, 0x00, 0xd9, 0xdb, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0xa2, 0xb6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page20[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0xd0, 0xd1, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0xd4, 0xd5, 0x00, 0x00, 0xd2, 0xd3, 0xd7, 0x00, /* 0x18-0x1f */ 0xa0, 0x00, 0xa5, 0x00, 0x00, 0x00, 0xc9, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page21[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char page22[256] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, /* 0x00-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 0x00, 0x00, 0xc3, 0x00, 0x00, 0x00, 0xb0, 0x00, /* 0x18-0x1f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 0xc5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 0xad, 0x00, 0x00, 0x00, 0xb2, 0xb3, 0x00, 0x00, /* 0x60-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ }; static const unsigned char *const page_uni2charset[256] = { page00, page01, NULL, NULL, page04, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, page20, page21, page22, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char charset2lower[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static const unsigned char charset2upper[256] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x00-0x07 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08-0x0f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10-0x17 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x18-0x1f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x20-0x27 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x28-0x2f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x30-0x37 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x38-0x3f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x40-0x47 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x48-0x4f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50-0x57 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x58-0x5f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x60-0x67 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x68-0x6f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70-0x77 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x78-0x7f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80-0x87 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x88-0x8f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90-0x97 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x98-0x9f */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0-0xa7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa8-0xaf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0-0xb7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb8-0xbf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0-0xc7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc8-0xcf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0-0xd7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd8-0xdf */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0-0xe7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe8-0xef */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf0-0xf7 */ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xf8-0xff */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "maccyrillic", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_maccyrillic(void) { return register_nls(&table); } static void __exit exit_nls_maccyrillic(void) { unregister_nls(&table); } module_init(init_nls_maccyrillic) module_exit(exit_nls_maccyrillic) MODULE_LICENSE("Dual BSD/GPL"); |
68 5 65 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer MIDI-through client * Copyright (c) 1999-2000 by Takashi Iwai <tiwai@suse.de> */ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <sound/core.h> #include "seq_clientmgr.h" #include <sound/initval.h> #include <sound/asoundef.h> /* Sequencer MIDI-through client This gives a simple midi-through client. All the normal input events are redirected to output port immediately. The routing can be done via aconnect program in alsa-utils. Each client has a static client number 14 (= SNDRV_SEQ_CLIENT_DUMMY). If you want to auto-load this module, you may add the following alias in your /etc/conf.modules file. alias snd-seq-client-14 snd-seq-dummy The module is loaded on demand for client 14, or /proc/asound/seq/ is accessed. If you don't need this module to be loaded, alias snd-seq-client-14 as "off". This will help modprobe. The number of ports to be created can be specified via the module parameter "ports". For example, to create four ports, add the following option in a configuration file under /etc/modprobe.d/: option snd-seq-dummy ports=4 The model option "duplex=1" enables duplex operation to the port. In duplex mode, a pair of ports are created instead of single port, and events are tunneled between pair-ports. For example, input to port A is sent to output port of another port B and vice versa. In duplex mode, each port has DUPLEX capability. */ MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("ALSA sequencer MIDI-through client"); MODULE_LICENSE("GPL"); MODULE_ALIAS("snd-seq-client-" __stringify(SNDRV_SEQ_CLIENT_DUMMY)); static int ports = 1; static bool duplex; module_param(ports, int, 0444); MODULE_PARM_DESC(ports, "number of ports to be created"); module_param(duplex, bool, 0444); MODULE_PARM_DESC(duplex, "create DUPLEX ports"); struct snd_seq_dummy_port { int client; int port; int duplex; int connect; }; static int my_client = -1; /* * event input callback - just redirect events to subscribers */ static int dummy_input(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct snd_seq_dummy_port *p; struct snd_seq_event tmpev; p = private_data; if (ev->source.client == SNDRV_SEQ_CLIENT_SYSTEM || ev->type == SNDRV_SEQ_EVENT_KERNEL_ERROR) return 0; /* ignore system messages */ tmpev = *ev; if (p->duplex) tmpev.source.port = p->connect; else tmpev.source.port = p->port; tmpev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; return snd_seq_kernel_client_dispatch(p->client, &tmpev, atomic, hop); } /* * free_private callback */ static void dummy_free(void *private_data) { kfree(private_data); } /* * create a port */ static struct snd_seq_dummy_port __init * create_port(int idx, int type) { struct snd_seq_port_info pinfo; struct snd_seq_port_callback pcb; struct snd_seq_dummy_port *rec; rec = kzalloc(sizeof(*rec), GFP_KERNEL); if (!rec) return NULL; rec->client = my_client; rec->duplex = duplex; rec->connect = 0; memset(&pinfo, 0, sizeof(pinfo)); pinfo.addr.client = my_client; if (duplex) sprintf(pinfo.name, "Midi Through Port-%d:%c", idx, (type ? 'B' : 'A')); else sprintf(pinfo.name, "Midi Through Port-%d", idx); pinfo.capability = SNDRV_SEQ_PORT_CAP_READ | SNDRV_SEQ_PORT_CAP_SUBS_READ; pinfo.capability |= SNDRV_SEQ_PORT_CAP_WRITE | SNDRV_SEQ_PORT_CAP_SUBS_WRITE; if (duplex) pinfo.capability |= SNDRV_SEQ_PORT_CAP_DUPLEX; pinfo.type = SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | SNDRV_SEQ_PORT_TYPE_SOFTWARE | SNDRV_SEQ_PORT_TYPE_PORT; memset(&pcb, 0, sizeof(pcb)); pcb.owner = THIS_MODULE; pcb.event_input = dummy_input; pcb.private_free = dummy_free; pcb.private_data = rec; pinfo.kernel = &pcb; if (snd_seq_kernel_client_ctl(my_client, SNDRV_SEQ_IOCTL_CREATE_PORT, &pinfo) < 0) { kfree(rec); return NULL; } rec->port = pinfo.addr.port; return rec; } /* * register client and create ports */ static int __init register_client(void) { struct snd_seq_dummy_port *rec1, *rec2; int i; if (ports < 1) { pr_err("ALSA: seq_dummy: invalid number of ports %d\n", ports); return -EINVAL; } /* create client */ my_client = snd_seq_create_kernel_client(NULL, SNDRV_SEQ_CLIENT_DUMMY, "Midi Through"); if (my_client < 0) return my_client; /* create ports */ for (i = 0; i < ports; i++) { rec1 = create_port(i, 0); if (rec1 == NULL) { snd_seq_delete_kernel_client(my_client); return -ENOMEM; } if (duplex) { rec2 = create_port(i, 1); if (rec2 == NULL) { snd_seq_delete_kernel_client(my_client); return -ENOMEM; } rec1->connect = rec2->port; rec2->connect = rec1->port; } } return 0; } /* * delete client if exists */ static void __exit delete_client(void) { if (my_client >= 0) snd_seq_delete_kernel_client(my_client); } /* * Init part */ static int __init alsa_seq_dummy_init(void) { return register_client(); } static void __exit alsa_seq_dummy_exit(void) { delete_client(); } module_init(alsa_seq_dummy_init) module_exit(alsa_seq_dummy_exit) |
3 3 1 1 3 3 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux driver for TerraTec DMX 6Fire USB * * Main routines and module definitions. * * Author: Torsten Schenk <torsten.schenk@zoho.com> * Created: Jan 01, 2011 * Copyright: (C) Torsten Schenk */ #include "chip.h" #include "firmware.h" #include "pcm.h" #include "control.h" #include "comm.h" #include "midi.h" #include <linux/moduleparam.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/init.h> #include <linux/gfp.h> #include <sound/initval.h> MODULE_AUTHOR("Torsten Schenk <torsten.schenk@zoho.com>"); MODULE_DESCRIPTION("TerraTec DMX 6Fire USB audio driver"); MODULE_LICENSE("GPL v2"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable card */ static struct sfire_chip *chips[SNDRV_CARDS] = SNDRV_DEFAULT_PTR; static struct usb_device *devices[SNDRV_CARDS] = SNDRV_DEFAULT_PTR; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the 6fire sound device"); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for the 6fire sound device."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable the 6fire sound device."); static DEFINE_MUTEX(register_mutex); static void usb6fire_chip_abort(struct sfire_chip *chip) { if (chip) { if (chip->pcm) usb6fire_pcm_abort(chip); if (chip->midi) usb6fire_midi_abort(chip); if (chip->comm) usb6fire_comm_abort(chip); if (chip->control) usb6fire_control_abort(chip); if (chip->card) { snd_card_disconnect(chip->card); snd_card_free_when_closed(chip->card); chip->card = NULL; } } } static void usb6fire_card_free(struct snd_card *card) { struct sfire_chip *chip = card->private_data; if (chip) { if (chip->pcm) usb6fire_pcm_destroy(chip); if (chip->midi) usb6fire_midi_destroy(chip); if (chip->comm) usb6fire_comm_destroy(chip); if (chip->control) usb6fire_control_destroy(chip); } } static int usb6fire_chip_probe(struct usb_interface *intf, const struct usb_device_id *usb_id) { int ret; int i; struct sfire_chip *chip = NULL; struct usb_device *device = interface_to_usbdev(intf); int regidx = -1; /* index in module parameter array */ struct snd_card *card = NULL; /* look if we already serve this card and return if so */ mutex_lock(®ister_mutex); for (i = 0; i < SNDRV_CARDS; i++) { if (devices[i] == device) { if (chips[i]) chips[i]->intf_count++; usb_set_intfdata(intf, chips[i]); mutex_unlock(®ister_mutex); return 0; } else if (!devices[i] && regidx < 0) regidx = i; } if (regidx < 0) { mutex_unlock(®ister_mutex); dev_err(&intf->dev, "too many cards registered.\n"); return -ENODEV; } devices[regidx] = device; mutex_unlock(®ister_mutex); /* check, if firmware is present on device, upload it if not */ ret = usb6fire_fw_init(intf); if (ret < 0) return ret; else if (ret == FW_NOT_READY) /* firmware update performed */ return 0; /* if we are here, card can be registered in alsa. */ if (usb_set_interface(device, 0, 0) != 0) { dev_err(&intf->dev, "can't set first interface.\n"); return -EIO; } ret = snd_card_new(&intf->dev, index[regidx], id[regidx], THIS_MODULE, sizeof(struct sfire_chip), &card); if (ret < 0) { dev_err(&intf->dev, "cannot create alsa card.\n"); return ret; } strcpy(card->driver, "6FireUSB"); strcpy(card->shortname, "TerraTec DMX6FireUSB"); sprintf(card->longname, "%s at %d:%d", card->shortname, device->bus->busnum, device->devnum); chip = card->private_data; chips[regidx] = chip; chip->dev = device; chip->regidx = regidx; chip->intf_count = 1; chip->card = card; card->private_free = usb6fire_card_free; ret = usb6fire_comm_init(chip); if (ret < 0) goto destroy_chip; ret = usb6fire_midi_init(chip); if (ret < 0) goto destroy_chip; ret = usb6fire_pcm_init(chip); if (ret < 0) goto destroy_chip; ret = usb6fire_control_init(chip); if (ret < 0) goto destroy_chip; ret = snd_card_register(card); if (ret < 0) { dev_err(&intf->dev, "cannot register card."); goto destroy_chip; } usb_set_intfdata(intf, chip); return 0; destroy_chip: snd_card_free(card); return ret; } static void usb6fire_chip_disconnect(struct usb_interface *intf) { struct sfire_chip *chip; chip = usb_get_intfdata(intf); if (chip) { /* if !chip, fw upload has been performed */ chip->intf_count--; if (!chip->intf_count) { mutex_lock(®ister_mutex); devices[chip->regidx] = NULL; chips[chip->regidx] = NULL; mutex_unlock(®ister_mutex); chip->shutdown = true; usb6fire_chip_abort(chip); } } } static const struct usb_device_id device_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x0ccd, .idProduct = 0x0080 }, {} }; MODULE_DEVICE_TABLE(usb, device_table); static struct usb_driver usb_driver = { .name = "snd-usb-6fire", .probe = usb6fire_chip_probe, .disconnect = usb6fire_chip_disconnect, .id_table = device_table, }; module_usb_driver(usb_driver); |
30 30 30 135 134 2 2749 2740 2749 2438 603 1845 24611 24482 24470 24357 133 135 24465 2 24134 898 145 130 7 7 11 18 134 134 10 7 32 32 1556 21893 25609 25657 23100 23113 580 23108 408 23125 25589 25339 780 470 569 25567 25569 22683 16270 25585 25574 277 25586 592 25593 8057 8420 7461 476 859 852 3 3 22001 22026 21504 37 2 2 19708 19714 19710 19712 19575 19564 13 13 69 56 16 8 8 2 43 7 67 67 3 7 60 69 67 11 1 6 4 2 8 48 20 8 21 29 8 48 58 29 48 20 7 13 16 8 8 8 6 125 122 3 3 19 19 13 13 4 6 13 13 13 4 13 6 13 4 13 19 19569 19573 19203 645 19221 127 3627 3368 392 314 315 314 148 149 142 30 3685 3687 3439 773 763 15 777 93 775 2 3441 3698 3679 12 33 37 37 8 1 2 8 26 2 39 39 12 12 12 3 3 3 25070 25094 3255 3257 13937 13949 2508 103 2434 103 104 25 35 99 92 7 7 1 1 3 5 5 3 82 25 104 32 236 179 10 231 285 285 88 252 27 12 12 1 12 17 27 27 1 105 78 6 224 237 87 2832 559 451 2278 161 162 390 42 1 390 49 90 37 90 16 4 6 6 14 22 12 15 14 22 1 22 22 4 10 22 22 22 2 21 22 20 22 2 22 47 5 39 3 1 7 1 8 1688 18 1616 89 67 83 1 82 14 5 918 251 11 233 13 5 20 1 20 2 4 4 2 58 50 8 3 54 1339 87 1259 2 1339 2594 3462 1133 1129 74 315 316 3 23 17450 43 36 33 80 25 81 80 81 36 36 36 36 36 33 7 34 34 32 3 33 34 34 32 32 2 10 32 225 411 60 20 42 61 23 3 36 61 1 60 59 1 1 1 60 59 61 1 5 5 9 47 45 45 45 45 43 43 40 8 8 58 58 1 1 2 21 41 16 1 15 4 5 1 4 4 4 1 16 1 15 7 7 3 3 4 4 4 2 2 2 2 116 95 15 93 93 93 131 68 67 1 54 114 62 60 41 17 4 61 13 62 3 62 39 9 5 9 39 23 62 30 215 2 2 210 19 56 19 37 56 56 56 56 6 1 152 152 239 239 190 191 16 34 28 165 1 61 59 126 126 9 124 123 396 395 393 396 391 1 9 3 80 21 21 125 7 107 21 21 124 125 7 7 151 5 146 151 158 158 158 151 149 151 151 372 371 362 5 3 3 3 3 10 2 10 5 3 1 2 2 2 2 2 2012 1 4 2005 292 67 276 59 310 309 80 56 71 8 75 33 105 1 12 33 33 33 75 13 13 73 72 75 37 39 38 38 127 122 39 86 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Routines having to do with the 'struct sk_buff' memory handlers. * * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. * Dave Platt : Interrupt stacking fix. * Richard Kooijman : Timestamp fixes. * Alan Cox : Changed buffer format. * Alan Cox : destructor hook for AF_UNIX etc. * Linus Torvalds : Better skb_clone. * Alan Cox : Added skb_copy. * Alan Cox : Added all the changed routines Linus * only put in the headers * Ray VanTassle : Fixed --skb->lock in free * Alan Cox : skb_copy copy arp field * Andi Kleen : slabified it. * Robert Olsson : Removed skb_head_pool * * NOTE: * The __skb_ routines should be called with interrupts * disabled, or you better be *real* sure that the operation is atomic * with respect to whatever list is being frobbed (e.g. via lock_sock() * or via disabling bottom half handlers, etc). */ /* * The functions in this file will not compile correctly with gcc 2.4.x */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/sctp.h> #include <linux/netdevice.h> #ifdef CONFIG_NET_CLS_ACT #include <net/pkt_sched.h> #endif #include <linux/string.h> #include <linux/skbuff.h> #include <linux/splice.h> #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> #include <linux/if_vlan.h> #include <linux/mpls.h> #include <linux/kcov.h> #include <net/protocol.h> #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/mpls.h> #include <net/mptcp.h> #include <net/page_pool.h> #include <linux/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> #include <linux/capability.h> #include <linux/user_namespace.h> #include <linux/indirect_call_wrapper.h> #include "datagram.h" #include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; #ifdef CONFIG_SKB_EXTENSIONS static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support * @skb: buffer * @sz: size * @addr: address * @msg: skb_over_panic or skb_under_panic * * Out-of-line support for skb_put() and skb_push(). * Called via the wrapper skb_over_panic() or skb_under_panic(). * Keep out of line to prevent kernel bloat. * __builtin_return_address is not used because it is not always reliable. */ static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, const char msg[]) { pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n", msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); BUG(); } static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) { skb_panic(skb, sz, addr, __func__); } #define NAPI_SKB_CACHE_SIZE 64 #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) struct napi_alloc_cache { struct page_frag_cache page; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask); } void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { fragsz = SKB_DATA_ALIGN(fragsz); return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct page_frag_cache *nc; void *data; fragsz = SKB_DATA_ALIGN(fragsz); if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { local_bh_disable(); data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); local_bh_enable(); } return data; } EXPORT_SYMBOL(__netdev_alloc_frag_align); static struct sk_buff *napi_skb_cache_get(void) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; if (unlikely(!nc->skb_count)) nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache, GFP_ATOMIC, NAPI_SKB_CACHE_BULK, nc->skb_cache); if (unlikely(!nc->skb_count)) return NULL; skb = nc->skb_cache[--nc->skb_count]; kasan_unpoison_object_data(skbuff_head_cache, skb); return skb; } /* Caller must provide SKB that is memset cleared */ static void __build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; unsigned int size = frag_size ? : ksize(data); size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Assumes caller memset cleared SKB */ skb->truesize = SKB_TRUESIZE(size); refcount_set(&skb->users, 1); skb->head = data; skb->data = data; skb_reset_tail_pointer(skb); skb_set_end_offset(skb, size); skb->mac_header = (typeof(skb->mac_header))~0U; skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); skb_set_kcov_handle(skb, kcov_common_handle()); } /** * __build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if * @frag_size is 0, otherwise data should come from the page allocator * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : * Before IO, driver allocates only data buffer where NIC put incoming frame * Driver should add room at head (NET_SKB_PAD) and * MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info)) * After IO, driver calls build_skb(), to allocate sk_buff and populate it * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, frag_size); return skb; } /* build_skb() is wrapper over __build_skb(), that specifically * takes care of skb->head and skb->pfmemalloc * This means that if @frag_size is not zero, then @data must be backed * by a page fragment, not kmalloc() or vmalloc() */ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); if (skb && frag_size) { skb->head_frag = 1; if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; } EXPORT_SYMBOL(build_skb); /** * build_skb_around - build a network buffer around provided skb * @skb: sk_buff provide by caller, must be memset cleared * @data: data buffer provided by caller * @frag_size: size of data, or 0 if head was kmalloced */ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) { if (unlikely(!skb)) return NULL; __build_skb_around(skb, data, frag_size); if (frag_size) { skb->head_frag = 1; if (page_is_pfmemalloc(virt_to_head_page(data))) skb->pfmemalloc = 1; } return skb; } EXPORT_SYMBOL(build_skb_around); /** * __napi_build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data, or 0 if head was kmalloced * * Version of __build_skb() that uses NAPI percpu caches to obtain * skbuff_head instead of inplace allocation. * * Returns a new &sk_buff on success, %NULL on allocation failure. */ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; skb = napi_skb_cache_get(); if (unlikely(!skb)) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, frag_size); return skb; } /** * napi_build_skb - build a network buffer * @data: data buffer provided by caller * @frag_size: size of data, or 0 if head was kmalloced * * Version of __napi_build_skb() that takes care of skb->head_frag * and skb->pfmemalloc when the data is a page or page fragment. * * Returns a new &sk_buff on success, %NULL on allocation failure. */ struct sk_buff *napi_build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __napi_build_skb(data, frag_size); if (likely(skb) && frag_size) { skb->head_frag = 1; skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } EXPORT_SYMBOL(napi_build_skb); /* * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells * the caller if emergency pfmemalloc reserves are being used. If it is and * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves * may be used. Otherwise, the packet data may be discarded until enough * memory is free */ static void *kmalloc_reserve(size_t size, gfp_t flags, int node, bool *pfmemalloc) { void *obj; bool ret_pfmemalloc = false; /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail. */ obj = kmalloc_node_track_caller(size, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); if (obj || !(gfp_pfmemalloc_allowed(flags))) goto out; /* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; obj = kmalloc_node_track_caller(size, flags, node); out: if (pfmemalloc) *pfmemalloc = ret_pfmemalloc; return obj; } /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. * */ /** * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache * instead of head cache and allocate a cloned (child) skb. * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of at least size bytes. The object has a reference count * of one. The return is the buffer. On a failure the return is %NULL. * * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int flags, int node) { struct kmem_cache *cache; struct sk_buff *skb; u8 *data; bool pfmemalloc; cache = (flags & SKB_ALLOC_FCLONE) ? skbuff_fclone_cache : skbuff_head_cache; if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI && likely(node == NUMA_NO_NODE || node == numa_mem_id())) skb = napi_skb_cache_get(); else skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node); if (unlikely(!skb)) return NULL; prefetchw(skb); /* We do our best to align skb_shared_info on a separate cache * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; /* kmalloc(size) might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* * Only clear those fields we need to clear, not those that we will * actually initialise below. Hence, don't put any more fields after * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); __build_skb_around(skb, data, 0); skb->pfmemalloc = pfmemalloc; if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); fclones->skb2.fclone = SKB_FCLONE_CLONE; } return skb; nodata: kmem_cache_free(cache, skb); return NULL; } EXPORT_SYMBOL(__alloc_skb); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has NET_SKB_PAD headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. */ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, gfp_t gfp_mask) { struct page_frag_cache *nc; struct sk_buff *skb; bool pfmemalloc; void *data; len += NET_SKB_PAD; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(1024) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; if (in_hardirq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; } else { local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache.page); data = page_frag_alloc(nc, len, gfp_mask); pfmemalloc = nc->pfmemalloc; local_bh_enable(); } if (unlikely(!data)) return NULL; skb = __build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; skb_fail: return skb; } EXPORT_SYMBOL(__netdev_alloc_skb); /** * __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance * @napi: napi instance this buffer was allocated for * @len: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages * * Allocate a new sk_buff for use in NAPI receive. This buffer will * attempt to allocate the head from a special reserved region used * only for NAPI Rx allocation. By doing this we can save several * CPU cycles by avoiding having to disable and re-enable IRQs. * * %NULL is returned if there is no free memory. */ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { struct napi_alloc_cache *nc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ if (len <= SKB_WITH_OVERHEAD(1024) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, NUMA_NO_NODE); if (!skb) goto skb_fail; goto skb_success; } nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; data = page_frag_alloc(&nc->page, len, gfp_mask); if (unlikely(!data)) return NULL; skb = __napi_build_skb(data, len); if (unlikely(!skb)) { skb_free_frag(data); return NULL; } if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; skb_success: skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); skb->dev = napi->dev; skb_fail: return skb; } EXPORT_SYMBOL(__napi_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_add_rx_frag); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; skb->truesize += truesize; } EXPORT_SYMBOL(skb_coalesce_rx_frag); static void skb_drop_list(struct sk_buff **listp) { kfree_skb_list(*listp); *listp = NULL; } static inline void skb_drop_fraglist(struct sk_buff *skb) { skb_drop_list(&skb_shinfo(skb)->frag_list); } static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; skb_walk_frags(skb, list) skb_get(list); } static void skb_free_head(struct sk_buff *skb) { unsigned char *head = skb->head; if (skb->head_frag) { if (skb_pp_recycle(skb, head)) return; skb_free_frag(head); } else { kfree(head); } } static void skb_release_data(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; if (skb->cloned && atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &shinfo->dataref)) goto exit; skb_zcopy_clear(skb, true); for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); skb_free_head(skb); exit: /* When we clone an SKB we copy the reycling bit. The pp_recycle * bit is only set on the head though, so in order to avoid races * while trying to recycle fragments on __skb_frag_unref() we need * to make one SKB responsible for triggering the recycle path. * So disable the recycling bit if an SKB is cloned and we have * additional references to to the fragmented part of the SKB. * Eventually the last SKB will have the recycling bit set and it's * dataref set to 0, which will trigger the recycling */ skb->pp_recycle = 0; } /* * Free an skbuff by memory without cleaning the state. */ static void kfree_skbmem(struct sk_buff *skb) { struct sk_buff_fclones *fclones; switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: kmem_cache_free(skbuff_head_cache, skb); return; case SKB_FCLONE_ORIG: fclones = container_of(skb, struct sk_buff_fclones, skb1); /* We usually free the clone (TX completion) before original skb * This test would have no chance to be true for the clone, * while here, branch prediction will be good. */ if (refcount_read(&fclones->fclone_ref) == 1) goto fastpath; break; default: /* SKB_FCLONE_CLONE */ fclones = container_of(skb, struct sk_buff_fclones, skb2); break; } if (!refcount_dec_and_test(&fclones->fclone_ref)) return; fastpath: kmem_cache_free(skbuff_fclone_cache, fclones); } void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); if (skb->destructor) { WARN_ON(in_hardirq()); skb->destructor(skb); } #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); #endif skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ static void skb_release_all(struct sk_buff *skb) { skb_release_head_state(skb); if (likely(skb->head)) skb_release_data(skb); } /** * __kfree_skb - private function * @skb: buffer * * Free an sk_buff. Release anything attached to the buffer. * Clean the state. This is an internal helper function. Users should * always call kfree_skb */ void __kfree_skb(struct sk_buff *skb) { skb_release_all(skb); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); /** * kfree_skb_reason - free an sk_buff with special reason * @skb: buffer to free * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' * tracepoint. */ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (!skb_unref(skb)) return; trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } EXPORT_SYMBOL(kfree_skb_reason); void kfree_skb_list(struct sk_buff *segs) { while (segs) { struct sk_buff *next = segs->next; kfree_skb(segs); segs = next; } } EXPORT_SYMBOL(kfree_skb_list); /* Dump skb information and contents. * * Must only be called from net_ratelimit()-ed paths. * * Dumps whole packets if full_pkt, only headers otherwise. */ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) { struct skb_shared_info *sh = skb_shinfo(skb); struct net_device *dev = skb->dev; struct sock *sk = skb->sk; struct sk_buff *list_skb; bool has_mac, has_trans; int headroom, tailroom; int i, len, seg_len; if (full_pkt) len = skb->len; else len = min_t(int, skb->len, MAX_HEADER + 128); headroom = skb_headroom(skb); tailroom = skb_tailroom(skb); has_mac = skb_mac_header_was_set(skb); has_trans = skb_transport_header_was_set(skb); printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n" "mac=(%d,%d) net=(%d,%d) trans=%d\n" "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n" "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n" "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n", level, skb->len, headroom, skb_headlen(skb), tailroom, has_mac ? skb->mac_header : -1, has_mac ? skb_mac_header_len(skb) : -1, skb->network_header, has_trans ? skb_network_header_len(skb) : -1, has_trans ? skb->transport_header : -1, sh->tx_flags, sh->nr_frags, sh->gso_size, sh->gso_type, sh->gso_segs, skb->csum, skb->ip_summed, skb->csum_complete_sw, skb->csum_valid, skb->csum_level, skb->hash, skb->sw_hash, skb->l4_hash, ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", level, sk->sk_family, sk->sk_type, sk->sk_protocol); if (full_pkt && headroom) print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb->head, headroom, false); seg_len = min_t(int, skb_headlen(skb), len); if (seg_len) print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET, 16, 1, skb->data, seg_len, false); len -= seg_len; if (full_pkt && tailroom) print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET, 16, 1, skb_tail_pointer(skb), tailroom, false); for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { seg_len = min_t(int, p_len, len); vaddr = kmap_atomic(p); print_hex_dump(level, "skb frag: ", DUMP_PREFIX_OFFSET, 16, 1, vaddr + p_off, seg_len, false); kunmap_atomic(vaddr); len -= seg_len; if (!len) break; } } if (full_pkt && skb_has_frag_list(skb)) { printk("skb fraglist:\n"); skb_walk_frags(skb, list_skb) skb_dump(level, list_skb, true); } } EXPORT_SYMBOL(skb_dump); /** * skb_tx_error - report an sk_buff xmit error * @skb: buffer that triggered an error * * Report xmit error if a device callback is tracking this skb. * skb must be freed afterwards. */ void skb_tx_error(struct sk_buff *skb) { skb_zcopy_clear(skb, true); } EXPORT_SYMBOL(skb_tx_error); #ifdef CONFIG_TRACEPOINTS /** * consume_skb - free an skbuff * @skb: buffer to free * * Drop a ref to the buffer and free it if the usage count has hit zero * Functions identically to kfree_skb, but kfree_skb assumes that the frame * is being dropped after a failure and notes that */ void consume_skb(struct sk_buff *skb) { if (!skb_unref(skb)) return; trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); #endif /** * __consume_stateless_skb - free an skbuff, assuming it is stateless * @skb: buffer to free * * Alike consume_skb(), but this variant assumes that this is the last * skb reference and all the head states have been already dropped */ void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb); skb_release_data(skb); kfree_skbmem(skb); } static void napi_skb_cache_put(struct sk_buff *skb) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); u32 i; kasan_poison_object_data(skbuff_head_cache, skb); nc->skb_cache[nc->skb_count++] = skb; if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) { for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++) kasan_unpoison_object_data(skbuff_head_cache, nc->skb_cache[i]); kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_HALF, nc->skb_cache + NAPI_SKB_CACHE_HALF); nc->skb_count = NAPI_SKB_CACHE_HALF; } } void __kfree_skb_defer(struct sk_buff *skb) { skb_release_all(skb); napi_skb_cache_put(skb); } void napi_skb_free_stolen_head(struct sk_buff *skb) { if (unlikely(skb->slow_gro)) { nf_reset_ct(skb); skb_dst_drop(skb); skb_ext_put(skb); skb_orphan(skb); skb->slow_gro = 0; } napi_skb_cache_put(skb); } void napi_consume_skb(struct sk_buff *skb, int budget) { /* Zero budget indicate non-NAPI context called us, like netpoll */ if (unlikely(!budget)) { dev_consume_skb_any(skb); return; } lockdep_assert_in_softirq(); if (!skb_unref(skb)) return; /* if reaching here SKB is ready to free */ trace_consume_skb(skb); /* if SKB is a clone, don't handle this case */ if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { __kfree_skb(skb); return; } skb_release_all(skb); napi_skb_cache_put(skb); } EXPORT_SYMBOL(napi_consume_skb); /* Make sure a field is enclosed inside headers_start/headers_end section */ #define CHECK_SKB_FIELD(field) \ BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ offsetof(struct sk_buff, headers_start)); \ BUILD_BUG_ON(offsetof(struct sk_buff, field) > \ offsetof(struct sk_buff, headers_end)); \ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; /* We do not copy old->sk */ new->dev = old->dev; memcpy(new->cb, old->cb, sizeof(old->cb)); skb_dst_copy(new, old); __skb_ext_copy(new, old); __nf_copy(new, old, false); /* Note : this field could be in headers_start/headers_end section * It is not yet because we do not want to have a 16 bit hole */ new->queue_mapping = old->queue_mapping; memcpy(&new->headers_start, &old->headers_start, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start)); CHECK_SKB_FIELD(protocol); CHECK_SKB_FIELD(csum); CHECK_SKB_FIELD(hash); CHECK_SKB_FIELD(priority); CHECK_SKB_FIELD(skb_iif); CHECK_SKB_FIELD(vlan_proto); CHECK_SKB_FIELD(vlan_tci); CHECK_SKB_FIELD(transport_header); CHECK_SKB_FIELD(network_header); CHECK_SKB_FIELD(mac_header); CHECK_SKB_FIELD(inner_protocol); CHECK_SKB_FIELD(inner_transport_header); CHECK_SKB_FIELD(inner_network_header); CHECK_SKB_FIELD(inner_mac_header); CHECK_SKB_FIELD(mark); #ifdef CONFIG_NETWORK_SECMARK CHECK_SKB_FIELD(secmark); #endif #ifdef CONFIG_NET_RX_BUSY_POLL CHECK_SKB_FIELD(napi_id); #endif #ifdef CONFIG_XPS CHECK_SKB_FIELD(sender_cpu); #endif #ifdef CONFIG_NET_SCHED CHECK_SKB_FIELD(tc_index); #endif } /* * You should not add any new code to this function. Add it to * __copy_skb_header above instead. */ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) { #define C(x) n->x = skb->x n->next = n->prev = NULL; n->sk = NULL; __copy_skb_header(n, skb); C(len); C(data_len); C(mac_len); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; n->peeked = 0; C(pfmemalloc); C(pp_recycle); n->destructor = NULL; C(tail); C(end); C(head); C(head_frag); C(data); C(truesize); refcount_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; return n; #undef C } /** * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg * @first: first sk_buff of the msg */ struct sk_buff *alloc_skb_for_msg(struct sk_buff *first) { struct sk_buff *n; n = alloc_skb(0, GFP_ATOMIC); if (!n) return NULL; n->len = first->len; n->data_len = first->len; n->truesize = first->truesize; skb_shinfo(n)->frag_list = first; __copy_skb_header(n, first); n->destructor = NULL; return n; } EXPORT_SYMBOL_GPL(alloc_skb_for_msg); /** * skb_morph - morph one skb into another * @dst: the skb to receive the contents * @src: the skb to supply the contents * * This is identical to skb_clone except that the target skb is * supplied by the user. * * The target skb is returned upon exit. */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { skb_release_all(dst); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); int mm_account_pinned_pages(struct mmpin *mmp, size_t size) { unsigned long max_pg, num_pg, new_pg, old_pg; struct user_struct *user; if (capable(CAP_IPC_LOCK) || !size) return 0; num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */ max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; user = mmp->user ? : current_user(); do { old_pg = atomic_long_read(&user->locked_vm); new_pg = old_pg + num_pg; if (new_pg > max_pg) return -ENOBUFS; } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) != old_pg); if (!mmp->user) { mmp->user = get_uid(user); mmp->num_pg = num_pg; } else { mmp->num_pg += num_pg; } return 0; } EXPORT_SYMBOL_GPL(mm_account_pinned_pages); void mm_unaccount_pinned_pages(struct mmpin *mmp) { if (mmp->user) { atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); free_uid(mmp->user); } } EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) { struct ubuf_info *uarg; struct sk_buff *skb; WARN_ON_ONCE(!in_task()); skb = sock_omalloc(sk, 0, GFP_KERNEL); if (!skb) return NULL; BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb)); uarg = (void *)skb->cb; uarg->mmp.user = NULL; if (mm_account_pinned_pages(&uarg->mmp, size)) { kfree_skb(skb); return NULL; } uarg->callback = msg_zerocopy_callback; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; uarg->flags = SKBFL_ZEROCOPY_FRAG; refcount_set(&uarg->refcnt, 1); sock_hold(sk); return uarg; } EXPORT_SYMBOL_GPL(msg_zerocopy_alloc); static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) { return container_of((void *)uarg, struct sk_buff, cb); } struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg) { if (uarg) { const u32 byte_limit = 1 << 19; /* limit to a few TSO */ u32 bytelen, next; /* realloc only when socket is locked (TCP, UDP cork), * so uarg->len and sk_zckey access is serialized */ if (!sock_owned_by_user(sk)) { WARN_ON_ONCE(1); return NULL; } bytelen = uarg->bytelen + size; if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { /* TCP can create new skb to attach new uarg */ if (sk->sk_type == SOCK_STREAM) goto new_alloc; return NULL; } next = (u32)atomic_read(&sk->sk_zckey); if ((u32)(uarg->id + uarg->len) == next) { if (mm_account_pinned_pages(&uarg->mmp, size)) return NULL; uarg->len++; uarg->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); /* no extra ref when appending to datagram (MSG_MORE) */ if (sk->sk_type == SOCK_STREAM) net_zcopy_get(uarg); return uarg; } } new_alloc: return msg_zerocopy_alloc(sk, size); } EXPORT_SYMBOL_GPL(msg_zerocopy_realloc); static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) { struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); u32 old_lo, old_hi; u64 sum_len; old_lo = serr->ee.ee_info; old_hi = serr->ee.ee_data; sum_len = old_hi - old_lo + 1ULL + len; if (sum_len >= (1ULL << 32)) return false; if (lo != old_hi + 1) return false; serr->ee.ee_data += len; return true; } static void __msg_zerocopy_callback(struct ubuf_info *uarg) { struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sock_exterr_skb *serr; struct sock *sk = skb->sk; struct sk_buff_head *q; unsigned long flags; bool is_zerocopy; u32 lo, hi; u16 len; mm_unaccount_pinned_pages(&uarg->mmp); /* if !len, there was only 1 call, and it was aborted * so do not queue a completion notification */ if (!uarg->len || sock_flag(sk, SOCK_DEAD)) goto release; len = uarg->len; lo = uarg->id; hi = uarg->id + len - 1; is_zerocopy = uarg->zerocopy; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = 0; serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; if (!is_zerocopy) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; q = &sk->sk_error_queue; spin_lock_irqsave(&q->lock, flags); tail = skb_peek_tail(q); if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY || !skb_zerocopy_notify_extend(tail, lo, len)) { __skb_queue_tail(q, skb); skb = NULL; } spin_unlock_irqrestore(&q->lock, flags); sk_error_report(sk); release: consume_skb(skb); sock_put(sk); } void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, bool success) { uarg->zerocopy = uarg->zerocopy & success; if (refcount_dec_and_test(&uarg->refcnt)) __msg_zerocopy_callback(uarg); } EXPORT_SYMBOL_GPL(msg_zerocopy_callback); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { struct sock *sk = skb_from_uarg(uarg)->sk; atomic_dec(&sk->sk_zckey); uarg->len--; if (have_uref) msg_zerocopy_callback(NULL, uarg, true); } EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort); int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg) { struct ubuf_info *orig_uarg = skb_zcopy(skb); struct iov_iter orig_iter = msg->msg_iter; int err, orig_len = skb->len; /* An skb can only point to one uarg. This edge case happens when * TCP appends to an skb, but zerocopy_realloc triggered a new alloc. */ if (orig_uarg && uarg != orig_uarg) return -EEXIST; err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { struct sock *save_sk = skb->sk; /* Streams do not free skb on error. Reset to prev state. */ msg->msg_iter = orig_iter; skb->sk = sk; ___pskb_trim(skb, orig_len); skb->sk = save_sk; return err; } skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, gfp_t gfp_mask) { if (skb_zcopy(orig)) { if (skb_zcopy(nskb)) { /* !gfp_mask callers are verified to !skb_zcopy(nskb) */ if (!gfp_mask) { WARN_ON_ONCE(1); return -ENOMEM; } if (skb_uarg(nskb) == skb_uarg(orig)) return 0; if (skb_copy_ubufs(nskb, GFP_ATOMIC)) return -EIO; } skb_zcopy_set(nskb, skb_uarg(orig), NULL); } return 0; } /** * skb_copy_ubufs - copy userspace skb frags buffers to kernel * @skb: the skb to modify * @gfp_mask: allocation priority * * This must be called on skb with SKBFL_ZEROCOPY_ENABLE. * It will copy all frags into kernel and drop the reference * to userspace pages. * * If this function is called from an interrupt gfp_mask() must be * %GFP_ATOMIC. * * Returns 0 on success or a negative error code on failure * to allocate kernel memory to copy to. */ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int num_frags = skb_shinfo(skb)->nr_frags; struct page *page, *head = NULL; int i, new_frags; u32 d_off; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; if (!num_frags) goto release; new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); if (!page) { while (head) { struct page *next = (struct page *)page_private(head); put_page(head); head = next; } return -ENOMEM; } set_page_private(page, (unsigned long)head); head = page; } page = head; d_off = 0; for (i = 0; i < num_frags; i++) { skb_frag_t *f = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; u8 *vaddr; skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), p, p_off, p_len, copied) { u32 copy, done = 0; vaddr = kmap_atomic(p); while (done < p_len) { if (d_off == PAGE_SIZE) { d_off = 0; page = (struct page *)page_private(page); } copy = min_t(u32, PAGE_SIZE - d_off, p_len - done); memcpy(page_address(page) + d_off, vaddr + p_off + done, copy); done += copy; d_off += copy; } kunmap_atomic(vaddr); } } /* skb frags release userspace buffers */ for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); /* skb frags point to kernel buffers */ for (i = 0; i < new_frags - 1; i++) { __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE); head = (struct page *)page_private(head); } __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); skb_shinfo(skb)->nr_frags = new_frags; release: skb_zcopy_clear(skb, false); return 0; } EXPORT_SYMBOL_GPL(skb_copy_ubufs); /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone * @gfp_mask: allocation priority * * Duplicate an &sk_buff. The new one is not owned by a socket. Both * copies share the same packet data but not structure. The new * buffer has a reference count of 1. If the allocation fails the * function returns %NULL otherwise the new buffer is returned. * * If this function is called from an interrupt gfp_mask() must be * %GFP_ATOMIC. */ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff_fclones *fclones = container_of(skb, struct sk_buff_fclones, skb1); struct sk_buff *n; if (skb_orphan_frags(skb, gfp_mask)) return NULL; if (skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) == 1) { n = &fclones->skb2; refcount_set(&fclones->fclone_ref, 2); } else { if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; n->fclone = SKB_FCLONE_UNAVAILABLE; } return __skb_clone(n, skb); } EXPORT_SYMBOL(skb_clone); void skb_headers_offset_update(struct sk_buff *skb, int off) { /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += off; /* {transport,network,mac}_header and tail are relative to skb->head */ skb->transport_header += off; skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; skb->inner_transport_header += off; skb->inner_network_header += off; skb->inner_mac_header += off; } EXPORT_SYMBOL(skb_headers_offset_update); void skb_copy_header(struct sk_buff *new, const struct sk_buff *old) { __copy_skb_header(new, old); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } EXPORT_SYMBOL(skb_copy_header); static inline int skb_alloc_rx_flag(const struct sk_buff *skb) { if (skb_pfmemalloc(skb)) return SKB_ALLOC_RX; return 0; } /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data. This is used when the * caller wishes to modify the data and needs a private copy of the * data to alter. Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * As by-product this function converts non-linear &sk_buff to linear * one, so that &sk_buff becomes completely private and caller is allowed * to modify all the data of returned buffer. This means that this * function is not recommended for use in circumstances when only * header is going to be modified. Use pskb_copy() instead. */ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; unsigned int size; int headerlen; if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) return NULL; headerlen = skb_headroom(skb); size = skb_end_offset(skb) + skb->data_len; n = __alloc_skb(size, gfp_mask, skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; /* Set the data pointer */ skb_reserve(n, headerlen); /* Set the tail pointer and length */ skb_put(n, skb->len); BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); skb_copy_header(n, skb); return n; } EXPORT_SYMBOL(skb_copy); /** * __pskb_copy_fclone - create copy of an sk_buff with private head. * @skb: buffer to copy * @headroom: headroom of new skb * @gfp_mask: allocation priority * @fclone: if true allocate the copy of the skb from the fclone * cache instead of the head cache; it is recommended to set this * to true for the cases where the copy will likely be cloned * * Make a copy of both an &sk_buff and part of its data, located * in header. Fragmented data remain shared. This is used when * the caller wishes to modify only header of &sk_buff and needs * private copy of the header to alter. Returns %NULL on failure * or the pointer to the buffer on success. * The returned buffer has a reference count of 1. */ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, gfp_t gfp_mask, bool fclone) { unsigned int size = skb_headlen(skb) + headroom; int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE); if (!n) goto out; /* Set the data pointer */ skb_reserve(n, headroom); /* Set the tail pointer and length */ skb_put(n, skb_headlen(skb)); /* Copy the bytes */ skb_copy_from_linear_data(skb, n->data, n->len); n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; if (skb_shinfo(skb)->nr_frags) { int i; if (skb_orphan_frags(skb, gfp_mask) || skb_zerocopy_clone(n, skb, gfp_mask)) { kfree_skb(n); n = NULL; goto out; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } if (skb_has_frag_list(skb)) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; skb_clone_fraglist(n); } skb_copy_header(n, skb); out: return n; } EXPORT_SYMBOL(__pskb_copy_fclone); /** * pskb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate * @nhead: room to add at head * @ntail: room to add at tail * @gfp_mask: allocation priority * * Expands (or creates identical copy, if @nhead and @ntail are zero) * header of @skb. &sk_buff itself is not changed. &sk_buff MUST have * reference count of 1. Returns zero in the case of success or error, * if expansion failed. In the last case, &sk_buff is not changed. * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. */ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask) { int i, osize = skb_end_offset(skb); int size = osize + nhead + ntail; long off; u8 *data; BUG_ON(nhead < 0); BUG_ON(skb_shared(skb)); size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(ksize(data)); /* Copy only real data... and, alas, header. This should be * optimized for the cases when header is void. */ memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); /* * if shinfo is shared we must drop the old head gracefully, but if it * is not we can just drop the old head and let the existing refcount * be since all we did is relocate the values */ if (skb_cloned(skb)) { if (skb_orphan_frags(skb, gfp_mask)) goto nofrags; if (skb_zcopy(skb)) refcount_inc(&skb_uarg(skb)->refcnt); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb); } else { skb_free_head(skb); } off = (data + nhead) - skb->head; skb->head = data; skb->head_frag = 0; skb->data += off; skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET off = nhead; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); skb_metadata_clear(skb); /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). */ if (!skb->sk || skb->destructor == sock_edemux) skb->truesize += size - osize; return 0; nofrags: kfree(data); nodata: return -ENOMEM; } EXPORT_SYMBOL(pskb_expand_head); /* Make private copy of skb with writable head and some headroom */ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) { struct sk_buff *skb2; int delta = headroom - skb_headroom(skb); if (delta <= 0) skb2 = pskb_copy(skb, GFP_ATOMIC); else { skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { kfree_skb(skb2); skb2 = NULL; } } return skb2; } EXPORT_SYMBOL(skb_realloc_headroom); int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { unsigned int saved_end_offset, saved_truesize; struct skb_shared_info *shinfo; int res; saved_end_offset = skb_end_offset(skb); saved_truesize = skb->truesize; res = pskb_expand_head(skb, 0, 0, pri); if (res) return res; skb->truesize = saved_truesize; if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; shinfo = skb_shinfo(skb); /* We are about to change back skb->end, * we need to move skb_shinfo() to its new location. */ memmove(skb->head + saved_end_offset, shinfo, offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); skb_set_end_offset(skb, saved_end_offset); return 0; } /** * skb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate * @headroom: needed headroom * * Unlike skb_realloc_headroom, this one does not allocate a new skb * if possible; copies skb->sk to new skb as needed * and frees original skb in case of failures. * * It expect increased headroom and generates warning otherwise. */ struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) { int delta = headroom - skb_headroom(skb); int osize = skb_end_offset(skb); struct sock *sk = skb->sk; if (WARN_ONCE(delta <= 0, "%s is expecting an increase in the headroom", __func__)) return skb; delta = SKB_DATA_ALIGN(delta); /* pskb_expand_head() might crash, if skb is shared. */ if (skb_shared(skb) || !is_skb_wmem(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) goto fail; if (sk) skb_set_owner_w(nskb, sk); consume_skb(skb); skb = nskb; } if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) goto fail; if (sk && is_skb_wmem(skb)) { delta = skb_end_offset(skb) - osize; refcount_add(delta, &sk->sk_wmem_alloc); skb->truesize += delta; } return skb; fail: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(skb_expand_head); /** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head * @newtailroom: new free bytes at tail * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data and while doing so * allocate additional space. * * This is used when the caller wishes to modify the data and needs a * private copy of the data to alter as well as more space for new fields. * Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t gfp_mask) { /* * Allocate the copy buffer */ int head_copy_len, head_copy_off; struct sk_buff *n; int oldheadroom; if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) return NULL; oldheadroom = skb_headroom(skb); n = __alloc_skb(newheadroom + skb->len + newtailroom, gfp_mask, skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; skb_reserve(n, newheadroom); /* Set the tail pointer and length */ skb_put(n, skb->len); head_copy_len = oldheadroom; head_copy_off = 0; if (newheadroom <= head_copy_len) head_copy_len = newheadroom; else head_copy_off = newheadroom - head_copy_len; /* Copy the linear header and data. */ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, skb->len + head_copy_len)); skb_copy_header(n, skb); skb_headers_offset_update(n, newheadroom - oldheadroom); return n; } EXPORT_SYMBOL(skb_copy_expand); /** * __skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad * @free_on_error: free buffer on error * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * * May return error in out of memory cases. The skb is freed on error * if @free_on_error is true. */ int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error) { int err; int ntail; /* If the skbuff is non linear tailroom is always zero.. */ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { memset(skb->data+skb->len, 0, pad); return 0; } ntail = skb->data_len + pad - (skb->end - skb->tail); if (likely(skb_cloned(skb) || ntail > 0)) { err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); if (unlikely(err)) goto free_skb; } /* FIXME: The use of this function with non-linear skb's really needs * to be audited. */ err = skb_linearize(skb); if (unlikely(err)) goto free_skb; memset(skb->data + skb->len, 0, pad); return 0; free_skb: if (free_on_error) kfree_skb(skb); return err; } EXPORT_SYMBOL(__skb_pad); /** * pskb_put - add data to the tail of a potentially fragmented buffer * @skb: start of the buffer to use * @tail: tail fragment of the buffer to use * @len: amount of data to add * * This function extends the used data area of the potentially * fragmented buffer. @tail must be the last fragment of @skb -- or * @skb itself. If this would exceed the total buffer size the kernel * will panic. A pointer to the first byte of the extra data is * returned. */ void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) { if (tail != skb) { skb->data_len += len; skb->len += len; } return skb_put(tail, len); } EXPORT_SYMBOL_GPL(pskb_put); /** * skb_put - add data to a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer. If this would * exceed the total buffer size the kernel will panic. A pointer to the * first byte of the extra data is returned. */ void *skb_put(struct sk_buff *skb, unsigned int len) { void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; if (unlikely(skb->tail > skb->end)) skb_over_panic(skb, len, __builtin_return_address(0)); return tmp; } EXPORT_SYMBOL(skb_put); /** * skb_push - add data to the start of a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer at the buffer * start. If this would exceed the total buffer headroom the kernel will * panic. A pointer to the first byte of the extra data is returned. */ void *skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; skb->len += len; if (unlikely(skb->data < skb->head)) skb_under_panic(skb, len, __builtin_return_address(0)); return skb->data; } EXPORT_SYMBOL(skb_push); /** * skb_pull - remove data from the start of a buffer * @skb: buffer to use * @len: amount of data to remove * * This function removes data from the start of a buffer, returning * the memory to the headroom. A pointer to the next data in the buffer * is returned. Once the data has been pulled future pushes will overwrite * the old data. */ void *skb_pull(struct sk_buff *skb, unsigned int len) { return skb_pull_inline(skb, len); } EXPORT_SYMBOL(skb_pull); /** * skb_pull_data - remove data from the start of a buffer returning its * original position. * @skb: buffer to use * @len: amount of data to remove * * This function removes data from the start of a buffer, returning * the memory to the headroom. A pointer to the original data in the buffer * is returned after checking if there is enough data to pull. Once the * data has been pulled future pushes will overwrite the old data. */ void *skb_pull_data(struct sk_buff *skb, size_t len) { void *data = skb->data; if (skb->len < len) return NULL; skb_pull(skb, len); return data; } EXPORT_SYMBOL(skb_pull_data); /** * skb_trim - remove end from a buffer * @skb: buffer to alter * @len: new length * * Cut the length of a buffer down by removing data from the tail. If * the buffer is already under the length specified it is not modified. * The skb must be linear. */ void skb_trim(struct sk_buff *skb, unsigned int len) { if (skb->len > len) __skb_trim(skb, len); } EXPORT_SYMBOL(skb_trim); /* Trims skb to length len. It can change skb pointers. */ int ___pskb_trim(struct sk_buff *skb, unsigned int len) { struct sk_buff **fragp; struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; int err; if (skb_cloned(skb) && unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; i = 0; if (offset >= len) goto drop_pages; for (; i < nfrags; i++) { int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); if (end < len) { offset = end; continue; } skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); fragp = &frag->next) { int end = offset + frag->len; if (skb_shared(frag)) { struct sk_buff *nfrag; nfrag = skb_clone(frag, GFP_ATOMIC); if (unlikely(!nfrag)) return -ENOMEM; nfrag->next = frag->next; consume_skb(frag); frag = nfrag; *fragp = frag; } if (end < len) { offset = end; continue; } if (end > len && unlikely((err = pskb_trim(frag, len - offset)))) return err; if (frag->next) skb_drop_list(&frag->next); break; } done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { skb->len = len; skb->data_len = 0; skb_set_tail_pointer(skb, len); } if (!skb->sk || skb->destructor == sock_edemux) skb_condense(skb); return 0; } EXPORT_SYMBOL(___pskb_trim); /* Note : use pskb_trim_rcsum() instead of calling this directly */ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) { int delta = skb->len - len; skb->csum = csum_block_sub(skb->csum, skb_checksum(skb, len, delta, 0), len); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; int offset = skb_checksum_start_offset(skb) + skb->csum_offset; if (offset + sizeof(__sum16) > hdlen) return -EINVAL; } return __pskb_trim(skb, len); } EXPORT_SYMBOL(pskb_trim_rcsum_slow); /** * __pskb_pull_tail - advance tail of skb header * @skb: buffer to reallocate * @delta: number of bytes to advance tail * * The function makes a sense only on a fragmented &sk_buff, * it expands header moving its tail forward and copying necessary * data from fragmented part. * * &sk_buff MUST have reference count of 1. * * Returns %NULL (and &sk_buff does not change) if pull failed * or value of new tail of skb in the case of success. * * All the pointers pointing into skb header may change and must be * reloaded after call to this function. */ /* Moves tail of skb head forward, copying data from fragmented part, * when it is necessary. * 1. It may fail due to malloc failure. * 2. It may change skb pointers. * * It is pretty complicated. Luckily, it is called only in exceptional cases. */ void *__pskb_pull_tail(struct sk_buff *skb, int delta) { /* If skb has not enough free space at tail, get new one * plus 128 bytes for future expansions. If we have enough * room at tail, reallocate without expansion only if skb is cloned. */ int i, k, eat = (skb->tail + delta) - skb->end; if (eat > 0 || skb_cloned(skb)) { if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, GFP_ATOMIC)) return NULL; } BUG_ON(skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. */ if (!skb_has_frag_list(skb)) goto pull_pages; /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size >= eat) goto pull_pages; eat -= size; } /* If we need update frag list, we are in troubles. * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. * Pure masohism, indeed. 8)8) */ if (eat) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_is_gso(skb) && !list->head_frag && skb_headlen(list)) skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; if (skb_shared(list)) { /* Sucks! We need to fork list. :-( */ clone = skb_clone(list, GFP_ATOMIC); if (!clone) return NULL; insp = list->next; list = clone; } else { /* This may be pulled without * problems. */ insp = list; } if (!pskb_pull(list, eat)) { kfree_skb(clone); return NULL; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; consume_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; skb_shinfo(skb)->frag_list = clone; } } /* Success! Now we may commit changes to skb data. */ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (size <= eat) { skb_frag_unref(skb, i); eat -= size; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; *frag = skb_shinfo(skb)->frags[i]; if (eat) { skb_frag_off_add(frag, eat); skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; } k++; } } skb_shinfo(skb)->nr_frags = k; end: skb->tail += delta; skb->data_len -= delta; if (!skb->data_len) skb_zcopy_clear(skb, false); return skb_tail_pointer(skb); } EXPORT_SYMBOL(__pskb_pull_tail); /** * skb_copy_bits - copy bits from skb to kernel buffer * @skb: source skb * @offset: offset in source * @to: destination buffer * @len: number of bytes to copy * * Copy the specified number of bytes from the source skb to the * destination buffer. * * CAUTION ! : * If its prototype is ever changed, * check arch/{*}/net/{*}.S files, * since it is called from BPF assembly code. */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); struct sk_buff *frag_iter; int i, copy; if (offset > (int)skb->len - len) goto fault; /* Copy header. */ if ((copy = start - offset) > 0) { if (copy > len) copy = len; skb_copy_from_linear_data_offset(skb, offset, to, copy); if ((len -= copy) == 0) return 0; offset += copy; to += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *f = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(f, skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); kunmap_atomic(vaddr); } if ((len -= copy) == 0) return 0; offset += copy; to += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_copy_bits(frag_iter, offset - start, to, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; to += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_copy_bits); /* * Callback from splice_to_pipe(), if we need to release some pages * at the end of the spd in case we error'ed out in filling the pipe. */ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { put_page(spd->pages[i]); } static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sock *sk) { struct page_frag *pfrag = sk_page_frag(sk); if (!sk_page_frag_refill(sk, pfrag)) return NULL; *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); memcpy(page_address(pfrag->page) + pfrag->offset, page_address(page) + *offset, *len); *offset = pfrag->offset; pfrag->offset += *len; return pfrag->page; } static bool spd_can_coalesce(const struct splice_pipe_desc *spd, struct page *page, unsigned int offset) { return spd->nr_pages && spd->pages[spd->nr_pages - 1] == page && (spd->partial[spd->nr_pages - 1].offset + spd->partial[spd->nr_pages - 1].len == offset); } /* * Fill page/offset/length into spd, if it can hold more pages. */ static bool spd_fill_page(struct splice_pipe_desc *spd, struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) return true; if (linear) { page = linear_to_page(page, len, &offset, sk); if (!page) return true; } if (spd_can_coalesce(spd, page, offset)) { spd->partial[spd->nr_pages - 1].len += *len; return false; } get_page(page); spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; return false; } static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct splice_pipe_desc *spd, bool linear, struct sock *sk, struct pipe_inode_info *pipe) { if (!*len) return true; /* skip this segment if already processed */ if (*off >= plen) { *off -= plen; return false; } /* ignore any bits we already processed */ poff += *off; plen -= *off; *off = 0; do { unsigned int flen = min(*len, plen); if (spd_fill_page(spd, pipe, page, &flen, poff, linear, sk)) return true; poff += flen; plen -= flen; *len -= flen; } while (*len && plen); return false; } /* * Map linear and fragment data from the skb to spd. It reports true if the * pipe is full or if we already spliced the requested length. */ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, unsigned int *offset, unsigned int *len, struct splice_pipe_desc *spd, struct sock *sk) { int seg; struct sk_buff *iter; /* map the linear part : * If skb->head_frag is set, this 'linear' part is backed by a * fragment, and if the head is not shared with any clones then * we can avoid a copy since we own the head portion of this page. */ if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), offset, len, spd, skb_head_is_locked(skb), sk, pipe)) return true; /* * then map the fragments */ for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) return true; } skb_walk_frags(skb, iter) { if (*offset >= iter->len) { *offset -= iter->len; continue; } /* __skb_splice_bits() only fails if the output has no room * left, so no point in going over the frag_list for the error * case. */ if (__skb_splice_bits(iter, pipe, offset, len, spd, sk)) return true; } return false; } /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. */ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { struct partial_page partial[MAX_SKB_FRAGS]; struct page *pages[MAX_SKB_FRAGS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, .nr_pages_max = MAX_SKB_FRAGS, .ops = &nosteal_pipe_buf_ops, .spd_release = sock_spd_release, }; int ret = 0; __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); if (spd.nr_pages) ret = splice_to_pipe(pipe, &spd); return ret; } EXPORT_SYMBOL_GPL(skb_splice_bits); static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { struct socket *sock = sk->sk_socket; if (!sock) return -EINVAL; return kernel_sendmsg(sock, msg, vec, num, size); } static int sendpage_unlocked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct socket *sock = sk->sk_socket; if (!sock) return -EINVAL; return kernel_sendpage(sock, page, offset, size, flags); } typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size); typedef int (*sendpage_func)(struct sock *sk, struct page *page, int offset, size_t size, int flags); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len, sendmsg_func sendmsg, sendpage_func sendpage) { unsigned int orig_len = len; struct sk_buff *head = skb; unsigned short fragidx; int slen, ret; do_frag_list: /* Deal with head data */ while (offset < skb_headlen(skb) && len) { struct kvec kv; struct msghdr msg; slen = min_t(int, len, skb_headlen(skb) - offset); kv.iov_base = skb->data + offset; kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT; ret = INDIRECT_CALL_2(sendmsg, kernel_sendmsg_locked, sendmsg_unlocked, sk, &msg, &kv, 1, slen); if (ret <= 0) goto error; offset += ret; len -= ret; } /* All the data was skb head? */ if (!len) goto out; /* Make offset relative to start of frags */ offset -= skb_headlen(skb); /* Find where we are in frag list */ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; if (offset < skb_frag_size(frag)) break; offset -= skb_frag_size(frag); } for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { ret = INDIRECT_CALL_2(sendpage, kernel_sendpage_locked, sendpage_unlocked, sk, skb_frag_page(frag), skb_frag_off(frag) + offset, slen, MSG_DONTWAIT); if (ret <= 0) goto error; len -= ret; offset += ret; slen -= ret; } offset = 0; } if (len) { /* Process any frag lists */ if (skb == head) { if (skb_has_frag_list(skb)) { skb = skb_shinfo(skb)->frag_list; goto do_frag_list; } } else if (skb->next) { skb = skb->next; goto do_frag_list; } } out: return orig_len - len; error: return orig_len == len ? ret : orig_len - len; } /* Send skb data on a socket. Socket must be locked. */ int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len) { return __skb_send_sock(sk, skb, offset, len, kernel_sendmsg_locked, kernel_sendpage_locked); } EXPORT_SYMBOL_GPL(skb_send_sock_locked); /* Send skb data on a socket. Socket must be unlocked. */ int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) { return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked, sendpage_unlocked); } /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer * @offset: offset in destination * @from: source buffer * @len: number of bytes to copy * * Copy the specified number of bytes from the source buffer to the * destination skb. This function handles all the messy bits of * traversing fragment lists and such. */ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) { int start = skb_headlen(skb); struct sk_buff *frag_iter; int i, copy; if (offset > (int)skb->len - len) goto fault; if ((copy = start - offset) > 0) { if (copy > len) copy = len; skb_copy_to_linear_data_offset(skb, offset, from, copy); if ((len -= copy) == 0) return 0; offset += copy; from += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(vaddr + p_off, from + copied, p_len); kunmap_atomic(vaddr); } if ((len -= copy) == 0) return 0; offset += copy; from += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_store_bits(frag_iter, offset - start, from, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; from += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum, const struct skb_checksum_ops *ops) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; /* Checksum header. */ if (copy > 0) { if (copy > len) copy = len; csum = INDIRECT_CALL_1(ops->update, csum_partial_ext, skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; pos = copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = INDIRECT_CALL_1(ops->update, csum_partial_ext, vaddr + p_off, p_len, 0); kunmap_atomic(vaddr); csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, csum, csum2, pos, p_len); pos += p_len; } if (!(len -= copy)) return csum; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { __wsum csum2; if (copy > len) copy = len; csum2 = __skb_checksum(frag_iter, offset - start, copy, 0, ops); csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; pos += copy; } start = end; } BUG_ON(len); return csum; } EXPORT_SYMBOL(__skb_checksum); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum) { const struct skb_checksum_ops ops = { .update = csum_partial_ext, .combine = csum_block_add_ext, }; return __skb_checksum(skb, offset, len, csum, &ops); } EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; __wsum csum = 0; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; csum = csum_partial_copy_nocheck(skb->data + offset, to, copy); if ((len -= copy) == 0) return csum; offset += copy; to += copy; pos = copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; u32 p_off, p_len, copied; struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = csum_partial_copy_nocheck(vaddr + p_off, to + copied, p_len); kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); pos += p_len; } if (!(len -= copy)) return csum; offset += copy; to += copy; } start = end; } skb_walk_frags(skb, frag_iter) { __wsum csum2; int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; csum2 = skb_copy_and_csum_bits(frag_iter, offset - start, to, copy); csum = csum_block_add(csum, csum2, pos); if ((len -= copy) == 0) return csum; offset += copy; to += copy; pos += copy; } start = end; } BUG_ON(len); return csum; } EXPORT_SYMBOL(skb_copy_and_csum_bits); __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) { __sum16 sum; sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); /* See comments in __skb_checksum_complete(). */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) skb->csum_valid = !sum; return sum; } EXPORT_SYMBOL(__skb_checksum_complete_head); /* This function assumes skb->csum already holds pseudo header's checksum, * which has been changed from the hardware checksum, for example, by * __skb_checksum_validate_complete(). And, the original skb->csum must * have been validated unsuccessfully for CHECKSUM_COMPLETE case. * * It returns non-zero if the recomputed checksum is still invalid, otherwise * zero. The new checksum is stored back into skb->csum unless the skb is * shared. */ __sum16 __skb_checksum_complete(struct sk_buff *skb) { __wsum csum; __sum16 sum; csum = skb_checksum(skb, 0, skb->len, 0); sum = csum_fold(csum_add(skb->csum, csum)); /* This check is inverted, because we already knew the hardware * checksum is invalid before calling this function. So, if the * re-computed checksum is valid instead, then we have a mismatch * between the original skb->csum and skb_checksum(). This means either * the original hardware checksum is incorrect or we screw up skb->csum * when moving skb->data around. */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } if (!skb_shared(skb)) { /* Save full packet checksum */ skb->csum = csum; skb->ip_summed = CHECKSUM_COMPLETE; skb->csum_complete_sw = 1; skb->csum_valid = !sum; } return sum; } EXPORT_SYMBOL(__skb_checksum_complete); static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) { net_warn_ratelimited( "%s: attempt to compute crc32c without libcrc32c.ko\n", __func__); return 0; } static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { net_warn_ratelimited( "%s: attempt to compute crc32c without libcrc32c.ko\n", __func__); return 0; } static const struct skb_checksum_ops default_crc32c_ops = { .update = warn_crc32c_csum_update, .combine = warn_crc32c_csum_combine, }; const struct skb_checksum_ops *crc32c_csum_stub __read_mostly = &default_crc32c_ops; EXPORT_SYMBOL(crc32c_csum_stub); /** * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() * @from: source buffer * * Calculates the amount of linear headroom needed in the 'to' skb passed * into skb_zerocopy(). */ unsigned int skb_zerocopy_headlen(const struct sk_buff *from) { unsigned int hlen = 0; if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); if (!hlen) hlen = from->len; } if (skb_has_frag_list(from)) hlen = from->len; return hlen; } EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); /** * skb_zerocopy - Zero copy skb to skb * @to: destination buffer * @from: source buffer * @len: number of bytes to copy from source buffer * @hlen: size of linear headroom in destination buffer * * Copies up to `len` bytes from `from` to `to` by creating references * to the frags in the source buffer. * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. * * Return value: * 0: everything is OK * -ENOMEM: couldn't orphan frags of @from due to lack of memory * -EFAULT: skb_copy_bits() found some problem with skb geometry */ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ if (len <= skb_tailroom(to)) return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); if (unlikely(ret)) return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); if (plen) { page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); __skb_fill_page_desc(to, 0, page, offset, plen); get_page(page); j = 1; len -= plen; } } to->truesize += len + plen; to->len += len + plen; to->data_len += len + plen; if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { skb_tx_error(from); return -ENOMEM; } skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { int size; if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]), len); skb_frag_size_set(&skb_shinfo(to)->frags[j], size); len -= size; skb_frag_ref(to, j); j++; } skb_shinfo(to)->nr_frags = j; return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) csstart = skb_checksum_start_offset(skb); else csstart = skb_headlen(skb); BUG_ON(csstart > skb_headlen(skb)); skb_copy_from_linear_data(skb, to, csstart); csum = 0; if (csstart != skb->len) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart); if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum_offset; *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } EXPORT_SYMBOL(skb_copy_and_csum_dev); /** * skb_dequeue - remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. The list lock is taken so the function * may be used safely with other locking list functions. The head item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list->lock, flags); result = __skb_dequeue(list); spin_unlock_irqrestore(&list->lock, flags); return result; } EXPORT_SYMBOL(skb_dequeue); /** * skb_dequeue_tail - remove from the tail of the queue * @list: list to dequeue from * * Remove the tail of the list. The list lock is taken so the function * may be used safely with other locking list functions. The tail item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list->lock, flags); result = __skb_dequeue_tail(list); spin_unlock_irqrestore(&list->lock, flags); return result; } EXPORT_SYMBOL(skb_dequeue_tail); /** * skb_queue_purge - empty a list * @list: list to empty * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function takes the list * lock and is atomic with respect to other list locking functions. */ void skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = skb_dequeue(list)) != NULL) kfree_skb(skb); } EXPORT_SYMBOL(skb_queue_purge); /** * skb_rbtree_purge - empty a skb rbtree * @root: root of the rbtree to empty * Return value: the sum of truesizes of all purged skbs. * * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from * the list and one reference dropped. This function does not take * any lock. Synchronization should be handled by the caller (e.g., TCP * out-of-order queue is protected by the socket lock). */ unsigned int skb_rbtree_purge(struct rb_root *root) { struct rb_node *p = rb_first(root); unsigned int sum = 0; while (p) { struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); p = rb_next(p); rb_erase(&skb->rbnode, root); sum += skb->truesize; kfree_skb(skb); } return sum; } /** * skb_queue_head - queue a buffer at the list head * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the start of the list. This function takes the * list lock and can be used safely with other locking &sk_buff functions * safely. * * A buffer cannot be placed on two lists at the same time. */ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_head(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_queue_head); /** * skb_queue_tail - queue a buffer at the list tail * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the tail of the list. This function takes the * list lock and can be used safely with other locking &sk_buff functions * safely. * * A buffer cannot be placed on two lists at the same time. */ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_queue_tail); /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove * @list: list to use * * Remove a packet from a list. The list locks are taken and this * function is atomic with respect to other list locked calls * * You must know what list the SKB is on. */ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_unlink(skb, list); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_unlink); /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; spin_lock_irqsave(&list->lock, flags); __skb_queue_after(list, old, newsk); spin_unlock_irqrestore(&list->lock, flags); } EXPORT_SYMBOL(skb_append); static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, const int pos) { int i; skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len), pos - len); /* And move data appendix as is. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; skb_shinfo(skb)->nr_frags = 0; skb1->data_len = skb->data_len; skb1->len += skb1->data_len; skb->data_len = 0; skb->len = len; skb_set_tail_pointer(skb, len); } static inline void skb_split_no_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, int pos) { int i, k = 0; const int nfrags = skb_shinfo(skb)->nr_frags; skb_shinfo(skb)->nr_frags = 0; skb1->len = skb1->data_len = skb->len - len; skb->len = len; skb->data_len = len - pos; for (i = 0; i < nfrags; i++) { int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + size > len) { skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; if (pos < len) { /* Split frag. * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, * where splitting is expensive. * 2. Split is accurately. We make this. */ skb_frag_ref(skb, i); skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; } k++; } else skb_shinfo(skb)->nr_frags++; pos += size; } skb_shinfo(skb1)->nr_frags = k; } /** * skb_split - Split fragmented skb to two parts at length len. * @skb: the buffer to split * @skb1: the buffer to receive the second part * @len: new length for skb */ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ skb_split_no_header(skb, skb1, len, pos); } EXPORT_SYMBOL(skb_split); /* Shifting from/to a cloned skb is a no-go. * * Caller cannot keep skb_shinfo related pointers past calling here! */ static int skb_prepare_for_shift(struct sk_buff *skb) { return skb_unclone_keeptruesize(skb, GFP_ATOMIC); } /** * skb_shift - Shifts paged data partially from skb to another * @tgt: buffer into which tail data gets added * @skb: buffer from which the paged data comes from * @shiftlen: shift up to this many bytes * * Attempts to shift up to shiftlen worth of bytes, which may be less than * the length of the skb, from skb to tgt. Returns number bytes shifted. * It's up to caller to free skb if everything was shifted. * * If @tgt runs out of frags, the whole operation is aborted. * * Skb cannot include anything else but paged data while tgt is allowed * to have non-paged data as well. * * TODO: full sized shift could be optimized but that would need * specialized skb free'er to handle frags without up-to-date nr_frags. */ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) { int from, to, merge, todo; skb_frag_t *fragfrom, *fragto; BUG_ON(shiftlen > skb->len); if (skb_headlen(skb)) return 0; if (skb_zcopy(tgt) || skb_zcopy(skb)) return 0; todo = shiftlen; from = 0; to = skb_shinfo(tgt)->nr_frags; fragfrom = &skb_shinfo(skb)->frags[from]; /* Actual merge is delayed until the point when we know we can * commit all, so that we don't have to undo partial changes */ if (!to || !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), skb_frag_off(fragfrom))) { merge = -1; } else { merge = to - 1; todo -= skb_frag_size(fragfrom); if (todo < 0) { if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) return 0; /* All previous frag pointers might be stale! */ fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; skb_frag_size_add(fragto, shiftlen); skb_frag_size_sub(fragfrom, shiftlen); skb_frag_off_add(fragfrom, shiftlen); goto onlymerged; } from++; } /* Skip full, not-fitting skb to avoid expensive operations */ if ((shiftlen == skb->len) && (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) return 0; if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) return 0; while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { if (to == MAX_SKB_FRAGS) return 0; fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[to]; if (todo >= skb_frag_size(fragfrom)) { *fragto = *fragfrom; todo -= skb_frag_size(fragfrom); from++; to++; } else { __skb_frag_ref(fragfrom); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); skb_frag_off_add(fragfrom, todo); skb_frag_size_sub(fragfrom, todo); todo = 0; to++; break; } } /* Ready to "commit" this state change to tgt */ skb_shinfo(tgt)->nr_frags = to; if (merge >= 0) { fragfrom = &skb_shinfo(skb)->frags[0]; fragto = &skb_shinfo(tgt)->frags[merge]; skb_frag_size_add(fragto, skb_frag_size(fragfrom)); __skb_frag_unref(fragfrom, skb->pp_recycle); } /* Reposition in the original skb */ to = 0; while (from < skb_shinfo(skb)->nr_frags) skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; skb_shinfo(skb)->nr_frags = to; BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); onlymerged: /* Most likely the tgt won't ever need its checksum anymore, skb on * the other hand might need it if it needs to be resent */ tgt->ip_summed = CHECKSUM_PARTIAL; skb->ip_summed = CHECKSUM_PARTIAL; /* Yak, is it really working this way? Some helper please? */ skb->len -= shiftlen; skb->data_len -= shiftlen; skb->truesize -= shiftlen; tgt->len += shiftlen; tgt->data_len += shiftlen; tgt->truesize += shiftlen; return shiftlen; } /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read * @from: lower offset of data to be read * @to: upper offset of data to be read * @st: state variable * * Initializes the specified state variable. Must be called before * invoking skb_seq_read() for the first time. */ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int to, struct skb_seq_state *st) { st->lower_offset = from; st->upper_offset = to; st->root_skb = st->cur_skb = skb; st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; st->frag_off = 0; } EXPORT_SYMBOL(skb_prepare_seq_read); /** * skb_seq_read - Sequentially read skb data * @consumed: number of bytes consumed by the caller so far * @data: destination pointer for data to be returned * @st: state variable * * Reads a block of skb data at @consumed relative to the * lower offset specified to skb_prepare_seq_read(). Assigns * the head of the data block to @data and returns the length * of the block or 0 if the end of the skb data or the upper * offset has been reached. * * The caller is not required to consume all of the data * returned, i.e. @consumed is typically set to the number * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * * Note 1: The size of each block of data returned can be arbitrary, * this limitation is the cost for zerocopy sequential * reads of potentially non linear data. * * Note 2: Fragment lists within fragments are not implemented * at the moment, state->root_skb could be replaced with * a stack for this purpose. */ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st) { unsigned int block_limit, abs_offset = consumed + st->lower_offset; skb_frag_t *frag; if (unlikely(abs_offset >= st->upper_offset)) { if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } return 0; } next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } if (st->frag_idx == 0 && !st->frag_data) st->stepped_offset += skb_headlen(st->cur_skb); while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { unsigned int pg_idx, pg_off, pg_sz; frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; pg_idx = 0; pg_off = skb_frag_off(frag); pg_sz = skb_frag_size(frag); if (skb_frag_must_loop(skb_frag_page(frag))) { pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT; pg_off = offset_in_page(pg_off + st->frag_off); pg_sz = min_t(unsigned int, pg_sz - st->frag_off, PAGE_SIZE - pg_off); } block_limit = pg_sz + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx); *data = (u8 *)st->frag_data + pg_off + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } st->stepped_offset += pg_sz; st->frag_off += pg_sz; if (st->frag_off == skb_frag_size(frag)) { st->frag_off = 0; st->frag_idx++; } } if (st->frag_data) { kunmap_atomic(st->frag_data); st->frag_data = NULL; } if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) { st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; } else if (st->cur_skb->next) { st->cur_skb = st->cur_skb->next; st->frag_idx = 0; goto next_skb; } return 0; } EXPORT_SYMBOL(skb_seq_read); /** * skb_abort_seq_read - Abort a sequential read of skb data * @st: state variable * * Must be called if skb_seq_read() was not called until it * returned 0. */ void skb_abort_seq_read(struct skb_seq_state *st) { if (st->frag_data) kunmap_atomic(st->frag_data); } EXPORT_SYMBOL(skb_abort_seq_read); #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, struct ts_config *conf, struct ts_state *state) { return skb_seq_read(offset, text, TS_SKB_CB(state)); } static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) { skb_abort_seq_read(TS_SKB_CB(state)); } /** * skb_find_text - Find a text pattern in skb data * @skb: the buffer to look in * @from: search offset * @to: search limit * @config: textsearch configuration * * Finds a pattern in the skb data according to the specified * textsearch configuration. Use textsearch_next() to retrieve * subsequent occurrences of the pattern. Returns the offset * to the first occurrence or UINT_MAX if no match was found. */ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config) { struct ts_state state; unsigned int ret; BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb)); config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state)); ret = textsearch_find(config, &state); return (ret <= to - from ? ret : UINT_MAX); } EXPORT_SYMBOL(skb_find_text); int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size) { int i = skb_shinfo(skb)->nr_frags; if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc_noacc(skb, i, page, offset, size); } else { return -EMSGSIZE; } return 0; } EXPORT_SYMBOL_GPL(skb_append_pagefrags); /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update * @len: length of data pulled * * This function performs an skb_pull on the packet and updates * the CHECKSUM_COMPLETE checksum. It should be used on * receive path processing instead of skb_pull unless you know * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { unsigned char *data = skb->data; BUG_ON(len > skb->len); __skb_pull(skb, len); skb_postpull_rcsum(skb, data, len); return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) { skb_frag_t head_frag; struct page *page; page = virt_to_head_page(frag_skb->head); __skb_frag_set_page(&head_frag, page); skb_frag_off_set(&head_frag, frag_skb->data - (unsigned char *)page_address(page)); skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); return head_frag; } struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset) { struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; int len_diff, err; skb_push(skb, -skb_network_offset(skb) + offset); /* Ensure the head is writeable before touching the shared info */ err = skb_unclone(skb, GFP_ATOMIC); if (err) goto err_linearize; skb_shinfo(skb)->frag_list = NULL; while (list_skb) { nskb = list_skb; list_skb = list_skb->next; err = 0; delta_truesize += nskb->truesize; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { consume_skb(nskb); nskb = tmp; err = skb_unclone(nskb, GFP_ATOMIC); } else { err = -ENOMEM; } } if (!tail) skb->next = nskb; else tail->next = nskb; if (unlikely(err)) { nskb->next = list_skb; goto err_linearize; } tail = nskb; delta_len += nskb->len; skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); nskb->transport_header += len_diff; skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); if (skb_needs_linearize(nskb, features) && __skb_linearize(nskb)) goto err_linearize; } skb->truesize = skb->truesize - delta_truesize; skb->data_len = skb->data_len - delta_len; skb->len = skb->len - delta_len; skb_gso_reset(skb); skb->prev = tail; if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto err_linearize; skb_get(skb); return skb; err_linearize: kfree_skb_list(skb->next); skb->next = NULL; return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(skb_segment_list); int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) { if (unlikely(p->len + skb->len >= 65536)) return -E2BIG; if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; skb_pull(skb, skb_gro_offset(skb)); NAPI_GRO_CB(p)->last = skb; NAPI_GRO_CB(p)->count++; p->data_len += skb->len; /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; NAPI_GRO_CB(skb)->same_flow = 1; return 0; } /** * skb_segment - Perform protocol segmentation on skb. * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ struct sk_buff *skb_segment(struct sk_buff *head_skb, netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; unsigned int len = head_skb->len; struct sk_buff *frag_skb; skb_frag_t *frag; __be16 proto; bool csum, sg; int err = -ENOMEM; int i = 0; int nfrags, pos; if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { struct sk_buff *check_skb; for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { if (skb_headlen(check_skb) && !check_skb->head_frag) { /* gso_size is untrusted, and we have a frag_list with * a linear non head_frag item. * * If head_skb's headlen does not fit requested gso_size, * it means that the frag_list members do NOT terminate * on exact gso_size boundaries. Hence we cannot perform * skb_frag_t page sharing. Therefore we must fallback to * copying the frag_list skbs; we do so by disabling SG. */ features &= ~NETIF_F_SG; break; } } } __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL); sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); if (sg && csum && (mss != GSO_BY_FRAGS)) { if (!(features & NETIF_F_GSO_PARTIAL)) { struct sk_buff *iter; unsigned int frag_len; if (!list_skb || !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) goto normal; /* If we get here then all the required * GSO features except frag_list are supported. * Try to split the SKB to multiple GSO SKBs * with no frag_list. * Currently we can do that only when the buffers don't * have a linear part and all the buffers except * the last are of the same length. */ frag_len = list_skb->len; skb_walk_frags(head_skb, iter) { if (frag_len != iter->len && iter->next) goto normal; if (skb_headlen(iter) && !iter->head_frag) goto normal; len -= iter->len; } if (len != frag_len) goto normal; } /* GSO partial only requires that we trim off any excess that * doesn't fit into an MSS sized block, so take care of that * now. * Cap len to not accidentally hit GSO_BY_FRAGS. */ partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss; if (partial_segs > 1) mss *= partial_segs; else partial_segs = 0; } normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); if (skb_orphan_frags(head_skb, GFP_ATOMIC)) return ERR_PTR(-ENOMEM); nfrags = skb_shinfo(head_skb)->nr_frags; frag = skb_shinfo(head_skb)->frags; frag_skb = head_skb; do { struct sk_buff *nskb; skb_frag_t *nskb_frag; int hsize; int size; if (unlikely(mss == GSO_BY_FRAGS)) { len = list_skb->len; } else { len = head_skb->len - offset; if (len > mss) len = mss; } hsize = skb_headlen(head_skb) - offset; if (hsize <= 0 && i >= nfrags && skb_headlen(list_skb) && (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); nskb = skb_clone(list_skb, GFP_ATOMIC); if (unlikely(!nskb)) goto err; i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; frag++; } list_skb = list_skb->next; if (unlikely(pskb_trim(nskb, len))) { kfree_skb(nskb); goto err; } hsize = skb_end_offset(nskb); if (skb_cow_head(nskb, doffset + headroom)) { kfree_skb(nskb); goto err; } nskb->truesize += skb_end_offset(nskb) - hsize; skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { if (hsize < 0) hsize = 0; if (hsize > len || !sg) hsize = len; nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) goto err; skb_reserve(nskb, headroom); __skb_put(nskb, doffset); } if (segs) tail->next = nskb; else segs = nskb; tail = nskb; __copy_skb_header(nskb, head_skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); skb_reset_mac_len(nskb); skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); if (nskb->len == len + doffset) goto perform_csum_check; if (!sg) { if (!csum) { if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len); SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } else { if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len)) goto err; } continue; } nskb_frag = skb_shinfo(nskb)->frags; skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags & SKBFL_SHARED_FRAG; if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; while (pos < offset + len) { if (i >= nfrags) { if (skb_orphan_frags(list_skb, GFP_ATOMIC) || skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC)) goto err; i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; frag_skb = list_skb; if (!skb_headlen(list_skb)) { BUG_ON(!nfrags); } else { BUG_ON(!list_skb->head_frag); /* to make room for head_frag. */ i--; frag--; } list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= MAX_SKB_FRAGS)) { net_warn_ratelimited( "skb_segment: too many frags: %u %u\n", pos, mss); err = -EINVAL; goto err; } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); if (pos < offset) { skb_frag_off_add(nskb_frag, offset - pos); skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } nskb_frag++; } skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; perform_csum_check: if (!csum) { if (skb_has_shared_frag(nskb) && __skb_linearize(nskb)) goto err; if (!nskb->remcsum_offload) nskb->ip_summed = CHECKSUM_NONE; SKB_GSO_CB(nskb)->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } } while ((offset += len) < head_skb->len); /* Some callers want to get the end of the list. * Put it in segs->prev to avoid walking the list. * (see validate_xmit_skb_list() for example) */ segs->prev = tail; if (partial_segs) { struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ for (iter = segs; iter; iter = iter->next) { skb_shinfo(iter)->gso_size = gso_size; skb_shinfo(iter)->gso_segs = partial_segs; skb_shinfo(iter)->gso_type = type; SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; } if (tail->len - doffset <= gso_size) skb_shinfo(tail)->gso_size = 0; else if (tail != segs) skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols * using skb_set_owner_w(). * Idea is to tranfert ownership from head_skb to last segment. */ if (head_skb->destructor == sock_wfree) { swap(tail->truesize, head_skb->truesize); swap(tail->destructor, head_skb->destructor); swap(tail->sk, head_skb->sk); } return segs; err: kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; unsigned int new_truesize; struct sk_buff *lp; if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { skb_frag_t *frag; skb_frag_t *frag2; int i = skbinfo->nr_frags; int nr_frags = pinfo->nr_frags + i; if (nr_frags > MAX_SKB_FRAGS) goto merge; offset -= headlen; pinfo->nr_frags = nr_frags; skbinfo->nr_frags = 0; frag = pinfo->frags + nr_frags; frag2 = skbinfo->frags + i; do { *--frag = *--frag2; } while (--i); skb_frag_off_add(frag, offset); skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ new_truesize = SKB_TRUESIZE(skb_end_offset(skb)); delta_truesize = skb->truesize - new_truesize; skb->truesize = new_truesize; skb->len -= skb->data_len; skb->data_len = 0; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; goto done; } else if (skb->head_frag) { int nr_frags = pinfo->nr_frags; skb_frag_t *frag = pinfo->frags + nr_frags; struct page *page = virt_to_head_page(skb->head); unsigned int first_size = headlen - offset; unsigned int first_offset; if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) goto merge; first_offset = skb->data - (unsigned char *)page_address(page) + offset; pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; __skb_frag_set_page(frag, page); skb_frag_off_set(frag, first_offset); skb_frag_size_set(frag, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); delta_truesize = skb->truesize - new_truesize; skb->truesize = new_truesize; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } merge: /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; skb_frag_off_add(&skbinfo->frags[0], eat); skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; offset = headlen; } __skb_pull(skb, offset); if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; NAPI_GRO_CB(p)->last = skb; __skb_header_release(skb); lp = p; done: NAPI_GRO_CB(p)->count++; p->data_len += len; p->truesize += delta_truesize; p->len += len; if (lp != p) { lp->data_len += len; lp->truesize += delta_truesize; lp->len += len; } NAPI_GRO_CB(skb)->same_flow = 1; return 0; } #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 #define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE) static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info), #endif #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), #endif #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif }; static __always_inline unsigned int skb_ext_total_length(void) { return SKB_EXT_CHUNKSIZEOF(struct skb_ext) + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) skb_ext_type_len[SKB_EXT_BRIDGE_NF] + #endif #ifdef CONFIG_XFRM skb_ext_type_len[SKB_EXT_SEC_PATH] + #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) skb_ext_type_len[TC_SKB_EXT] + #endif #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + #endif 0; } static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); BUILD_BUG_ON(skb_ext_total_length() > 255); skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } #else static void skb_extensions_init(void) {} #endif void __init skb_init(void) { skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, offsetof(struct sk_buff, cb), sizeof_field(struct sk_buff, cb), NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", sizeof(struct sk_buff_fclones), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); skb_extensions_init(); } static int __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; if (unlikely(recursion_level >= 24)) return -EMSGSIZE; if (copy > 0) { if (copy > len) copy = len; sg_set_buf(sg, skb->data + offset, copy); elt++; if ((len -= copy) == 0) return elt; offset += copy; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; WARN_ON(start > offset + len); end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (unlikely(elt && sg_is_last(&sg[elt - 1]))) return -EMSGSIZE; if (copy > len) copy = len; sg_set_page(&sg[elt], skb_frag_page(frag), copy, skb_frag_off(frag) + offset - start); elt++; if (!(len -= copy)) return elt; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (unlikely(elt && sg_is_last(&sg[elt - 1]))) return -EMSGSIZE; if (copy > len) copy = len; ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, copy, recursion_level + 1); if (unlikely(ret < 0)) return ret; elt += ret; if ((len -= copy) == 0) return elt; offset += copy; } start = end; } BUG_ON(len); return elt; } /** * skb_to_sgvec - Fill a scatter-gather list from a socket buffer * @skb: Socket buffer containing the buffers to be mapped * @sg: The scatter-gather list to map into * @offset: The offset into the buffer's contents to start mapping * @len: Length of buffer space to be mapped * * Fill the specified scatter-gather list with mappings/pointers into a * region of the buffer space attached to a socket buffer. Returns either * the number of scatterlist items used, or -EMSGSIZE if the contents * could not fit. */ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); if (nsg <= 0) return nsg; sg_mark_end(&sg[nsg - 1]); return nsg; } EXPORT_SYMBOL_GPL(skb_to_sgvec); /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after * the first call without calling sg_unmark_end to expend sg list. * * Scenario to use skb_to_sgvec_nomark: * 1. sg_init_table * 2. skb_to_sgvec_nomark(payload1) * 3. skb_to_sgvec_nomark(payload2) * * This is equivalent to: * 1. sg_init_table * 2. skb_to_sgvec(payload1) * 3. sg_unmark_end * 4. skb_to_sgvec(payload2) * * When mapping mutilple payload conditionally, skb_to_sgvec_nomark * is more preferable. */ int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); /** * skb_cow_data - Check that a socket buffer's data buffers are writable * @skb: The socket buffer to check. * @tailbits: Amount of trailing space to be added * @trailer: Returned pointer to the skb where the @tailbits space begins * * Make sure that the data buffers attached to a socket buffer are * writable. If they are not, private copies are made of the data buffers * and the socket buffer is set to use these instead. * * If @tailbits is given, make sure that there is space to write @tailbits * bytes of data beyond current end of socket buffer. @trailer will be * set to point to the skb in which this space begins. * * The number of scatterlist elements required to completely map the * COW'd and extended socket buffer will be returned. */ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) { int copyflag; int elt; struct sk_buff *skb1, **skb_p; /* If skb is cloned or its head is paged, reallocate * head pulling out all the pages (pages are considered not writable * at the moment even if they are anonymous). */ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && !__pskb_pull_tail(skb, __skb_pagelen(skb))) return -ENOMEM; /* Easy case. Most of packets will go this way. */ if (!skb_has_frag_list(skb)) { /* A little of trouble, not enough of space for trailer. * This should not happen, when stack is tuned to generate * good frames. OK, on miss we reallocate and reserve even more * space, 128 bytes is fair. */ if (skb_tailroom(skb) < tailbits && pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC)) return -ENOMEM; /* Voila! */ *trailer = skb; return 1; } /* Misery. We are in troubles, going to mincer fragments... */ elt = 1; skb_p = &skb_shinfo(skb)->frag_list; copyflag = 0; while ((skb1 = *skb_p) != NULL) { int ntail = 0; /* The fragment is partially pulled by someone, * this can happen on input. Copy it and everything * after it. */ if (skb_shared(skb1)) copyflag = 1; /* If the skb is the last, worry about trailer. */ if (skb1->next == NULL && tailbits) { if (skb_shinfo(skb1)->nr_frags || skb_has_frag_list(skb1) || skb_tailroom(skb1) < tailbits) ntail = tailbits + 128; } if (copyflag || skb_cloned(skb1) || ntail || skb_shinfo(skb1)->nr_frags || skb_has_frag_list(skb1)) { struct sk_buff *skb2; /* Fuck, we are miserable poor guys... */ if (ntail == 0) skb2 = skb_copy(skb1, GFP_ATOMIC); else skb2 = skb_copy_expand(skb1, skb_headroom(skb1), ntail, GFP_ATOMIC); if (unlikely(skb2 == NULL)) return -ENOMEM; if (skb1->sk) skb_set_owner_w(skb2, skb1->sk); /* Looking around. Are we still alive? * OK, link new skb, drop old one */ skb2->next = skb1->next; *skb_p = skb2; kfree_skb(skb1); skb1 = skb2; } elt++; *trailer = skb1; skb_p = &skb1->next; } return elt; } EXPORT_SYMBOL_GPL(skb_cow_data); static void sock_rmem_free(struct sk_buff *skb) { struct sock *sk = skb->sk; atomic_sub(skb->truesize, &sk->sk_rmem_alloc); } static void skb_set_err_queue(struct sk_buff *skb) { /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. * So, it is safe to (mis)use it to mark skbs on the error queue. */ skb->pkt_type = PACKET_OUTGOING; BUILD_BUG_ON(PACKET_OUTGOING == 0); } /* * Note: We dont mem charge error packets (no sk_forward_alloc changes) */ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); skb->sk = sk; skb->destructor = sock_rmem_free; atomic_add(skb->truesize, &sk->sk_rmem_alloc); skb_set_err_queue(skb); /* before exiting rcu section, make sure dst is refcounted */ skb_dst_force(skb); skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); return 0; } EXPORT_SYMBOL(sock_queue_err_skb); static bool is_icmp_err_skb(const struct sk_buff *skb) { return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP || SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6); } struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next = NULL; bool icmp_next = false; unsigned long flags; spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); if (icmp_next) sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno; } spin_unlock_irqrestore(&q->lock, flags); if (is_icmp_err_skb(skb) && !icmp_next) sk->sk_err = 0; if (skb_next) sk_error_report(sk); return skb; } EXPORT_SYMBOL(sock_dequeue_err_skb); /** * skb_clone_sk - create clone of skb, and take reference to socket * @skb: the skb to clone * * This function creates a clone of a buffer that holds a reference on * sk_refcnt. Buffers created via this function are meant to be * returned using sock_queue_err_skb, or free via kfree_skb. * * When passing buffers allocated with this function to sock_queue_err_skb * it is necessary to wrap the call with sock_hold/sock_put in order to * prevent the socket from being released prior to being enqueued on * the sk_error_queue. */ struct sk_buff *skb_clone_sk(struct sk_buff *skb) { struct sock *sk = skb->sk; struct sk_buff *clone; if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; clone = skb_clone(skb, GFP_ATOMIC); if (!clone) { sock_put(sk); return NULL; } clone->sk = sk; clone->destructor = sock_efree; return clone; } EXPORT_SYMBOL(skb_clone_sk); static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, int tstype, bool opt_stats) { struct sock_exterr_skb *serr; int err; BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) serr->ee.ee_data -= atomic_read(&sk->sk_tskey); } err = sock_queue_err_skb(sk, skb); if (err) kfree_skb(skb); } static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) return true; read_lock_bh(&sk->sk_callback_lock); ret = sk->sk_socket && sk->sk_socket->file && file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); read_unlock_bh(&sk->sk_callback_lock); return ret; } void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) { struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); return; } err: kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype) { struct sk_buff *skb; bool tsonly, opt_stats = false; if (!sk) return; if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) return; tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) { #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb, ack_skb); opt_stats = true; } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { skb = skb_clone(orig_skb, GFP_ATOMIC); if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) { kfree_skb(skb); return; } } if (!skb) return; if (tsonly) { skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } if (hwtstamps) *skb_hwtstamps(skb) = *hwtstamps; else skb->tstamp = ktime_get_real(); __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk, SCM_TSTAMP_SND); } EXPORT_SYMBOL_GPL(skb_tstamp_tx); void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. */ if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) { err = sock_queue_err_skb(sk, skb); sock_put(sk); } if (err) kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set * @start: the number of bytes after skb->data to start checksumming. * @off: the offset from start to place the checksum. * * For untrusted partially-checksummed packets, we need to make sure the values * for skb->csum_start and skb->csum_offset are valid so we don't oops. * * This function checks and sets those values and skb->ip_summed: if this * returns false you should drop the packet. */ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) { u32 csum_end = (u32)start + (u32)off + sizeof(__sum16); u32 csum_start = skb_headroom(skb) + (u32)start; if (unlikely(csum_start > U16_MAX || csum_end > skb_headlen(skb))) { net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n", start, off, skb_headroom(skb), skb_headlen(skb)); return false; } skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = csum_start; skb->csum_offset = off; skb_set_transport_header(skb, start); return true; } EXPORT_SYMBOL_GPL(skb_partial_csum_set); static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max) { if (skb_headlen(skb) >= len) return 0; /* If we need to pullup then pullup to the max, so we * won't need to do it again. */ if (max > skb->len) max = skb->len; if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) return -ENOMEM; if (skb_headlen(skb) < len) return -EPROTO; return 0; } #define MAX_TCP_HDR_LEN (15 * 4) static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, typeof(IPPROTO_IP) proto, unsigned int off) { int err; switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); if (!err && !skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) err = -EPROTO; return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; case IPPROTO_UDP: err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), off + sizeof(struct udphdr)); if (!err && !skb_partial_csum_set(skb, off, offsetof(struct udphdr, check))) err = -EPROTO; return err ? ERR_PTR(err) : &udp_hdr(skb)->check; } return ERR_PTR(-EPROTO); } /* This value should be large enough to cover a tagged ethernet header plus * maximally sized IP and TCP or UDP headers. */ #define MAX_IP_HDR_LEN 128 static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) { unsigned int off; bool fragment; __sum16 *csum; int err; fragment = false; err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN); if (err < 0) goto out; if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); err = -EPROTO; if (fragment) goto out; csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); if (IS_ERR(csum)) return PTR_ERR(csum); if (recalculate) *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, skb->len - off, ip_hdr(skb)->protocol, 0); err = 0; out: return err; } /* This value should be large enough to cover a tagged ethernet header plus * an IPv6 header, all options, and a maximal TCP or UDP header. */ #define MAX_IPV6_HDR_LEN 256 #define OPT_HDR(type, skb, off) \ (type *)(skb_network_header(skb) + (off)) static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) { int err; u8 nexthdr; unsigned int off; unsigned int len; bool fragment; bool done; __sum16 *csum; fragment = false; done = false; off = sizeof(struct ipv6hdr); err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); if (err < 0) goto out; nexthdr = ipv6_hdr(skb)->nexthdr; len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); while (off <= len && !done) { switch (nexthdr) { case IPPROTO_DSTOPTS: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: { struct ipv6_opt_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_optlen(hp); break; } case IPPROTO_AH: { struct ip_auth_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct ip_auth_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct ip_auth_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_authlen(hp); break; } case IPPROTO_FRAGMENT: { struct frag_hdr *hp; err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN); if (err < 0) goto out; hp = OPT_HDR(struct frag_hdr, skb, off); if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) fragment = true; nexthdr = hp->nexthdr; off += sizeof(struct frag_hdr); break; } default: done = true; break; } } err = -EPROTO; if (!done || fragment) goto out; csum = skb_checksum_setup_ip(skb, nexthdr, off); if (IS_ERR(csum)) return PTR_ERR(csum); if (recalculate) *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len - off, nexthdr, 0); err = 0; out: return err; } /** * skb_checksum_setup - set up partial checksum offset * @skb: the skb to set up * @recalculate: if true the pseudo-header checksum will be recalculated */ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) { int err; switch (skb->protocol) { case htons(ETH_P_IP): err = skb_checksum_setup_ipv4(skb, recalculate); break; case htons(ETH_P_IPV6): err = skb_checksum_setup_ipv6(skb, recalculate); break; default: err = -EPROTO; break; } return err; } EXPORT_SYMBOL(skb_checksum_setup); /** * skb_checksum_maybe_trim - maybe trims the given skb * @skb: the skb to check * @transport_len: the data length beyond the network header * * Checks whether the given skb has data beyond the given transport length. * If so, returns a cloned skb trimmed to this transport length. * Otherwise returns the provided skb. Returns NULL in error cases * (e.g. transport_len exceeds skb length or out-of-memory). * * Caller needs to set the skb transport header and free any returned skb if it * differs from the provided skb. */ static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, unsigned int transport_len) { struct sk_buff *skb_chk; unsigned int len = skb_transport_offset(skb) + transport_len; int ret; if (skb->len < len) return NULL; else if (skb->len == len) return skb; skb_chk = skb_clone(skb, GFP_ATOMIC); if (!skb_chk) return NULL; ret = pskb_trim_rcsum(skb_chk, len); if (ret) { kfree_skb(skb_chk); return NULL; } return skb_chk; } /** * skb_checksum_trimmed - validate checksum of an skb * @skb: the skb to check * @transport_len: the data length beyond the network header * @skb_chkf: checksum function to use * * Applies the given checksum function skb_chkf to the provided skb. * Returns a checked and maybe trimmed skb. Returns NULL on error. * * If the skb has data beyond the given transport length, then a * trimmed & cloned skb is checked and returned. * * Caller needs to set the skb transport header and free any returned skb if it * differs from the provided skb. */ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)) { struct sk_buff *skb_chk; unsigned int offset = skb_transport_offset(skb); __sum16 ret; skb_chk = skb_checksum_maybe_trim(skb, transport_len); if (!skb_chk) goto err; if (!pskb_may_pull(skb_chk, offset)) goto err; skb_pull_rcsum(skb_chk, offset); ret = skb_chkf(skb_chk); skb_push_rcsum(skb_chk, offset); if (ret) goto err; return skb_chk; err: if (skb_chk && skb_chk != skb) kfree_skb(skb_chk); return NULL; } EXPORT_SYMBOL(skb_checksum_trimmed); void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", skb->dev->name); } EXPORT_SYMBOL(__skb_warn_lro_forwarding); void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) { if (head_stolen) { skb_release_head_state(skb); kmem_cache_free(skbuff_head_cache, skb); } else { __kfree_skb(skb); } } EXPORT_SYMBOL(kfree_skb_partial); /** * skb_try_coalesce - try to merge skb to prior one * @to: prior buffer * @from: buffer to add * @fragstolen: pointer to boolean * @delta_truesize: how much more was allocated than was requested */ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; if (skb_cloned(to)) return false; /* In general, avoid mixing page_pool and non-page_pool allocated * pages within the same SKB. Additionally avoid dealing with clones * with page_pool pages, in case the SKB is using page_pool fragment * references (PP_FLAG_PAGE_FRAG). Since we only take full page * references for cloned SKBs at the moment that would result in * inconsistent reference counts. * In theory we could take full references if @from is cloned and * !@to->pp_recycle but its tricky (due to potential race with * the clone disappearing) and rare, so not worth dealing with. */ if (to->pp_recycle != from->pp_recycle || (from->pp_recycle && skb_cloned(from))) return false; if (len <= skb_tailroom(to)) { if (len) BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); *delta_truesize = 0; return true; } to_shinfo = skb_shinfo(to); from_shinfo = skb_shinfo(from); if (to_shinfo->frag_list || from_shinfo->frag_list) return false; if (skb_zcopy(to) || skb_zcopy(from)) return false; if (skb_headlen(from) != 0) { struct page *page; unsigned int offset; if (to_shinfo->nr_frags + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) return false; delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { if (to_shinfo->nr_frags + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); } WARN_ON_ONCE(delta < len); memcpy(to_shinfo->frags + to_shinfo->nr_frags, from_shinfo->frags, from_shinfo->nr_frags * sizeof(skb_frag_t)); to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ for (i = 0; i < from_shinfo->nr_frags; i++) __skb_frag_ref(&from_shinfo->frags[i]); to->truesize += delta; to->len += len; to->data_len += len; *delta_truesize = delta; return true; } EXPORT_SYMBOL(skb_try_coalesce); /** * skb_scrub_packet - scrub an skb * * @skb: buffer to clean * @xnet: packet is crossing netns * * skb_scrub_packet can be used after encapsulating or decapsulting a packet * into/from a tunnel. Some information have to be cleared during these * operations. * skb_scrub_packet can also be used to clean a skb before injecting it in * another namespace (@xnet == true). We have to clear all information in the * skb that could impact namespace isolation. */ void skb_scrub_packet(struct sk_buff *skb, bool xnet) { skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); skb_ext_reset(skb); nf_reset_ct(skb); nf_reset_trace(skb); #ifdef CONFIG_NET_SWITCHDEV skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif ipvs_reset(skb); if (!xnet) return; skb->mark = 0; skb->tstamp = 0; } EXPORT_SYMBOL_GPL(skb_scrub_packet); /** * skb_gso_transport_seglen - Return length of individual segments of a gso packet * * @skb: GSO skb * * skb_gso_transport_seglen is used to determine the real size of the * individual segments, including Layer4 headers (TCP/UDP). * * The MAC/L2 or network (IP, IPv6) headers are not accounted for. */ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); unsigned int thlen = 0; if (skb->encapsulation) { thlen = skb_inner_transport_header(skb) - skb_transport_header(skb); if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) thlen += inner_tcp_hdrlen(skb); } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { thlen = tcp_hdrlen(skb); } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { thlen = sizeof(struct udphdr); } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already * accounted for. */ return thlen + shinfo->gso_size; } /** * skb_gso_network_seglen - Return length of individual segments of a gso packet * * @skb: GSO skb * * skb_gso_network_seglen is used to determine the real size of the * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). * * The MAC/L2 header is not accounted for. */ static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_network_header(skb); return hdr_len + skb_gso_transport_seglen(skb); } /** * skb_gso_mac_seglen - Return length of individual segments of a gso packet * * @skb: GSO skb * * skb_gso_mac_seglen is used to determine the real size of the * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 * headers (TCP/UDP). */ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); return hdr_len + skb_gso_transport_seglen(skb); } /** * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS * * There are a couple of instances where we have a GSO skb, and we * want to determine what size it would be after it is segmented. * * We might want to check: * - L3+L4+payload size (e.g. IP forwarding) * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) * * This is a helper to do that correctly considering GSO_BY_FRAGS. * * @skb: GSO skb * * @seg_len: The segmented length (from skb_gso_*_seglen). In the * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. * * @max_len: The maximum permissible length. * * Returns true if the segmented length <= max length. */ static inline bool skb_gso_size_check(const struct sk_buff *skb, unsigned int seg_len, unsigned int max_len) { const struct skb_shared_info *shinfo = skb_shinfo(skb); const struct sk_buff *iter; if (shinfo->gso_size != GSO_BY_FRAGS) return seg_len <= max_len; /* Undo this so we can re-use header sizes */ seg_len -= GSO_BY_FRAGS; skb_walk_frags(skb, iter) { if (seg_len + skb_headlen(iter) > max_len) return false; } return true; } /** * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? * * @skb: GSO skb * @mtu: MTU to validate against * * skb_gso_validate_network_len validates if a given skb will fit a * wanted MTU once split. It considers L3 headers, L4 headers, and the * payload. */ bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) { return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); } EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); /** * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? * * @skb: GSO skb * @len: length to validate against * * skb_gso_validate_mac_len validates if a given skb will fit a wanted * length once split, including L2, L3 and L4 headers and the payload. */ bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) { return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); } EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len, meta_len; void *meta; if (skb_cow(skb, skb_headroom(skb)) < 0) { kfree_skb(skb); return NULL; } mac_len = skb->data - skb_mac_header(skb); if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) { memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb), mac_len - VLAN_HLEN - ETH_TLEN); } meta_len = skb_metadata_len(skb); if (meta_len) { meta = skb_metadata_end(skb) - meta_len; memmove(meta + VLAN_HLEN, meta, meta_len); } skb->mac_header += VLAN_HLEN; return skb; } struct sk_buff *skb_vlan_untag(struct sk_buff *skb) { struct vlan_hdr *vhdr; u16 vlan_tci; if (unlikely(skb_vlan_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto err_free; /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short)))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb = skb_reorder_vlan_header(skb); if (unlikely(!skb)) goto err_free; skb_reset_network_header(skb); if (!skb_transport_header_was_set(skb)) skb_reset_transport_header(skb); skb_reset_mac_len(skb); return skb; err_free: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(skb_vlan_untag); int skb_ensure_writable(struct sk_buff *skb, int write_len) { if (!pskb_may_pull(skb, write_len)) return -ENOMEM; if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } EXPORT_SYMBOL(skb_ensure_writable); /* remove VLAN header from packet and update csum accordingly. * expects a non skb_vlan_tag_present skb with a vlan tag payload */ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; int offset = skb->data - skb_mac_header(skb); int err; if (WARN_ONCE(offset, "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", offset)) { return -EINVAL; } err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *vlan_tci = ntohs(vhdr->h_vlan_TCI); memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); __skb_pull(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; if (skb_network_offset(skb) < ETH_HLEN) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); return err; } EXPORT_SYMBOL(__skb_vlan_pop); /* Pop a vlan tag either from hwaccel or from payload. * Expects skb->data at mac header. */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; __be16 vlan_proto; int err; if (likely(skb_vlan_tag_present(skb))) { __vlan_hwaccel_clear_tag(skb); } else { if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); if (err) return err; } /* move next vlan tag to hw accel tag */ if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; err = __skb_vlan_pop(skb, &vlan_tci); if (unlikely(err)) return err; __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; } EXPORT_SYMBOL(skb_vlan_pop); /* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). * Expects skb->data at mac header. */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { int offset = skb->data - skb_mac_header(skb); int err; if (WARN_ONCE(offset, "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", offset)) { return -EINVAL; } err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); if (err) return err; skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; } EXPORT_SYMBOL(skb_vlan_push); /** * skb_eth_pop() - Drop the Ethernet header at the head of a packet * * @skb: Socket buffer to modify * * Drop the Ethernet header of @skb. * * Expects that skb->data points to the mac header and that no VLAN tags are * present. * * Returns 0 on success, -errno otherwise. */ int skb_eth_pop(struct sk_buff *skb) { if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) || skb_network_offset(skb) < ETH_HLEN) return -EPROTO; skb_pull_rcsum(skb, ETH_HLEN); skb_reset_mac_header(skb); skb_reset_mac_len(skb); return 0; } EXPORT_SYMBOL(skb_eth_pop); /** * skb_eth_push() - Add a new Ethernet header at the head of a packet * * @skb: Socket buffer to modify * @dst: Destination MAC address of the new header * @src: Source MAC address of the new header * * Prepend @skb with a new Ethernet header. * * Expects that skb->data points to the mac header, which must be empty. * * Returns 0 on success, -errno otherwise. */ int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, const unsigned char *src) { struct ethhdr *eth; int err; if (skb_network_offset(skb) || skb_vlan_tag_present(skb)) return -EPROTO; err = skb_cow_head(skb, sizeof(*eth)); if (err < 0) return err; skb_push(skb, sizeof(*eth)); skb_reset_mac_header(skb); skb_reset_mac_len(skb); eth = eth_hdr(skb); ether_addr_copy(eth->h_dest, dst); ether_addr_copy(eth->h_source, src); eth->h_proto = skb->protocol; skb_postpush_rcsum(skb, eth, sizeof(*eth)); return 0; } EXPORT_SYMBOL(skb_eth_push); /* Update the ethertype of hdr and the skb csum value if required. */ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { if (skb->ip_summed == CHECKSUM_COMPLETE) { __be16 diff[] = { ~hdr->h_proto, ethertype }; skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } hdr->h_proto = ethertype; } /** * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of * the packet * * @skb: buffer * @mpls_lse: MPLS label stack entry to push * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) * @mac_len: length of the MAC header * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is * ethernet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len, bool ethernet) { struct mpls_shim_hdr *lse; int err; if (unlikely(!eth_p_mpls(mpls_proto))) return -EINVAL; /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) return -EINVAL; err = skb_cow_head(skb, MPLS_HLEN); if (unlikely(err)) return err; if (!skb->inner_protocol) { skb_set_inner_network_header(skb, skb_network_offset(skb)); skb_set_inner_protocol(skb, skb->protocol); } skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), mac_len); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb_reset_mac_len(skb); lse = mpls_hdr(skb); lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_push); /** * skb_mpls_pop() - pop the outermost MPLS header * * @skb: buffer * @next_proto: ethertype of header after popped MPLS header * @mac_len: length of the MAC header * @ethernet: flag to indicate if the packet is ethernet * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, bool ethernet) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return 0; err = skb_ensure_writable(skb, mac_len + MPLS_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), mac_len); __skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); skb_mod_eth_type(skb, hdr, next_proto); } skb->protocol = next_proto; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_pop); /** * skb_mpls_update_lse() - modify outermost MPLS header and update csum * * @skb: buffer * @mpls_lse: new MPLS label stack entry to update to * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse) { int err; if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); if (unlikely(err)) return err; if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse }; skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } mpls_hdr(skb)->label_stack_entry = mpls_lse; return 0; } EXPORT_SYMBOL_GPL(skb_mpls_update_lse); /** * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header * * @skb: buffer * * Expects skb->data at mac header. * * Returns 0 on success, -errno otherwise. */ int skb_mpls_dec_ttl(struct sk_buff *skb) { u32 lse; u8 ttl; if (unlikely(!eth_p_mpls(skb->protocol))) return -EINVAL; if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) return -ENOMEM; lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; if (!--ttl) return -EINVAL; lse &= ~MPLS_LS_TTL_MASK; lse |= ttl << MPLS_LS_TTL_SHIFT; return skb_mpls_update_lse(skb, cpu_to_be32(lse)); } EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl); /** * alloc_skb_with_frags - allocate skb with page frags * * @header_len: size of linear part * @data_len: needed length in frags * @max_page_order: max page order desired. * @errcode: pointer to error code if any * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long data_len, int max_page_order, int *errcode, gfp_t gfp_mask) { int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; unsigned long chunk; struct sk_buff *skb; struct page *page; int i; *errcode = -EMSGSIZE; /* Note this test could be relaxed, if we succeed to allocate * high order pages... */ if (npages > MAX_SKB_FRAGS) return NULL; *errcode = -ENOBUFS; skb = alloc_skb(header_len, gfp_mask); if (!skb) return NULL; skb->truesize += npages << PAGE_SHIFT; for (i = 0; npages > 0; i++) { int order = max_page_order; while (order) { if (npages >= 1 << order) { page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN, order); if (page) goto fill_page; /* Do not retry other high order allocations */ order = 1; max_page_order = 0; } order--; } page = alloc_page(gfp_mask); if (!page) goto failure; fill_page: chunk = min_t(unsigned long, data_len, PAGE_SIZE << order); skb_fill_page_desc(skb, i, page, 0, chunk); data_len -= chunk; npages -= 1 << order; } return skb; failure: kfree_skb(skb); return NULL; } EXPORT_SYMBOL(alloc_skb_with_frags); /* carve out the first off bytes from skb when off < headlen */ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, const int headlen, gfp_t gfp_mask) { int i; int size = skb_end_offset(skb); int new_hlen = headlen - off; u8 *data; size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(ksize(data)); /* Copy real data, and all frags */ skb_copy_from_linear_data_offset(skb, off, data, new_hlen); skb->len -= off; memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); if (skb_cloned(skb)) { /* drop the old head gracefully */ if (skb_orphan_frags(skb, gfp_mask)) { kfree(data); return -ENOMEM; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); skb_release_data(skb); } else { /* we can reuse existing recount- all we did was * relocate values */ skb_free_head(skb); } skb->head = data; skb->data = data; skb->head_frag = 0; skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; } static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); /* carve out the first eat bytes from skb's frag_list. May recurse into * pskb_carve() */ static int pskb_carve_frag_list(struct sk_buff *skb, struct skb_shared_info *shinfo, int eat, gfp_t gfp_mask) { struct sk_buff *list = shinfo->frag_list; struct sk_buff *clone = NULL; struct sk_buff *insp = NULL; do { if (!list) { pr_err("Not enough bytes to eat. Want %d\n", eat); return -EFAULT; } if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; list = list->next; insp = list; } else { /* Eaten partially. */ if (skb_shared(list)) { clone = skb_clone(list, gfp_mask); if (!clone) return -ENOMEM; insp = list->next; list = clone; } else { /* This may be pulled without problems. */ insp = list; } if (pskb_carve(list, eat, gfp_mask) < 0) { kfree_skb(clone); return -ENOMEM; } break; } } while (eat); /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; consume_skb(list); } /* And insert new clone at head. */ if (clone) { clone->next = list; shinfo->frag_list = clone; } return 0; } /* carve off first len bytes from skb. Split line (off) is in the * non-linear part of skb */ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, int pos, gfp_t gfp_mask) { int i, k = 0; int size = skb_end_offset(skb); u8 *data; const int nfrags = skb_shinfo(skb)->nr_frags; struct skb_shared_info *shinfo; size = SKB_DATA_ALIGN(size); if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC; data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(ksize(data)); memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { kfree(data); return -ENOMEM; } shinfo = (struct skb_shared_info *)(data + size); for (i = 0; i < nfrags; i++) { int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + fsize > off) { shinfo->frags[k] = skb_shinfo(skb)->frags[i]; if (pos < off) { /* Split frag. * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, * where splitting is expensive. * 2. Split is accurately. We make this. */ skb_frag_off_add(&shinfo->frags[0], off - pos); skb_frag_size_sub(&shinfo->frags[0], off - pos); } skb_frag_ref(skb, i); k++; } pos += fsize; } shinfo->nr_frags = k; if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); /* split line is in frag list */ if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); kfree(data); return -ENOMEM; } skb_release_data(skb); skb->head = data; skb->head_frag = 0; skb->data = data; skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; skb->len -= off; skb->data_len = skb->len; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; } /* remove len bytes from the beginning of the skb */ static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp) { int headlen = skb_headlen(skb); if (len < headlen) return pskb_carve_inside_header(skb, len, headlen, gfp); else return pskb_carve_inside_nonlinear(skb, len, headlen, gfp); } /* Extract to_copy bytes starting at off from skb, and return this in * a new skb */ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, gfp_t gfp) { struct sk_buff *clone = skb_clone(skb, gfp); if (!clone) return NULL; if (pskb_carve(clone, off, gfp) < 0 || pskb_trim(clone, to_copy)) { kfree_skb(clone); return NULL; } return clone; } EXPORT_SYMBOL(pskb_extract); /** * skb_condense - try to get rid of fragments/frag_list if possible * @skb: buffer * * Can be used to save memory before skb is added to a busy queue. * If packet has bytes in frags and enough tail room in skb->head, * pull all of them, so that we can free the frags right now and adjust * truesize. * Notes: * We do not reallocate skb->head thus can not fail. * Caller must re-evaluate skb->truesize if needed. */ void skb_condense(struct sk_buff *skb) { if (skb->data_len) { if (skb->data_len > skb->end - skb->tail || skb_cloned(skb)) return; /* Nice, we can free page frag(s) right now */ __pskb_pull_tail(skb, skb->data_len); } /* At this point, skb->truesize might be over estimated, * because skb had a fragment, and fragments do not tell * their truesize. * When we pulled its content into skb->head, fragment * was freed, but __pskb_pull_tail() could not possibly * adjust skb->truesize, not knowing the frag truesize. */ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); } #ifdef CONFIG_SKB_EXTENSIONS static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) { return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); } /** * __skb_ext_alloc - allocate a new skb extensions storage * * @flags: See kmalloc(). * * Returns the newly allocated pointer. The pointer can later attached to a * skb via __skb_ext_set(). * Note: caller must handle the skb_ext as an opaque data. */ struct skb_ext *__skb_ext_alloc(gfp_t flags) { struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags); if (new) { memset(new->offset, 0, sizeof(new->offset)); refcount_set(&new->refcnt, 1); } return new; } static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, unsigned int old_active) { struct skb_ext *new; if (refcount_read(&old->refcnt) == 1) return old; new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); if (!new) return NULL; memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE); refcount_set(&new->refcnt, 1); #ifdef CONFIG_XFRM if (old_active & (1 << SKB_EXT_SEC_PATH)) { struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH); unsigned int i; for (i = 0; i < sp->len; i++) xfrm_state_hold(sp->xvec[i]); } #endif __skb_ext_put(old); return new; } /** * __skb_ext_set - attach the specified extension storage to this skb * @skb: buffer * @id: extension id * @ext: extension storage previously allocated via __skb_ext_alloc() * * Existing extensions, if any, are cleared. * * Returns the pointer to the extension. */ void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext) { unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); skb_ext_put(skb); newlen = newoff + skb_ext_type_len[id]; ext->chunks = newlen; ext->offset[id] = newoff; skb->extensions = ext; skb->active_extensions = 1 << id; return skb_ext_get_ptr(ext, id); } /** * skb_ext_add - allocate space for given extension, COW if needed * @skb: buffer * @id: extension to allocate space for * * Allocates enough space for the given extension. * If the extension is already present, a pointer to that extension * is returned. * * If the skb was cloned, COW applies and the returned memory can be * modified without changing the extension space of clones buffers. * * Returns pointer to the extension or NULL on allocation failure. */ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *new, *old = NULL; unsigned int newlen, newoff; if (skb->active_extensions) { old = skb->extensions; new = skb_ext_maybe_cow(old, skb->active_extensions); if (!new) return NULL; if (__skb_ext_exist(new, id)) goto set_active; newoff = new->chunks; } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); new = __skb_ext_alloc(GFP_ATOMIC); if (!new) return NULL; } newlen = newoff + skb_ext_type_len[id]; new->chunks = newlen; new->offset[id] = newoff; set_active: skb->slow_gro = 1; skb->extensions = new; skb->active_extensions |= 1 << id; return skb_ext_get_ptr(new, id); } EXPORT_SYMBOL(skb_ext_add); #ifdef CONFIG_XFRM static void skb_ext_put_sp(struct sec_path *sp) { unsigned int i; for (i = 0; i < sp->len; i++) xfrm_state_put(sp->xvec[i]); } #endif void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) { struct skb_ext *ext = skb->extensions; skb->active_extensions &= ~(1 << id); if (skb->active_extensions == 0) { skb->extensions = NULL; __skb_ext_put(ext); #ifdef CONFIG_XFRM } else if (id == SKB_EXT_SEC_PATH && refcount_read(&ext->refcnt) == 1) { struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH); skb_ext_put_sp(sp); sp->len = 0; #endif } } EXPORT_SYMBOL(__skb_ext_del); void __skb_ext_put(struct skb_ext *ext) { /* If this is last clone, nothing can increment * it after check passes. Avoids one atomic op. */ if (refcount_read(&ext->refcnt) == 1) goto free_now; if (!refcount_dec_and_test(&ext->refcnt)) return; free_now: #ifdef CONFIG_XFRM if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH)) skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH)); #endif kmem_cache_free(skbuff_ext_cache, ext); } EXPORT_SYMBOL(__skb_ext_put); #endif /* CONFIG_SKB_EXTENSIONS */ |
3 27 39 39 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_RDS_H #define _RDS_RDS_H #include <net/sock.h> #include <linux/scatterlist.h> #include <linux/highmem.h> #include <rdma/rdma_cm.h> #include <linux/mutex.h> #include <linux/rds.h> #include <linux/rhashtable.h> #include <linux/refcount.h> #include <linux/in6.h> #include "info.h" /* * RDS Network protocol version */ #define RDS_PROTOCOL_3_0 0x0300 #define RDS_PROTOCOL_3_1 0x0301 #define RDS_PROTOCOL_4_0 0x0400 #define RDS_PROTOCOL_4_1 0x0401 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) #define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) #define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 /* The following ports, 16385, 18634, 18635, are registered with IANA as * the ports to be used for RDS over TCP and UDP. Currently, only RDS over * TCP and RDS over IB/RDMA are implemented. 18634 is the historical value * used for the RDMA_CM listener port. RDS/TCP uses port 16385. After * IPv6 work, RDMA_CM also uses 16385 as the listener port. 18634 is kept * to ensure compatibility with older RDS modules. Those ports are defined * in each transport's header file. */ #define RDS_PORT 18634 #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 #endif #ifdef RDS_DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ static inline __printf(1, 2) void rdsdebug(char *fmt, ...) { } #endif #define RDS_FRAG_SHIFT 12 #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) /* Used to limit both RDMA and non-RDMA RDS message to 1MB */ #define RDS_MAX_MSG_SIZE ((unsigned int)(1 << 20)) #define RDS_CONG_MAP_BYTES (65536 / 8) #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) struct rds_cong_map { struct rb_node m_rb_node; struct in6_addr m_addr; wait_queue_head_t m_waitq; struct list_head m_conn_list; unsigned long m_page_addrs[RDS_CONG_MAP_PAGES]; }; /* * This is how we will track the connection state: * A connection is always in one of the following * states. Updates to the state are atomic and imply * a memory barrier. */ enum { RDS_CONN_DOWN = 0, RDS_CONN_CONNECTING, RDS_CONN_DISCONNECTING, RDS_CONN_UP, RDS_CONN_RESETTING, RDS_CONN_ERROR, }; /* Bits for c_flags */ #define RDS_LL_SEND_FULL 0 #define RDS_RECONNECT_PENDING 1 #define RDS_IN_XMIT 2 #define RDS_RECV_REFILL 3 #define RDS_DESTROY_PENDING 4 /* Max number of multipaths per RDS connection. Must be a power of 2 */ #define RDS_MPATH_WORKERS 8 #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \ (rs)->rs_hash_initval) & ((n) - 1)) #define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr)) /* Per mpath connection state */ struct rds_conn_path { struct rds_connection *cp_conn; struct rds_message *cp_xmit_rm; unsigned long cp_xmit_sg; unsigned int cp_xmit_hdr_off; unsigned int cp_xmit_data_off; unsigned int cp_xmit_atomic_sent; unsigned int cp_xmit_rdma_sent; unsigned int cp_xmit_data_sent; spinlock_t cp_lock; /* protect msg queues */ u64 cp_next_tx_seq; struct list_head cp_send_queue; struct list_head cp_retrans; u64 cp_next_rx_seq; void *cp_transport_data; atomic_t cp_state; unsigned long cp_send_gen; unsigned long cp_flags; unsigned long cp_reconnect_jiffies; struct delayed_work cp_send_w; struct delayed_work cp_recv_w; struct delayed_work cp_conn_w; struct work_struct cp_down_w; struct mutex cp_cm_lock; /* protect cp_state & cm */ wait_queue_head_t cp_waitq; unsigned int cp_unacked_packets; unsigned int cp_unacked_bytes; unsigned int cp_index; }; /* One rds_connection per RDS address pair */ struct rds_connection { struct hlist_node c_hash_node; struct in6_addr c_laddr; struct in6_addr c_faddr; int c_dev_if; /* ifindex used for this conn */ int c_bound_if; /* ifindex of c_laddr */ unsigned int c_loopback:1, c_isv6:1, c_ping_triggered:1, c_pad_to_32:29; int c_npaths; struct rds_connection *c_passive; struct rds_transport *c_trans; struct rds_cong_map *c_lcong; struct rds_cong_map *c_fcong; /* Protocol version */ unsigned int c_proposed_version; unsigned int c_version; possible_net_t c_net; /* TOS */ u8 c_tos; struct list_head c_map_item; unsigned long c_map_queued; struct rds_conn_path *c_path; wait_queue_head_t c_hs_waitq; /* handshake waitq */ u32 c_my_gen_num; u32 c_peer_gen_num; }; static inline struct net *rds_conn_net(struct rds_connection *conn) { return read_pnet(&conn->c_net); } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { write_pnet(&conn->c_net, net); } #define RDS_FLAG_CONG_BITMAP 0x01 #define RDS_FLAG_ACK_REQUIRED 0x02 #define RDS_FLAG_RETRANSMITTED 0x04 #define RDS_MAX_ADV_CREDIT 255 /* RDS_FLAG_PROBE_PORT is the reserved sport used for sending a ping * probe to exchange control information before establishing a connection. * Currently the control information that is exchanged is the number of * supported paths. If the peer is a legacy (older kernel revision) peer, * it would return a pong message without additional control information * that would then alert the sender that the peer was an older rev. */ #define RDS_FLAG_PROBE_PORT 1 #define RDS_HS_PROBE(sport, dport) \ ((sport == RDS_FLAG_PROBE_PORT && dport == 0) || \ (sport == 0 && dport == RDS_FLAG_PROBE_PORT)) /* * Maximum space available for extension headers. */ #define RDS_HEADER_EXT_SPACE 16 struct rds_header { __be64 h_sequence; __be64 h_ack; __be32 h_len; __be16 h_sport; __be16 h_dport; u8 h_flags; u8 h_credit; u8 h_padding[4]; __sum16 h_csum; u8 h_exthdr[RDS_HEADER_EXT_SPACE]; }; /* * Reserved - indicates end of extensions */ #define RDS_EXTHDR_NONE 0 /* * This extension header is included in the very * first message that is sent on a new connection, * and identifies the protocol level. This will help * rolling updates if a future change requires breaking * the protocol. * NB: This is no longer true for IB, where we do a version * negotiation during the connection setup phase (protocol * version information is included in the RDMA CM private data). */ #define RDS_EXTHDR_VERSION 1 struct rds_ext_header_version { __be32 h_version; }; /* * This extension header is included in the RDS message * chasing an RDMA operation. */ #define RDS_EXTHDR_RDMA 2 struct rds_ext_header_rdma { __be32 h_rdma_rkey; }; /* * This extension header tells the peer about the * destination <R_Key,offset> of the requested RDMA * operation. */ #define RDS_EXTHDR_RDMA_DEST 3 struct rds_ext_header_rdma_dest { __be32 h_rdma_rkey; __be32 h_rdma_offset; }; /* Extension header announcing number of paths. * Implicit length = 2 bytes. */ #define RDS_EXTHDR_NPATHS 5 #define RDS_EXTHDR_GEN_NUM 6 #define __RDS_EXTHDR_MAX 16 /* for now */ #define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1) #define RDS_MSG_RX_HDR 0 #define RDS_MSG_RX_START 1 #define RDS_MSG_RX_END 2 #define RDS_MSG_RX_CMSG 3 /* The following values are whitelisted for usercopy */ struct rds_inc_usercopy { rds_rdma_cookie_t rdma_cookie; ktime_t rx_tstamp; }; struct rds_incoming { refcount_t i_refcount; struct list_head i_item; struct rds_connection *i_conn; struct rds_conn_path *i_conn_path; struct rds_header i_hdr; unsigned long i_rx_jiffies; struct in6_addr i_saddr; struct rds_inc_usercopy i_usercopy; u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; }; struct rds_mr { struct rb_node r_rb_node; struct kref r_kref; u32 r_key; /* A copy of the creation flags */ unsigned int r_use_once:1; unsigned int r_invalidate:1; unsigned int r_write:1; struct rds_sock *r_sock; /* back pointer to the socket that owns us */ struct rds_transport *r_trans; void *r_trans_private; }; static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) { return r_key | (((u64) offset) << 32); } static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie) { return cookie; } static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) { return cookie >> 32; } /* atomic operation types */ #define RDS_ATOMIC_TYPE_CSWP 0 #define RDS_ATOMIC_TYPE_FADD 1 /* * m_sock_item and m_conn_item are on lists that are serialized under * conn->c_lock. m_sock_item has additional meaning in that once it is empty * the message will not be put back on the retransmit list after being sent. * messages that are canceled while being sent rely on this. * * m_inc is used by loopback so that it can pass an incoming message straight * back up into the rx path. It embeds a wire header which is also used by * the send path, which is kind of awkward. * * m_sock_item indicates the message's presence on a socket's send or receive * queue. m_rs will point to that socket. * * m_daddr is used by cancellation to prune messages to a given destination. * * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock * nesting. As paths iterate over messages on a sock, or conn, they must * also lock the conn, or sock, to remove the message from those lists too. * Testing the flag to determine if the message is still on the lists lets * us avoid testing the list_head directly. That means each path can use * the message's list_head to keep it on a local list while juggling locks * without confusing the other path. * * m_ack_seq is an optional field set by transports who need a different * sequence number range to invalidate. They can use this in a callback * that they pass to rds_send_drop_acked() to see if each message has been * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't * had ack_seq set yet. */ #define RDS_MSG_ON_SOCK 1 #define RDS_MSG_ON_CONN 2 #define RDS_MSG_HAS_ACK_SEQ 3 #define RDS_MSG_ACK_REQUIRED 4 #define RDS_MSG_RETRANSMITTED 5 #define RDS_MSG_MAPPED 6 #define RDS_MSG_PAGEVEC 7 #define RDS_MSG_FLUSH 8 struct rds_znotifier { struct mmpin z_mmp; u32 z_cookie; }; struct rds_msg_zcopy_info { struct list_head rs_zcookie_next; union { struct rds_znotifier znotif; struct rds_zcopy_cookies zcookies; }; }; struct rds_msg_zcopy_queue { struct list_head zcookie_head; spinlock_t lock; /* protects zcookie_head queue */ }; static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q) { spin_lock_init(&q->lock); INIT_LIST_HEAD(&q->zcookie_head); } struct rds_iov_vector { struct rds_iovec *iov; int len; }; struct rds_iov_vector_arr { struct rds_iov_vector *vec; int len; int indx; int incr; }; struct rds_message { refcount_t m_refcount; struct list_head m_sock_item; struct list_head m_conn_item; struct rds_incoming m_inc; u64 m_ack_seq; struct in6_addr m_daddr; unsigned long m_flags; /* Never access m_rs without holding m_rs_lock. * Lock nesting is * rm->m_rs_lock * -> rs->rs_lock */ spinlock_t m_rs_lock; wait_queue_head_t m_flush_wait; struct rds_sock *m_rs; /* cookie to send to remote, in rds header */ rds_rdma_cookie_t m_rdma_cookie; unsigned int m_used_sgs; unsigned int m_total_sgs; void *m_final_op; struct { struct rm_atomic_op { int op_type; union { struct { uint64_t compare; uint64_t swap; uint64_t compare_mask; uint64_t swap_mask; } op_m_cswp; struct { uint64_t add; uint64_t nocarry_mask; } op_m_fadd; }; u32 op_rkey; u64 op_remote_addr; unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; unsigned int op_silent:1; unsigned int op_active:1; struct scatterlist *op_sg; struct rds_notifier *op_notifier; struct rds_mr *op_rdma_mr; } atomic; struct rm_rdma_op { u32 op_rkey; u64 op_remote_addr; unsigned int op_write:1; unsigned int op_fence:1; unsigned int op_notify:1; unsigned int op_recverr:1; unsigned int op_mapped:1; unsigned int op_silent:1; unsigned int op_active:1; unsigned int op_bytes; unsigned int op_nents; unsigned int op_count; struct scatterlist *op_sg; struct rds_notifier *op_notifier; struct rds_mr *op_rdma_mr; u64 op_odp_addr; struct rds_mr *op_odp_mr; } rdma; struct rm_data_op { unsigned int op_active:1; unsigned int op_nents; unsigned int op_count; unsigned int op_dmasg; unsigned int op_dmaoff; struct rds_znotifier *op_mmp_znotifier; struct scatterlist *op_sg; } data; }; struct rds_conn_path *m_conn_path; }; /* * The RDS notifier is used (optionally) to tell the application about * completed RDMA operations. Rather than keeping the whole rds message * around on the queue, we allocate a small notifier that is put on the * socket's notifier_list. Notifications are delivered to the application * through control messages. */ struct rds_notifier { struct list_head n_list; uint64_t n_user_token; int n_status; }; /* Available as part of RDS core, so doesn't need to participate * in get_preferred transport etc */ #define RDS_TRANS_LOOP 3 /** * struct rds_transport - transport specific behavioural hooks * * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send * part of a message. The caller serializes on the send_sem so this * doesn't need to be reentrant for a given conn. The header must be * sent before the data payload. .xmit must be prepared to send a * message with no data payload. .xmit should return the number of * bytes that were sent down the connection, including header bytes. * Returning 0 tells the caller that it doesn't need to perform any * additional work now. This is usually the case when the transport has * filled the sending queue for its connection and will handle * triggering the rds thread to continue the send when space becomes * available. Returning -EAGAIN tells the caller to retry the send * immediately. Returning -ENOMEM tells the caller to retry the send at * some point in the future. * * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once * it returns the connection can not call rds_recv_incoming(). * This will only be called once after conn_connect returns * non-zero success and will The caller serializes this with * the send and connecting paths (xmit_* and conn_*). The * transport is responsible for other serialization, including * rds_recv_incoming(). This is called in process context but * should try hard not to block. */ struct rds_transport { char t_name[TRANSNAMSIZ]; struct list_head t_item; struct module *t_owner; unsigned int t_prefer_loopback:1, t_mp_capable:1; unsigned int t_type; int (*laddr_check)(struct net *net, const struct in6_addr *addr, __u32 scope_id); int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); void (*conn_free)(void *data); int (*conn_path_connect)(struct rds_conn_path *cp); void (*conn_path_shutdown)(struct rds_conn_path *conn); void (*xmit_path_prepare)(struct rds_conn_path *cp); void (*xmit_path_complete)(struct rds_conn_path *cp); int (*xmit)(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op); int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); int (*recv_path)(struct rds_conn_path *cp); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to); void (*inc_free)(struct rds_incoming *inc); int (*cm_handle_connect)(struct rdma_cm_id *cm_id, struct rdma_cm_event *event, bool isv6); int (*cm_initiate_connect)(struct rdma_cm_id *cm_id, bool isv6); void (*cm_connect_complete)(struct rds_connection *conn, struct rdma_cm_event *event); unsigned int (*stats_info_copy)(struct rds_info_iterator *iter, unsigned int avail); void (*exit)(void); void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, struct rds_sock *rs, u32 *key_ret, struct rds_connection *conn, u64 start, u64 length, int need_odp); void (*sync_mr)(void *trans_private, int direction); void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); bool (*t_unloading)(struct rds_connection *conn); u8 (*get_tos_map)(u8 tos); }; /* Bind hash table key length. It is the sum of the size of a struct * in6_addr, a scope_id and a port. */ #define RDS_BOUND_KEY_LEN \ (sizeof(struct in6_addr) + sizeof(__u32) + sizeof(__be16)) struct rds_sock { struct sock rs_sk; u64 rs_user_addr; u64 rs_user_bytes; /* * bound_addr used for both incoming and outgoing, no INADDR_ANY * support. */ struct rhash_head rs_bound_node; u8 rs_bound_key[RDS_BOUND_KEY_LEN]; struct sockaddr_in6 rs_bound_sin6; #define rs_bound_addr rs_bound_sin6.sin6_addr #define rs_bound_addr_v4 rs_bound_sin6.sin6_addr.s6_addr32[3] #define rs_bound_port rs_bound_sin6.sin6_port #define rs_bound_scope_id rs_bound_sin6.sin6_scope_id struct in6_addr rs_conn_addr; #define rs_conn_addr_v4 rs_conn_addr.s6_addr32[3] __be16 rs_conn_port; struct rds_transport *rs_transport; /* * rds_sendmsg caches the conn it used the last time around. * This helps avoid costly lookups. */ struct rds_connection *rs_conn; /* flag indicating we were congested or not */ int rs_congested; /* seen congestion (ENOBUFS) when sending? */ int rs_seen_congestion; /* rs_lock protects all these adjacent members before the newline */ spinlock_t rs_lock; struct list_head rs_send_queue; u32 rs_snd_bytes; int rs_rcv_bytes; struct list_head rs_notify_queue; /* currently used for failed RDMAs */ /* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask * to decide whether the application should be woken up. * If not set, we use rs_cong_track to find out whether a cong map * update arrived. */ uint64_t rs_cong_mask; uint64_t rs_cong_notify; struct list_head rs_cong_list; unsigned long rs_cong_track; /* * rs_recv_lock protects the receive queue, and is * used to serialize with rds_release. */ rwlock_t rs_recv_lock; struct list_head rs_recv_queue; /* just for stats reporting */ struct list_head rs_item; /* these have their own lock */ spinlock_t rs_rdma_lock; struct rb_root rs_rdma_keys; /* Socket options - in case there will be more */ unsigned char rs_recverr, rs_cong_monitor; u32 rs_hash_initval; /* Socket receive path trace points*/ u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; struct rds_msg_zcopy_queue rs_zcookie_queue; u8 rs_tos; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) { return container_of(sk, struct rds_sock, rs_sk); } static inline struct sock *rds_rs_to_sk(struct rds_sock *rs) { return &rs->rs_sk; } /* * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value * to account for overhead. We don't account for overhead, we just apply * the number of payload bytes to the specified value. */ static inline int rds_sk_sndbuf(struct rds_sock *rs) { return rds_rs_to_sk(rs)->sk_sndbuf / 2; } static inline int rds_sk_rcvbuf(struct rds_sock *rs) { return rds_rs_to_sk(rs)->sk_rcvbuf / 2; } struct rds_statistics { uint64_t s_conn_reset; uint64_t s_recv_drop_bad_checksum; uint64_t s_recv_drop_old_seq; uint64_t s_recv_drop_no_sock; uint64_t s_recv_drop_dead_sock; uint64_t s_recv_deliver_raced; uint64_t s_recv_delivered; uint64_t s_recv_queued; uint64_t s_recv_immediate_retry; uint64_t s_recv_delayed_retry; uint64_t s_recv_ack_required; uint64_t s_recv_rdma_bytes; uint64_t s_recv_ping; uint64_t s_send_queue_empty; uint64_t s_send_queue_full; uint64_t s_send_lock_contention; uint64_t s_send_lock_queue_raced; uint64_t s_send_immediate_retry; uint64_t s_send_delayed_retry; uint64_t s_send_drop_acked; uint64_t s_send_ack_required; uint64_t s_send_queued; uint64_t s_send_rdma; uint64_t s_send_rdma_bytes; uint64_t s_send_pong; uint64_t s_page_remainder_hit; uint64_t s_page_remainder_miss; uint64_t s_copy_to_user; uint64_t s_copy_from_user; uint64_t s_cong_update_queued; uint64_t s_cong_update_received; uint64_t s_cong_send_error; uint64_t s_cong_send_blocked; uint64_t s_recv_bytes_added_to_socket; uint64_t s_recv_bytes_removed_from_socket; uint64_t s_send_stuck_rm; }; /* af_rds.c */ void rds_sock_addref(struct rds_sock *rs); void rds_sock_put(struct rds_sock *rs); void rds_wake_sk_sleep(struct rds_sock *rs); static inline void __rds_wake_sk_sleep(struct sock *sk) { wait_queue_head_t *waitq = sk_sleep(sk); if (!sock_flag(sk, SOCK_DEAD) && waitq) wake_up(waitq); } extern wait_queue_head_t rds_poll_waitq; /* bind.c */ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); void rds_remove_bound(struct rds_sock *rs); struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __u32 scope_id); int rds_bind_lock_init(void); void rds_bind_lock_destroy(void); /* cong.c */ int rds_cong_get_maps(struct rds_connection *conn); void rds_cong_add_conn(struct rds_connection *conn); void rds_cong_remove_conn(struct rds_connection *conn); void rds_cong_set_bit(struct rds_cong_map *map, __be16 port); void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port); int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs); void rds_cong_queue_updates(struct rds_cong_map *map); void rds_cong_map_updated(struct rds_cong_map *map, uint64_t); int rds_cong_updated_since(unsigned long *recent); void rds_cong_add_socket(struct rds_sock *); void rds_cong_remove_socket(struct rds_sock *); void rds_cong_exit(void); struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); /* connection.c */ extern u32 rds_gen_num; int rds_conn_init(void); void rds_conn_exit(void); struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, u8 tos, gfp_t gfp, int dev_if); struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, u8 tos, gfp_t gfp, int dev_if); void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); void rds_check_all_paths(struct rds_connection *conn); void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), u64 *buffer, size_t item_len); __printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); #define rds_conn_path_error(cp, fmt...) \ __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt) static inline int rds_conn_path_transition(struct rds_conn_path *cp, int old, int new) { return atomic_cmpxchg(&cp->cp_state, old, new) == old; } static inline int rds_conn_transition(struct rds_connection *conn, int old, int new) { WARN_ON(conn->c_trans->t_mp_capable); return rds_conn_path_transition(&conn->c_path[0], old, new); } static inline int rds_conn_path_state(struct rds_conn_path *cp) { return atomic_read(&cp->cp_state); } static inline int rds_conn_state(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); return rds_conn_path_state(&conn->c_path[0]); } static inline int rds_conn_path_up(struct rds_conn_path *cp) { return atomic_read(&cp->cp_state) == RDS_CONN_UP; } static inline int rds_conn_path_down(struct rds_conn_path *cp) { return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; } static inline int rds_conn_up(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); return rds_conn_path_up(&conn->c_path[0]); } static inline int rds_conn_path_connecting(struct rds_conn_path *cp) { return atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING; } static inline int rds_conn_connecting(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); return rds_conn_path_connecting(&conn->c_path[0]); } /* message.c */ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, bool zcopy); struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); void rds_message_populate_header(struct rds_header *hdr, __be16 sport, __be16 dport, u64 seq); int rds_message_add_extension(struct rds_header *hdr, unsigned int type, const void *data, unsigned int len); int rds_message_next_extension(struct rds_header *hdr, unsigned int *pos, void *buf, unsigned int *buflen); int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_message_inc_free(struct rds_incoming *inc); void rds_message_addref(struct rds_message *rm); void rds_message_put(struct rds_message *rm); void rds_message_wait(struct rds_message *rm); void rds_message_unmapped(struct rds_message *rm); void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info); static inline void rds_message_make_checksum(struct rds_header *hdr) { hdr->h_csum = 0; hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2); } static inline int rds_message_verify_checksum(const struct rds_header *hdr) { return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0; } /* page.c */ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, gfp_t gfp); void rds_page_exit(void); /* recv.c */ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, struct in6_addr *saddr); void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn, struct in6_addr *saddr); void rds_inc_put(struct rds_incoming *inc); void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, struct in6_addr *daddr, struct rds_incoming *inc, gfp_t gfp); int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int msg_flags); void rds_clear_recv_queue(struct rds_sock *rs); int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); void rds_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, __be32 saddr, __be32 daddr, int flip); void rds6_inc_info_copy(struct rds_incoming *inc, struct rds_info_iterator *iter, struct in6_addr *saddr, struct in6_addr *daddr, int flip); /* send.c */ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); void rds_send_path_reset(struct rds_conn_path *conn); int rds_send_xmit(struct rds_conn_path *cp); struct sockaddr_in; void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest); typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); void rds_send_drop_acked(struct rds_connection *conn, u64 ack, is_acked_func is_acked); void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack, is_acked_func is_acked); void rds_send_ping(struct rds_connection *conn, int cp_index); int rds_send_pong(struct rds_conn_path *cp, __be16 dport); /* rdma.c */ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen); int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen); int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen); void rds_rdma_drop_keys(struct rds_sock *rs); int rds_rdma_extra_size(struct rds_rdma_args *args, struct rds_iov_vector *iov); int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg, struct rds_iov_vector *vec); int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); void rds_rdma_free_op(struct rm_rdma_op *ro); void rds_atomic_free_op(struct rm_atomic_op *ao); void rds_rdma_send_complete(struct rds_message *rm, int wc_status); void rds_atomic_send_complete(struct rds_message *rm, int wc_status); int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg); void __rds_put_mr_final(struct kref *kref); static inline bool rds_destroy_pending(struct rds_connection *conn) { return !check_net(rds_conn_net(conn)) || (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn)); } enum { ODP_NOT_NEEDED, ODP_ZEROBASED, ODP_VIRTUAL }; /* stats.c */ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ } while (0) #define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member) #define rds_stats_add_which(which, member, count) do { \ per_cpu(which, get_cpu()).member += count; \ put_cpu(); \ } while (0) #define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) int rds_stats_init(void); void rds_stats_exit(void); void rds_stats_info_copy(struct rds_info_iterator *iter, uint64_t *values, const char *const *names, size_t nr); /* sysctl.c */ int rds_sysctl_init(void); void rds_sysctl_exit(void); extern unsigned long rds_sysctl_sndbuf_min; extern unsigned long rds_sysctl_sndbuf_default; extern unsigned long rds_sysctl_sndbuf_max; extern unsigned long rds_sysctl_reconnect_min_jiffies; extern unsigned long rds_sysctl_reconnect_max_jiffies; extern unsigned int rds_sysctl_max_unacked_packets; extern unsigned int rds_sysctl_max_unacked_bytes; extern unsigned int rds_sysctl_ping_enable; extern unsigned long rds_sysctl_trace_flags; extern unsigned int rds_sysctl_trace_level; /* threads.c */ int rds_threads_init(void); void rds_threads_exit(void); extern struct workqueue_struct *rds_wq; void rds_queue_reconnect(struct rds_conn_path *cp); void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); void rds_recv_worker(struct work_struct *); void rds_connect_path_complete(struct rds_conn_path *conn, int curr); void rds_connect_complete(struct rds_connection *conn); int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2); /* transport.c */ void rds_trans_register(struct rds_transport *trans); void rds_trans_unregister(struct rds_transport *trans); struct rds_transport *rds_trans_get_preferred(struct net *net, const struct in6_addr *addr, __u32 scope_id); void rds_trans_put(struct rds_transport *trans); unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, unsigned int avail); struct rds_transport *rds_trans_get(int t_type); int rds_trans_init(void); void rds_trans_exit(void); #endif |
1 1 1 405 776 3327 383 382 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/bad_inode.c * * Copyright (C) 1997, Stephen Tweedie * * Provide stub functions for unreadable inodes * * Fabian Frederick : August 2003 - All file operations assigned to EIO */ #include <linux/fs.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/time.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/fiemap.h> static int bad_file_open(struct inode *inode, struct file *filp) { return -EIO; } static const struct file_operations bad_file_ops = { .open = bad_file_open, }; static int bad_inode_create(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return -EIO; } static struct dentry *bad_inode_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return ERR_PTR(-EIO); } static int bad_inode_link (struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_unlink(struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_symlink(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, const char *symname) { return -EIO; } static int bad_inode_mkdir(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode) { return -EIO; } static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry) { return -EIO; } static int bad_inode_mknod(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { return -EIO; } static int bad_inode_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return -EIO; } static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, int buflen) { return -EIO; } static int bad_inode_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask) { return -EIO; } static int bad_inode_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { return -EIO; } static int bad_inode_setattr(struct user_namespace *mnt_userns, struct dentry *direntry, struct iattr *attrs) { return -EIO; } static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { return -EIO; } static const char *bad_inode_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { return ERR_PTR(-EIO); } static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type, bool rcu) { return ERR_PTR(-EIO); } static int bad_inode_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { return -EIO; } static int bad_inode_update_time(struct inode *inode, struct timespec64 *time, int flags) { return -EIO; } static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry, struct file *file, unsigned int open_flag, umode_t create_mode) { return -EIO; } static int bad_inode_tmpfile(struct user_namespace *mnt_userns, struct inode *inode, struct dentry *dentry, umode_t mode) { return -EIO; } static int bad_inode_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type) { return -EIO; } static const struct inode_operations bad_inode_ops = { .create = bad_inode_create, .lookup = bad_inode_lookup, .link = bad_inode_link, .unlink = bad_inode_unlink, .symlink = bad_inode_symlink, .mkdir = bad_inode_mkdir, .rmdir = bad_inode_rmdir, .mknod = bad_inode_mknod, .rename = bad_inode_rename2, .readlink = bad_inode_readlink, .permission = bad_inode_permission, .getattr = bad_inode_getattr, .setattr = bad_inode_setattr, .listxattr = bad_inode_listxattr, .get_link = bad_inode_get_link, .get_acl = bad_inode_get_acl, .fiemap = bad_inode_fiemap, .update_time = bad_inode_update_time, .atomic_open = bad_inode_atomic_open, .tmpfile = bad_inode_tmpfile, .set_acl = bad_inode_set_acl, }; /* * When a filesystem is unable to read an inode due to an I/O error in * its read_inode() function, it can call make_bad_inode() to return a * set of stubs which will return EIO errors as required. * * We only need to do limited initialisation: all other fields are * preinitialised to zero automatically. */ /** * make_bad_inode - mark an inode bad due to an I/O error * @inode: Inode to mark bad * * When an inode cannot be read due to a media or remote network * failure this function makes the inode "bad" and causes I/O operations * on it to fail from this point on. */ void make_bad_inode(struct inode *inode) { remove_inode_hash(inode); inode->i_mode = S_IFREG; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_op = &bad_inode_ops; inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &bad_file_ops; } EXPORT_SYMBOL(make_bad_inode); /* * This tests whether an inode has been flagged as bad. The test uses * &bad_inode_ops to cover the case of invalidated inodes as well as * those created by make_bad_inode() above. */ /** * is_bad_inode - is an inode errored * @inode: inode to test * * Returns true if the inode in question has been marked as bad. */ bool is_bad_inode(struct inode *inode) { return (inode->i_op == &bad_inode_ops); } EXPORT_SYMBOL(is_bad_inode); /** * iget_failed - Mark an under-construction inode as dead and release it * @inode: The inode to discard * * Mark an under-construction inode as dead and release it. */ void iget_failed(struct inode *inode) { make_bad_inode(inode); unlock_new_inode(inode); iput(inode); } EXPORT_SYMBOL(iget_failed); |
33 30 194 200 155 120 118 48 46 45 30 30 30 84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_DQUOT_H__ #define __XFS_DQUOT_H__ /* * Dquots are structures that hold quota information about a user or a group, * much like inodes are for files. In fact, dquots share many characteristics * with inodes. However, dquots can also be a centralized resource, relative * to a collection of inodes. In this respect, dquots share some characteristics * of the superblock. * XFS dquots exploit both those in its algorithms. They make every attempt * to not be a bottleneck when quotas are on and have minimal impact, if any, * when quotas are off. */ struct xfs_mount; struct xfs_trans; enum { XFS_QLOWSP_1_PCNT = 0, XFS_QLOWSP_3_PCNT, XFS_QLOWSP_5_PCNT, XFS_QLOWSP_MAX }; struct xfs_dquot_res { /* Total resources allocated and reserved. */ xfs_qcnt_t reserved; /* Total resources allocated. */ xfs_qcnt_t count; /* Absolute and preferred limits. */ xfs_qcnt_t hardlimit; xfs_qcnt_t softlimit; /* * For root dquots, this is the default grace period, in seconds. * Otherwise, this is when the quota grace period expires, * in seconds since the Unix epoch. */ time64_t timer; /* * For root dquots, this is the maximum number of warnings that will * be issued for this quota type. Otherwise, this is the number of * warnings issued against this quota. Note that none of this is * implemented. */ xfs_qwarncnt_t warnings; }; static inline bool xfs_dquot_res_over_limits( const struct xfs_dquot_res *qres) { if ((qres->softlimit && qres->softlimit < qres->reserved) || (qres->hardlimit && qres->hardlimit < qres->reserved)) return true; return false; } /* * The incore dquot structure */ struct xfs_dquot { struct list_head q_lru; struct xfs_mount *q_mount; xfs_dqtype_t q_type; uint16_t q_flags; xfs_dqid_t q_id; uint q_nrefs; int q_bufoffset; xfs_daddr_t q_blkno; xfs_fileoff_t q_fileoffset; struct xfs_dquot_res q_blk; /* regular blocks */ struct xfs_dquot_res q_ino; /* inodes */ struct xfs_dquot_res q_rtb; /* realtime blocks */ struct xfs_dq_logitem q_logitem; xfs_qcnt_t q_prealloc_lo_wmark; xfs_qcnt_t q_prealloc_hi_wmark; int64_t q_low_space[XFS_QLOWSP_MAX]; struct mutex q_qlock; struct completion q_flush; atomic_t q_pincount; struct wait_queue_head q_pinwait; }; /* * Lock hierarchy for q_qlock: * XFS_QLOCK_NORMAL is the implicit default, * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 */ enum { XFS_QLOCK_NORMAL = 0, XFS_QLOCK_NESTED, }; /* * Manage the q_flush completion queue embedded in the dquot. This completion * queue synchronizes processes attempting to flush the in-core dquot back to * disk. */ static inline void xfs_dqflock(struct xfs_dquot *dqp) { wait_for_completion(&dqp->q_flush); } static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp) { return try_wait_for_completion(&dqp->q_flush); } static inline void xfs_dqfunlock(struct xfs_dquot *dqp) { complete(&dqp->q_flush); } static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp) { return mutex_trylock(&dqp->q_qlock); } static inline void xfs_dqlock(struct xfs_dquot *dqp) { mutex_lock(&dqp->q_qlock); } static inline void xfs_dqunlock(struct xfs_dquot *dqp) { mutex_unlock(&dqp->q_qlock); } static inline int xfs_dquot_type(const struct xfs_dquot *dqp) { return dqp->q_type & XFS_DQTYPE_REC_MASK; } static inline int xfs_this_quota_on(struct xfs_mount *mp, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return XFS_IS_UQUOTA_ON(mp); case XFS_DQTYPE_GROUP: return XFS_IS_GQUOTA_ON(mp); case XFS_DQTYPE_PROJ: return XFS_IS_PQUOTA_ON(mp); default: return 0; } } static inline struct xfs_dquot *xfs_inode_dquot( struct xfs_inode *ip, xfs_dqtype_t type) { switch (type) { case XFS_DQTYPE_USER: return ip->i_udquot; case XFS_DQTYPE_GROUP: return ip->i_gdquot; case XFS_DQTYPE_PROJ: return ip->i_pdquot; default: return NULL; } } /* Decide if the dquot's limits are actually being enforced. */ static inline bool xfs_dquot_is_enforced( const struct xfs_dquot *dqp) { switch (xfs_dquot_type(dqp)) { case XFS_DQTYPE_USER: return XFS_IS_UQUOTA_ENFORCED(dqp->q_mount); case XFS_DQTYPE_GROUP: return XFS_IS_GQUOTA_ENFORCED(dqp->q_mount); case XFS_DQTYPE_PROJ: return XFS_IS_PQUOTA_ENFORCED(dqp->q_mount); } ASSERT(0); return false; } /* * Check whether a dquot is under low free space conditions. We assume the quota * is enabled and enforced. */ static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp) { int64_t freesp; freesp = dqp->q_blk.hardlimit - dqp->q_blk.reserved; if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT]) return true; return false; } void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp); #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->q_flags & XFS_DQFLAG_DIRTY) void xfs_qm_dqdestroy(struct xfs_dquot *dqp); int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp); void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp); void xfs_qm_adjust_dqtimers(struct xfs_dquot *d); void xfs_qm_adjust_dqlimits(struct xfs_dquot *d); xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, xfs_dqtype_t type); int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, bool can_alloc, struct xfs_dquot **dqpp); int xfs_qm_dqget_inode(struct xfs_inode *ip, xfs_dqtype_t type, bool can_alloc, struct xfs_dquot **dqpp); int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct xfs_dquot **dqpp); int xfs_qm_dqget_uncached(struct xfs_mount *mp, xfs_dqid_t id, xfs_dqtype_t type, struct xfs_dquot **dqpp); void xfs_qm_dqput(struct xfs_dquot *dqp); void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) { xfs_dqlock(dqp); dqp->q_nrefs++; xfs_dqunlock(dqp); return dqp; } typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq, xfs_dqtype_t type, void *priv); int xfs_qm_dqiterate(struct xfs_mount *mp, xfs_dqtype_t type, xfs_qm_dqiterate_fn iter_fn, void *priv); time64_t xfs_dquot_set_timeout(struct xfs_mount *mp, time64_t timeout); time64_t xfs_dquot_set_grace_period(time64_t grace); #endif /* __XFS_DQUOT_H__ */ |
1 2 1 1 2 1 31 23 134 29 1 1 4 7 5 1 1 1 1 1 2 1 1 4 3 4 55 3 1 4 1 2 2 1 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * OSS compatible i/o control * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_device.h" #include "seq_oss_readq.h" #include "seq_oss_writeq.h" #include "seq_oss_timer.h" #include "seq_oss_synth.h" #include "seq_oss_midi.h" #include "seq_oss_event.h" static int snd_seq_oss_synth_info_user(struct seq_oss_devinfo *dp, void __user *arg) { struct synth_info info; if (copy_from_user(&info, arg, sizeof(info))) return -EFAULT; if (snd_seq_oss_synth_make_info(dp, info.device, &info) < 0) return -EINVAL; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_seq_oss_midi_info_user(struct seq_oss_devinfo *dp, void __user *arg) { struct midi_info info; if (copy_from_user(&info, arg, sizeof(info))) return -EFAULT; if (snd_seq_oss_midi_make_info(dp, info.device, &info) < 0) return -EINVAL; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_seq_oss_oob_user(struct seq_oss_devinfo *dp, void __user *arg) { unsigned char ev[8]; struct snd_seq_event tmpev; if (copy_from_user(ev, arg, 8)) return -EFAULT; memset(&tmpev, 0, sizeof(tmpev)); snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.client, dp->addr.port); tmpev.time.tick = 0; if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) { snd_seq_oss_dispatch(dp, &tmpev, 0, 0); } return 0; } int snd_seq_oss_ioctl(struct seq_oss_devinfo *dp, unsigned int cmd, unsigned long carg) { int dev, val; void __user *arg = (void __user *)carg; int __user *p = arg; switch (cmd) { case SNDCTL_TMR_TIMEBASE: case SNDCTL_TMR_TEMPO: case SNDCTL_TMR_START: case SNDCTL_TMR_STOP: case SNDCTL_TMR_CONTINUE: case SNDCTL_TMR_METRONOME: case SNDCTL_TMR_SOURCE: case SNDCTL_TMR_SELECT: case SNDCTL_SEQ_CTRLRATE: return snd_seq_oss_timer_ioctl(dp->timer, cmd, arg); case SNDCTL_SEQ_PANIC: snd_seq_oss_reset(dp); return -EINVAL; case SNDCTL_SEQ_SYNC: if (! is_write_mode(dp->file_mode) || dp->writeq == NULL) return 0; while (snd_seq_oss_writeq_sync(dp->writeq)) ; if (signal_pending(current)) return -ERESTARTSYS; return 0; case SNDCTL_SEQ_RESET: snd_seq_oss_reset(dp); return 0; case SNDCTL_SEQ_TESTMIDI: if (get_user(dev, p)) return -EFAULT; return snd_seq_oss_midi_open(dp, dev, dp->file_mode); case SNDCTL_SEQ_GETINCOUNT: if (dp->readq == NULL || ! is_read_mode(dp->file_mode)) return 0; return put_user(dp->readq->qlen, p) ? -EFAULT : 0; case SNDCTL_SEQ_GETOUTCOUNT: if (! is_write_mode(dp->file_mode) || dp->writeq == NULL) return 0; return put_user(snd_seq_oss_writeq_get_free_size(dp->writeq), p) ? -EFAULT : 0; case SNDCTL_SEQ_GETTIME: return put_user(snd_seq_oss_timer_cur_tick(dp->timer), p) ? -EFAULT : 0; case SNDCTL_SEQ_RESETSAMPLES: if (get_user(dev, p)) return -EFAULT; return snd_seq_oss_synth_ioctl(dp, dev, cmd, carg); case SNDCTL_SEQ_NRSYNTHS: return put_user(dp->max_synthdev, p) ? -EFAULT : 0; case SNDCTL_SEQ_NRMIDIS: return put_user(dp->max_mididev, p) ? -EFAULT : 0; case SNDCTL_SYNTH_MEMAVL: if (get_user(dev, p)) return -EFAULT; val = snd_seq_oss_synth_ioctl(dp, dev, cmd, carg); return put_user(val, p) ? -EFAULT : 0; case SNDCTL_FM_4OP_ENABLE: if (get_user(dev, p)) return -EFAULT; snd_seq_oss_synth_ioctl(dp, dev, cmd, carg); return 0; case SNDCTL_SYNTH_INFO: case SNDCTL_SYNTH_ID: return snd_seq_oss_synth_info_user(dp, arg); case SNDCTL_SEQ_OUTOFBAND: return snd_seq_oss_oob_user(dp, arg); case SNDCTL_MIDI_INFO: return snd_seq_oss_midi_info_user(dp, arg); case SNDCTL_SEQ_THRESHOLD: if (! is_write_mode(dp->file_mode)) return 0; if (get_user(val, p)) return -EFAULT; if (val < 1) val = 1; if (val >= dp->writeq->maxlen) val = dp->writeq->maxlen - 1; snd_seq_oss_writeq_set_output(dp->writeq, val); return 0; case SNDCTL_MIDI_PRETIME: if (dp->readq == NULL || !is_read_mode(dp->file_mode)) return 0; if (get_user(val, p)) return -EFAULT; if (val <= 0) val = -1; else val = (HZ * val) / 10; dp->readq->pre_event_timeout = val; return put_user(val, p) ? -EFAULT : 0; default: if (! is_write_mode(dp->file_mode)) return -EIO; return snd_seq_oss_synth_ioctl(dp, 0, cmd, carg); } return 0; } |
8 8 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) 2016, Amir Vadai <amir@vadai.me> * Copyright (c) 2016, Mellanox Technologies. All rights reserved. */ #ifndef __NET_TC_TUNNEL_KEY_H #define __NET_TC_TUNNEL_KEY_H #include <net/act_api.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/dst_metadata.h> struct tcf_tunnel_key_params { struct rcu_head rcu; int tcft_action; struct metadata_dst *tcft_enc_metadata; }; struct tcf_tunnel_key { struct tc_action common; struct tcf_tunnel_key_params __rcu *params; }; #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) static inline bool is_tcf_tunnel_set(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, lockdep_is_held(&a->tcfa_lock)); if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; #endif return false; } static inline bool is_tcf_tunnel_release(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, lockdep_is_held(&a->tcfa_lock)); if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY) return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; #endif return false; } static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, lockdep_is_held(&a->tcfa_lock)); return ¶ms->tcft_enc_metadata->u.tun_info; #else return NULL; #endif } static inline struct ip_tunnel_info * tcf_tunnel_info_copy(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT struct ip_tunnel_info *tun = tcf_tunnel_info(a); if (tun) { size_t tun_size = sizeof(*tun) + tun->options_len; struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, GFP_ATOMIC); return tun_copy; } #endif return NULL; } #endif /* __NET_TC_TUNNEL_KEY_H */ |
70 20 54 456 455 1 408 28 42 422 30 86 129 1248 1109 142 643 296 4 167 129 129 294 798 1072 1062 22 927 454 10 10 928 827 20 378 929 932 908 908 9 7 2 2 1249 241 53 989 205 124 6 125 25 148 22 2 3 19 9 138 77 1083 1079 1233 1239 1111 262 866 541 333 494 337 6 334 236 255 255 24 24 4 5 16 8 16 11 24 24 24 14 3 24 2 23 2 2 13 2 5 6 191 191 191 182 187 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 | // SPDX-License-Identifier: GPL-2.0 /* * SUCS NET3: * * Generic datagram handling routines. These are generic for all * protocols. Possibly a generic IP version on top of these would * make sense. Not tonight however 8-). * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and * NetROM layer all have identical poll code and mostly * identical recvmsg() code. So we share it here. The poll was * shared before but buried in udp.c so I moved it. * * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old * udp.c code) * * Fixes: * Alan Cox : NULL return from skb_peek_copy() * understood * Alan Cox : Rewrote skb_read_datagram to avoid the * skb_peek_copy stuff. * Alan Cox : Added support for SOCK_SEQPACKET. * IPX can no longer use the SO_TYPE hack * but AX.25 now works right, and SPX is * feasible. * Alan Cox : Fixed write poll of non IP protocol * crash. * Florian La Roche: Changed for my new skbuff handling. * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. * Linus Torvalds : BSD semantic fixes. * Alan Cox : Datagram iovec handling * Darryl Miles : Fixed non-blocking SOCK_STREAM. * Alan Cox : POSIXisms * Pete Wyckoff : Unconnected accept() fix. * */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/uaccess.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/poll.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/uio.h> #include <linux/indirect_call_wrapper.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <net/checksum.h> #include <net/sock.h> #include <net/tcp_states.h> #include <trace/events/skb.h> #include <net/busy_poll.h> #include "datagram.h" /* * Is a socket 'connection oriented' ? */ static inline int connection_based(struct sock *sk) { return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { /* * Avoid a wakeup if event not interesting for us */ if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) return 0; return autoremove_wake_function(wait, mode, sync, key); } /* * Wait for the last received packet to be different from skb */ int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, int *err, long *timeo_p, const struct sk_buff *skb) { int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); if (error) goto out_err; if (READ_ONCE(queue->prev) != skb) goto out; /* Socket shut down? */ if (sk->sk_shutdown & RCV_SHUTDOWN) goto out_noerr; /* Sequenced packets can come disconnected. * If so we report the problem */ error = -ENOTCONN; if (connection_based(sk) && !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) goto out_err; /* handle signals */ if (signal_pending(current)) goto interrupted; error = 0; *timeo_p = schedule_timeout(*timeo_p); out: finish_wait(sk_sleep(sk), &wait); return error; interrupted: error = sock_intr_errno(*timeo_p); out_err: *err = error; goto out; out_noerr: *err = 0; error = 1; goto out; } EXPORT_SYMBOL(__skb_wait_for_more_packets); static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { struct sk_buff *nskb; if (skb->peeked) return skb; /* We have to unshare an skb before modifying it. */ if (!skb_shared(skb)) goto done; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return ERR_PTR(-ENOMEM); skb->prev->next = nskb; skb->next->prev = nskb; nskb->prev = skb->prev; nskb->next = skb->next; consume_skb(skb); skb = nskb; done: skb->peeked = 1; return skb; } struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last) { bool peek_at_off = false; struct sk_buff *skb; int _off = 0; if (unlikely(flags & MSG_PEEK && *off >= 0)) { peek_at_off = true; _off = *off; } *last = queue->prev; skb_queue_walk(queue, skb) { if (flags & MSG_PEEK) { if (peek_at_off && _off >= skb->len && (_off || skb->peeked)) { _off -= skb->len; continue; } if (!skb->len) { skb = skb_set_peeked(skb); if (IS_ERR(skb)) { *err = PTR_ERR(skb); return NULL; } } refcount_inc(&skb->users); } else { __skb_unlink(skb, queue); } *off = _off; return skb; } return NULL; } /** * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @queue: socket queue from which to receive * @flags: MSG\_ flags * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts * @err: error code returned * @last: set to last peeked message to inform the wait function * what to look for when peeking * * Get a datagram skbuff, understands the peeking, nonblocking wakeups * and possible races. This replaces identical code in packet, raw and * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes * the long standing peek and read race for datagram sockets. If you * alter this routine remember it must be re-entrant. * * This function will lock the socket if a skb is returned, so * the caller needs to unlock the socket in that case (usually by * calling skb_free_datagram). Returns NULL with @err set to * -EAGAIN if no data was available or to some other value if an * error was detected. * * * It does not lock socket since today. This function is * * free of race conditions. This measure should/can improve * * significantly datagram socket latencies at high loads, * * when data copying to user space takes lots of time. * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet * * 8) Great win.) * * --ANK (980729) * * The order of the tests when we find no data waiting are specified * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last) { struct sk_buff *skb; unsigned long cpu_flags; /* * Caller is allowed not to check sk->sk_err before skb_recv_datagram() */ int error = sock_error(sk); if (error) goto no_packet; do { /* Again only user level code calls this function, so nothing * interrupt level will suddenly eat the receive_queue. * * Look at current nfs client by the way... * However, this function was correct in any case. 8) */ spin_lock_irqsave(&queue->lock, cpu_flags); skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error, last); spin_unlock_irqrestore(&queue->lock, cpu_flags); if (error) goto no_packet; if (skb) return skb; if (!sk_can_busy_loop(sk)) break; sk_busy_loop(sk, flags & MSG_DONTWAIT); } while (READ_ONCE(queue->prev) != *last); error = -EAGAIN; no_packet: *err = error; return NULL; } EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err) { struct sk_buff *skb, *last; long timeo; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err, &last); if (skb) return skb; if (*err != -EAGAIN) break; } while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, err, &timeo, last)); return NULL; } EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int noblock, int *err) { int off = 0; return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags | (noblock ? MSG_DONTWAIT : 0), &off, err); } EXPORT_SYMBOL(skb_recv_datagram); void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { consume_skb(skb); sk_mem_reclaim_partial(sk); } EXPORT_SYMBOL(skb_free_datagram); void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) { bool slow; if (!skb_unref(skb)) { sk_peek_offset_bwd(sk, len); return; } slow = lock_sock_fast(sk); sk_peek_offset_bwd(sk, len); skb_orphan(skb); sk_mem_reclaim_partial(sk); unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ __kfree_skb(skb); } EXPORT_SYMBOL(__skb_free_datagram_locked); int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, struct sk_buff *skb, unsigned int flags, void (*destructor)(struct sock *sk, struct sk_buff *skb)) { int err = 0; if (flags & MSG_PEEK) { err = -ENOENT; spin_lock_bh(&sk_queue->lock); if (skb->next) { __skb_unlink(skb, sk_queue); refcount_dec(&skb->users); if (destructor) destructor(sk, skb); err = 0; } spin_unlock_bh(&sk_queue->lock); } atomic_inc(&sk->sk_drops); return err; } EXPORT_SYMBOL(__sk_queue_drop_skb); /** * skb_kill_datagram - Free a datagram skbuff forcibly * @sk: socket * @skb: datagram skbuff * @flags: MSG\_ flags * * This function frees a datagram skbuff that was received by * skb_recv_datagram. The flags argument must match the one * used for skb_recv_datagram. * * If the MSG_PEEK flag is set, and the packet is still on the * receive queue of the socket, it will be taken off the queue * before it is freed. * * This function currently only disables BH when acquiring the * sk_receive_queue lock. Therefore it must not be used in a * context where that lock is acquired in an IRQ context. * * It returns 0 if the packet was removed by us. */ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags, NULL); kfree_skb(skb); sk_mem_reclaim_partial(sk); return err; } EXPORT_SYMBOL(skb_kill_datagram); INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr, size_t bytes, void *data __always_unused, struct iov_iter *i)); static int __skb_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, bool fault_short, size_t (*cb)(const void *, size_t, void *, struct iov_iter *), void *data) { int start = skb_headlen(skb); int i, copy = start - offset, start_off = offset, n; struct sk_buff *frag_iter; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; n = INDIRECT_CALL_1(cb, simple_copy_to_iter, skb->data + offset, copy, data, to); offset += n; if (n != copy) goto short_copy; if ((len -= copy) == 0) return 0; } /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u32 p_off, p_len, copied; struct page *p; u8 *vaddr; if (copy > len) copy = len; n = 0; skb_frag_foreach_page(frag, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_local_page(p); n += INDIRECT_CALL_1(cb, simple_copy_to_iter, vaddr + p_off, p_len, data, to); kunmap_local(vaddr); } offset += n; if (n != copy) goto short_copy; if (!(len -= copy)) return 0; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (__skb_datagram_iter(frag_iter, offset - start, to, copy, fault_short, cb, data)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; } start = end; } if (!len) return 0; /* This is not really a user copy fault, but rather someone * gave us a bogus length on the skb. We should probably * print a warning here as it may indicate a kernel bug. */ fault: iov_iter_revert(to, offset - start_off); return -EFAULT; short_copy: if (fault_short || iov_iter_count(to)) goto fault; return 0; } /** * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator * and update a hash. * @skb: buffer to copy * @offset: offset in the buffer to start copying from * @to: iovec iterator to copy to * @len: amount of data to copy from buffer to iovec * @hash: hash request to update */ int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, struct ahash_request *hash) { return __skb_datagram_iter(skb, offset, to, len, true, hash_and_copy_to_iter, hash); } EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter); static size_t simple_copy_to_iter(const void *addr, size_t bytes, void *data __always_unused, struct iov_iter *i) { return copy_to_iter(addr, bytes, i); } /** * skb_copy_datagram_iter - Copy a datagram to an iovec iterator. * @skb: buffer to copy * @offset: offset in the buffer to start copying from * @to: iovec iterator to copy to * @len: amount of data to copy from buffer to iovec */ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len) { trace_skb_copy_datagram_iovec(skb, len); return __skb_datagram_iter(skb, offset, to, len, false, simple_copy_to_iter, NULL); } EXPORT_SYMBOL(skb_copy_datagram_iter); /** * skb_copy_datagram_from_iter - Copy a datagram from an iov_iter. * @skb: buffer to copy * @offset: offset in the buffer to start copying to * @from: the copy source * @len: amount of data to copy to buffer from iovec * * Returns 0 or -EFAULT. */ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; if (copy_from_iter(skb->data + offset, copy, from) != copy) goto fault; if ((len -= copy) == 0) return 0; offset += copy; } /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { size_t copied; if (copy > len) copy = len; copied = copy_page_from_iter(skb_frag_page(frag), skb_frag_off(frag) + offset - start, copy, from); if (copied != copy) goto fault; if (!(len -= copy)) return 0; offset += copy; } start = end; } skb_walk_frags(skb, frag_iter) { int end; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { if (copy > len) copy = len; if (skb_copy_datagram_from_iter(frag_iter, offset - start, from, copy)) goto fault; if ((len -= copy) == 0) return 0; offset += copy; } start = end; } if (!len) return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_copy_datagram_from_iter); int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length) { int frag = skb_shinfo(skb)->nr_frags; while (length && iov_iter_count(from)) { struct page *pages[MAX_SKB_FRAGS]; struct page *last_head = NULL; size_t start; ssize_t copied; unsigned long truesize; int refs, n = 0; if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; copied = iov_iter_get_pages(from, pages, length, MAX_SKB_FRAGS - frag, &start); if (copied < 0) return -EFAULT; iov_iter_advance(from, copied); length -= copied; truesize = PAGE_ALIGN(copied + start); skb->data_len += copied; skb->len += copied; skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } for (refs = 0; copied != 0; start = 0) { int size = min_t(int, copied, PAGE_SIZE - start); struct page *head = compound_head(pages[n]); start += (pages[n] - head) << PAGE_SHIFT; copied -= size; n++; if (frag) { skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1]; if (head == skb_frag_page(last) && start == skb_frag_off(last) + skb_frag_size(last)) { skb_frag_size_add(last, size); /* We combined this page, we need to release * a reference. Since compound pages refcount * is shared among many pages, batch the refcount * adjustments to limit false sharing. */ last_head = head; refs++; continue; } } if (refs) { page_ref_sub(last_head, refs); refs = 0; } skb_fill_page_desc_noacc(skb, frag++, head, start, size); } if (refs) page_ref_sub(last_head, refs); } return 0; } EXPORT_SYMBOL(__zerocopy_sg_from_iter); /** * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter * @skb: buffer to copy * @from: the source to copy from * * The function will first copy up to headlen, and then pin the userspace * pages and build frags through them. * * Returns 0, -EFAULT or -EMSGSIZE. */ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) { int copy = min_t(int, skb_headlen(skb), iov_iter_count(from)); /* copy up to skb headlen */ if (skb_copy_datagram_from_iter(skb, 0, from, copy)) return -EFAULT; return __zerocopy_sg_from_iter(NULL, skb, from, ~0U); } EXPORT_SYMBOL(zerocopy_sg_from_iter); /** * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator * and update a checksum. * @skb: buffer to copy * @offset: offset in the buffer to start copying from * @to: iovec iterator to copy to * @len: amount of data to copy from buffer to iovec * @csump: checksum pointer */ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, __wsum *csump) { struct csum_state csdata = { .csum = *csump }; int ret; ret = __skb_datagram_iter(skb, offset, to, len, true, csum_and_copy_to_iter, &csdata); if (ret) return ret; *csump = csdata.csum; return 0; } /** * skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec. * @skb: skbuff * @hlen: hardware length * @msg: destination * * Caller _must_ check that skb will fit to this iovec. * * Returns: 0 - success. * -EINVAL - checksum failure. * -EFAULT - fault during copy. */ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg) { __wsum csum; int chunk = skb->len - hlen; if (!chunk) return 0; if (msg_data_left(msg) < chunk) { if (__skb_checksum_complete(skb)) return -EINVAL; if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) goto fault; } else { csum = csum_partial(skb->data, hlen, skb->csum); if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, chunk, &csum)) goto fault; if (csum_fold(csum)) { iov_iter_revert(&msg->msg_iter, chunk); return -EINVAL; } if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(NULL, skb); } return 0; fault: return -EFAULT; } EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** * datagram_poll - generic datagram poll * @file: file struct * @sock: socket * @wait: poll table * * Datagram poll: Again totally generic. This also handles * sequenced packet sockets providing the socket receive queue * is only ever holding data ready to receive. * * Note: when you *don't* use this routine for this protocol, * and you use a different write policy from sock_writeable() * then please supply your own write_space callback. */ __poll_t datagram_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; /* exceptional events? */ if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); shutdown = READ_ONCE(sk->sk_shutdown); if (shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (connection_based(sk)) { int state = READ_ONCE(sk->sk_state); if (state == TCP_CLOSE) mask |= EPOLLHUP; /* connection hasn't started yet? */ if (state == TCP_SYN_SENT) return mask; } /* writable? */ if (sock_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } EXPORT_SYMBOL(datagram_poll); |
8 8 7 1 3 8 7 7 2 78 78 78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | // SPDX-License-Identifier: GPL-2.0 /* * xfrm6_policy.c: based on xfrm4_policy.c * * Authors: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * IPv6 support * YOSHIFUJI Hideaki * Split up af-specific portion * */ #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/addrconf.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/l3mdev.h> static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params) { struct flowi6 fl6; struct dst_entry *dst; int err; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net, params->oif); fl6.flowi6_mark = params->mark; memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr)); if (params->saddr) memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); fl6.flowi4_proto = params->ipproto; fl6.uli = params->uli; dst = ip6_route_output(params->net, NULL, &fl6); err = dst->error; if (dst->error) { dst_release(dst); dst = ERR_PTR(err); } return dst; } static int xfrm6_get_saddr(xfrm_address_t *saddr, const struct xfrm_dst_lookup_params *params) { struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; dst = xfrm6_dst_lookup(params); if (IS_ERR(dst)) return -EHOSTUNREACH; idev = ip6_dst_idev(dst); if (!idev) { dst_release(dst); return -EHOSTUNREACH; } dev = idev->dev; ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; dev_hold(dev); xdst->u.rt6.rt6i_idev = in6_dev_get(dev); if (!xdst->u.rt6.rt6i_idev) { dev_put(dev); return -ENODEV; } /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); rt6_uncached_list_add(&xdst->u.rt6); atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); if (xdst->u.rt6.rt6i_uncached_list) rt6_uncached_list_del(&xdst->u.rt6); if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int unregister) { struct xfrm_dst *xdst; if (!unregister) return; xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { struct inet6_dev *loopback_idev = in6_dev_get(dev_net(dev)->loopback_dev); do { in6_dev_put(xdst->u.rt6.rt6i_idev); xdst->u.rt6.rt6i_idev = loopback_idev; in6_dev_hold(loopback_idev); xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst); } while (xdst->u.dst.xfrm); __in6_dev_put(loopback_idev); } xfrm_dst_ifdown(dst, dev); } static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, }; static int __init xfrm6_policy_init(void) { return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) { xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } #ifdef CONFIG_SYSCTL static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { } }; static int __net_init xfrm6_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; table = xfrm6_policy_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(xfrm6_policy_table), GFP_KERNEL); if (!table) goto err_alloc; table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; } hdr = register_net_sysctl(net, "net/ipv6", table); if (!hdr) goto err_reg; net->ipv6.sysctl.xfrm6_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit xfrm6_net_sysctl_exit(struct net *net) { struct ctl_table *table; if (!net->ipv6.sysctl.xfrm6_hdr) return; table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv6.sysctl.xfrm6_hdr); if (!net_eq(net, &init_net)) kfree(table); } #else /* CONFIG_SYSCTL */ static inline int xfrm6_net_sysctl_init(struct net *net) { return 0; } static inline void xfrm6_net_sysctl_exit(struct net *net) { } #endif static int __net_init xfrm6_net_init(struct net *net) { int ret; memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, sizeof(xfrm6_dst_ops_template)); ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); if (ret) return ret; ret = xfrm6_net_sysctl_init(net); if (ret) dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); return ret; } static void __net_exit xfrm6_net_exit(struct net *net) { xfrm6_net_sysctl_exit(net); dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); } static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, }; int __init xfrm6_init(void) { int ret; ret = xfrm6_policy_init(); if (ret) goto out; ret = xfrm6_state_init(); if (ret) goto out_policy; ret = xfrm6_protocol_init(); if (ret) goto out_state; ret = register_pernet_subsys(&xfrm6_net_ops); if (ret) goto out_protocol; out: return ret; out_protocol: xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: xfrm6_policy_fini(); goto out; } void xfrm6_fini(void) { unregister_pernet_subsys(&xfrm6_net_ops); xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); } |
235 235 235 235 234 234 359 362 106 361 361 361 360 337 351 361 352 353 353 352 194 193 124 119 10 10 10 10 2 8 10 10 2 143 99 10 194 193 10 2 194 194 9 2 1 1 194 2 195 195 195 195 92 194 2 2 194 144 99 192 191 192 96 10 96 194 193 184 35 215 195 195 129 2 2 354 320 340 340 325 344 295 315 2 2 2 2 2 2 2 354 354 354 354 319 331 11 64 64 68 68 68 68 68 68 3 68 68 68 353 351 352 334 321 348 20 347 108 108 322 321 319 224 222 3 320 318 68 3 68 2 1 1 1 1 62 217 12 344 227 350 351 351 351 199 348 344 226 350 351 320 345 227 100 188 350 314 348 317 351 350 23 350 31 350 229 319 320 321 321 320 12 223 40 138 315 320 4 3 350 14 14 9 5 5 5 5 5 5 5 5 5 29 2 29 7 5 2 2 5 107 19 6 6 1 5 6 18 18 18 23 23 23 23 23 27 27 27 9 27 26 220 218 218 218 8 8 8 1 6 1 5 8 8 8 8 8 7 1 4 1 4 150 137 16 1 1 8 16 16 15 13 7 15 15 15 15 2 14 14 14 15 15 4 13 15 150 140 3 16 25 25 25 25 25 25 2 2 2 31 31 23 6 15 7 16 16 12 23 26 3 1 2 7 6 1 1 3 6 23 2 2 2 2 2 2 2 23 23 20 22 23 32 32 27 32 215 216 214 110 216 195 208 216 216 212 212 22 1 212 84 84 5 5 4 1 5 3 2 5 5 173 173 140 166 166 89 2 170 167 18 4 141 3 4 138 134 17 16 1 17 61 61 61 1 56 32 60 26 51 56 32 23 11 56 56 33 33 33 33 33 31 11 271 1 3 269 271 260 28 28 28 1 28 28 271 37 14 38 38 23 29 9 3 8 8 8 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007,2008 Oracle. All rights reserved. */ #include <linux/sched.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/mm.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "print-tree.h" #include "locking.h" #include "volumes.h" #include "qgroup.h" #include "tree-mod-log.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend); static int push_node_left(struct btrfs_trans_handle *trans, struct extent_buffer *dst, struct extent_buffer *src, int empty); static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *dst_buf, struct extent_buffer *src_buf); static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot); static const struct btrfs_csums { u16 size; const char name[10]; const char driver[12]; } btrfs_csums[] = { [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" }, [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" }, [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" }, [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b", .driver = "blake2b-256" }, }; int btrfs_super_csum_size(const struct btrfs_super_block *s) { u16 t = btrfs_super_csum_type(s); /* * csum type is validated at mount time */ return btrfs_csums[t].size; } const char *btrfs_super_csum_name(u16 csum_type) { /* csum type is validated at mount time */ return btrfs_csums[csum_type].name; } /* * Return driver name if defined, otherwise the name that's also a valid driver * name */ const char *btrfs_super_csum_driver(u16 csum_type) { /* csum type is validated at mount time */ return btrfs_csums[csum_type].driver[0] ? btrfs_csums[csum_type].driver : btrfs_csums[csum_type].name; } size_t __attribute_const__ btrfs_get_num_csums(void) { return ARRAY_SIZE(btrfs_csums); } struct btrfs_path *btrfs_alloc_path(void) { might_sleep(); return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); } /* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { if (!p) return; btrfs_release_path(p); kmem_cache_free(btrfs_path_cachep, p); } /* * path release drops references on the extent buffers in the path * and it drops any locks held by this path * * It is safe to call this on paths that no locks or extent buffers held. */ noinline void btrfs_release_path(struct btrfs_path *p) { int i; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { p->slots[i] = 0; if (!p->nodes[i]) continue; if (p->locks[i]) { btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); p->nodes[i] = NULL; } } /* * safely gets a reference on the root node of a tree. A lock * is not taken, so a concurrent writer may put a different node * at the root of the tree. See btrfs_lock_root_node for the * looping required. * * The extent buffer returned by this has a reference taken, so * it won't disappear. It may stop being the root of the tree * at any time because there are no locks held. */ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) { struct extent_buffer *eb; while (1) { rcu_read_lock(); eb = rcu_dereference(root->node); /* * RCU really hurts here, we could free up the root node because * it was COWed but we may not get the new root node yet so do * the inc_not_zero dance and if it doesn't work then * synchronize_rcu and try again. */ if (atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); break; } rcu_read_unlock(); synchronize_rcu(); } return eb; } /* * Cowonly root (not-shareable trees, everything not subvolume or reloc roots), * just get put onto a simple dirty list. Transaction walks this list to make * sure they get properly updated on disk. */ static void add_root_to_dirty_list(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; if (test_bit(BTRFS_ROOT_DIRTY, &root->state) || !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state)) return; spin_lock(&fs_info->trans_lock); if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) { /* Want the extent tree to be the last on the list */ if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID) list_move_tail(&root->dirty_list, &fs_info->dirty_cowonly_roots); else list_move(&root->dirty_list, &fs_info->dirty_cowonly_roots); } spin_unlock(&fs_info->trans_lock); } /* * used by snapshot creation to make a copy of a root for a tree with * a given objectid. The buffer with the new root node is returned in * cow_ret, and this func returns zero on success or a negative error code. */ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer **cow_ret, u64 new_root_objectid) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *cow; int ret = 0; int level; struct btrfs_disk_key disk_key; WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && trans->transid != fs_info->running_transaction->transid); WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && trans->transid != root->last_trans); level = btrfs_header_level(buf); if (level == 0) btrfs_item_key(buf, &disk_key, 0); else btrfs_node_key(buf, &disk_key, 0); cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, &disk_key, level, buf->start, 0, BTRFS_NESTING_NEW_ROOT); if (IS_ERR(cow)) return PTR_ERR(cow); copy_extent_buffer_full(cow, buf); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | BTRFS_HEADER_FLAG_RELOC); if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); else btrfs_set_header_owner(cow, new_root_objectid); write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); WARN_ON(btrfs_header_generation(buf) > trans->transid); if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) ret = btrfs_inc_ref(trans, root, cow, 1); else ret = btrfs_inc_ref(trans, root, cow, 0); if (ret) { btrfs_tree_unlock(cow); free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } /* * check if the tree block can be shared by multiple trees */ int btrfs_block_can_be_shared(struct btrfs_root *root, struct extent_buffer *buf) { /* * Tree blocks not in shareable trees and tree roots are never shared. * If a block was allocated after the last snapshot and the block was * not allocated by tree relocation, we know the block is not shared. */ if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && buf != root->node && buf != root->commit_root && (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item) || btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) return 1; return 0; } static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *cow, int *last_ref) { struct btrfs_fs_info *fs_info = root->fs_info; u64 refs; u64 owner; u64 flags; u64 new_flags = 0; int ret; /* * Backrefs update rules: * * Always use full backrefs for extent pointers in tree block * allocated by tree relocation. * * If a shared tree block is no longer referenced by its owner * tree (btrfs_header_owner(buf) == root->root_key.objectid), * use full backrefs for extent pointers in tree block. * * If a tree block is been relocating * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), * use full backrefs for extent pointers in tree block. * The reason for this is some operations (such as drop tree) * are only allowed for blocks use full backrefs. */ if (btrfs_block_can_be_shared(root, buf)) { ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, btrfs_header_level(buf), 1, &refs, &flags); if (ret) return ret; if (refs == 0) { ret = -EROFS; btrfs_handle_fs_error(fs_info, ret, NULL); return ret; } } else { refs = 1; if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; else flags = 0; } owner = btrfs_header_owner(buf); if (unlikely(owner == BTRFS_TREE_RELOC_OBJECTID && !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))) { btrfs_crit(fs_info, "found tree block at bytenr %llu level %d root %llu refs %llu flags %llx without full backref flag set", buf->start, btrfs_header_level(buf), btrfs_root_id(root), refs, flags); ret = -EUCLEAN; btrfs_abort_transaction(trans, ret); return ret; } if (refs > 1) { if ((owner == root->root_key.objectid || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { ret = btrfs_inc_ref(trans, root, buf, 1); if (ret) return ret; if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { ret = btrfs_dec_ref(trans, root, buf, 0); if (ret) return ret; ret = btrfs_inc_ref(trans, root, cow, 1); if (ret) return ret; } new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; } else { if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) ret = btrfs_inc_ref(trans, root, cow, 1); else ret = btrfs_inc_ref(trans, root, cow, 0); if (ret) return ret; } if (new_flags != 0) { int level = btrfs_header_level(buf); ret = btrfs_set_disk_extent_flags(trans, buf, new_flags, level, 0); if (ret) return ret; } } else { if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) ret = btrfs_inc_ref(trans, root, cow, 1); else ret = btrfs_inc_ref(trans, root, cow, 0); if (ret) return ret; ret = btrfs_dec_ref(trans, root, buf, 1); if (ret) return ret; } btrfs_clean_tree_block(buf); *last_ref = 1; } return 0; } /* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked * dirty and returned locked. If you modify the block it needs to be marked * dirty again. * * search_start -- an allocation hint for the new block * * empty_size -- a hint that you plan on doing more cow. This is the size in * bytes the allocator should try to find free next to the block it returns. * This is just a hint and may be ignored by the allocator. */ int btrfs_force_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, u64 search_start, u64 empty_size, enum btrfs_lock_nesting nest) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_disk_key disk_key; struct extent_buffer *cow; int level, ret; int last_ref = 0; int unlock_orig = 0; u64 parent_start = 0; if (*cow_ret == buf) unlock_orig = 1; btrfs_assert_tree_locked(buf); WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && trans->transid != fs_info->running_transaction->transid); WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && trans->transid != root->last_trans); level = btrfs_header_level(buf); if (level == 0) btrfs_item_key(buf, &disk_key, 0); else btrfs_node_key(buf, &disk_key, 0); if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) parent_start = parent->start; cow = btrfs_alloc_tree_block(trans, root, parent_start, root->root_key.objectid, &disk_key, level, search_start, empty_size, nest); if (IS_ERR(cow)) return PTR_ERR(cow); /* cow is set to blocking by btrfs_init_new_buffer */ copy_extent_buffer_full(cow, buf); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | BTRFS_HEADER_FLAG_RELOC); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); else btrfs_set_header_owner(cow, root->root_key.objectid); write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { btrfs_tree_unlock(cow); free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { btrfs_tree_unlock(cow); free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } } if (buf == root->node) { WARN_ON(parent && parent != buf); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) parent_start = buf->start; ret = btrfs_tree_mod_log_insert_root(root->node, cow, true); if (ret < 0) { btrfs_tree_unlock(cow); free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } atomic_inc(&cow->refs); rcu_assign_pointer(root->node, cow); btrfs_free_tree_block(trans, btrfs_root_id(root), buf, parent_start, last_ref); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { WARN_ON(trans->transid != btrfs_header_generation(parent)); btrfs_tree_mod_log_insert_key(parent, parent_slot, BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); if (last_ref) { ret = btrfs_tree_mod_log_free_eb(buf); if (ret) { btrfs_tree_unlock(cow); free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } } btrfs_free_tree_block(trans, btrfs_root_id(root), buf, parent_start, last_ref); } trace_btrfs_cow_block(root, buf, cow); if (unlock_orig) btrfs_tree_unlock(buf); free_extent_buffer_stale(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { if (btrfs_is_testing(root->fs_info)) return 0; /* Ensure we can see the FORCE_COW bit */ smp_mb__before_atomic(); /* * We do not need to cow a block if * 1) this block is not created or changed in this transaction; * 2) this block does not belong to TREE_RELOC tree; * 3) the root is not forced COW. * * What is forced COW: * when we create snapshot during committing the transaction, * after we've finished copying src root, we must COW the shared * block to ensure the metadata consistency. */ if (btrfs_header_generation(buf) == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) return 0; return 1; } /* * COWs a single block, see btrfs_force_cow_block() for the real work. * This version of it has extra checks so that a block isn't COWed more than * once per transaction, as long as it hasn't been written yet */ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, enum btrfs_lock_nesting nest) { struct btrfs_fs_info *fs_info = root->fs_info; u64 search_start; if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { btrfs_abort_transaction(trans, -EUCLEAN); btrfs_crit(fs_info, "attempt to COW block %llu on root %llu that is being deleted", buf->start, btrfs_root_id(root)); return -EUCLEAN; } /* * COWing must happen through a running transaction, which always * matches the current fs generation (it's a transaction with a state * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs * into error state to prevent the commit of any transaction. */ if (unlikely(trans->transaction != fs_info->running_transaction || trans->transid != fs_info->generation)) { btrfs_abort_transaction(trans, -EUCLEAN); btrfs_crit(fs_info, "unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu", buf->start, btrfs_root_id(root), trans->transid, fs_info->running_transaction->transid, fs_info->generation); return -EUCLEAN; } if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; return 0; } search_start = buf->start & ~((u64)SZ_1G - 1); /* * Before CoWing this block for later modification, check if it's * the subtree root and do the delayed subtree trace if needed. * * Also We don't care about the error, as it's handled internally. */ btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); return btrfs_force_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0, nest); } ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO); /* * helper function for defrag to decide if two blocks pointed to by a * node are actually close by */ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) { if (blocknr < other && other - (blocknr + blocksize) < 32768) return 1; if (blocknr > other && blocknr - (other + blocksize) < 32768) return 1; return 0; } #ifdef __LITTLE_ENDIAN /* * Compare two keys, on little-endian the disk order is same as CPU order and * we can avoid the conversion. */ static int comp_keys(const struct btrfs_disk_key *disk_key, const struct btrfs_key *k2) { const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key; return btrfs_comp_cpu_keys(k1, k2); } #else /* * compare two keys in a memcmp fashion */ static int comp_keys(const struct btrfs_disk_key *disk, const struct btrfs_key *k2) { struct btrfs_key k1; btrfs_disk_key_to_cpu(&k1, disk); return btrfs_comp_cpu_keys(&k1, k2); } #endif /* * same as comp_keys only with two btrfs_key's */ int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) { if (k1->objectid > k2->objectid) return 1; if (k1->objectid < k2->objectid) return -1; if (k1->type > k2->type) return 1; if (k1->type < k2->type) return -1; if (k1->offset > k2->offset) return 1; if (k1->offset < k2->offset) return -1; return 0; } /* * this is used by the defrag code to go through all the * leaves pointed to by a node and reallocate them so that * disk order is close to key order */ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, u64 *last_ret, struct btrfs_key *progress) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *cur; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; u64 other; u32 parent_nritems; int end_slot; int i; int err = 0; u32 blocksize; int progress_passed = 0; struct btrfs_disk_key disk_key; /* * COWing must happen through a running transaction, which always * matches the current fs generation (it's a transaction with a state * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs * into error state to prevent the commit of any transaction. */ if (unlikely(trans->transaction != fs_info->running_transaction || trans->transid != fs_info->generation)) { btrfs_abort_transaction(trans, -EUCLEAN); btrfs_crit(fs_info, "unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu", parent->start, btrfs_root_id(root), trans->transid, fs_info->running_transaction->transid, fs_info->generation); return -EUCLEAN; } parent_nritems = btrfs_header_nritems(parent); blocksize = fs_info->nodesize; end_slot = parent_nritems - 1; if (parent_nritems <= 1) return 0; for (i = start_slot; i <= end_slot; i++) { int close = 1; btrfs_node_key(parent, &disk_key, i); if (!progress_passed && comp_keys(&disk_key, progress) < 0) continue; progress_passed = 1; blocknr = btrfs_node_blockptr(parent, i); if (last_block == 0) last_block = blocknr; if (i > 0) { other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } if (!close && i < end_slot) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } if (close) { last_block = blocknr; continue; } cur = btrfs_read_node_slot(parent, i); if (IS_ERR(cur)) return PTR_ERR(cur); if (search_start == 0) search_start = last_block; btrfs_tree_lock(cur); err = btrfs_force_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, (end_slot - i) * blocksize), BTRFS_NESTING_COW); if (err) { btrfs_tree_unlock(cur); free_extent_buffer(cur); break; } search_start = cur->start; last_block = cur->start; *last_ret = search_start; btrfs_tree_unlock(cur); free_extent_buffer(cur); } return err; } /* * search for key in the extent_buffer. The items start at offset p, * and they are item_size apart. * * the slot in the array is returned via slot, and it points to * the place where you would insert key if it is not found in * the array. * * Slot may point to total number of items if the key is bigger than * all of the keys */ static noinline int generic_bin_search(struct extent_buffer *eb, unsigned long p, int item_size, const struct btrfs_key *key, int *slot) { int low = 0; int high = btrfs_header_nritems(eb); int ret; const int key_size = sizeof(struct btrfs_disk_key); if (low > high) { btrfs_err(eb->fs_info, "%s: low (%d) > high (%d) eb %llu owner %llu level %d", __func__, low, high, eb->start, btrfs_header_owner(eb), btrfs_header_level(eb)); return -EINVAL; } while (low < high) { unsigned long oip; unsigned long offset; struct btrfs_disk_key *tmp; struct btrfs_disk_key unaligned; int mid; mid = (low + high) / 2; offset = p + mid * item_size; oip = offset_in_page(offset); if (oip + key_size <= PAGE_SIZE) { const unsigned long idx = get_eb_page_index(offset); char *kaddr = page_address(eb->pages[idx]); oip = get_eb_offset_in_page(eb, offset); tmp = (struct btrfs_disk_key *)(kaddr + oip); } else { read_extent_buffer(eb, &unaligned, offset, key_size); tmp = &unaligned; } ret = comp_keys(tmp, key); if (ret < 0) low = mid + 1; else if (ret > 0) high = mid; else { *slot = mid; return 0; } } *slot = low; return 1; } /* * simple bin_search frontend that does the right thing for * leaves vs nodes */ int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int *slot) { if (btrfs_header_level(eb) == 0) return generic_bin_search(eb, offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), key, slot); else return generic_bin_search(eb, offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), key, slot); } static void root_add_used(struct btrfs_root *root, u32 size) { spin_lock(&root->accounting_lock); btrfs_set_root_used(&root->root_item, btrfs_root_used(&root->root_item) + size); spin_unlock(&root->accounting_lock); } static void root_sub_used(struct btrfs_root *root, u32 size) { spin_lock(&root->accounting_lock); btrfs_set_root_used(&root->root_item, btrfs_root_used(&root->root_item) - size); spin_unlock(&root->accounting_lock); } /* given a node and slot number, this reads the blocks it points to. The * extent buffer is returned with a reference taken (but unlocked). */ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); struct extent_buffer *eb; struct btrfs_key first_key; if (slot < 0 || slot >= btrfs_header_nritems(parent)) return ERR_PTR(-ENOENT); BUG_ON(level == 0); btrfs_node_key_to_cpu(parent, &first_key, slot); eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), btrfs_header_owner(parent), btrfs_node_ptr_generation(parent, slot), level - 1, &first_key); if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); eb = ERR_PTR(-EIO); } return eb; } /* * node level balancing, used to make sure nodes are in proper order for * item deletion. We balance from the top down, so we have to make sure * that a deletion won't leave an node completely empty later on. */ static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *right = NULL; struct extent_buffer *mid; struct extent_buffer *left = NULL; struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; int orig_slot = path->slots[level]; u64 orig_ptr; ASSERT(level > 0); mid = path->nodes[level]; WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK); WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; } /* * deal with the case where there is only one pointer in the root * by promoting the node below to a root */ if (!parent) { struct extent_buffer *child; if (btrfs_header_nritems(mid) != 1) return 0; /* promote the child to a root */ child = btrfs_read_node_slot(mid, 0); if (IS_ERR(child)) { ret = PTR_ERR(child); btrfs_handle_fs_error(fs_info, ret, NULL); goto enospc; } btrfs_tree_lock(child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child, BTRFS_NESTING_COW); if (ret) { btrfs_tree_unlock(child); free_extent_buffer(child); goto enospc; } ret = btrfs_tree_mod_log_insert_root(root->node, child, true); if (ret < 0) { btrfs_tree_unlock(child); free_extent_buffer(child); btrfs_abort_transaction(trans, ret); goto enospc; } rcu_assign_pointer(root->node, child); add_root_to_dirty_list(root); btrfs_tree_unlock(child); path->locks[level] = 0; path->nodes[level] = NULL; btrfs_clean_tree_block(mid); btrfs_tree_unlock(mid); /* once for the path */ free_extent_buffer(mid); root_sub_used(root, mid->len); btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; } if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) return 0; left = btrfs_read_node_slot(parent, pslot - 1); if (IS_ERR(left)) left = NULL; if (left) { __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left, BTRFS_NESTING_LEFT_COW); if (wret) { ret = wret; goto enospc; } } right = btrfs_read_node_slot(parent, pslot + 1); if (IS_ERR(right)) right = NULL; if (right) { __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right, BTRFS_NESTING_RIGHT_COW); if (wret) { ret = wret; goto enospc; } } /* first, try to make some room in the middle buffer */ if (left) { orig_slot += btrfs_header_nritems(left); wret = push_node_left(trans, left, mid, 1); if (wret < 0) ret = wret; } /* * then try to empty the right most buffer into the middle */ if (right) { wret = push_node_left(trans, mid, right, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { btrfs_clean_tree_block(right); btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); btrfs_free_tree_block(trans, btrfs_root_id(root), right, 0, 1); free_extent_buffer_stale(right); right = NULL; } else { struct btrfs_disk_key right_key; btrfs_node_key(right, &right_key, 0); ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); if (ret < 0) { btrfs_abort_transaction(trans, ret); goto enospc; } btrfs_set_node_key(parent, &right_key, pslot + 1); btrfs_mark_buffer_dirty(parent); } } if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree * could try to delete the only pointer in this node. * So, pull some keys from the left. * There has to be a left pointer at this point because * otherwise we would have pulled some pointers from the * right */ if (!left) { ret = -EROFS; btrfs_handle_fs_error(fs_info, ret, NULL); goto enospc; } wret = balance_node_right(trans, mid, left); if (wret < 0) { ret = wret; goto enospc; } if (wret == 1) { wret = push_node_left(trans, left, mid, 1); if (wret < 0) ret = wret; } BUG_ON(wret == 1); } if (btrfs_header_nritems(mid) == 0) { btrfs_clean_tree_block(mid); btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); free_extent_buffer_stale(mid); mid = NULL; } else { /* update the parent key to reflect our changes */ struct btrfs_disk_key mid_key; btrfs_node_key(mid, &mid_key, 0); ret = btrfs_tree_mod_log_insert_key(parent, pslot, BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); if (ret < 0) { btrfs_abort_transaction(trans, ret); goto enospc; } btrfs_set_node_key(parent, &mid_key, pslot); btrfs_mark_buffer_dirty(parent); } /* update the path */ if (left) { if (btrfs_header_nritems(left) > orig_slot) { atomic_inc(&left->refs); /* left was locked after cow */ path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; if (mid) { btrfs_tree_unlock(mid); free_extent_buffer(mid); } } else { orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; } } /* double check we haven't messed things up */ if (orig_ptr != btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: if (right) { btrfs_tree_unlock(right); free_extent_buffer(right); } if (left) { if (path->nodes[level] != left) btrfs_tree_unlock(left); free_extent_buffer(left); } return ret; } /* Node balancing for insertion. Here we only split or push nodes around * when they are completely full. This is also done top down, so we * have to be pessimistic. */ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *right = NULL; struct extent_buffer *mid; struct extent_buffer *left = NULL; struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; int orig_slot = path->slots[level]; if (level == 0) return 1; mid = path->nodes[level]; WARN_ON(btrfs_header_generation(mid) != trans->transid); if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; } if (!parent) return 1; left = btrfs_read_node_slot(parent, pslot - 1); if (IS_ERR(left)) left = NULL; /* first, try to make some room in the middle buffer */ if (left) { u32 left_nr; __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { wret = 1; } else { ret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left, BTRFS_NESTING_LEFT_COW); if (ret) wret = 1; else { wret = push_node_left(trans, left, mid, 0); } } if (wret < 0) ret = wret; if (wret == 0) { struct btrfs_disk_key disk_key; orig_slot += left_nr; btrfs_node_key(mid, &disk_key, 0); ret = btrfs_tree_mod_log_insert_key(parent, pslot, BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); BUG_ON(ret < 0); btrfs_set_node_key(parent, &disk_key, pslot); btrfs_mark_buffer_dirty(parent); if (btrfs_header_nritems(left) > orig_slot) { path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; btrfs_tree_unlock(mid); free_extent_buffer(mid); } else { orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; btrfs_tree_unlock(left); free_extent_buffer(left); } return 0; } btrfs_tree_unlock(left); free_extent_buffer(left); } right = btrfs_read_node_slot(parent, pslot + 1); if (IS_ERR(right)) right = NULL; /* * then try to empty the right most buffer into the middle */ if (right) { u32 right_nr; __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { wret = 1; } else { ret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right, BTRFS_NESTING_RIGHT_COW); if (ret) wret = 1; else { wret = balance_node_right(trans, right, mid); } } if (wret < 0) ret = wret; if (wret == 0) { struct btrfs_disk_key disk_key; btrfs_node_key(right, &disk_key, 0); ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS); BUG_ON(ret < 0); btrfs_set_node_key(parent, &disk_key, pslot + 1); btrfs_mark_buffer_dirty(parent); if (btrfs_header_nritems(mid) <= orig_slot) { path->nodes[level] = right; path->slots[level + 1] += 1; path->slots[level] = orig_slot - btrfs_header_nritems(mid); btrfs_tree_unlock(mid); free_extent_buffer(mid); } else { btrfs_tree_unlock(right); free_extent_buffer(right); } return 0; } btrfs_tree_unlock(right); free_extent_buffer(right); } return 1; } /* * readahead one full node of leaves, finding things that are close * to the block in 'slot', and triggering ra on them. */ static void reada_for_search(struct btrfs_fs_info *fs_info, struct btrfs_path *path, int level, int slot, u64 objectid) { struct extent_buffer *node; struct btrfs_disk_key disk_key; u32 nritems; u64 search; u64 target; u64 nread = 0; u64 nread_max; u32 nr; u32 blocksize; u32 nscan = 0; if (level != 1 && path->reada != READA_FORWARD_ALWAYS) return; if (!path->nodes[level]) return; node = path->nodes[level]; /* * Since the time between visiting leaves is much shorter than the time * between visiting nodes, limit read ahead of nodes to 1, to avoid too * much IO at once (possibly random). */ if (path->reada == READA_FORWARD_ALWAYS) { if (level > 1) nread_max = node->fs_info->nodesize; else nread_max = SZ_128K; } else { nread_max = SZ_64K; } search = btrfs_node_blockptr(node, slot); blocksize = fs_info->nodesize; if (path->reada != READA_FORWARD_ALWAYS) { struct extent_buffer *eb; eb = find_extent_buffer(fs_info, search); if (eb) { free_extent_buffer(eb); return; } } target = search; nritems = btrfs_header_nritems(node); nr = slot; while (1) { if (path->reada == READA_BACK) { if (nr == 0) break; nr--; } else if (path->reada == READA_FORWARD || path->reada == READA_FORWARD_ALWAYS) { nr++; if (nr >= nritems) break; } if (path->reada == READA_BACK && objectid) { btrfs_node_key(node, &disk_key, nr); if (btrfs_disk_key_objectid(&disk_key) != objectid) break; } search = btrfs_node_blockptr(node, nr); if (path->reada == READA_FORWARD_ALWAYS || (search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { btrfs_readahead_node_child(node, nr); nread += blocksize; } nscan++; if (nread > nread_max || nscan > 32) break; } } static noinline void reada_for_balance(struct btrfs_path *path, int level) { struct extent_buffer *parent; int slot; int nritems; parent = path->nodes[level + 1]; if (!parent) return; nritems = btrfs_header_nritems(parent); slot = path->slots[level + 1]; if (slot > 0) btrfs_readahead_node_child(parent, slot - 1); if (slot + 1 < nritems) btrfs_readahead_node_child(parent, slot + 1); } /* * when we walk down the tree, it is usually safe to unlock the higher layers * in the tree. The exceptions are when our path goes through slot 0, because * operations on the tree might require changing key pointers higher up in the * tree. * * callers might also have set path->keep_locks, which tells this code to keep * the lock if the path points to the last slot in the block. This is part of * walking through the tree, and selecting the next slot in the higher block. * * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so * if lowest_unlock is 1, level 0 won't be unlocked */ static noinline void unlock_up(struct btrfs_path *path, int level, int lowest_unlock, int min_write_lock_level, int *write_lock_level) { int i; int skip_level = level; int no_skips = 0; struct extent_buffer *t; for (i = level; i < BTRFS_MAX_LEVEL; i++) { if (!path->nodes[i]) break; if (!path->locks[i]) break; if (!no_skips && path->slots[i] == 0) { skip_level = i + 1; continue; } if (!no_skips && path->keep_locks) { u32 nritems; t = path->nodes[i]; nritems = btrfs_header_nritems(t); if (nritems < 1 || path->slots[i] >= nritems - 1) { skip_level = i + 1; continue; } } if (skip_level < i && i >= lowest_unlock) no_skips = 1; t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level) { btrfs_tree_unlock_rw(t, path->locks[i]); path->locks[i] = 0; if (write_lock_level && i > min_write_lock_level && i <= *write_lock_level) { *write_lock_level = i - 1; } } } } /* * helper function for btrfs_search_slot. The goal is to find a block * in cache without setting the path to blocking. If we find the block * we return zero and the path is unchanged. * * If we can't find the block, we set the path blocking and do some * reada. -EAGAIN is returned and the search must be repeated. */ static int read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, struct extent_buffer **eb_ret, int level, int slot, const struct btrfs_key *key) { struct btrfs_fs_info *fs_info = root->fs_info; u64 blocknr; u64 gen; struct extent_buffer *tmp; struct btrfs_key first_key; int ret; int parent_level; blocknr = btrfs_node_blockptr(*eb_ret, slot); gen = btrfs_node_ptr_generation(*eb_ret, slot); parent_level = btrfs_header_level(*eb_ret); btrfs_node_key_to_cpu(*eb_ret, &first_key, slot); tmp = find_extent_buffer(fs_info, blocknr); if (tmp) { if (p->reada == READA_FORWARD_ALWAYS) reada_for_search(fs_info, p, level, slot, key->objectid); /* first we do an atomic uptodate check */ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { /* * Do extra check for first_key, eb can be stale due to * being cached, read from scrub, or have multiple * parents (shared tree blocks). */ if (btrfs_verify_level_key(tmp, parent_level - 1, &first_key, gen)) { free_extent_buffer(tmp); return -EUCLEAN; } *eb_ret = tmp; return 0; } /* now we're allowed to do a blocking uptodate check */ ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); if (!ret) { *eb_ret = tmp; return 0; } free_extent_buffer(tmp); btrfs_release_path(p); return -EIO; } /* * reduce lock contention at high levels * of the btree by dropping locks before * we read. Don't release the lock on the current * level because we need to walk this node to figure * out which blocks to read. */ btrfs_unlock_up_safe(p, level + 1); if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); ret = -EAGAIN; tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid, gen, parent_level - 1, &first_key); if (!IS_ERR(tmp)) { /* * If the read above didn't mark this buffer up to date, * it will never end up being up to date. Set ret to EIO now * and give up so that our caller doesn't loop forever * on our EAGAINs. */ if (!extent_buffer_uptodate(tmp)) ret = -EIO; free_extent_buffer(tmp); } else { ret = PTR_ERR(tmp); } btrfs_release_path(p); return ret; } /* * helper function for btrfs_search_slot. This does all of the checks * for node-level blocks and does any balancing required based on * the ins_len. * * If no extra work was required, zero is returned. If we had to * drop the path, -EAGAIN is returned and btrfs_search_slot must * start over */ static int setup_nodes_for_search(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *p, struct extent_buffer *b, int level, int ins_len, int *write_lock_level) { struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) { if (*write_lock_level < level + 1) { *write_lock_level = level + 1; btrfs_release_path(p); return -EAGAIN; } reada_for_balance(p, level); ret = split_node(trans, root, p, level); b = p->nodes[level]; } else if (ins_len < 0 && btrfs_header_nritems(b) < BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) { if (*write_lock_level < level + 1) { *write_lock_level = level + 1; btrfs_release_path(p); return -EAGAIN; } reada_for_balance(p, level); ret = balance_level(trans, root, p, level); if (ret) return ret; b = p->nodes[level]; if (!b) { btrfs_release_path(p); return -EAGAIN; } BUG_ON(btrfs_header_nritems(b) == 1); } return ret; } int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, u64 iobjectid, u64 ioff, u8 key_type, struct btrfs_key *found_key) { int ret; struct btrfs_key key; struct extent_buffer *eb; ASSERT(path); ASSERT(found_key); key.type = key_type; key.objectid = iobjectid; key.offset = ioff; ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); if (ret < 0) return ret; eb = path->nodes[0]; if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { ret = btrfs_next_leaf(fs_root, path); if (ret) return ret; eb = path->nodes[0]; } btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); if (found_key->type != key.type || found_key->objectid != key.objectid) return 1; return 0; } static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, struct btrfs_path *p, int write_lock_level) { struct extent_buffer *b; int root_lock = 0; int level = 0; if (p->search_commit_root) { b = root->commit_root; atomic_inc(&b->refs); level = btrfs_header_level(b); /* * Ensure that all callers have set skip_locking when * p->search_commit_root = 1. */ ASSERT(p->skip_locking == 1); goto out; } if (p->skip_locking) { b = btrfs_root_node(root); level = btrfs_header_level(b); goto out; } /* We try very hard to do read locks on the root */ root_lock = BTRFS_READ_LOCK; /* * If the level is set to maximum, we can skip trying to get the read * lock. */ if (write_lock_level < BTRFS_MAX_LEVEL) { /* * We don't know the level of the root node until we actually * have it read locked */ b = btrfs_read_lock_root_node(root); level = btrfs_header_level(b); if (level > write_lock_level) goto out; /* Whoops, must trade for write lock */ btrfs_tree_read_unlock(b); free_extent_buffer(b); } b = btrfs_lock_root_node(root); root_lock = BTRFS_WRITE_LOCK; /* The level might have changed, check again */ level = btrfs_header_level(b); out: /* * The root may have failed to write out at some point, and thus is no * longer valid, return an error in this case. */ if (!extent_buffer_uptodate(b)) { if (root_lock) btrfs_tree_unlock_rw(b, root_lock); free_extent_buffer(b); return ERR_PTR(-EIO); } p->nodes[level] = b; if (!p->skip_locking) p->locks[level] = root_lock; /* * Callers are responsible for dropping b's references. */ return b; } /* * Replace the extent buffer at the lowest level of the path with a cloned * version. The purpose is to be able to use it safely, after releasing the * commit root semaphore, even if relocation is happening in parallel, the * transaction used for relocation is committed and the extent buffer is * reallocated in the next transaction. * * This is used in a context where the caller does not prevent transaction * commits from happening, either by holding a transaction handle or holding * some lock, while it's doing searches through a commit root. * At the moment it's only used for send operations. */ static int finish_need_commit_sem_search(struct btrfs_path *path) { const int i = path->lowest_level; const int slot = path->slots[i]; struct extent_buffer *lowest = path->nodes[i]; struct extent_buffer *clone; ASSERT(path->need_commit_sem); if (!lowest) return 0; lockdep_assert_held_read(&lowest->fs_info->commit_root_sem); clone = btrfs_clone_extent_buffer(lowest); if (!clone) return -ENOMEM; btrfs_release_path(path); path->nodes[i] = clone; path->slots[i] = slot; return 0; } /* * btrfs_search_slot - look for a key in a tree and perform necessary * modifications to preserve tree invariants. * * @trans: Handle of transaction, used when modifying the tree * @p: Holds all btree nodes along the search path * @root: The root node of the tree * @key: The key we are looking for * @ins_len: Indicates purpose of search: * >0 for inserts it's size of item inserted (*) * <0 for deletions * 0 for plain searches, not modifying the tree * * (*) If size of item inserted doesn't include * sizeof(struct btrfs_item), then p->search_for_extension must * be set. * @cow: boolean should CoW operations be performed. Must always be 1 * when modifying the tree. * * If @ins_len > 0, nodes and leaves will be split as we walk down the tree. * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible) * * If @key is found, 0 is returned and you can find the item in the leaf level * of the path (level 0) * * If @key isn't found, 1 is returned and the leaf level of the path (level 0) * points to the slot where it should be inserted * * If an error is encountered while searching the tree a negative error number * is returned */ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { struct btrfs_fs_info *fs_info; struct extent_buffer *b; int slot; int ret; int err; int level; int lowest_unlock = 1; /* everything at write_lock_level or lower must be write locked */ int write_lock_level = 0; u8 lowest_level = 0; int min_write_lock_level; int prev_cmp; if (!root) return -EINVAL; fs_info = root->fs_info; might_sleep(); lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); WARN_ON(p->nodes[0] != NULL); BUG_ON(!cow && ins_len); if (ins_len < 0) { lowest_unlock = 2; /* when we are removing items, we might have to go up to level * two as we update tree pointers Make sure we keep write * for those levels as well */ write_lock_level = 2; } else if (ins_len > 0) { /* * for inserting items, make sure we have a write lock on * level 1 so we can update keys */ write_lock_level = 1; } if (!cow) write_lock_level = -1; if (cow && (p->keep_locks || p->lowest_level)) write_lock_level = BTRFS_MAX_LEVEL; min_write_lock_level = write_lock_level; if (p->need_commit_sem) { ASSERT(p->search_commit_root); down_read(&fs_info->commit_root_sem); } again: prev_cmp = -1; b = btrfs_search_slot_get_root(root, p, write_lock_level); if (IS_ERR(b)) { ret = PTR_ERR(b); goto done; } while (b) { int dec = 0; level = btrfs_header_level(b); if (cow) { bool last_level = (level == (BTRFS_MAX_LEVEL - 1)); /* * if we don't really need to cow this block * then we don't want to set the path blocking, * so we test it here */ if (!should_cow_block(trans, root, b)) goto cow_done; /* * must have write locks on this node and the * parent */ if (level > write_lock_level || (level + 1 > write_lock_level && level + 1 < BTRFS_MAX_LEVEL && p->nodes[level + 1])) { write_lock_level = level + 1; btrfs_release_path(p); goto again; } if (last_level) err = btrfs_cow_block(trans, root, b, NULL, 0, &b, BTRFS_NESTING_COW); else err = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], &b, BTRFS_NESTING_COW); if (err) { ret = err; goto done; } } cow_done: p->nodes[level] = b; /* * Leave path with blocking locks to avoid massive * lock context switch, this is made on purpose. */ /* * we have a lock on b and as long as we aren't changing * the tree, there is no way to for the items in b to change. * It is safe to drop the lock on our parent before we * go through the expensive btree search on b. * * If we're inserting or deleting (ins_len != 0), then we might * be changing slot zero, which may require changing the parent. * So, we can't drop the lock until after we know which slot * we're operating on. */ if (!ins_len && !p->keep_locks) { int u = level + 1; if (u < BTRFS_MAX_LEVEL && p->locks[u]) { btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]); p->locks[u] = 0; } } /* * If btrfs_bin_search returns an exact match (prev_cmp == 0) * we can safely assume the target key will always be in slot 0 * on lower levels due to the invariants BTRFS' btree provides, * namely that a btrfs_key_ptr entry always points to the * lowest key in the child node, thus we can skip searching * lower levels */ if (prev_cmp == 0) { slot = 0; ret = 0; } else { ret = btrfs_bin_search(b, key, &slot); prev_cmp = ret; if (ret < 0) goto done; } if (level == 0) { p->slots[level] = slot; /* * Item key already exists. In this case, if we are * allowed to insert the item (for example, in dir_item * case, item key collision is allowed), it will be * merged with the original item. Only the item size * grows, no new btrfs item will be added. If * search_for_extension is not set, ins_len already * accounts the size btrfs_item, deduct it here so leaf * space check will be correct. */ if (ret == 0 && ins_len > 0 && !p->search_for_extension) { ASSERT(ins_len >= sizeof(struct btrfs_item)); ins_len -= sizeof(struct btrfs_item); } if (ins_len > 0 && btrfs_leaf_free_space(b) < ins_len) { if (write_lock_level < 1) { write_lock_level = 1; btrfs_release_path(p); goto again; } err = split_leaf(trans, root, key, p, ins_len, ret == 0); BUG_ON(err > 0); if (err) { ret = err; goto done; } } if (!p->search_for_split) unlock_up(p, level, lowest_unlock, min_write_lock_level, NULL); goto done; } if (ret && slot > 0) { dec = 1; slot--; } p->slots[level] = slot; err = setup_nodes_for_search(trans, root, p, b, level, ins_len, &write_lock_level); if (err == -EAGAIN) goto again; if (err) { ret = err; goto done; } b = p->nodes[level]; slot = p->slots[level]; /* * Slot 0 is special, if we change the key we have to update * the parent pointer which means we must have a write lock on * the parent */ if (slot == 0 && ins_len && write_lock_level < level + 1) { write_lock_level = level + 1; btrfs_release_path(p); goto again; } unlock_up(p, level, lowest_unlock, min_write_lock_level, &write_lock_level); if (level == lowest_level) { if (dec) p->slots[level]++; goto done; } err = read_block_for_search(root, p, &b, level, slot, key); if (err == -EAGAIN) goto again; if (err) { ret = err; goto done; } if (!p->skip_locking) { level = btrfs_header_level(b); btrfs_maybe_reset_lockdep_class(root, b); if (level <= write_lock_level) { btrfs_tree_lock(b); p->locks[level] = BTRFS_WRITE_LOCK; } else { btrfs_tree_read_lock(b); p->locks[level] = BTRFS_READ_LOCK; } p->nodes[level] = b; } } ret = 1; done: if (ret < 0 && !p->skip_release_on_error) btrfs_release_path(p); if (p->need_commit_sem) { int ret2; ret2 = finish_need_commit_sem_search(p); up_read(&fs_info->commit_root_sem); if (ret2) ret = ret2; } return ret; } ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO); /* * Like btrfs_search_slot, this looks for a key in the given tree. It uses the * current state of the tree together with the operations recorded in the tree * modification log to search for the key in a previous version of this tree, as * denoted by the time_seq parameter. * * Naturally, there is no support for insert, delete or cow operations. * * The resulting path and return value will be set up as if we called * btrfs_search_slot at that point in time with ins_len and cow both set to 0. */ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, u64 time_seq) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; int slot; int ret; int err; int level; int lowest_unlock = 1; u8 lowest_level = 0; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); if (p->search_commit_root) { BUG_ON(time_seq); return btrfs_search_slot(NULL, root, key, p, 0, 0); } again: b = btrfs_get_old_root(root, time_seq); if (!b) { ret = -EIO; goto done; } level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; while (b) { int dec = 0; level = btrfs_header_level(b); p->nodes[level] = b; /* * we have a lock on b and as long as we aren't changing * the tree, there is no way to for the items in b to change. * It is safe to drop the lock on our parent before we * go through the expensive btree search on b. */ btrfs_unlock_up_safe(p, level + 1); ret = btrfs_bin_search(b, key, &slot); if (ret < 0) goto done; if (level == 0) { p->slots[level] = slot; unlock_up(p, level, lowest_unlock, 0, NULL); goto done; } if (ret && slot > 0) { dec = 1; slot--; } p->slots[level] = slot; unlock_up(p, level, lowest_unlock, 0, NULL); if (level == lowest_level) { if (dec) p->slots[level]++; goto done; } err = read_block_for_search(root, p, &b, level, slot, key); if (err == -EAGAIN) goto again; if (err) { ret = err; goto done; } level = btrfs_header_level(b); btrfs_tree_read_lock(b); b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq); if (!b) { ret = -ENOMEM; goto done; } p->locks[level] = BTRFS_READ_LOCK; p->nodes[level] = b; } ret = 1; done: if (ret < 0) btrfs_release_path(p); return ret; } /* * helper to use instead of search slot if no exact match is needed but * instead the next or previous item should be returned. * When find_higher is true, the next higher item is returned, the next lower * otherwise. * When return_any and find_higher are both true, and no higher item is found, * return the next lower instead. * When return_any is true and find_higher is false, and no lower item is found, * return the next higher instead. * It returns 0 if any item is found, 1 if none is found (tree empty), and * < 0 on error */ int btrfs_search_slot_for_read(struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, int find_higher, int return_any) { int ret; struct extent_buffer *leaf; again: ret = btrfs_search_slot(NULL, root, key, p, 0, 0); if (ret <= 0) return ret; /* * a return value of 1 means the path is at the position where the * item should be inserted. Normally this is the next bigger item, * but in case the previous item is the last in a leaf, path points * to the first free slot in the previous leaf, i.e. at an invalid * item. */ leaf = p->nodes[0]; if (find_higher) { if (p->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, p); if (ret <= 0) return ret; if (!return_any) return 1; /* * no higher item found, return the next * lower instead */ return_any = 0; find_higher = 0; btrfs_release_path(p); goto again; } } else { if (p->slots[0] == 0) { ret = btrfs_prev_leaf(root, p); if (ret < 0) return ret; if (!ret) { leaf = p->nodes[0]; if (p->slots[0] == btrfs_header_nritems(leaf)) p->slots[0]--; return 0; } if (!return_any) return 1; /* * no lower item found, return the next * higher instead */ return_any = 0; find_higher = 1; btrfs_release_path(p); goto again; } else { --p->slots[0]; } } return 0; } /* * Execute search and call btrfs_previous_item to traverse backwards if the item * was not found. * * Return 0 if found, 1 if not found and < 0 if error. */ int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *path) { int ret; ret = btrfs_search_slot(NULL, root, key, path, 0, 0); if (ret > 0) ret = btrfs_previous_item(root, path, key->objectid, key->type); if (ret == 0) btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]); return ret; } /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. * This is used after shifting pointers to the left, so it stops * fixing up pointers when a given leaf/node is not in slot 0 of the * higher levels * */ static void fixup_low_keys(struct btrfs_path *path, struct btrfs_disk_key *key, int level) { int i; struct extent_buffer *t; int ret; for (i = level; i < BTRFS_MAX_LEVEL; i++) { int tslot = path->slots[i]; if (!path->nodes[i]) break; t = path->nodes[i]; ret = btrfs_tree_mod_log_insert_key(t, tslot, BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC); BUG_ON(ret < 0); btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } } /* * update item key. * * This function isn't completely safe. It's the caller's responsibility * that the new key won't break the order */ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, struct btrfs_path *path, const struct btrfs_key *new_key) { struct btrfs_disk_key disk_key; struct extent_buffer *eb; int slot; eb = path->nodes[0]; slot = path->slots[0]; if (slot > 0) { btrfs_item_key(eb, &disk_key, slot - 1); if (unlikely(comp_keys(&disk_key, new_key) >= 0)) { btrfs_crit(fs_info, "slot %u key (%llu %u %llu) new key (%llu %u %llu)", slot, btrfs_disk_key_objectid(&disk_key), btrfs_disk_key_type(&disk_key), btrfs_disk_key_offset(&disk_key), new_key->objectid, new_key->type, new_key->offset); btrfs_print_leaf(eb); BUG(); } } if (slot < btrfs_header_nritems(eb) - 1) { btrfs_item_key(eb, &disk_key, slot + 1); if (unlikely(comp_keys(&disk_key, new_key) <= 0)) { btrfs_crit(fs_info, "slot %u key (%llu %u %llu) new key (%llu %u %llu)", slot, btrfs_disk_key_objectid(&disk_key), btrfs_disk_key_type(&disk_key), btrfs_disk_key_offset(&disk_key), new_key->objectid, new_key->type, new_key->offset); btrfs_print_leaf(eb); BUG(); } } btrfs_cpu_key_to_disk(&disk_key, new_key); btrfs_set_item_key(eb, &disk_key, slot); btrfs_mark_buffer_dirty(eb); if (slot == 0) fixup_low_keys(path, &disk_key, 1); } /* * Check key order of two sibling extent buffers. * * Return true if something is wrong. * Return false if everything is fine. * * Tree-checker only works inside one tree block, thus the following * corruption can not be detected by tree-checker: * * Leaf @left | Leaf @right * -------------------------------------------------------------- * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 | * * Key f6 in leaf @left itself is valid, but not valid when the next * key in leaf @right is 7. * This can only be checked at tree block merge time. * And since tree checker has ensured all key order in each tree block * is correct, we only need to bother the last key of @left and the first * key of @right. */ static bool check_sibling_keys(struct extent_buffer *left, struct extent_buffer *right) { struct btrfs_key left_last; struct btrfs_key right_first; int level = btrfs_header_level(left); int nr_left = btrfs_header_nritems(left); int nr_right = btrfs_header_nritems(right); /* No key to check in one of the tree blocks */ if (!nr_left || !nr_right) return false; if (level) { btrfs_node_key_to_cpu(left, &left_last, nr_left - 1); btrfs_node_key_to_cpu(right, &right_first, 0); } else { btrfs_item_key_to_cpu(left, &left_last, nr_left - 1); btrfs_item_key_to_cpu(right, &right_first, 0); } if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) { btrfs_crit(left->fs_info, "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", left_last.objectid, left_last.type, left_last.offset, right_first.objectid, right_first.type, right_first.offset); return true; } return false; } /* * try to push data from one node into the next node left in the * tree. * * returns 0 if some ptrs were pushed left, < 0 if there was some horrible * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct extent_buffer *dst, struct extent_buffer *src, int empty) { struct btrfs_fs_info *fs_info = trans->fs_info; int push_items = 0; int src_nritems; int dst_nritems; int ret = 0; src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; WARN_ON(btrfs_header_generation(src) != trans->transid); WARN_ON(btrfs_header_generation(dst) != trans->transid); if (!empty && src_nritems <= 8) return 1; if (push_items <= 0) return 1; if (empty) { push_items = min(src_nritems, push_items); if (push_items < src_nritems) { /* leave at least 8 pointers in the node if * we aren't going to empty it */ if (src_nritems - push_items < 8) { if (push_items <= 8) return 1; push_items -= 8; } } } else push_items = min(src_nritems - 8, push_items); /* dst is the left eb, src is the middle eb */ if (check_sibling_keys(dst, src)) { ret = -EUCLEAN; btrfs_abort_transaction(trans, ret); return ret; } ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); if (ret) { btrfs_abort_transaction(trans, ret); return ret; } copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { /* * Don't call btrfs_tree_mod_log_insert_move() here, key removal * was already fully logged by btrfs_tree_mod_log_eb_copy() above. */ memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(push_items), (src_nritems - push_items) * sizeof(struct btrfs_key_ptr)); } btrfs_set_header_nritems(src, src_nritems - push_items); btrfs_set_header_nritems(dst, dst_nritems + push_items); btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); return ret; } /* * try to push data from one node into the next node right in the * tree. * * returns 0 if some ptrs were pushed, < 0 if there was some horrible * error, and > 0 if there was no room in the right hand block. * * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, struct extent_buffer *dst, struct extent_buffer *src) { struct btrfs_fs_info *fs_info = trans->fs_info; int push_items = 0; int max_push; int src_nritems; int dst_nritems; int ret = 0; WARN_ON(btrfs_header_generation(src) != trans->transid); WARN_ON(btrfs_header_generation(dst) != trans->transid); src_nritems = btrfs_header_nritems(src); dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; if (push_items <= 0) return 1; if (src_nritems < 4) return 1; max_push = src_nritems / 2 + 1; /* don't try to empty the node */ if (max_push >= src_nritems) return 1; if (max_push < push_items) push_items = max_push; /* dst is the right eb, src is the middle eb */ if (check_sibling_keys(src, dst)) { ret = -EUCLEAN; btrfs_abort_transaction(trans, ret); return ret; } ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems); BUG_ON(ret < 0); memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), btrfs_node_key_ptr_offset(0), (dst_nritems) * sizeof(struct btrfs_key_ptr)); ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, push_items); if (ret) { btrfs_abort_transaction(trans, ret); return ret; } copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(src, src_nritems - push_items); btrfs_set_header_nritems(dst, dst_nritems + push_items); btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); return ret; } /* * helper function to insert a new root level in the tree. * A new node is allocated, and a single item is inserted to * point to the existing root * * returns zero on success or < 0 on failure. */ static noinline int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { struct btrfs_fs_info *fs_info = root->fs_info; u64 lower_gen; struct extent_buffer *lower; struct extent_buffer *c; struct extent_buffer *old; struct btrfs_disk_key lower_key; int ret; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); lower = path->nodes[level-1]; if (level == 1) btrfs_item_key(lower, &lower_key, 0); else btrfs_node_key(lower, &lower_key, 0); c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, &lower_key, level, root->node->start, 0, BTRFS_NESTING_NEW_ROOT); if (IS_ERR(c)) return PTR_ERR(c); root_add_used(root, fs_info->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); lower_gen = btrfs_header_generation(lower); WARN_ON(lower_gen != trans->transid); btrfs_set_node_ptr_generation(c, 0, lower_gen); btrfs_mark_buffer_dirty(c); old = root->node; ret = btrfs_tree_mod_log_insert_root(root->node, c, false); BUG_ON(ret < 0); rcu_assign_pointer(root->node, c); /* the super has an extra ref to root->node */ free_extent_buffer(old); add_root_to_dirty_list(root); atomic_inc(&c->refs); path->nodes[level] = c; path->locks[level] = BTRFS_WRITE_LOCK; path->slots[level] = 0; return 0; } /* * worker function to insert a single pointer in a node. * the node should have enough room for the pointer already * * slot and level indicate where you want the key to go, and * blocknr is the block the key points to. */ static void insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, int slot, int level) { struct extent_buffer *lower; int nritems; int ret; BUG_ON(!path->nodes[level]); btrfs_assert_tree_locked(path->nodes[level]); lower = path->nodes[level]; nritems = btrfs_header_nritems(lower); BUG_ON(slot > nritems); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info)); if (slot != nritems) { if (level) { ret = btrfs_tree_mod_log_insert_move(lower, slot + 1, slot, nritems - slot); BUG_ON(ret < 0); } memmove_extent_buffer(lower, btrfs_node_key_ptr_offset(slot + 1), btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } if (level) { ret = btrfs_tree_mod_log_insert_key(lower, slot, BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS); BUG_ON(ret < 0); } btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, bytenr); WARN_ON(trans->transid == 0); btrfs_set_node_ptr_generation(lower, slot, trans->transid); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); } /* * split the node at the specified level in path in two. * The path is corrected to point to the appropriate node after the split * * Before splitting this tries to make some room in the node by pushing * left and right, if either one works, it returns right away. * * returns 0 on success and < 0 on failure */ static noinline int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *c; struct extent_buffer *split; struct btrfs_disk_key disk_key; int mid; int ret; u32 c_nritems; c = path->nodes[level]; WARN_ON(btrfs_header_generation(c) != trans->transid); if (c == root->node) { /* * trying to split the root, lets make a new one * * tree mod log: We don't log_removal old root in * insert_new_root, because that root buffer will be kept as a * normal node. We are going to log removal of half of the * elements below with btrfs_tree_mod_log_eb_copy(). We're * holding a tree lock on the buffer, which is why we cannot * race with other tree_mod_log users. */ ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } else { ret = push_nodes_for_insert(trans, root, path, level); c = path->nodes[level]; if (!ret && btrfs_header_nritems(c) < BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) return 0; if (ret < 0) return ret; } c_nritems = btrfs_header_nritems(c); mid = (c_nritems + 1) / 2; btrfs_node_key(c, &disk_key, mid); split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, &disk_key, level, c->start, 0, BTRFS_NESTING_SPLIT); if (IS_ERR(split)) return PTR_ERR(split); root_add_used(root, fs_info->nodesize); ASSERT(btrfs_header_level(c) == level); ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); if (ret) { btrfs_tree_unlock(split); free_extent_buffer(split); btrfs_abort_transaction(trans, ret); return ret; } copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(mid), (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(split, c_nritems - mid); btrfs_set_header_nritems(c, mid); btrfs_mark_buffer_dirty(c); btrfs_mark_buffer_dirty(split); insert_ptr(trans, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { path->slots[level] -= mid; btrfs_tree_unlock(c); free_extent_buffer(c); path->nodes[level] = split; path->slots[level + 1] += 1; } else { btrfs_tree_unlock(split); free_extent_buffer(split); } return 0; } /* * how many bytes are required to store the items in a leaf. start * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ static int leaf_space_used(struct extent_buffer *l, int start, int nr) { struct btrfs_item *start_item; struct btrfs_item *end_item; int data_len; int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; start_item = btrfs_item_nr(start); end_item = btrfs_item_nr(end); data_len = btrfs_item_offset(l, start_item) + btrfs_item_size(l, start_item); data_len = data_len - btrfs_item_offset(l, end_item); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; } /* * The space between the end of the leaf items and * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ noinline int btrfs_leaf_free_space(struct extent_buffer *leaf) { struct btrfs_fs_info *fs_info = leaf->fs_info; int nritems = btrfs_header_nritems(leaf); int ret; ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems); if (ret < 0) { btrfs_crit(fs_info, "leaf free space ret %d, leaf data size %lu, used %d nritems %d", ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info), leaf_space_used(leaf, 0, nritems), nritems); } return ret; } /* * min slot controls the lowest index we're willing to push to the * right. We'll push up to and including min_slot, but no lower */ static noinline int __push_leaf_right(struct btrfs_path *path, int data_size, int empty, struct extent_buffer *right, int free_space, u32 left_nritems, u32 min_slot) { struct btrfs_fs_info *fs_info = right->fs_info; struct extent_buffer *left = path->nodes[0]; struct extent_buffer *upper = path->nodes[1]; struct btrfs_map_token token; struct btrfs_disk_key disk_key; int slot; u32 i; int push_space = 0; int push_items = 0; struct btrfs_item *item; u32 nr; u32 right_nritems; u32 data_end; u32 this_item_size; if (empty) nr = 0; else nr = max_t(u32, 1, min_slot); if (path->slots[0] >= left_nritems) push_space += data_size; slot = path->slots[1]; i = left_nritems - 1; while (i >= nr) { item = btrfs_item_nr(i); if (!empty && push_items > 0) { if (path->slots[0] > i) break; if (path->slots[0] == i) { int space = btrfs_leaf_free_space(left); if (space + push_space * 2 > free_space) break; } } if (path->slots[0] == i) push_space += data_size; this_item_size = btrfs_item_size(left, item); if (this_item_size + sizeof(*item) + push_space > free_space) break; push_items++; push_space += this_item_size + sizeof(*item); if (i == 0) break; i--; } if (push_items == 0) goto out_unlock; WARN_ON(!empty && push_items == left_nritems); /* push left to right */ right_nritems = btrfs_header_nritems(right); push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(left); /* make room in the right data area */ data_end = leaf_data_end(right); memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + data_end - push_space, BTRFS_LEAF_DATA_OFFSET + data_end, BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); /* copy from the left data area */ copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left), push_space); memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), btrfs_item_nr_offset(0), right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ copy_extent_buffer(right, left, btrfs_item_nr_offset(0), btrfs_item_nr_offset(left_nritems - push_items), push_items * sizeof(struct btrfs_item)); /* update the item pointers */ btrfs_init_map_token(&token, right); right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(fs_info); for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(i); push_space -= btrfs_token_item_size(&token, item); btrfs_set_token_item_offset(&token, item, push_space); } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); if (left_nritems) btrfs_mark_buffer_dirty(left); else btrfs_clean_tree_block(left); btrfs_mark_buffer_dirty(right); btrfs_item_key(right, &disk_key, 0); btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; if (btrfs_header_nritems(path->nodes[0]) == 0) btrfs_clean_tree_block(path->nodes[0]); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[1] += 1; } else { btrfs_tree_unlock(right); free_extent_buffer(right); } return 0; out_unlock: btrfs_tree_unlock(right); free_extent_buffer(right); return 1; } /* * push some data in the path leaf to the right, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise * * returns 1 if the push failed because the other node didn't have enough * room, 0 if everything worked out and < 0 if there were major errors. * * this will push starting from min_slot to the end of the leaf. It won't * push any slot lower than min_slot */ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int min_data_size, int data_size, int empty, u32 min_slot) { struct extent_buffer *left = path->nodes[0]; struct extent_buffer *right; struct extent_buffer *upper; int slot; int free_space; u32 left_nritems; int ret; if (!path->nodes[1]) return 1; slot = path->slots[1]; upper = path->nodes[1]; if (slot >= btrfs_header_nritems(upper) - 1) return 1; btrfs_assert_tree_locked(path->nodes[1]); right = btrfs_read_node_slot(upper, slot + 1); /* * slot + 1 is not valid or we fail to read the right node, * no big deal, just return. */ if (IS_ERR(right)) return 1; __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); free_space = btrfs_leaf_free_space(right); if (free_space < data_size) goto out_unlock; /* cow and double check */ ret = btrfs_cow_block(trans, root, right, upper, slot + 1, &right, BTRFS_NESTING_RIGHT_COW); if (ret) goto out_unlock; free_space = btrfs_leaf_free_space(right); if (free_space < data_size) goto out_unlock; left_nritems = btrfs_header_nritems(left); if (left_nritems == 0) goto out_unlock; if (check_sibling_keys(left, right)) { ret = -EUCLEAN; btrfs_abort_transaction(trans, ret); btrfs_tree_unlock(right); free_extent_buffer(right); return ret; } if (path->slots[0] == left_nritems && !empty) { /* Key greater than all keys in the leaf, right neighbor has * enough room for it and we're not emptying our leaf to delete * it, therefore use right neighbor to insert the new item and * no need to touch/dirty our left leaf. */ btrfs_tree_unlock(left); free_extent_buffer(left); path->nodes[0] = right; path->slots[0] = 0; path->slots[1]++; return 0; } return __push_leaf_right(path, min_data_size, empty, right, free_space, left_nritems, min_slot); out_unlock: btrfs_tree_unlock(right); free_extent_buffer(right); return 1; } /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise * * max_slot can put a limit on how far into the leaf we'll push items. The * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the * items */ static noinline int __push_leaf_left(struct btrfs_path *path, int data_size, int empty, struct extent_buffer *left, int free_space, u32 right_nritems, u32 max_slot) { struct btrfs_fs_info *fs_info = left->fs_info; struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; int i; int push_space = 0; int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; u32 nr; int ret = 0; u32 this_item_size; u32 old_left_item_size; struct btrfs_map_token token; if (empty) nr = min(right_nritems, max_slot); else nr = min(right_nritems - 1, max_slot); for (i = 0; i < nr; i++) { item = btrfs_item_nr(i); if (!empty && push_items > 0) { if (path->slots[0] < i) break; if (path->slots[0] == i) { int space = btrfs_leaf_free_space(right); if (space + push_space * 2 > free_space) break; } } if (path->slots[0] == i) push_space += data_size; this_item_size = btrfs_item_size(right, item); if (this_item_size + sizeof(*item) + push_space > free_space) break; push_items++; push_space += this_item_size + sizeof(*item); } if (push_items == 0) { ret = 1; goto out; } WARN_ON(!empty && push_items == btrfs_header_nritems(right)); /* push data from right to left */ copy_extent_buffer(left, right, btrfs_item_nr_offset(btrfs_header_nritems(left)), btrfs_item_nr_offset(0), push_items * sizeof(struct btrfs_item)); push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_offset_nr(right, push_items - 1); copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left) - push_space, BTRFS_LEAF_DATA_OFFSET + btrfs_item_offset_nr(right, push_items - 1), push_space); old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems <= 0); btrfs_init_map_token(&token, left); old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ if (push_items > right_nritems) WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, right_nritems); if (push_items < right_nritems) { push_space = btrfs_item_offset_nr(right, push_items - 1) - leaf_data_end(right); memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, BTRFS_LEAF_DATA_OFFSET + leaf_data_end(right), push_space); memmove_extent_buffer(right, btrfs_item_nr_offset(0), btrfs_item_nr_offset(push_items), (btrfs_header_nritems(right) - push_items) * sizeof(struct btrfs_item)); } btrfs_init_map_token(&token, right); right_nritems -= push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(fs_info); for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(i); push_space = push_space - btrfs_token_item_size(&token, item); btrfs_set_token_item_offset(&token, item, push_space); } btrfs_mark_buffer_dirty(left); if (right_nritems) btrfs_mark_buffer_dirty(right); else btrfs_clean_tree_block(right); btrfs_item_key(right, &disk_key, 0); fixup_low_keys(path, &disk_key, 1); /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = left; path->slots[1] -= 1; } else { btrfs_tree_unlock(left); free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); return ret; out: btrfs_tree_unlock(left); free_extent_buffer(left); return ret; } /* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise * * max_slot can put a limit on how far into the leaf we'll push items. The * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the * items */ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int min_data_size, int data_size, int empty, u32 max_slot) { struct extent_buffer *right = path->nodes[0]; struct extent_buffer *left; int slot; int free_space; u32 right_nritems; int ret = 0; slot = path->slots[1]; if (slot == 0) return 1; if (!path->nodes[1]) return 1; right_nritems = btrfs_header_nritems(right); if (right_nritems == 0) return 1; btrfs_assert_tree_locked(path->nodes[1]); left = btrfs_read_node_slot(path->nodes[1], slot - 1); /* * slot - 1 is not valid or we fail to read the left node, * no big deal, just return. */ if (IS_ERR(left)) return 1; __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); free_space = btrfs_leaf_free_space(left); if (free_space < data_size) { ret = 1; goto out; } /* cow and double check */ ret = btrfs_cow_block(trans, root, left, path->nodes[1], slot - 1, &left, BTRFS_NESTING_LEFT_COW); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ if (ret == -ENOSPC) ret = 1; goto out; } free_space = btrfs_leaf_free_space(left); if (free_space < data_size) { ret = 1; goto out; } if (check_sibling_keys(left, right)) { ret = -EUCLEAN; btrfs_abort_transaction(trans, ret); goto out; } return __push_leaf_left(path, min_data_size, empty, left, free_space, right_nritems, max_slot); out: btrfs_tree_unlock(left); free_extent_buffer(left); return ret; } /* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. */ static noinline void copy_for_split(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct extent_buffer *l, struct extent_buffer *right, int slot, int mid, int nritems) { struct btrfs_fs_info *fs_info = trans->fs_info; int data_copy_size; int rt_data_off; int i; struct btrfs_disk_key disk_key; struct btrfs_map_token token; nritems = nritems - mid; btrfs_set_header_nritems(right, nritems); data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l); copy_extent_buffer(right, l, btrfs_item_nr_offset(0), btrfs_item_nr_offset(mid), nritems * sizeof(struct btrfs_item)); copy_extent_buffer(right, l, BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - data_copy_size, BTRFS_LEAF_DATA_OFFSET + leaf_data_end(l), data_copy_size); rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); btrfs_init_map_token(&token, right); for (i = 0; i < nritems; i++) { struct btrfs_item *item = btrfs_item_nr(i); u32 ioff; ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, ioff + rt_data_off); } btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; } else { btrfs_tree_unlock(right); free_extent_buffer(right); } BUG_ON(path->slots[0] < 0); } /* * double splits happen when we need to insert a big item in the middle * of a leaf. A double split can leave us with 3 mostly empty leaves: * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] * A B C * * We avoid this by trying to push the items on either side of our target * into the adjacent leaves. If all goes well we can avoid the double split * completely. */ static noinline int push_for_double_split(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { int ret; int progress = 0; int slot; u32 nritems; int space_needed = data_size; slot = path->slots[0]; if (slot < btrfs_header_nritems(path->nodes[0])) space_needed -= btrfs_leaf_free_space(path->nodes[0]); /* * try to push all the items after our slot into the * right leaf */ ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot); if (ret < 0) return ret; if (ret == 0) progress++; nritems = btrfs_header_nritems(path->nodes[0]); /* * our goal is to get our slot at the start or end of a leaf. If * we've done so we're done */ if (path->slots[0] == 0 || path->slots[0] == nritems) return 0; if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) return 0; /* try to push all the items before our slot into the next leaf */ slot = path->slots[0]; space_needed = data_size; if (slot > 0) space_needed -= btrfs_leaf_free_space(path->nodes[0]); ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot); if (ret < 0) return ret; if (ret == 0) progress++; if (progress) return 0; return 1; } /* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. * * returns 0 if all went well and < 0 on failure. */ static noinline int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *ins_key, struct btrfs_path *path, int data_size, int extend) { struct btrfs_disk_key disk_key; struct extent_buffer *l; u32 nritems; int mid; int slot; struct extent_buffer *right; struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int wret; int split; int num_doubles = 0; int tried_avoid_double = 0; l = path->nodes[0]; slot = path->slots[0]; if (extend && data_size + btrfs_item_size_nr(l, slot) + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info)) return -EOVERFLOW; /* first try to make some room by pushing left and right */ if (data_size && path->nodes[1]) { int space_needed = data_size; if (slot < btrfs_header_nritems(l)) space_needed -= btrfs_leaf_free_space(l); wret = push_leaf_right(trans, root, path, space_needed, space_needed, 0, 0); if (wret < 0) return wret; if (wret) { space_needed = data_size; if (slot > 0) space_needed -= btrfs_leaf_free_space(l); wret = push_leaf_left(trans, root, path, space_needed, space_needed, 0, (u32)-1); if (wret < 0) return wret; } l = path->nodes[0]; /* did the pushes work? */ if (btrfs_leaf_free_space(l) >= data_size) return 0; } if (!path->nodes[1]) { ret = insert_new_root(trans, root, path, 1); if (ret) return ret; } again: split = 1; l = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(l); mid = (nritems + 1) / 2; if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { if (slot >= nritems) { split = 0; } else { mid = slot; if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { if (data_size && !tried_avoid_double) goto push_for_double; split = 2; } } } } else { if (leaf_space_used(l, 0, mid) + data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { if (!extend && data_size && slot == 0) { split = 0; } else if ((extend || !data_size) && slot == 0) { mid = 1; } else { mid = slot; if (mid != nritems && leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { if (data_size && !tried_avoid_double) goto push_for_double; split = 2; } } } } if (split == 0) btrfs_cpu_key_to_disk(&disk_key, ins_key); else btrfs_item_key(l, &disk_key, mid); /* * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES * subclasses, which is 8 at the time of this patch, and we've maxed it * out. In the future we could add a * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just * use BTRFS_NESTING_NEW_ROOT. */ right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, &disk_key, 0, l->start, 0, num_doubles ? BTRFS_NESTING_NEW_ROOT : BTRFS_NESTING_SPLIT); if (IS_ERR(right)) return PTR_ERR(right); root_add_used(root, fs_info->nodesize); if (split == 0) { if (mid <= slot) { btrfs_set_header_nritems(right, 0); insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; } else { btrfs_set_header_nritems(right, 0); insert_ptr(trans, path, &disk_key, right->start, path->slots[1], 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) fixup_low_keys(path, &disk_key, 1); } /* * We create a new leaf 'right' for the required ins_len and * we'll do btrfs_mark_buffer_dirty() on this leaf after copying * the content of ins_len to 'right'. */ return ret; } copy_for_split(trans, path, l, right, slot, mid, nritems); if (split == 2) { BUG_ON(num_doubles != 0); num_doubles++; goto again; } return 0; push_for_double: push_for_double_split(trans, root, path, data_size); tried_avoid_double = 1; if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) return 0; goto again; } static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int ins_len) { struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_len = 0; u32 item_size; int ret; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && key.type != BTRFS_EXTENT_CSUM_KEY); if (btrfs_leaf_free_space(leaf) >= ins_len) return 0; item_size = btrfs_item_size_nr(leaf, path->slots[0]); if (key.type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); extent_len = btrfs_file_extent_num_bytes(leaf, fi); } btrfs_release_path(path); path->keep_locks = 1; path->search_for_split = 1; ret = btrfs_search_slot(trans, root, &key, path, 0, 1); path->search_for_split = 0; if (ret > 0) ret = -EAGAIN; if (ret < 0) goto err; ret = -EAGAIN; leaf = path->nodes[0]; /* if our item isn't there, return now */ if (item_size != btrfs_item_size_nr(leaf, path->slots[0])) goto err; /* the leaf has changed, it now has room. return now */ if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len) goto err; if (key.type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) goto err; } ret = split_leaf(trans, root, &key, path, ins_len, 1); if (ret) goto err; path->keep_locks = 0; btrfs_unlock_up_safe(path, 1); return 0; err: path->keep_locks = 0; return ret; } static noinline int split_item(struct btrfs_path *path, const struct btrfs_key *new_key, unsigned long split_offset) { struct extent_buffer *leaf; struct btrfs_item *item; struct btrfs_item *new_item; int slot; char *buf; u32 nritems; u32 item_size; u32 orig_offset; struct btrfs_disk_key disk_key; leaf = path->nodes[0]; BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)); item = btrfs_item_nr(path->slots[0]); orig_offset = btrfs_item_offset(leaf, item); item_size = btrfs_item_size(leaf, item); buf = kmalloc(item_size, GFP_NOFS); if (!buf) return -ENOMEM; read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, path->slots[0]), item_size); slot = path->slots[0] + 1; nritems = btrfs_header_nritems(leaf); if (slot != nritems) { /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); } btrfs_cpu_key_to_disk(&disk_key, new_key); btrfs_set_item_key(leaf, &disk_key, slot); new_item = btrfs_item_nr(slot); btrfs_set_item_offset(leaf, new_item, orig_offset); btrfs_set_item_size(leaf, new_item, item_size - split_offset); btrfs_set_item_offset(leaf, item, orig_offset + item_size - split_offset); btrfs_set_item_size(leaf, item, split_offset); btrfs_set_header_nritems(leaf, nritems + 1); /* write the data for the start of the original item */ write_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, path->slots[0]), split_offset); /* write the data for the new item */ write_extent_buffer(leaf, buf + split_offset, btrfs_item_ptr_offset(leaf, slot), item_size - split_offset); btrfs_mark_buffer_dirty(leaf); BUG_ON(btrfs_leaf_free_space(leaf) < 0); kfree(buf); return 0; } /* * This function splits a single item into two items, * giving 'new_key' to the new item and splitting the * old one at split_offset (from the start of the item). * * The path may be released by this operation. After * the split, the path is pointing to the old item. The * new item is going to be in the same node as the old one. * * Note, the item being split must be smaller enough to live alone on * a tree block with room for one extra struct btrfs_item * * This allows us to split the item in place, keeping a lock on the * leaf the entire time. */ int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_key *new_key, unsigned long split_offset) { int ret; ret = setup_leaf_for_split(trans, root, path, sizeof(struct btrfs_item)); if (ret) return ret; ret = split_item(path, new_key, split_offset); return ret; } /* * This function duplicate a item, giving 'new_key' to the new item. * It guarantees both items live in the same tree leaf and the new item * is contiguous with the original item. * * This allows us to split file extent in place, keeping a lock on the * leaf the entire time. */ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_key *new_key) { struct extent_buffer *leaf; int ret; u32 item_size; leaf = path->nodes[0]; item_size = btrfs_item_size_nr(leaf, path->slots[0]); ret = setup_leaf_for_split(trans, root, path, item_size + sizeof(struct btrfs_item)); if (ret) return ret; path->slots[0]++; setup_items_for_insert(root, path, new_key, &item_size, 1); leaf = path->nodes[0]; memcpy_extent_buffer(leaf, btrfs_item_ptr_offset(leaf, path->slots[0]), btrfs_item_ptr_offset(leaf, path->slots[0] - 1), item_size); return 0; } /* * make the item pointed to by the path smaller. new_size indicates * how small to make it, and from_end tells us if we just chop bytes * off the end of the item or if we shift the item to chop bytes off * the front. */ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) { int slot; struct extent_buffer *leaf; struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data_start; unsigned int old_size; unsigned int size_diff; int i; struct btrfs_map_token token; leaf = path->nodes[0]; slot = path->slots[0]; old_size = btrfs_item_size_nr(leaf, slot); if (old_size == new_size) return; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(leaf); old_data_start = btrfs_item_offset_nr(leaf, slot); size_diff = old_size - new_size; BUG_ON(slot < 0); BUG_ON(slot >= nritems); /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ btrfs_init_map_token(&token, leaf); for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, ioff + size_diff); } /* shift the data */ if (from_end) { memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + data_end, old_data_start + new_size - data_end); } else { struct btrfs_disk_key disk_key; u64 offset; btrfs_item_key(leaf, &disk_key, slot); if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { unsigned long ptr; struct btrfs_file_extent_item *fi; fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); fi = (struct btrfs_file_extent_item *)( (unsigned long)fi - size_diff); if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { ptr = btrfs_item_ptr_offset(leaf, slot); memmove_extent_buffer(leaf, ptr, (unsigned long)fi, BTRFS_FILE_EXTENT_INLINE_DATA_START); } } memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + data_end, old_data_start - data_end); offset = btrfs_disk_key_offset(&disk_key); btrfs_set_disk_key_offset(&disk_key, offset + size_diff); btrfs_set_item_key(leaf, &disk_key, slot); if (slot == 0) fixup_low_keys(path, &disk_key, 1); } item = btrfs_item_nr(slot); btrfs_set_item_size(leaf, item, new_size); btrfs_mark_buffer_dirty(leaf); if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); BUG(); } } /* * make the item pointed to by the path bigger, data_size is the added size. */ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) { int slot; struct extent_buffer *leaf; struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data; unsigned int old_size; int i; struct btrfs_map_token token; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(leaf); if (btrfs_leaf_free_space(leaf) < data_size) { btrfs_print_leaf(leaf); BUG(); } slot = path->slots[0]; old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); if (slot >= nritems) { btrfs_print_leaf(leaf); btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d", slot, nritems); BUG(); } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ btrfs_init_map_token(&token, leaf); for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, ioff - data_size); } /* shift the data */ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + data_end - data_size, BTRFS_LEAF_DATA_OFFSET + data_end, old_data - data_end); data_end = old_data; old_size = btrfs_item_size_nr(leaf, slot); item = btrfs_item_nr(slot); btrfs_set_item_size(leaf, item, old_size + data_size); btrfs_mark_buffer_dirty(leaf); if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); BUG(); } } /** * setup_items_for_insert - Helper called before inserting one or more items * to a leaf. Main purpose is to save stack depth by doing the bulk of the work * in a function that doesn't call btrfs_search_slot * * @root: root we are inserting items to * @path: points to the leaf/slot where we are going to insert new items * @cpu_key: array of keys for items to be inserted * @data_size: size of the body of each item we are going to insert * @nr: size of @cpu_key/@data_size arrays */ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_key *cpu_key, u32 *data_size, int nr) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_item *item; int i; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; struct extent_buffer *leaf; int slot; struct btrfs_map_token token; u32 total_size; u32 total_data = 0; for (i = 0; i < nr; i++) total_data += data_size[i]; total_size = total_data + (nr * sizeof(struct btrfs_item)); if (path->slots[0] == 0) { btrfs_cpu_key_to_disk(&disk_key, cpu_key); fixup_low_keys(path, &disk_key, 1); } btrfs_unlock_up_safe(path, 1); leaf = path->nodes[0]; slot = path->slots[0]; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(leaf); if (btrfs_leaf_free_space(leaf) < total_size) { btrfs_print_leaf(leaf); btrfs_crit(fs_info, "not enough freespace need %u have %d", total_size, btrfs_leaf_free_space(leaf)); BUG(); } btrfs_init_map_token(&token, leaf); if (slot != nritems) { unsigned int old_data = btrfs_item_end_nr(leaf, slot); if (old_data < data_end) { btrfs_print_leaf(leaf); btrfs_crit(fs_info, "item at slot %d with data offset %u beyond data end of leaf %u", slot, old_data, data_end); BUG(); } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, ioff - total_data); } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + data_end - total_data, BTRFS_LEAF_DATA_OFFSET + data_end, old_data - data_end); data_end = old_data; } /* setup the item for the new data */ for (i = 0; i < nr; i++) { btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); btrfs_set_item_key(leaf, &disk_key, slot + i); item = btrfs_item_nr(slot + i); data_end -= data_size[i]; btrfs_set_token_item_offset(&token, item, data_end); btrfs_set_token_item_size(&token, item, data_size[i]); } btrfs_set_header_nritems(leaf, nritems + nr); btrfs_mark_buffer_dirty(leaf); if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); BUG(); } } /* * Given a key and some data, insert items into the tree. * This does all the path init required, making room in the tree if needed. */ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_key *cpu_key, u32 *data_size, int nr) { int ret = 0; int slot; int i; u32 total_size = 0; u32 total_data = 0; for (i = 0; i < nr; i++) total_data += data_size[i]; total_size = total_data + (nr * sizeof(struct btrfs_item)); ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); if (ret == 0) return -EEXIST; if (ret < 0) return ret; slot = path->slots[0]; BUG_ON(slot < 0); setup_items_for_insert(root, path, cpu_key, data_size, nr); return 0; } /* * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *cpu_key, void *data, u32 data_size) { int ret = 0; struct btrfs_path *path; struct extent_buffer *leaf; unsigned long ptr; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { leaf = path->nodes[0]; ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, data, ptr, data_size); btrfs_mark_buffer_dirty(leaf); } btrfs_free_path(path); return ret; } /* * delete the pointer from a given node. * * the tree should have been previously balanced so the deletion does not * empty a node. */ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret; nritems = btrfs_header_nritems(parent); if (slot != nritems - 1) { if (level) { ret = btrfs_tree_mod_log_insert_move(parent, slot, slot + 1, nritems - slot - 1); BUG_ON(ret < 0); } memmove_extent_buffer(parent, btrfs_node_key_ptr_offset(slot), btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } else if (level) { ret = btrfs_tree_mod_log_insert_key(parent, slot, BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); } nritems--; btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { BUG_ON(btrfs_header_level(root->node) != 1); /* just turn the root into a leaf and break */ btrfs_set_header_level(root->node, 0); } else if (slot == 0) { struct btrfs_disk_key disk_key; btrfs_node_key(parent, &disk_key, 0); fixup_low_keys(path, &disk_key, level + 1); } btrfs_mark_buffer_dirty(parent); } /* * a helper function to delete the leaf pointed to by path->slots[1] and * path->nodes[1]. * * This deletes the pointer in path->nodes[1] and frees the leaf * block extent. zero is returned if it all worked out, < 0 otherwise. * * The path must have already been setup for deleting the leaf, including * all the proper balancing. path->nodes[1] must be locked. */ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct extent_buffer *leaf) { WARN_ON(btrfs_header_generation(leaf) != trans->transid); del_ptr(root, path, 1, path->slots[1]); /* * btrfs_free_extent is expensive, we want to make sure we * aren't holding any locks when we call it */ btrfs_unlock_up_safe(path, 0); root_sub_used(root, leaf->len); atomic_inc(&leaf->refs); btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); free_extent_buffer_stale(leaf); } /* * delete the item at the leaf level in path. If that empties * the leaf, remove it from the tree */ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int slot, int nr) { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *leaf; struct btrfs_item *item; u32 last_off; u32 dsize = 0; int ret = 0; int wret; int i; u32 nritems; leaf = path->nodes[0]; last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); for (i = 0; i < nr; i++) dsize += btrfs_item_size_nr(leaf, slot + i); nritems = btrfs_header_nritems(leaf); if (slot + nr != nritems) { int data_end = leaf_data_end(leaf); struct btrfs_map_token token; memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + data_end + dsize, BTRFS_LEAF_DATA_OFFSET + data_end, last_off - data_end); btrfs_init_map_token(&token, leaf); for (i = slot + nr; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, ioff + dsize); } memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + nr), sizeof(struct btrfs_item) * (nritems - slot - nr)); } btrfs_set_header_nritems(leaf, nritems - nr); nritems -= nr; /* delete the leaf if we've emptied it */ if (nritems == 0) { if (leaf == root->node) { btrfs_set_header_level(leaf, 0); } else { btrfs_clean_tree_block(leaf); btrfs_del_leaf(trans, root, path, leaf); } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { struct btrfs_disk_key disk_key; btrfs_item_key(leaf, &disk_key, 0); fixup_low_keys(path, &disk_key, 1); } /* delete the leaf if it is mostly empty */ if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below */ slot = path->slots[1]; atomic_inc(&leaf->refs); wret = push_leaf_left(trans, root, path, 1, 1, 1, (u32)-1); if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf && btrfs_header_nritems(leaf)) { wret = push_leaf_right(trans, root, path, 1, 1, 1, 0); if (wret < 0 && wret != -ENOSPC) ret = wret; } if (btrfs_header_nritems(leaf) == 0) { path->slots[1] = slot; btrfs_del_leaf(trans, root, path, leaf); free_extent_buffer(leaf); ret = 0; } else { /* if we're still in the path, make sure * we're dirty. Otherwise, one of the * push_leaf functions must have already * dirtied this buffer */ if (path->nodes[0] == leaf) btrfs_mark_buffer_dirty(leaf); free_extent_buffer(leaf); } } else { btrfs_mark_buffer_dirty(leaf); } } return ret; } /* * search the tree again to find a leaf with lesser keys * returns 0 if it found something or 1 if there are no lesser leaves. * returns < 0 on io errors. * * This may release the path, and so you may lose any locks held at the * time you call it. */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; struct btrfs_key orig_key; struct btrfs_disk_key found_key; int ret; btrfs_item_key_to_cpu(path->nodes[0], &key, 0); orig_key = key; if (key.offset > 0) { key.offset--; } else if (key.type > 0) { key.type--; key.offset = (u64)-1; } else if (key.objectid > 0) { key.objectid--; key.type = (u8)-1; key.offset = (u64)-1; } else { return 1; } btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret <= 0) return ret; /* * Previous key not found. Even if we were at slot 0 of the leaf we had * before releasing the path and calling btrfs_search_slot(), we now may * be in a slot pointing to the same original key - this can happen if * after we released the path, one of more items were moved from a * sibling leaf into the front of the leaf we had due to an insertion * (see push_leaf_right()). * If we hit this case and our slot is > 0 and just decrement the slot * so that the caller does not process the same key again, which may or * may not break the caller, depending on its logic. */ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { btrfs_item_key(path->nodes[0], &found_key, path->slots[0]); ret = comp_keys(&found_key, &orig_key); if (ret == 0) { if (path->slots[0] > 0) { path->slots[0]--; return 0; } /* * At slot 0, same key as before, it means orig_key is * the lowest, leftmost, key in the tree. We're done. */ return 1; } } btrfs_item_key(path->nodes[0], &found_key, 0); ret = comp_keys(&found_key, &key); /* * We might have had an item with the previous key in the tree right * before we released our path. And after we released our path, that * item might have been pushed to the first slot (0) of the leaf we * were holding due to a tree balance. Alternatively, an item with the * previous key can exist as the only element of a leaf (big fat item). * Therefore account for these 2 cases, so that our callers (like * btrfs_previous_item) don't miss an existing item with a key matching * the previous key we computed above. */ if (ret <= 0) return 0; return 1; } /* * A helper function to walk down the tree starting at min_key, and looking * for nodes or leaves that are have a minimum transaction id. * This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the * key and get a writable path. * * This honors path->lowest_level to prevent descent past a given level * of the tree. * * min_trans indicates the oldest transaction that you are interested * in walking through. Any nodes or leaves older than min_trans are * skipped over (without reading them). * * returns zero if something useful was found, < 0 on error and 1 if there * was nothing in the tree that matched the search criteria. */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_path *path, u64 min_trans) { struct extent_buffer *cur; struct btrfs_key found_key; int slot; int sret; u32 nritems; int level; int ret = 1; int keep_locks = path->keep_locks; path->keep_locks = 1; again: cur = btrfs_read_lock_root_node(root); level = btrfs_header_level(cur); WARN_ON(path->nodes[level]); path->nodes[level] = cur; path->locks[level] = BTRFS_READ_LOCK; if (btrfs_header_generation(cur) < min_trans) { ret = 1; goto out; } while (1) { nritems = btrfs_header_nritems(cur); level = btrfs_header_level(cur); sret = btrfs_bin_search(cur, min_key, &slot); if (sret < 0) { ret = sret; goto out; } /* at the lowest level, we're done, setup the path and exit */ if (level == path->lowest_level) { if (slot >= nritems) goto find_next_key; ret = 0; path->slots[level] = slot; btrfs_item_key_to_cpu(cur, &found_key, slot); goto out; } if (sret && slot > 0) slot--; /* * check this node pointer against the min_trans parameters. * If it is too old, skip to the next one. */ while (slot < nritems) { u64 gen; gen = btrfs_node_ptr_generation(cur, slot); if (gen < min_trans) { slot++; continue; } break; } find_next_key: /* * we didn't find a candidate key in this node, walk forward * and find another one */ if (slot >= nritems) { path->slots[level] = slot; sret = btrfs_find_next_key(root, path, min_key, level, min_trans); if (sret == 0) { btrfs_release_path(path); goto again; } else { goto out; } } /* save our key for returning back */ btrfs_node_key_to_cpu(cur, &found_key, slot); path->slots[level] = slot; if (level == path->lowest_level) { ret = 0; goto out; } cur = btrfs_read_node_slot(cur, slot); if (IS_ERR(cur)) { ret = PTR_ERR(cur); goto out; } btrfs_tree_read_lock(cur); path->locks[level - 1] = BTRFS_READ_LOCK; path->nodes[level - 1] = cur; unlock_up(path, level, 1, 0, NULL); } out: path->keep_locks = keep_locks; if (ret == 0) { btrfs_unlock_up_safe(path, path->lowest_level + 1); memcpy(min_key, &found_key, sizeof(found_key)); } return ret; } /* * this is similar to btrfs_next_leaf, but does not try to preserve * and fixup the path. It looks for and returns the next key in the * tree based on the current path and the min_trans parameters. * * 0 is returned if another key is found, < 0 if there are any errors * and 1 is returned if there are no higher keys in the tree * * path->keep_locks should be set to 1 on the search made before * calling this function. */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int level, u64 min_trans) { int slot; struct extent_buffer *c; WARN_ON(!path->keep_locks && !path->skip_locking); while (level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; next: if (slot >= btrfs_header_nritems(c)) { int ret; int orig_lowest; struct btrfs_key cur_key; if (level + 1 >= BTRFS_MAX_LEVEL || !path->nodes[level + 1]) return 1; if (path->locks[level + 1] || path->skip_locking) { level++; continue; } slot = btrfs_header_nritems(c) - 1; if (level == 0) btrfs_item_key_to_cpu(c, &cur_key, slot); else btrfs_node_key_to_cpu(c, &cur_key, slot); orig_lowest = path->lowest_level; btrfs_release_path(path); path->lowest_level = level; ret = btrfs_search_slot(NULL, root, &cur_key, path, 0, 0); path->lowest_level = orig_lowest; if (ret < 0) return ret; c = path->nodes[level]; slot = path->slots[level]; if (ret == 0) slot++; goto next; } if (level == 0) btrfs_item_key_to_cpu(c, key, slot); else { u64 gen = btrfs_node_ptr_generation(c, slot); if (gen < min_trans) { slot++; goto next; } btrfs_node_key_to_cpu(c, key, slot); } return 0; } return 1; } int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq) { int slot; int level; struct extent_buffer *c; struct extent_buffer *next; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; bool need_commit_sem = false; u32 nritems; int ret; int i; nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) return 1; btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); again: level = 1; next = NULL; btrfs_release_path(path); path->keep_locks = 1; if (time_seq) { ret = btrfs_search_old_slot(root, &key, path, time_seq); } else { if (path->need_commit_sem) { path->need_commit_sem = 0; need_commit_sem = true; down_read(&fs_info->commit_root_sem); } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); } path->keep_locks = 0; if (ret < 0) goto done; nritems = btrfs_header_nritems(path->nodes[0]); /* * by releasing the path above we dropped all our locks. A balance * could have added more items next to the key that used to be * at the very end of the block. So, check again here and * advance the path if there are now more items available. */ if (nritems > 0 && path->slots[0] < nritems - 1) { if (ret == 0) path->slots[0]++; ret = 0; goto done; } /* * So the above check misses one case: * - after releasing the path above, someone has removed the item that * used to be at the very end of the block, and balance between leafs * gets another one with bigger key.offset to replace it. * * This one should be returned as well, or we can get leaf corruption * later(esp. in __btrfs_drop_extents()). * * And a bit more explanation about this check, * with ret > 0, the key isn't found, the path points to the slot * where it should be inserted, so the path->slots[0] item must be the * bigger one. */ if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { ret = 0; goto done; } while (level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) { ret = 1; goto done; } slot = path->slots[level] + 1; c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; if (level == BTRFS_MAX_LEVEL) { ret = 1; goto done; } continue; } /* * Our current level is where we're going to start from, and to * make sure lockdep doesn't complain we need to drop our locks * and nodes from 0 to our current level. */ for (i = 0; i < level; i++) { if (path->locks[level]) { btrfs_tree_read_unlock(path->nodes[i]); path->locks[i] = 0; } free_extent_buffer(path->nodes[i]); path->nodes[i] = NULL; } next = c; ret = read_block_for_search(root, path, &next, level, slot, &key); if (ret == -EAGAIN) goto again; if (ret < 0) { btrfs_release_path(path); goto done; } if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); if (!ret && time_seq) { /* * If we don't get the lock, we may be racing * with push_leaf_left, holding that lock while * itself waiting for the leaf we've currently * locked. To solve this situation, we give up * on our lock and cycle. */ free_extent_buffer(next); btrfs_release_path(path); cond_resched(); goto again; } if (!ret) btrfs_tree_read_lock(next); } break; } path->slots[level] = slot; while (1) { level--; path->nodes[level] = next; path->slots[level] = 0; if (!path->skip_locking) path->locks[level] = BTRFS_READ_LOCK; if (!level) break; ret = read_block_for_search(root, path, &next, level, 0, &key); if (ret == -EAGAIN) goto again; if (ret < 0) { btrfs_release_path(path); goto done; } if (!path->skip_locking) btrfs_tree_read_lock(next); } ret = 0; done: unlock_up(path, 0, 1, 0, NULL); if (need_commit_sem) { int ret2; path->need_commit_sem = 1; ret2 = finish_need_commit_sem_search(path); up_read(&fs_info->commit_root_sem); if (ret2) ret = ret2; } return ret; } /* * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps * searching until it gets past min_objectid or finds an item of 'type' * * returns 0 if something is found, 1 if nothing was found and < 0 on error */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type) { struct btrfs_key found_key; struct extent_buffer *leaf; u32 nritems; int ret; while (1) { if (path->slots[0] == 0) { ret = btrfs_prev_leaf(root, path); if (ret != 0) return ret; } else { path->slots[0]--; } leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); if (nritems == 0) return 1; if (path->slots[0] == nritems) path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid < min_objectid) break; if (found_key.type == type) return 0; if (found_key.objectid == min_objectid && found_key.type < type) break; } return 1; } /* * search in extent tree to find a previous Metadata/Data extent item with * min objecitd. * * returns 0 if something is found, 1 if nothing was found and < 0 on error */ int btrfs_previous_extent_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid) { struct btrfs_key found_key; struct extent_buffer *leaf; u32 nritems; int ret; while (1) { if (path->slots[0] == 0) { ret = btrfs_prev_leaf(root, path); if (ret != 0) return ret; } else { path->slots[0]--; } leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); if (nritems == 0) return 1; if (path->slots[0] == nritems) path->slots[0]--; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid < min_objectid) break; if (found_key.type == BTRFS_EXTENT_ITEM_KEY || found_key.type == BTRFS_METADATA_ITEM_KEY) return 0; if (found_key.objectid == min_objectid && found_key.type < BTRFS_EXTENT_ITEM_KEY) break; } return 1; } |
53 52 52 3 66 29 70 25 1 69 67 3 96 7 89 95 67 64 66 61 61 25 48 11 23 6 1 16 57 61 4 3 1 5 5 5 2 2 78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 | // SPDX-License-Identifier: GPL-2.0-only /* * DCCP connection tracking protocol helper * * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/dccp.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_log.h> /* Timeouts are based on values from RFC4340: * * - REQUEST: * * 8.1.2. Client Request * * A client MAY give up on its DCCP-Requests after some time * (3 minutes, for example). * * - RESPOND: * * 8.1.3. Server Response * * It MAY also leave the RESPOND state for CLOSED after a timeout of * not less than 4MSL (8 minutes); * * - PARTOPEN: * * 8.1.5. Handshake Completion * * If the client remains in PARTOPEN for more than 4MSL (8 minutes), * it SHOULD reset the connection with Reset Code 2, "Aborted". * * - OPEN: * * The DCCP timestamp overflows after 11.9 hours. If the connection * stays idle this long the sequence number won't be recognized * as valid anymore. * * - CLOSEREQ/CLOSING: * * 8.3. Termination * * The retransmission timer should initially be set to go off in two * round-trip times and should back off to not less than once every * 64 seconds ... * * - TIMEWAIT: * * 4.3. States * * A server or client socket remains in this state for 2MSL (4 minutes) * after the connection has been town down, ... */ #define DCCP_MSL (2 * 60 * HZ) static const char * const dccp_state_names[] = { [CT_DCCP_NONE] = "NONE", [CT_DCCP_REQUEST] = "REQUEST", [CT_DCCP_RESPOND] = "RESPOND", [CT_DCCP_PARTOPEN] = "PARTOPEN", [CT_DCCP_OPEN] = "OPEN", [CT_DCCP_CLOSEREQ] = "CLOSEREQ", [CT_DCCP_CLOSING] = "CLOSING", [CT_DCCP_TIMEWAIT] = "TIMEWAIT", [CT_DCCP_IGNORE] = "IGNORE", [CT_DCCP_INVALID] = "INVALID", }; #define sNO CT_DCCP_NONE #define sRQ CT_DCCP_REQUEST #define sRS CT_DCCP_RESPOND #define sPO CT_DCCP_PARTOPEN #define sOP CT_DCCP_OPEN #define sCR CT_DCCP_CLOSEREQ #define sCG CT_DCCP_CLOSING #define sTW CT_DCCP_TIMEWAIT #define sIG CT_DCCP_IGNORE #define sIV CT_DCCP_INVALID /* * DCCP state transition table * * The assumption is the same as for TCP tracking: * * We are the man in the middle. All the packets go through us but might * get lost in transit to the destination. It is assumed that the destination * can't receive segments we haven't seen. * * The following states exist: * * NONE: Initial state, expecting Request * REQUEST: Request seen, waiting for Response from server * RESPOND: Response from server seen, waiting for Ack from client * PARTOPEN: Ack after Response seen, waiting for packet other than Response, * Reset or Sync from server * OPEN: Packet other than Response, Reset or Sync seen * CLOSEREQ: CloseReq from server seen, expecting Close from client * CLOSING: Close seen, expecting Reset * TIMEWAIT: Reset seen * IGNORE: Not determinable whether packet is valid * * Some states exist only on one side of the connection: REQUEST, RESPOND, * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to * the one it was in before. * * Packets are marked as ignored (sIG) if we don't know if they're valid * (for example a reincarnation of a connection we didn't notice is dead * already) and the server may send back a connection closing Reset or a * Response. They're also used for Sync/SyncAck packets, which we don't * care about. */ static const u_int8_t dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { [CT_DCCP_ROLE_CLIENT] = { [DCCP_PKT_REQUEST] = { /* * sNO -> sRQ Regular Request * sRQ -> sRQ Retransmitted Request or reincarnation * sRS -> sRS Retransmitted Request (apparently Response * got lost after we saw it) or reincarnation * sPO -> sIG Ignore, conntrack might be out of sync * sOP -> sIG Ignore, conntrack might be out of sync * sCR -> sIG Ignore, conntrack might be out of sync * sCG -> sIG Ignore, conntrack might be out of sync * sTW -> sRQ Reincarnation * * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, }, [DCCP_PKT_RESPONSE] = { /* * sNO -> sIV Invalid * sRQ -> sIG Ignore, might be response to ignored Request * sRS -> sIG Ignore, might be response to ignored Request * sPO -> sIG Ignore, might be response to ignored Request * sOP -> sIG Ignore, might be response to ignored Request * sCR -> sIG Ignore, might be response to ignored Request * sCG -> sIG Ignore, might be response to ignored Request * sTW -> sIV Invalid, reincarnation in reverse direction * goes through sRQ * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, }, [DCCP_PKT_ACK] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN * sOP -> sOP Regular ACK, remain in OPEN * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) * sTW -> sIV * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV }, [DCCP_PKT_DATA] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) * sOP -> sOP Regular Data packet * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) * sCG -> sCG Data in CLOSING MAY be processed (8.3.) * sTW -> sIV * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, }, [DCCP_PKT_DATAACK] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) * sPO -> sPO Remain in PARTOPEN state * sOP -> sOP Regular DataAck packet in OPEN state * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) * sTW -> sIV * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV }, [DCCP_PKT_CLOSEREQ] = { /* * CLOSEREQ may only be sent by the server. * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, [DCCP_PKT_CLOSE] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sCG Client-initiated close * sOP -> sCG Client-initiated close * sCR -> sCG Close in response to CloseReq (8.3.) * sCG -> sCG Retransmit * sTW -> sIV Late retransmit, already in TIME_WAIT * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV }, [DCCP_PKT_RESET] = { /* * sNO -> sIV No connection * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) * sRS -> sTW Response received without Request * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) * sOP -> sTW Connection reset * sCR -> sTW Connection reset * sCG -> sTW Connection reset * sTW -> sIG Ignore (don't refresh timer) * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG }, [DCCP_PKT_SYNC] = { /* * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, [CT_DCCP_ROLE_SERVER] = { [DCCP_PKT_REQUEST] = { /* * sNO -> sIV Invalid * sRQ -> sIG Ignore, conntrack might be out of sync * sRS -> sIG Ignore, conntrack might be out of sync * sPO -> sIG Ignore, conntrack might be out of sync * sOP -> sIG Ignore, conntrack might be out of sync * sCR -> sIG Ignore, conntrack might be out of sync * sCG -> sIG Ignore, conntrack might be out of sync * sTW -> sRQ Reincarnation, must reverse roles * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ }, [DCCP_PKT_RESPONSE] = { /* * sNO -> sIV Response without Request * sRQ -> sRS Response to clients Request * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) * sPO -> sIG Response to an ignored Request or late retransmit * sOP -> sIG Ignore, might be response to ignored Request * sCR -> sIG Ignore, might be response to ignored Request * sCG -> sIG Ignore, might be response to ignored Request * sTW -> sIV Invalid, Request from client in sTW moves to sRQ * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV }, [DCCP_PKT_ACK] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sOP Enter OPEN state (8.1.5.) * sOP -> sOP Regular Ack in OPEN state * sCR -> sIV Waiting for Close from client * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) * sTW -> sIV * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV }, [DCCP_PKT_DATA] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sOP Enter OPEN state (8.1.5.) * sOP -> sOP Regular Data packet in OPEN state * sCR -> sIV Waiting for Close from client * sCG -> sCG Data in CLOSING MAY be processed (8.3.) * sTW -> sIV * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV }, [DCCP_PKT_DATAACK] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sOP Enter OPEN state (8.1.5.) * sOP -> sOP Regular DataAck in OPEN state * sCR -> sIV Waiting for Close from client * sCG -> sCG Data in CLOSING MAY be processed (8.3.) * sTW -> sIV * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV }, [DCCP_PKT_CLOSEREQ] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) * sOP -> sCR CloseReq in OPEN state * sCR -> sCR Retransmit * sCG -> sCR Simultaneous close, client sends another Close * sTW -> sIV Already closed * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV }, [DCCP_PKT_CLOSE] = { /* * sNO -> sIV No connection * sRQ -> sIV No connection * sRS -> sIV No connection * sPO -> sOP -> sCG Move direcly to CLOSING * sOP -> sCG Move to CLOSING * sCR -> sIV Close after CloseReq is invalid * sCG -> sCG Retransmit * sTW -> sIV Already closed * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV }, [DCCP_PKT_RESET] = { /* * sNO -> sIV No connection * sRQ -> sTW Reset in response to Request * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) * sOP -> sTW * sCR -> sTW * sCG -> sTW * sTW -> sIG Ignore (don't refresh timer) * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG }, [DCCP_PKT_SYNC] = { /* * We currently ignore Sync packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, [DCCP_PKT_SYNCACK] = { /* * We currently ignore SyncAck packets * * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, }, }, }; static noinline bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, const struct dccp_hdr *dh, const struct nf_hook_state *hook_state) { struct net *net = nf_ct_net(ct); struct nf_dccp_net *dn; const char *msg; u_int8_t state; state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: dn = nf_dccp_pernet(net); if (dn->dccp_loose == 0) { msg = "not picking up existing connection "; goto out_invalid; } break; case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: msg = "invalid state transition "; goto out_invalid; } ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.state = CT_DCCP_NONE; ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; ct->proto.dccp.handshake_seq = 0; return true; out_invalid: nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg); return false; } static u64 dccp_ack_seq(const struct dccp_hdr *dh) { const struct dccp_hdr_ack_bits *dhack; dhack = (void *)dh + __dccp_basic_hdr_len(dh); return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low); } static bool dccp_error(const struct dccp_hdr *dh, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST | 1 << DCCP_PKT_RESPONSE | 1 << DCCP_PKT_CLOSEREQ | 1 << DCCP_PKT_CLOSE | 1 << DCCP_PKT_RESET | 1 << DCCP_PKT_SYNC | 1 << DCCP_PKT_SYNCACK; unsigned int dccp_len = skb->len - dataoff; unsigned int cscov; const char *msg; u8 type; BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG); if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || dh->dccph_doff * 4 > dccp_len) { msg = "nf_ct_dccp: truncated/malformed packet "; goto out_invalid; } cscov = dccp_len; if (dh->dccph_cscov) { cscov = (dh->dccph_cscov - 1) * 4; if (cscov > dccp_len) { msg = "nf_ct_dccp: bad checksum coverage "; goto out_invalid; } } if (state->hook == NF_INET_PRE_ROUTING && state->net->ct.sysctl_checksum && nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_DCCP, state->pf)) { msg = "nf_ct_dccp: bad checksum "; goto out_invalid; } type = dh->dccph_type; if (type >= DCCP_PKT_INVALID) { msg = "nf_ct_dccp: reserved packet type "; goto out_invalid; } if (test_bit(type, &require_seq48) && !dh->dccph_x) { msg = "nf_ct_dccp: type lacks 48bit sequence numbers"; goto out_invalid; } return false; out_invalid: nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg); return true; } struct nf_conntrack_dccp_buf { struct dccp_hdr dh; /* generic header part */ struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */ union { /* depends on header type */ struct dccp_hdr_ack_bits ack; struct dccp_hdr_request req; struct dccp_hdr_response response; struct dccp_hdr_reset rst; } u; }; static struct dccp_hdr * dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh, struct nf_conntrack_dccp_buf *buf) { unsigned int hdrlen = __dccp_hdr_len(dh); if (hdrlen > sizeof(*buf)) return NULL; return skb_header_pointer(skb, offset, hdrlen, buf); } int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct nf_conntrack_dccp_buf _dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; unsigned int *timeouts; struct dccp_hdr *dh; dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); if (!dh) return NF_DROP; if (dccp_error(dh, skb, dataoff, state)) return -NF_ACCEPT; /* pull again, including possible 48 bit sequences and subtype header */ dh = dccp_header_pointer(skb, dataoff, dh, &_dh); if (!dh) return NF_DROP; type = dh->dccph_type; if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state)) return -NF_ACCEPT; if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* Tear down connection immediately if only reply is a RESET */ nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; new_state = dccp_state_table[role][type][old_state]; switch (new_state) { case CT_DCCP_REQUEST: if (old_state == CT_DCCP_TIMEWAIT && role == CT_DCCP_ROLE_SERVER) { /* Reincarnation in the reverse direction: reopen and * reverse client/server roles. */ ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; } break; case CT_DCCP_RESPOND: if (old_state == CT_DCCP_REQUEST) ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); break; case CT_DCCP_PARTOPEN: if (old_state == CT_DCCP_RESPOND && type == DCCP_PKT_ACK && dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) set_bit(IPS_ASSURED_BIT, &ct->status); break; case CT_DCCP_IGNORE: /* * Connection tracking might be out of sync, so we ignore * packets that might establish a new connection and resync * if the server responds with a valid Response. */ if (ct->proto.dccp.last_dir == !dir && ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && type == DCCP_PKT_RESPONSE) { ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); new_state = CT_DCCP_RESPOND; break; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet"); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition"); return -NF_ACCEPT; } ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); timeouts = nf_ct_timeout_lookup(ct); if (!timeouts) timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout; nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; } static bool dccp_can_early_drop(const struct nf_conn *ct) { switch (ct->proto.dccp.state) { case CT_DCCP_CLOSEREQ: case CT_DCCP_CLOSING: case CT_DCCP_TIMEWAIT: return true; default: break; } return false; } #ifdef CONFIG_NF_CONNTRACK_PROCFS static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct, bool destroy) { struct nlattr *nest_parms; spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state)) goto nla_put_failure; if (destroy) goto skip_state; if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, cpu_to_be64(ct->proto.dccp.handshake_seq), CTA_PROTOINFO_DCCP_PAD)) goto nla_put_failure; skip_state: nla_nest_end(skb, nest_parms); spin_unlock_bh(&ct->lock); return 0; nla_put_failure: spin_unlock_bh(&ct->lock); return -1; } static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, }; #define DCCP_NLATTR_SIZE ( \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ NLA_ALIGN(NLA_HDRLEN + 0)) static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) { struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; int err; if (!attr) return 0; err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr, dccp_nla_policy, NULL); if (err < 0) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || !tb[CTA_PROTOINFO_DCCP_ROLE] || nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; } spin_lock_bh(&ct->lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; } else { ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; } if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { ct->proto.dccp.handshake_seq = be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); } spin_unlock_bh(&ct->lock); return 0; } #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { struct nf_dccp_net *dn = nf_dccp_pernet(net); unsigned int *timeouts = data; int i; if (!timeouts) timeouts = dn->dccp_timeout; /* set default DCCP timeouts. */ for (i=0; i<CT_DCCP_MAX; i++) timeouts[i] = dn->dccp_timeout[i]; /* there's a 1:1 mapping between attributes and protocol states. */ for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { if (tb[i]) { timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; } } timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST]; return 0; } static int dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeouts = data; int i; for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) goto nla_put_failure; } return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_dccp_init_net(struct net *net) { struct nf_dccp_net *dn = nf_dccp_pernet(net); /* default values */ dn->dccp_loose = 1; dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; /* timeouts[0] is unused, make it same as SYN_SENT so * ->timeouts[0] contains 'new' timeout, like udp or icmp. */ dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { .l4proto = IPPROTO_DCCP, .can_early_drop = dccp_can_early_drop, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, #endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_size = DCCP_NLATTR_SIZE, .to_nlattr = dccp_to_nlattr, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = dccp_timeout_nlattr_to_obj, .obj_to_nlattr = dccp_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_DCCP_MAX, .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ }; |
58 58 3 348 348 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | /* * Module for handling utf8 just like any other charset. * By Urban Widmark 2000 */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static unsigned char identity[256]; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { int n; if (boundlen <= 0) return -ENAMETOOLONG; n = utf32_to_utf8(uni, out, boundlen); if (n < 0) { *out = '?'; return -EINVAL; } return n; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { int n; unicode_t u; n = utf8_to_utf32(rawstring, boundlen, &u); if (n < 0 || u > MAX_WCHAR_T) { *uni = 0x003f; /* ? */ return -EINVAL; } *uni = (wchar_t) u; return n; } static struct nls_table table = { .charset = "utf8", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = identity, /* no conversion */ .charset2upper = identity, }; static int __init init_nls_utf8(void) { int i; for (i=0; i<256; i++) identity[i] = i; return register_nls(&table); } static void __exit exit_nls_utf8(void) { unregister_nls(&table); } module_init(init_nls_utf8) module_exit(exit_nls_utf8) MODULE_LICENSE("Dual BSD/GPL"); |
2591 2593 2032 2034 1844 5931 200 9668 1364 1250 1147 1250 1136 1072 353 1 1346 86 1141 285 233 1346 1804 1489 363 287 1626 410 1461 1463 1805 1802 50 1790 102 2709 2705 81 81 1740 513 1252 1744 1595 3717 2146 329 1846 2766 3 3 1 2 2 4 323 6 6 6 6 316 323 323 324 318 323 6 6 330 330 284 325 328 324 4 325 323 329 330 6 330 328 329 328 323 330 330 330 330 2 330 330 1 330 330 332 20 328 319 316 330 330 330 329 317 330 3 1292 335 1400 58 1445 765 1425 206 62 102 146 1426 1425 1420 5 1 1 1 5 1537 62 62 1522 334 1533 2 13 1725 7 1329 1537 1727 1724 1 1728 1721 296 1728 1895 1901 2 1894 1911 1730 403 1906 103 20 37 1897 1813 1811 1808 40 40 40 40 101 101 101 1025 1025 1024 1025 936 1019 89 89 89 1 88 89 89 82 82 7 7 7 7 1 7 7 7 7 82 83 80 20 20 83 133 133 128 133 133 70 133 133 1 133 126 1 7 133 133 134 134 134 133 1 6391 6395 6389 1674 6401 6391 6411 6388 6392 6388 1675 6413 6393 6404 6398 6400 1677 6399 6413 6399 6396 6413 6399 6413 6398 6396 1675 458 5 8291 2143 91 91 10 84 200 200 200 83 124 21 166 35 166 126 161 1955 1952 1497 463 1951 1951 1 459 7 7 1504 1953 1951 1954 1503 455 458 1948 1502 449 12 12 6 459 35 35 35 42 42 1 7 35 1 1 36 2068 1 2067 1504 581 535 126 126 125 7 42 459 125 125 7 7 7 16 824 121 914 914 52 2 50 23 50 50 2 50 50 1 50 49 50 2 50 50 50 50 31 31 50 50 50 50 3 3 7784 4743 3 1 4739 4 3885 3890 3892 3880 7 7 3888 7 654 606 97 334 28 372 9 9 5 5 5 5 1953 1956 1792 172 20 1941 398 398 350 49 337 226 6 223 1 10 393 1740 338 39 4 1858 1856 259 219 9 27 27 27 13 20 20 75 123 56 3 3 172 2 2 1901 27 201 2029 126 60 3 1 10 13 1 2595 8300 7809 2093 2139 52 3 1 11 2066 107 9636 9628 9644 14 9627 9637 9642 64 60 9611 13 10 10 3 9642 494 1012 204 8491 9659 9670 9693 7777 2112 47 9668 7786 2109 2106 9653 1161 1159 2 1240 1248 1243 2 15 2 15 1 15 15 1 2 13 4 5 9 47 47 41 6 6 4 37 41 47 19 20 20 64086 1194 64061 159 165 20 170 159 159 159 156 18 18 16 16 16 15 16 15 3115 3124 1262 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/memory.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ /* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus */ /* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. */ /* * Real VM (paging to/from disk) started 18.12.91. Much more work and * thought has to go into this. Oh, well.. * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. * Found it. Everything seems to work now. * 20.12.91 - Ok, making the swap-device changeable like the root. */ /* * 05.04.94 - Multi-page memory management added for v1.1. * Idea by Alex Bligh (alex@cconcepts.co.uk) * * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG * (Gerhard.Wichert@pdb.siemens.de) * * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) */ #include <linux/kernel_stat.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/task.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/swap.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/memremap.h> #include <linux/ksm.h> #include <linux/rmap.h> #include <linux/export.h> #include <linux/delayacct.h> #include <linux/init.h> #include <linux/pfn_t.h> #include <linux/writeback.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> #include <linux/swapops.h> #include <linux/elf.h> #include <linux/gfp.h> #include <linux/migrate.h> #include <linux/string.h> #include <linux/debugfs.h> #include <linux/userfaultfd_k.h> #include <linux/dax.h> #include <linux/oom.h> #include <linux/numa.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/vmalloc.h> #include <trace/events/kmem.h> #include <asm/io.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include "pgalloc-track.h" #include "internal.h" #if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif #ifndef CONFIG_NUMA unsigned long max_mapnr; EXPORT_SYMBOL(max_mapnr); struct page *mem_map; EXPORT_SYMBOL(mem_map); #endif /* * A number of key systems in x86 including ioremap() rely on the assumption * that high_memory defines the upper bound on direct map memory, then end * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL * and ZONE_HIGHMEM. */ void *high_memory; EXPORT_SYMBOL(high_memory); /* * Randomize the address space (stacks, mmaps, brk, etc.). * * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization, * as ancient (libc5 based) binaries can segfault. ) */ int randomize_va_space __read_mostly = #ifdef CONFIG_COMPAT_BRK 1; #else 2; #endif #ifndef arch_faults_on_old_pte static inline bool arch_faults_on_old_pte(void) { /* * Those arches which don't have hw access flag feature need to * implement their own helper. By default, "true" means pagefault * will be hit on old pte. */ return true; } #endif #ifndef arch_wants_old_prefaulted_pte static inline bool arch_wants_old_prefaulted_pte(void) { /* * Transitioning a PTE from 'old' to 'young' can be expensive on * some architectures, even if it's performed in hardware. By * default, "false" means prefaulted entries will be 'young'. */ return false; } #endif static int __init disable_randmaps(char *s) { randomize_va_space = 0; return 1; } __setup("norandmaps", disable_randmaps); unsigned long zero_pfn __read_mostly; EXPORT_SYMBOL(zero_pfn); unsigned long highest_memmap_pfn __read_mostly; /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ static int __init init_zero_pfn(void) { zero_pfn = page_to_pfn(ZERO_PAGE(0)); return 0; } early_initcall(init_zero_pfn); void mm_trace_rss_stat(struct mm_struct *mm, int member, long count) { trace_rss_stat(mm, member, count); } #if defined(SPLIT_RSS_COUNTING) void sync_mm_rss(struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { if (current->rss_stat.count[i]) { add_mm_counter(mm, i, current->rss_stat.count[i]); current->rss_stat.count[i] = 0; } } current->rss_stat.events = 0; } static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) { struct task_struct *task = current; if (likely(task->mm == mm)) task->rss_stat.count[member] += val; else add_mm_counter(mm, member, val); } #define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1) #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1) /* sync counter once per 64 page faults */ #define TASK_RSS_EVENTS_THRESH (64) static void check_sync_rss_stat(struct task_struct *task) { if (unlikely(task != current)) return; if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) sync_mm_rss(task->mm); } #else /* SPLIT_RSS_COUNTING */ #define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member) #define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member) static void check_sync_rss_stat(struct task_struct *task) { } #endif /* SPLIT_RSS_COUNTING */ /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { pgtable_t token = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free_tlb(tlb, token, addr); mm_dec_nr_ptes(tlb->mm); } static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { pmd_t *pmd; unsigned long next; unsigned long start; start = addr; pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; free_pte_range(tlb, pmd, addr); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; if (start < floor) return; if (ceiling) { ceiling &= PUD_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) return; pmd = pmd_offset(pud, start); pud_clear(pud); pmd_free_tlb(tlb, pmd, start); mm_dec_nr_pmds(tlb->mm); } static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { pud_t *pud; unsigned long next; unsigned long start; start = addr; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; free_pmd_range(tlb, pud, addr, next, floor, ceiling); } while (pud++, addr = next, addr != end); start &= P4D_MASK; if (start < floor) return; if (ceiling) { ceiling &= P4D_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) return; pud = pud_offset(p4d, start); p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { p4d_t *p4d; unsigned long next; unsigned long start; start = addr; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; free_pud_range(tlb, p4d, addr, next, floor, ceiling); } while (p4d++, addr = next, addr != end); start &= PGDIR_MASK; if (start < floor) return; if (ceiling) { ceiling &= PGDIR_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) return; p4d = p4d_offset(pgd, start); pgd_clear(pgd); p4d_free_tlb(tlb, p4d, start); } /* * This function frees user-level page tables of a process. */ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; unsigned long next; /* * The next few lines have given us lots of grief... * * Why are we testing PMD* at this top level? Because often * there will be no work to do at all, and we'd prefer not to * go all the way down to the bottom just to discover that. * * Why all these "- 1"s? Because 0 represents both the bottom * of the address space and the top of it (using -1 for the * top wouldn't help much: the masks would do the wrong thing). * The rule is that addr 0 and floor 0 refer to the bottom of * the address space, but end 0 and ceiling 0 refer to the top * Comparisons need to use "end - 1" and "ceiling - 1" (though * that end 0 case should be mythical). * * Wherever addr is brought up or ceiling brought down, we must * be careful to reject "the opposite 0" before it confuses the * subsequent tests. But what about where end is brought down * by PMD_SIZE below? no, end can't go down to 0 there. * * Whereas we round start (addr) and ceiling down, by different * masks at different levels, in order to test whether a table * now has no other vmas using it, so can be freed, we don't * bother to round floor or end up - the tests don't need that. */ addr &= PMD_MASK; if (addr < floor) { addr += PMD_SIZE; if (!addr) return; } if (ceiling) { ceiling &= PMD_MASK; if (!ceiling) return; } if (end - 1 > ceiling - 1) end -= PMD_SIZE; if (addr > end - 1) return; /* * We add page table cache pages with PAGE_SIZE, * (see pte_free_tlb()), flush the tlb if we need */ tlb_change_page_size(tlb, PAGE_SIZE); pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; free_p4d_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling) { while (vma) { struct vm_area_struct *next = vma->vm_next; unsigned long addr = vma->vm_start; /* * Hide vma from rmap and truncate_pagecache before freeing * pgtables */ unlink_anon_vmas(vma); unlink_file_vma(vma); if (is_vm_hugetlb_page(vma)) { hugetlb_free_pgd_range(tlb, addr, vma->vm_end, floor, next ? next->vm_start : ceiling); } else { /* * Optimization: gather nearby vmas into one call down */ while (next && next->vm_start <= vma->vm_end + PMD_SIZE && !is_vm_hugetlb_page(next)) { vma = next; next = vma->vm_next; unlink_anon_vmas(vma); unlink_file_vma(vma); } free_pgd_range(tlb, addr, vma->vm_end, floor, next ? next->vm_start : ceiling); } vma = next; } } int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm); if (!new) return -ENOMEM; /* * Ensure all pte setup (eg. pte page lock and page clearing) are * visible before the pte is made visible to other CPUs by being * put into page tables. * * The other side of the story is the pointer chasing in the page * table walking code (when walking the page table without locking; * ie. most of the time). Fortunately, these data accesses consist * of a chain of data-dependent loads, meaning most CPUs (alpha * being the notable exception) will already guarantee loads are * seen in-order. See the alpha page table accessors for the * smp_rmb() barriers in page table walking code. */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ ptl = pmd_lock(mm, pmd); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ mm_inc_nr_ptes(mm); pmd_populate(mm, pmd, new); new = NULL; } spin_unlock(ptl); if (new) pte_free(mm, new); return 0; } int __pte_alloc_kernel(pmd_t *pmd) { pte_t *new = pte_alloc_one_kernel(&init_mm); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&init_mm.page_table_lock); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; } spin_unlock(&init_mm.page_table_lock); if (new) pte_free_kernel(&init_mm, new); return 0; } static inline void init_rss_vec(int *rss) { memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); } static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) { int i; if (current->mm == mm) sync_mm_rss(mm); for (i = 0; i < NR_MM_COUNTERS; i++) if (rss[i]) add_mm_counter(mm, i, rss[i]); } /* * This function is called to print an error when a bad pte * is found. For example, we might have a PFN-mapped pte in * a region that doesn't allow it. * * The calling function must still handle the error. */ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, pte_t pte, struct page *page) { pgd_t *pgd = pgd_offset(vma->vm_mm, addr); p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr); pmd_t *pmd = pmd_offset(pud, addr); struct address_space *mapping; pgoff_t index; static unsigned long resume; static unsigned long nr_shown; static unsigned long nr_unshown; /* * Allow a burst of 60 reports, then keep quiet for that minute; * or allow a steady drip of one report per second. */ if (nr_shown == 60) { if (time_before(jiffies, resume)) { nr_unshown++; return; } if (nr_unshown) { pr_alert("BUG: Bad page map: %lu messages suppressed\n", nr_unshown); nr_unshown = 0; } nr_shown = 0; } if (nr_shown++ == 0) resume = jiffies + 60 * HZ; mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; index = linear_page_index(vma, addr); pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", current->comm, (long long)pte_val(pte), (long long)pmd_val(*pmd)); if (page) dump_page(page, "bad pte"); pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n", (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); pr_alert("file:%pD fault:%ps mmap:%ps readpage:%ps\n", vma->vm_file, vma->vm_ops ? vma->vm_ops->fault : NULL, vma->vm_file ? vma->vm_file->f_op->mmap : NULL, mapping ? mapping->a_ops->readpage : NULL); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } /* * vm_normal_page -- This function gets the "struct page" associated with a pte. * * "Special" mappings do not wish to be associated with a "struct page" (either * it doesn't exist, or it exists but they don't want to touch it). In this * case, NULL is returned here. "Normal" mappings do have a struct page. * * There are 2 broad cases. Firstly, an architecture may define a pte_special() * pte bit, in which case this function is trivial. Secondly, an architecture * may not have a spare pte bit, which requires a more complicated scheme, * described below. * * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a * special mapping (even if there are underlying and valid "struct pages"). * COWed pages of a VM_PFNMAP are always normal. * * The way we recognize COWed pages within VM_PFNMAP mappings is through the * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit * set, and the vm_pgoff will point to the first PFN mapped: thus every special * mapping will always honor the rule * * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) * * And for normal mappings this is false. * * This restricts such mappings to be a linear translation from virtual address * to pfn. To get around this restriction, we allow arbitrary mappings so long * as the vma is not a COW mapping; in that case, we know that all ptes are * special (because none can have been COWed). * * * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. * * VM_MIXEDMAP mappings can likewise contain memory with or without "struct * page" backing, however the difference is that _all_ pages with a struct * page (that is, those where pfn_valid is true) are refcounted and considered * normal pages by the VM. The disadvantage is that pages are refcounted * (which can be slower and simply not an option for some PFNMAP users). The * advantage is that we don't have to follow the strict linearity rule of * PFNMAP mappings in order to support COWable mappings. * */ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { unsigned long pfn = pte_pfn(pte); if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) { if (likely(!pte_special(pte))) goto check_pfn; if (vma->vm_ops && vma->vm_ops->find_special_page) return vma->vm_ops->find_special_page(vma, addr); if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; if (is_zero_pfn(pfn)) return NULL; if (pte_devmap(pte)) return NULL; print_bad_pte(vma, addr, pte, NULL); return NULL; } /* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { if (!pfn_valid(pfn)) return NULL; goto out; } else { unsigned long off; off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) return NULL; } } if (is_zero_pfn(pfn)) return NULL; check_pfn: if (unlikely(pfn > highest_memmap_pfn)) { print_bad_pte(vma, addr, pte, NULL); return NULL; } /* * NOTE! We still have PageReserved() pages in the page tables. * eg. VDSO mappings can cause them to exist. */ out: return pfn_to_page(pfn); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd) { unsigned long pfn = pmd_pfn(pmd); /* * There is no pmd_special() but there may be special pmds, e.g. * in a direct-access (dax) mapping, so let's just replicate the * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here. */ if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { if (!pfn_valid(pfn)) return NULL; goto out; } else { unsigned long off; off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) return NULL; } } if (pmd_devmap(pmd)) return NULL; if (is_huge_zero_pmd(pmd)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) return NULL; /* * NOTE! We still have PageReserved() pages in the page tables. * eg. VDSO mappings can cause them to exist. */ out: return pfn_to_page(pfn); } #endif static void restore_exclusive_pte(struct vm_area_struct *vma, struct page *page, unsigned long address, pte_t *ptep) { pte_t pte; swp_entry_t entry; pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); if (pte_swp_soft_dirty(*ptep)) pte = pte_mksoft_dirty(pte); entry = pte_to_swp_entry(*ptep); if (pte_swp_uffd_wp(*ptep)) pte = pte_mkuffd_wp(pte); else if (is_writable_device_exclusive_entry(entry)) pte = maybe_mkwrite(pte_mkdirty(pte), vma); set_pte_at(vma->vm_mm, address, ptep, pte); /* * No need to take a page reference as one was already * created when the swap entry was made. */ if (PageAnon(page)) page_add_anon_rmap(page, vma, address, false); else /* * Currently device exclusive access only supports anonymous * memory so the entry shouldn't point to a filebacked page. */ WARN_ON_ONCE(!PageAnon(page)); if (vma->vm_flags & VM_LOCKED) mlock_vma_page(page); /* * No need to invalidate - it was non-present before. However * secondary CPUs may have mappings that need invalidating. */ update_mmu_cache(vma, address, ptep); } /* * Tries to restore an exclusive pte if the page lock can be acquired without * sleeping. */ static int try_restore_exclusive_pte(pte_t *src_pte, struct vm_area_struct *vma, unsigned long addr) { swp_entry_t entry = pte_to_swp_entry(*src_pte); struct page *page = pfn_swap_entry_to_page(entry); if (trylock_page(page)) { restore_exclusive_pte(vma, page, addr, src_pte); unlock_page(page); return 0; } return -EBUSY; } /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */ static unsigned long copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long addr, int *rss) { unsigned long vm_flags = dst_vma->vm_flags; pte_t pte = *src_pte; struct page *page; swp_entry_t entry = pte_to_swp_entry(pte); if (likely(!non_swap_entry(entry))) { if (swap_duplicate(entry) < 0) return -EIO; /* make sure dst_mm is on swapoff's mmlist. */ if (unlikely(list_empty(&dst_mm->mmlist))) { spin_lock(&mmlist_lock); if (list_empty(&dst_mm->mmlist)) list_add(&dst_mm->mmlist, &src_mm->mmlist); spin_unlock(&mmlist_lock); } rss[MM_SWAPENTS]++; } else if (is_migration_entry(entry)) { page = pfn_swap_entry_to_page(entry); rss[mm_counter(page)]++; if (is_writable_migration_entry(entry) && is_cow_mapping(vm_flags)) { /* * COW mappings require pages in both * parent and child to be set to read. */ entry = make_readable_migration_entry( swp_offset(entry)); pte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(*src_pte)) pte = pte_swp_mksoft_dirty(pte); if (pte_swp_uffd_wp(*src_pte)) pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } } else if (is_device_private_entry(entry)) { page = pfn_swap_entry_to_page(entry); /* * Update rss count even for unaddressable pages, as * they should treated just like normal pages in this * respect. * * We will likely want to have some new rss counters * for unaddressable pages, at some point. But for now * keep things as they are. */ get_page(page); rss[mm_counter(page)]++; page_dup_rmap(page, false); /* * We do not preserve soft-dirty information, because so * far, checkpoint/restore is the only feature that * requires that. And checkpoint/restore does not work * when a device driver is involved (you cannot easily * save and restore device driver state). */ if (is_writable_device_private_entry(entry) && is_cow_mapping(vm_flags)) { entry = make_readable_device_private_entry( swp_offset(entry)); pte = swp_entry_to_pte(entry); if (pte_swp_uffd_wp(*src_pte)) pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } } else if (is_device_exclusive_entry(entry)) { /* * Make device exclusive entries present by restoring the * original entry then copying as for a present pte. Device * exclusive entries currently only support private writable * (ie. COW) mappings. */ VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); if (try_restore_exclusive_pte(src_pte, src_vma, addr)) return -EBUSY; return -ENOENT; } if (!userfaultfd_wp(dst_vma)) pte = pte_swp_clear_uffd_wp(pte); set_pte_at(dst_mm, addr, dst_pte, pte); return 0; } /* * Copy a present and normal page if necessary. * * NOTE! The usual case is that this doesn't need to do * anything, and can just return a positive value. That * will let the caller know that it can just increase * the page refcount and re-use the pte the traditional * way. * * But _if_ we need to copy it because it needs to be * pinned in the parent (and the child should get its own * copy rather than just a reference to the same page), * we'll do that here and return zero to let the caller * know we're done. * * And if we need a pre-allocated page but don't yet have * one, return a negative error to let the preallocation * code know so that it can do so outside the page table * lock. */ static inline int copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, struct page **prealloc, pte_t pte, struct page *page) { struct page *new_page; /* * What we want to do is to check whether this page may * have been pinned by the parent process. If so, * instead of wrprotect the pte on both sides, we copy * the page immediately so that we'll always guarantee * the pinned page won't be randomly replaced in the * future. * * The page pinning checks are just "has this mm ever * seen pinning", along with the (inexact) check of * the page count. That might give false positives for * for pinning, but it will work correctly. */ if (likely(!page_needs_cow_for_dma(src_vma, page))) return 1; /* * The vma->anon_vma of the child process may be NULL * because the entire vma does not contain anonymous pages. * A BUG will occur when the copy_present_page() passes * a copy of a non-anonymous page of that vma to the * page_add_new_anon_rmap() to set up new anonymous rmap. * Return 1 if the page is not an anonymous page. */ if (!PageAnon(page)) return 1; new_page = *prealloc; if (!new_page) return -EAGAIN; /* * We have a prealloc page, all good! Take it * over and copy the page & arm it. */ *prealloc = NULL; copy_user_highpage(new_page, page, addr, src_vma); __SetPageUptodate(new_page); page_add_new_anon_rmap(new_page, dst_vma, addr, false); lru_cache_add_inactive_or_unevictable(new_page, dst_vma); rss[mm_counter(new_page)]++; /* All done, just insert the new page copy in the child */ pte = mk_pte(new_page, dst_vma->vm_page_prot); pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma); if (userfaultfd_pte_wp(dst_vma, *src_pte)) /* Uffd-wp needs to be delivered to dest pte as well */ pte = pte_wrprotect(pte_mkuffd_wp(pte)); set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); return 0; } /* * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page * is required to copy this pte. */ static inline int copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, struct page **prealloc) { struct mm_struct *src_mm = src_vma->vm_mm; unsigned long vm_flags = src_vma->vm_flags; pte_t pte = *src_pte; struct page *page; page = vm_normal_page(src_vma, addr, pte); if (page) { int retval; retval = copy_present_page(dst_vma, src_vma, dst_pte, src_pte, addr, rss, prealloc, pte, page); if (retval <= 0) return retval; get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; } /* * If it's a COW mapping, write protect it both * in the parent and the child */ if (is_cow_mapping(vm_flags) && pte_write(pte)) { ptep_set_wrprotect(src_mm, addr, src_pte); pte = pte_wrprotect(pte); } /* * If it's a shared mapping, mark it clean in * the child */ if (vm_flags & VM_SHARED) pte = pte_mkclean(pte); pte = pte_mkold(pte); if (!userfaultfd_wp(dst_vma)) pte = pte_clear_uffd_wp(pte); set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); return 0; } static inline struct page * page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, unsigned long addr) { struct page *new_page; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, addr); if (!new_page) return NULL; if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) { put_page(new_page); return NULL; } cgroup_throttle_swaprate(new_page, GFP_KERNEL); return new_page; } static int copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, unsigned long end) { struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; pte_t *orig_src_pte, *orig_dst_pte; pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; int progress, ret = 0; int rss[NR_MM_COUNTERS]; swp_entry_t entry = (swp_entry_t){0}; struct page *prealloc = NULL; again: progress = 0; init_rss_vec(rss); dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); if (!dst_pte) { ret = -ENOMEM; goto out; } src_pte = pte_offset_map(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); orig_src_pte = src_pte; orig_dst_pte = dst_pte; arch_enter_lazy_mmu_mode(); do { /* * We are holding two locks at this point - either of them * could generate latencies in another task on another CPU. */ if (progress >= 32) { progress = 0; if (need_resched() || spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) break; } if (pte_none(*src_pte)) { progress++; continue; } if (unlikely(!pte_present(*src_pte))) { ret = copy_nonpresent_pte(dst_mm, src_mm, dst_pte, src_pte, dst_vma, src_vma, addr, rss); if (ret == -EIO) { entry = pte_to_swp_entry(*src_pte); break; } else if (ret == -EBUSY) { break; } else if (!ret) { progress += 8; continue; } /* * Device exclusive entry restored, continue by copying * the now present pte. */ WARN_ON_ONCE(ret != -ENOENT); } /* copy_present_pte() will clear `*prealloc' if consumed */ ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte, addr, rss, &prealloc); /* * If we need a pre-allocated page for this pte, drop the * locks, allocate, and try again. */ if (unlikely(ret == -EAGAIN)) break; if (unlikely(prealloc)) { /* * pre-alloc page cannot be reused by next time so as * to strictly follow mempolicy (e.g., alloc_page_vma() * will allocate page according to address). This * could only happen if one pinned pte changed. */ put_page(prealloc); prealloc = NULL; } progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); pte_unmap(orig_src_pte); add_mm_rss_vec(dst_mm, rss); pte_unmap_unlock(orig_dst_pte, dst_ptl); cond_resched(); if (ret == -EIO) { VM_WARN_ON_ONCE(!entry.val); if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) { ret = -ENOMEM; goto out; } entry.val = 0; } else if (ret == -EBUSY) { goto out; } else if (ret == -EAGAIN) { prealloc = page_copy_prealloc(src_mm, src_vma, addr); if (!prealloc) return -ENOMEM; } else if (ret) { VM_WARN_ON_ONCE(1); } /* We've captured and resolved the error. Reset, try again. */ ret = 0; if (addr != end) goto again; out: if (unlikely(prealloc)) put_page(prealloc); return ret; } static inline int copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pud_t *dst_pud, pud_t *src_pud, unsigned long addr, unsigned long end) { struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; pmd_t *src_pmd, *dst_pmd; unsigned long next; dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); if (!dst_pmd) return -ENOMEM; src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, addr, dst_vma, src_vma); if (err == -ENOMEM) return -ENOMEM; if (!err) continue; /* fall through */ } if (pmd_none_or_clear_bad(src_pmd)) continue; if (copy_pte_range(dst_vma, src_vma, dst_pmd, src_pmd, addr, next)) return -ENOMEM; } while (dst_pmd++, src_pmd++, addr = next, addr != end); return 0; } static inline int copy_pud_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, p4d_t *dst_p4d, p4d_t *src_p4d, unsigned long addr, unsigned long end) { struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; pud_t *src_pud, *dst_pud; unsigned long next; dst_pud = pud_alloc(dst_mm, dst_p4d, addr); if (!dst_pud) return -ENOMEM; src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); err = copy_huge_pud(dst_mm, src_mm, dst_pud, src_pud, addr, src_vma); if (err == -ENOMEM) return -ENOMEM; if (!err) continue; /* fall through */ } if (pud_none_or_clear_bad(src_pud)) continue; if (copy_pmd_range(dst_vma, src_vma, dst_pud, src_pud, addr, next)) return -ENOMEM; } while (dst_pud++, src_pud++, addr = next, addr != end); return 0; } static inline int copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long addr, unsigned long end) { struct mm_struct *dst_mm = dst_vma->vm_mm; p4d_t *src_p4d, *dst_p4d; unsigned long next; dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr); if (!dst_p4d) return -ENOMEM; src_p4d = p4d_offset(src_pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(src_p4d)) continue; if (copy_pud_range(dst_vma, src_vma, dst_p4d, src_p4d, addr, next)) return -ENOMEM; } while (dst_p4d++, src_p4d++, addr = next, addr != end); return 0; } int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { pgd_t *src_pgd, *dst_pgd; unsigned long next; unsigned long addr = src_vma->vm_start; unsigned long end = src_vma->vm_end; struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; struct mmu_notifier_range range; bool is_cow; int ret; /* * Don't copy ptes where a page fault will fill them correctly. * Fork becomes much lighter when there are big shared or private * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ if (!(src_vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && !src_vma->anon_vma) return 0; if (is_vm_hugetlb_page(src_vma)) return copy_hugetlb_page_range(dst_mm, src_mm, src_vma); if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { /* * We do not free on error cases below as remove_vma * gets called on error from higher level routine */ ret = track_pfn_copy(src_vma); if (ret) return ret; } /* * We need to invalidate the secondary MMU mappings only when * there could be a permission downgrade on the ptes of the * parent mm. And a permission downgrade will only happen if * is_cow_mapping() returns true. */ is_cow = is_cow_mapping(src_vma->vm_flags); if (is_cow) { mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, src_vma, src_mm, addr, end); mmu_notifier_invalidate_range_start(&range); /* * Disabling preemption is not needed for the write side, as * the read side doesn't spin, but goes to the mmap_lock. * * Use the raw variant of the seqcount_t write API to avoid * lockdep complaining about preemptibility. */ mmap_assert_write_locked(src_mm); raw_write_seqcount_begin(&src_mm->write_protect_seq); } ret = 0; dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd, addr, next))) { ret = -ENOMEM; break; } } while (dst_pgd++, src_pgd++, addr = next, addr != end); if (is_cow) { raw_write_seqcount_end(&src_mm->write_protect_seq); mmu_notifier_invalidate_range_end(&range); } return ret; } /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { /* By default, zap all pages */ if (!details) return true; /* Or, we zap COWed pages only if the caller wants to */ return !details->check_mapping; } static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, struct zap_details *details) { struct mm_struct *mm = tlb->mm; int force_flush = 0; int rss[NR_MM_COUNTERS]; spinlock_t *ptl; pte_t *start_pte; pte_t *pte; swp_entry_t entry; tlb_change_page_size(tlb, PAGE_SIZE); again: init_rss_vec(rss); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte = start_pte; flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; if (pte_none(ptent)) continue; if (need_resched()) break; if (pte_present(ptent)) { struct page *page; page = vm_normal_page(vma, addr, ptent); if (unlikely(details) && page) { /* * unmap_shared_mapping_pages() wants to * invalidate cache without truncating: * unmap shared but keep private pages. */ if (details->check_mapping && details->check_mapping != page_rmapping(page)) continue; } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; if (!PageAnon(page)) { if (pte_dirty(ptent)) { force_flush = 1; set_page_dirty(page); } if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); } rss[mm_counter(page)]--; page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); if (unlikely(__tlb_remove_page(tlb, page))) { force_flush = 1; addr += PAGE_SIZE; break; } continue; } entry = pte_to_swp_entry(ptent); if (is_device_private_entry(entry) || is_device_exclusive_entry(entry)) { struct page *page = pfn_swap_entry_to_page(entry); if (unlikely(details && details->check_mapping)) { /* * unmap_shared_mapping_pages() wants to * invalidate cache without truncating: * unmap shared but keep private pages. */ if (details->check_mapping != page_rmapping(page)) continue; } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; if (is_device_private_entry(entry)) page_remove_rmap(page, false); put_page(page); continue; } if (!non_swap_entry(entry)) { /* Genuine swap entry, hence a private anon page */ if (!should_zap_cows(details)) continue; rss[MM_SWAPENTS]--; } else if (is_migration_entry(entry)) { struct page *page; page = pfn_swap_entry_to_page(entry); if (details && details->check_mapping && details->check_mapping != page_rmapping(page)) continue; rss[mm_counter(page)]--; } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, addr != end); add_mm_rss_vec(mm, rss); arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ if (force_flush) tlb_flush_mmu_tlbonly(tlb); pte_unmap_unlock(start_pte, ptl); /* * If we forced a TLB flush (either due to running out of * batch buffers or because we needed to flush dirty TLB * entries before releasing the ptl), free the batched * memory too. Restart if we didn't do everything. */ if (force_flush) { force_flush = 0; tlb_flush_mmu(tlb); } if (addr != end) { cond_resched(); goto again; } return addr; } static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, struct zap_details *details) { pmd_t *pmd; unsigned long next; pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false, NULL); else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ } else if (details && details->single_page && PageTransCompound(details->single_page) && next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { spinlock_t *ptl = pmd_lock(tlb->mm, pmd); /* * Take and drop THP pmd lock so that we cannot return * prematurely, while zap_huge_pmd() has cleared *pmd, * but not yet decremented compound_mapcount(). */ spin_unlock(ptl); } /* * Here there can be other concurrent MADV_DONTNEED or * trans huge page faults running, and if the pmd is * none or trans huge it can change under us. This is * because MADV_DONTNEED holds the mmap_lock in read * mode. */ if (pmd_none_or_trans_huge_or_clear_bad(pmd)) goto next; next = zap_pte_range(tlb, vma, pmd, addr, next, details); next: cond_resched(); } while (pmd++, addr = next, addr != end); return addr; } static inline unsigned long zap_pud_range(struct mmu_gather *tlb, struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, struct zap_details *details) { pud_t *pud; unsigned long next; pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); if (pud_trans_huge(*pud) || pud_devmap(*pud)) { if (next - addr != HPAGE_PUD_SIZE) { mmap_assert_locked(tlb->mm); split_huge_pud(vma, pud, addr); } else if (zap_huge_pud(tlb, vma, pud, addr)) goto next; /* fall through */ } if (pud_none_or_clear_bad(pud)) continue; next = zap_pmd_range(tlb, vma, pud, addr, next, details); next: cond_resched(); } while (pud++, addr = next, addr != end); return addr; } static inline unsigned long zap_p4d_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, struct zap_details *details) { p4d_t *p4d; unsigned long next; p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; next = zap_pud_range(tlb, vma, p4d, addr, next, details); } while (p4d++, addr = next, addr != end); return addr; } void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details) { pgd_t *pgd; unsigned long next; BUG_ON(addr >= end); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); } static void unmap_single_vma(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) { unsigned long start = max(vma->vm_start, start_addr); unsigned long end; if (start >= vma->vm_end) return; end = min(vma->vm_end, end_addr); if (end <= vma->vm_start) return; if (vma->vm_file) uprobe_munmap(vma, start, end); if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn(vma, 0, 0); if (start != end) { if (unlikely(is_vm_hugetlb_page(vma))) { /* * It is undesirable to test vma->vm_file as it * should be non-null for valid hugetlb area. * However, vm_file will be NULL in the error * cleanup path of mmap_region. When * hugetlbfs ->mmap method fails, * mmap_region() nullifies vma->vm_file * before calling this function to clean up. * Since no pte has actually been setup, it is * safe to do nothing in this case. */ if (vma->vm_file) { i_mmap_lock_write(vma->vm_file->f_mapping); __unmap_hugepage_range_final(tlb, vma, start, end, NULL); i_mmap_unlock_write(vma->vm_file->f_mapping); } } else unmap_page_range(tlb, vma, start, end, details); } } /** * unmap_vmas - unmap a range of memory covered by a list of vma's * @tlb: address of the caller's struct mmu_gather * @vma: the starting vma * @start_addr: virtual address at which to start unmapping * @end_addr: virtual address at which to end unmapping * * Unmap all pages in the vma list. * * Only addresses between `start' and `end' will be unmapped. * * The VMA list must be sorted in ascending virtual address order. * * unmap_vmas() assumes that the caller will flush the whole unmapped address * range after unmap_vmas() returns. So the only responsibility here is to * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr) { struct mmu_notifier_range range; mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, start_addr, end_addr); mmu_notifier_invalidate_range_start(&range); for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) unmap_single_vma(tlb, vma, start_addr, end_addr, NULL); mmu_notifier_invalidate_range_end(&range); } /** * zap_page_range - remove user pages in a given range * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap * * Caller must protect the VMA list */ void zap_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long size) { struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, start, start + size); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next) unmap_single_vma(&tlb, vma, start, range.end, NULL); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } /** * zap_page_range_single - remove user pages in a given range * @vma: vm_area_struct holding the applicable pages * @address: starting address of pages to zap * @size: number of bytes to zap * @details: details of shared cache invalidation * * The range must fit into one VMA. */ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address, address + size); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); unmap_single_vma(&tlb, vma, address, range.end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } /** * zap_vma_ptes - remove ptes mapping the vma * @vma: vm_area_struct holding ptes to be zapped * @address: starting address of pages to zap * @size: number of bytes to zap * * This function only unmaps ptes assigned to VM_PFNMAP vmas. * * The entire address range must be fully contained within the vma. * */ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size) { if (address < vma->vm_start || address + size > vma->vm_end || !(vma->vm_flags & VM_PFNMAP)) return; zap_page_range_single(vma, address, size, NULL); } EXPORT_SYMBOL_GPL(zap_vma_ptes); static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return NULL; pud = pud_alloc(mm, p4d, addr); if (!pud) return NULL; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); return pmd; } pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) { pmd_t *pmd = walk_to_pmd(mm, addr); if (!pmd) return NULL; return pte_alloc_map_lock(mm, pmd, addr, ptl); } static int validate_page_before_insert(struct page *page) { if (PageAnon(page) || PageSlab(page) || page_has_type(page)) return -EINVAL; flush_dcache_page(page); return 0; } static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, unsigned long addr, struct page *page, pgprot_t prot) { if (!pte_none(*pte)) return -EBUSY; /* Ok, finally just insert the thing.. */ get_page(page); inc_mm_counter_fast(mm, mm_counter_file(page)); page_add_file_rmap(page, false); set_pte_at(mm, addr, pte, mk_pte(page, prot)); return 0; } /* * This is the old fallback for page remapping. * * For historical reasons, it only allows reserved pages. Only * old drivers should use this, and they needed to mark their * pages reserved for the old functions anyway. */ static int insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot) { struct mm_struct *mm = vma->vm_mm; int retval; pte_t *pte; spinlock_t *ptl; retval = validate_page_before_insert(page); if (retval) goto out; retval = -ENOMEM; pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; retval = insert_page_into_pte_locked(mm, pte, addr, page, prot); pte_unmap_unlock(pte, ptl); out: return retval; } #ifdef pte_index static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte, unsigned long addr, struct page *page, pgprot_t prot) { int err; if (!page_count(page)) return -EINVAL; err = validate_page_before_insert(page); if (err) return err; return insert_page_into_pte_locked(mm, pte, addr, page, prot); } /* insert_pages() amortizes the cost of spinlock operations * when inserting pages in a loop. Arch *must* define pte_index. */ static int insert_pages(struct vm_area_struct *vma, unsigned long addr, struct page **pages, unsigned long *num, pgprot_t prot) { pmd_t *pmd = NULL; pte_t *start_pte, *pte; spinlock_t *pte_lock; struct mm_struct *const mm = vma->vm_mm; unsigned long curr_page_idx = 0; unsigned long remaining_pages_total = *num; unsigned long pages_to_write_in_pmd; int ret; more: ret = -EFAULT; pmd = walk_to_pmd(mm, addr); if (!pmd) goto out; pages_to_write_in_pmd = min_t(unsigned long, remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); /* Allocate the PTE if necessary; takes PMD lock once only. */ ret = -ENOMEM; if (pte_alloc(mm, pmd)) goto out; while (pages_to_write_in_pmd) { int pte_idx = 0; const int batch_size = min_t(int, pages_to_write_in_pmd, 8); start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock); for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) { int err = insert_page_in_batch_locked(mm, pte, addr, pages[curr_page_idx], prot); if (unlikely(err)) { pte_unmap_unlock(start_pte, pte_lock); ret = err; remaining_pages_total -= pte_idx; goto out; } addr += PAGE_SIZE; ++curr_page_idx; } pte_unmap_unlock(start_pte, pte_lock); pages_to_write_in_pmd -= batch_size; remaining_pages_total -= batch_size; } if (remaining_pages_total) goto more; ret = 0; out: *num = remaining_pages_total; return ret; } #endif /* ifdef pte_index */ /** * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock. * @vma: user vma to map to * @addr: target start user address of these pages * @pages: source kernel pages * @num: in: number of pages to map. out: number of pages that were *not* * mapped. (0 means all pages were successfully mapped). * * Preferred over vm_insert_page() when inserting multiple pages. * * In case of error, we may have mapped a subset of the provided * pages. It is the caller's responsibility to account for this case. * * The same restrictions apply as in vm_insert_page(). */ int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, struct page **pages, unsigned long *num) { #ifdef pte_index const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; if (addr < vma->vm_start || end_addr >= vma->vm_end) return -EFAULT; if (!(vma->vm_flags & VM_MIXEDMAP)) { BUG_ON(mmap_read_trylock(vma->vm_mm)); BUG_ON(vma->vm_flags & VM_PFNMAP); vma->vm_flags |= VM_MIXEDMAP; } /* Defer page refcount checking till we're about to map that page. */ return insert_pages(vma, addr, pages, num, vma->vm_page_prot); #else unsigned long idx = 0, pgcount = *num; int err = -EINVAL; for (; idx < pgcount; ++idx) { err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]); if (err) break; } *num = pgcount - idx; return err; #endif /* ifdef pte_index */ } EXPORT_SYMBOL(vm_insert_pages); /** * vm_insert_page - insert single page into user vma * @vma: user vma to map to * @addr: target user address of this page * @page: source kernel page * * This allows drivers to insert individual pages they've allocated * into a user vma. * * The page has to be a nice clean _individual_ kernel allocation. * If you allocate a compound page, you need to have marked it as * such (__GFP_COMP), or manually just split the page up yourself * (see split_page()). * * NOTE! Traditionally this was done with "remap_pfn_range()" which * took an arbitrary page protection parameter. This doesn't allow * that. Your vma protection will have to be set up correctly, which * means that if you want a shared writable mapping, you'd better * ask for a shared writable mapping! * * The page does not need to be reserved. * * Usually this function is called from f_op->mmap() handler * under mm->mmap_lock write-lock, so it can change vma->vm_flags. * Caller must set VM_MIXEDMAP on vma if it wants to call this * function from other places, for example from page-fault handler. * * Return: %0 on success, negative error code otherwise. */ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page) { if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; if (!page_count(page)) return -EINVAL; if (!(vma->vm_flags & VM_MIXEDMAP)) { BUG_ON(mmap_read_trylock(vma->vm_mm)); BUG_ON(vma->vm_flags & VM_PFNMAP); vma->vm_flags |= VM_MIXEDMAP; } return insert_page(vma, addr, page, vma->vm_page_prot); } EXPORT_SYMBOL(vm_insert_page); /* * __vm_map_pages - maps range of kernel pages into user vma * @vma: user vma to map to * @pages: pointer to array of source kernel pages * @num: number of pages in page array * @offset: user's requested vm_pgoff * * This allows drivers to map range of kernel pages into a user vma. * * Return: 0 on success and error code otherwise. */ static int __vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num, unsigned long offset) { unsigned long count = vma_pages(vma); unsigned long uaddr = vma->vm_start; int ret, i; /* Fail if the user requested offset is beyond the end of the object */ if (offset >= num) return -ENXIO; /* Fail if the user requested size exceeds available object size */ if (count > num - offset) return -ENXIO; for (i = 0; i < count; i++) { ret = vm_insert_page(vma, uaddr, pages[offset + i]); if (ret < 0) return ret; uaddr += PAGE_SIZE; } return 0; } /** * vm_map_pages - maps range of kernel pages starts with non zero offset * @vma: user vma to map to * @pages: pointer to array of source kernel pages * @num: number of pages in page array * * Maps an object consisting of @num pages, catering for the user's * requested vm_pgoff * * If we fail to insert any page into the vma, the function will return * immediately leaving any previously inserted pages present. Callers * from the mmap handler may immediately return the error as their caller * will destroy the vma, removing any successfully inserted pages. Other * callers should make their own arrangements for calling unmap_region(). * * Context: Process context. Called by mmap handlers. * Return: 0 on success and error code otherwise. */ int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num) { return __vm_map_pages(vma, pages, num, vma->vm_pgoff); } EXPORT_SYMBOL(vm_map_pages); /** * vm_map_pages_zero - map range of kernel pages starts with zero offset * @vma: user vma to map to * @pages: pointer to array of source kernel pages * @num: number of pages in page array * * Similar to vm_map_pages(), except that it explicitly sets the offset * to 0. This function is intended for the drivers that did not consider * vm_pgoff. * * Context: Process context. Called by mmap handlers. * Return: 0 on success and error code otherwise. */ int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, unsigned long num) { return __vm_map_pages(vma, pages, num, 0); } EXPORT_SYMBOL(vm_map_pages_zero); static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, pgprot_t prot, bool mkwrite) { struct mm_struct *mm = vma->vm_mm; pte_t *pte, entry; spinlock_t *ptl; pte = get_locked_pte(mm, addr, &ptl); if (!pte) return VM_FAULT_OOM; if (!pte_none(*pte)) { if (mkwrite) { /* * For read faults on private mappings the PFN passed * in may not match the PFN we have mapped if the * mapped PFN is a writeable COW page. In the mkwrite * case we are creating a writable PTE for a shared * mapping and we expect the PFNs to match. If they * don't match, we are likely racing with block * allocation and mapping invalidation so just skip the * update. */ if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) { WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; } entry = pte_mkyoung(*pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (ptep_set_access_flags(vma, addr, pte, entry, 1)) update_mmu_cache(vma, addr, pte); } goto out_unlock; } /* Ok, finally just insert the thing.. */ if (pfn_t_devmap(pfn)) entry = pte_mkdevmap(pfn_t_pte(pfn, prot)); else entry = pte_mkspecial(pfn_t_pte(pfn, prot)); if (mkwrite) { entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); } set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ out_unlock: pte_unmap_unlock(pte, ptl); return VM_FAULT_NOPAGE; } /** * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * * This is exactly like vmf_insert_pfn(), except that it allows drivers * to override pgprot on a per-page basis. * * This only makes sense for IO mappings, and it makes no sense for * COW mappings. In general, using multiple vmas is preferable; * vmf_insert_pfn_prot should only be used if using multiple VMAs is * impractical. * * See vmf_insert_mixed_prot() for a discussion of the implication of using * a value of @pgprot different from that of @vma->vm_page_prot. * * Context: Process context. May allocate using %GFP_KERNEL. * Return: vm_fault_t value. */ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot) { /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like * consistency in testing and feature parity among all, so we should * try to keep these invariants in place for everybody. */ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == (VM_PFNMAP|VM_MIXEDMAP)); BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return VM_FAULT_SIGBUS; if (!pfn_modify_allowed(pfn, pgprot)) return VM_FAULT_SIGBUS; track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)); return insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, false); } EXPORT_SYMBOL(vmf_insert_pfn_prot); /** * vmf_insert_pfn - insert single pfn into user vma * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * * Similar to vm_insert_page, this allows drivers to insert individual pages * they've allocated into a user vma. Same comments apply. * * This function should only be called from a vm_ops->fault handler, and * in that case the handler should return the result of this function. * * vma cannot be a COW mapping. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. * * Context: Process context. May allocate using %GFP_KERNEL. * Return: vm_fault_t value. */ vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); } EXPORT_SYMBOL(vmf_insert_pfn); static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) { /* these checks mirror the abort conditions in vm_normal_page */ if (vma->vm_flags & VM_MIXEDMAP) return true; if (pfn_t_devmap(pfn)) return true; if (pfn_t_special(pfn)) return true; if (is_zero_pfn(pfn_t_to_pfn(pfn))) return true; return false; } static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, pgprot_t pgprot, bool mkwrite) { int err; BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return VM_FAULT_SIGBUS; track_pfn_insert(vma, &pgprot, pfn); if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) return VM_FAULT_SIGBUS; /* * If we don't have pte special, then we have to use the pfn_valid() * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* * refcount the page if pfn_valid is true (hence insert_page rather * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * without pte special, it would there be refcounted as a normal page. */ if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { struct page *page; /* * At this point we are committed to insert_page() * regardless of whether the caller specified flags that * result in pfn_t_has_page() == false. */ page = pfn_to_page(pfn_t_to_pfn(pfn)); err = insert_page(vma, addr, page, pgprot); } else { return insert_pfn(vma, addr, pfn, pgprot, mkwrite); } if (err == -ENOMEM) return VM_FAULT_OOM; if (err < 0 && err != -EBUSY) return VM_FAULT_SIGBUS; return VM_FAULT_NOPAGE; } /** * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot * @vma: user vma to map to * @addr: target user address of this page * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * * This is exactly like vmf_insert_mixed(), except that it allows drivers * to override pgprot on a per-page basis. * * Typically this function should be used by drivers to set caching- and * encryption bits different than those of @vma->vm_page_prot, because * the caching- or encryption mode may not be known at mmap() time. * This is ok as long as @vma->vm_page_prot is not used by the core vm * to set caching and encryption bits for those vmas (except for COW pages). * This is ensured by core vm only modifying these page table entries using * functions that don't touch caching- or encryption bits, using pte_modify() * if needed. (See for example mprotect()). * Also when new page-table entries are created, this is only done using the * fault() callback, and never using the value of vma->vm_page_prot, * except for page-table entries that point to anonymous pages as the result * of COW. * * Context: Process context. May allocate using %GFP_KERNEL. * Return: vm_fault_t value. */ vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, pgprot_t pgprot) { return __vm_insert_mixed(vma, addr, pfn, pgprot, false); } EXPORT_SYMBOL(vmf_insert_mixed_prot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false); } EXPORT_SYMBOL(vmf_insert_mixed); /* * If the insertion of PTE failed because someone else already added a * different entry in the mean time, we treat that as success as we assume * the same entry was actually inserted. */ vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, true); } EXPORT_SYMBOL(vmf_insert_mixed_mkwrite); /* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { pte_t *pte, *mapped_pte; spinlock_t *ptl; int err = 0; mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); if (!pfn_modify_allowed(pfn, prot)) { err = -EACCES; break; } set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(mapped_pte, ptl); return err; } static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { pmd_t *pmd; unsigned long next; int err; pfn -= addr >> PAGE_SHIFT; pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; VM_BUG_ON(pmd_trans_huge(*pmd)); do { next = pmd_addr_end(addr, end); err = remap_pte_range(mm, pmd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (pmd++, addr = next, addr != end); return 0; } static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { pud_t *pud; unsigned long next; int err; pfn -= addr >> PAGE_SHIFT; pud = pud_alloc(mm, p4d, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end); err = remap_pmd_range(mm, pud, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (pud++, addr = next, addr != end); return 0; } static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { p4d_t *p4d; unsigned long next; int err; pfn -= addr >> PAGE_SHIFT; p4d = p4d_alloc(mm, pgd, addr); if (!p4d) return -ENOMEM; do { next = p4d_addr_end(addr, end); err = remap_pud_range(mm, p4d, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (p4d++, addr = next, addr != end); return 0; } static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { pgd_t *pgd; unsigned long next; unsigned long end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; int err; if (WARN_ON_ONCE(!PAGE_ALIGNED(addr))) return -EINVAL; /* * Physically remapped pages are special. Tell the * rest of the world about it: * VM_IO tells people not to look at these pages * (accesses can have side effects). * VM_PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. * VM_DONTEXPAND * Disable vma merging and expanding with mremap(). * VM_DONTDUMP * Omit vma from core dump, even when VM_IO turned off. * * There's a horrible special case to handle copy-on-write * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". * See vm_normal_page() for details. */ if (is_cow_mapping(vma->vm_flags)) { if (addr != vma->vm_start || end != vma->vm_end) return -EINVAL; vma->vm_pgoff = pfn; } vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); err = remap_p4d_range(mm, pgd, addr, next, pfn + (addr >> PAGE_SHIFT), prot); if (err) return err; } while (pgd++, addr = next, addr != end); return 0; } /* * Variant of remap_pfn_range that does not call track_pfn_remap. The caller * must have pre-validated the caching bits of the pgprot_t. */ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { int error = remap_pfn_range_internal(vma, addr, pfn, size, prot); if (!error) return 0; /* * A partial pfn range mapping is dangerous: it does not * maintain page reference counts, and callers may free * pages due to the error. So zap it early. */ zap_page_range_single(vma, addr, size, NULL); return error; } /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to * @addr: target page aligned user address to start at * @pfn: page frame number of kernel physical memory address * @size: size of mapping area * @prot: page protection flags for this mapping * * Note: this is only safe if the mm semaphore is held when called. * * Return: %0 on success, negative error code otherwise. */ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { int err; err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size)); if (err) return -EINVAL; err = remap_pfn_range_notrack(vma, addr, pfn, size, prot); if (err) untrack_pfn(vma, pfn, PAGE_ALIGN(size)); return err; } EXPORT_SYMBOL(remap_pfn_range); /** * vm_iomap_memory - remap memory to userspace * @vma: user vma to map to * @start: start of the physical memory to be mapped * @len: size of area * * This is a simplified io_remap_pfn_range() for common driver use. The * driver just needs to give us the physical memory range to be mapped, * we'll figure out the rest from the vma information. * * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get * whatever write-combining details or similar. * * Return: %0 on success, negative error code otherwise. */ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len) { unsigned long vm_len, pfn, pages; /* Check that the physical memory area passed in looks valid */ if (start + len < start) return -EINVAL; /* * You *really* shouldn't map things that aren't page-aligned, * but we've historically allowed it because IO memory might * just have smaller alignment. */ len += start & ~PAGE_MASK; pfn = start >> PAGE_SHIFT; pages = (len + ~PAGE_MASK) >> PAGE_SHIFT; if (pfn + pages < pfn) return -EINVAL; /* We start the mapping 'vm_pgoff' pages into the area */ if (vma->vm_pgoff > pages) return -EINVAL; pfn += vma->vm_pgoff; pages -= vma->vm_pgoff; /* Can we fit all of the mapping? */ vm_len = vma->vm_end - vma->vm_start; if (vm_len >> PAGE_SHIFT > pages) return -EINVAL; /* Ok, let it rip */ return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); } EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data, bool create, pgtbl_mod_mask *mask) { pte_t *pte, *mapped_pte; int err = 0; spinlock_t *ptl; if (create) { mapped_pte = pte = (mm == &init_mm) ? pte_alloc_kernel_track(pmd, addr, mask) : pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; } else { mapped_pte = pte = (mm == &init_mm) ? pte_offset_kernel(pmd, addr) : pte_offset_map_lock(mm, pmd, addr, &ptl); } BUG_ON(pmd_huge(*pmd)); arch_enter_lazy_mmu_mode(); if (fn) { do { if (create || !pte_none(*pte)) { err = fn(pte, addr, data); if (err) break; } } while (pte++, addr += PAGE_SIZE, addr != end); } *mask |= PGTBL_PTE_MODIFIED; arch_leave_lazy_mmu_mode(); if (mm != &init_mm) pte_unmap_unlock(mapped_pte, ptl); return err; } static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, pte_fn_t fn, void *data, bool create, pgtbl_mod_mask *mask) { pmd_t *pmd; unsigned long next; int err = 0; BUG_ON(pud_huge(*pud)); if (create) { pmd = pmd_alloc_track(mm, pud, addr, mask); if (!pmd) return -ENOMEM; } else { pmd = pmd_offset(pud, addr); } do { next = pmd_addr_end(addr, end); if (pmd_none(*pmd) && !create) continue; if (WARN_ON_ONCE(pmd_leaf(*pmd))) return -EINVAL; if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) { if (!create) continue; pmd_clear_bad(pmd); } err = apply_to_pte_range(mm, pmd, addr, next, fn, data, create, mask); if (err) break; } while (pmd++, addr = next, addr != end); return err; } static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, pte_fn_t fn, void *data, bool create, pgtbl_mod_mask *mask) { pud_t *pud; unsigned long next; int err = 0; if (create) { pud = pud_alloc_track(mm, p4d, addr, mask); if (!pud) return -ENOMEM; } else { pud = pud_offset(p4d, addr); } do { next = pud_addr_end(addr, end); if (pud_none(*pud) && !create) continue; if (WARN_ON_ONCE(pud_leaf(*pud))) return -EINVAL; if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) { if (!create) continue; pud_clear_bad(pud); } err = apply_to_pmd_range(mm, pud, addr, next, fn, data, create, mask); if (err) break; } while (pud++, addr = next, addr != end); return err; } static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, pte_fn_t fn, void *data, bool create, pgtbl_mod_mask *mask) { p4d_t *p4d; unsigned long next; int err = 0; if (create) { p4d = p4d_alloc_track(mm, pgd, addr, mask); if (!p4d) return -ENOMEM; } else { p4d = p4d_offset(pgd, addr); } do { next = p4d_addr_end(addr, end); if (p4d_none(*p4d) && !create) continue; if (WARN_ON_ONCE(p4d_leaf(*p4d))) return -EINVAL; if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) { if (!create) continue; p4d_clear_bad(p4d); } err = apply_to_pud_range(mm, p4d, addr, next, fn, data, create, mask); if (err) break; } while (p4d++, addr = next, addr != end); return err; } static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long size, pte_fn_t fn, void *data, bool create) { pgd_t *pgd; unsigned long start = addr, next; unsigned long end = addr + size; pgtbl_mod_mask mask = 0; int err = 0; if (WARN_ON(addr >= end)) return -EINVAL; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none(*pgd) && !create) continue; if (WARN_ON_ONCE(pgd_leaf(*pgd))) { err = -EINVAL; break; } if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { if (!create) continue; pgd_clear_bad(pgd); } err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask); if (err) break; } while (pgd++, addr = next, addr != end); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, start + size); return err; } /* * Scan a region of virtual memory, filling in page tables as necessary * and calling a provided function on each leaf page table. */ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, unsigned long size, pte_fn_t fn, void *data) { return __apply_to_page_range(mm, addr, size, fn, data, true); } EXPORT_SYMBOL_GPL(apply_to_page_range); /* * Scan a region of virtual memory, calling a provided function on * each leaf page table where it exists. * * Unlike apply_to_page_range, this does _not_ fill in page tables * where they are absent. */ int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr, unsigned long size, pte_fn_t fn, void *data) { return __apply_to_page_range(mm, addr, size, fn, data, false); } EXPORT_SYMBOL_GPL(apply_to_existing_page_range); /* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures * or configurations (e.g. i386 with PAE) which might give a mix of unmatched * parts, do_swap_page must check under lock before unmapping the pte and * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, pte_t *page_table, pte_t orig_pte) { int same = 1; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION) if (sizeof(pte_t) > sizeof(unsigned long)) { spinlock_t *ptl = pte_lockptr(mm, pmd); spin_lock(ptl); same = pte_same(*page_table, orig_pte); spin_unlock(ptl); } #endif pte_unmap(page_table); return same; } /* * Return: * 0: copied succeeded * -EHWPOISON: copy failed due to hwpoison in source page * -EAGAIN: copied failed (some other reason) */ static inline int cow_user_page(struct page *dst, struct page *src, struct vm_fault *vmf) { int ret; void *kaddr; void __user *uaddr; bool locked = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; if (likely(src)) { if (copy_mc_user_highpage(dst, src, addr, vma)) { memory_failure_queue(page_to_pfn(src), 0); return -EHWPOISON; } return 0; } /* * If the source page was a PFN mapping, we don't have * a "struct page" for it. We do a best-effort copy by * just copying from the original user address. If that * fails, we just zero-fill it. Live with it. */ kaddr = kmap_atomic(dst); uaddr = (void __user *)(addr & PAGE_MASK); /* * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* * Other thread has already handled the fault * and update local tlb only */ update_mmu_tlb(vma, addr, vmf->pte); ret = -EAGAIN; goto pte_unlock; } entry = pte_mkyoung(vmf->orig_pte); if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) update_mmu_cache(vma, addr, vmf->pte); } /* * This really shouldn't fail, because the page is there * in the page tables. But it might just be unreadable, * in which case we just give up and fill the result with * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { if (locked) goto warn; /* Re-validate under PTL if the page is still mapped */ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* The PTE changed under us, update local tlb */ update_mmu_tlb(vma, addr, vmf->pte); ret = -EAGAIN; goto pte_unlock; } /* * The same page can be mapped back since last copy attempt. * Try to copy again under PTL. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { /* * Give a warn in case there can be some obscure * use-case */ warn: WARN_ON_ONCE(1); clear_page(kaddr); } } ret = 0; pte_unlock: if (locked) pte_unmap_unlock(vmf->pte, vmf->ptl); kunmap_atomic(kaddr); flush_dcache_page(dst); return ret; } static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) { struct file *vm_file = vma->vm_file; if (vm_file) return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; /* * Special mappings (e.g. VDSO) do not have any file so fake * a default GFP_KERNEL for them. */ return GFP_KERNEL; } /* * Notify the address space that the page is about to become writable so that * it can prohibit this or wait for the page to get into an appropriate state. * * We do this without the lock held, so that it can sleep if it needs to. */ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) { vm_fault_t ret; struct page *page = vmf->page; unsigned int old_flags = vmf->flags; vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; if (vmf->vma->vm_file && IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) return VM_FAULT_SIGBUS; ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); if (!page->mapping) { unlock_page(page); return 0; /* retry */ } ret |= VM_FAULT_LOCKED; } else VM_BUG_ON_PAGE(!PageLocked(page), page); return ret; } /* * Handle dirtying of a page in shared file mapping on a write fault. * * The function expects the page to be locked and unlocks it. */ static vm_fault_t fault_dirty_shared_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct address_space *mapping; struct page *page = vmf->page; bool dirtied; bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; dirtied = set_page_dirty(page); VM_BUG_ON_PAGE(PageAnon(page), page); /* * Take a local copy of the address_space - page.mapping may be zeroed * by truncate after unlock_page(). The address_space itself remains * pinned by vma->vm_file's reference. We rely on unlock_page()'s * release semantics to prevent the compiler from undoing this copying. */ mapping = page_rmapping(page); unlock_page(page); if (!page_mkwrite) file_update_time(vma->vm_file); /* * Throttle page dirtying rate down to writeback speed. * * mapping may be NULL here because some device drivers do not * set page.mapping but still dirty their pages * * Drop the mmap_lock before waiting on IO, if we can. The file * is pinning the mapping, as per above. */ if ((dirtied || page_mkwrite) && mapping) { struct file *fpin; fpin = maybe_unlock_mmap_for_io(vmf, NULL); balance_dirty_pages_ratelimited(mapping); if (fpin) { fput(fpin); return VM_FAULT_RETRY; } } return 0; } /* * Handle write page faults for pages that can be reused in the current vma * * This can happen either due to the mapping being with the VM_SHARED flag, * or due to us being the last reference standing to the page. In either * case, all we need to do here is to mark the page as writable and update * any related book-keeping. */ static inline void wp_page_reuse(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; pte_t entry; /* * Clear the pages cpupid information as the existing * information potentially belongs to a now completely * unrelated process. */ if (page) page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = pte_mkyoung(vmf->orig_pte); entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); count_vm_event(PGREUSE); } /* * Handle the case of a page which we actually need to copy to a new page. * * Called with mmap_lock locked and the old page referenced, but * without the ptl held. * * High level logic flow: * * - Allocate a page, copy the content of the old page to the new one. * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. * - Take the PTL. If the pte changed, bail out and release the allocated page * - If the pte is still the way we remember it, update the page table and all * relevant references. This includes dropping the reference the page-table * held to the old page, as well as updating the rmap. * - In any case, unlock the PTL and drop the reference we took to the old page. */ static vm_fault_t wp_page_copy(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; struct page *old_page = vmf->page; struct page *new_page = NULL; pte_t entry; int page_copied = 0; struct mmu_notifier_range range; int ret; if (unlikely(anon_vma_prepare(vma))) goto oom; if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { new_page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!new_page) goto oom; } else { new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!new_page) goto oom; ret = cow_user_page(new_page, old_page, vmf); if (ret) { /* * COW failed, if the fault was solved by other, * it's fine. If not, userspace would re-fault on * the same address and we will handle the fault * from the second attempt. * The -EHWPOISON case will not be retried. */ put_page(new_page); if (old_page) put_page(old_page); return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; } } if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; cgroup_throttle_swaprate(new_page, GFP_KERNEL); __SetPageUptodate(new_page); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); /* * Re-check the pte - we dropped the lock */ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, mm_counter_file(old_page)); inc_mm_counter_fast(mm, MM_ANONPAGES); } } else { inc_mm_counter_fast(mm, MM_ANONPAGES); } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* * Clear the pte entry and flush it first, before updating the * pte with the new entry, to keep TLBs on different CPUs in * sync. This code used to set the new PTE then flush TLBs, but * that left a window where the new PTE could be loaded into * some TLBs while the old PTE remains in others. */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); lru_cache_add_inactive_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the * new page to be mapped directly into the secondary page table. */ set_pte_at_notify(mm, vmf->address, vmf->pte, entry); update_mmu_cache(vma, vmf->address, vmf->pte); if (old_page) { /* * Only after switching the pte to the new page may * we remove the mapcount here. Otherwise another * process may come and find the rmap count decremented * before the pte is switched to the new page, and * "reuse" the old page writing into it while our pte * here still points into it and can be read by other * threads. * * The critical issue is to order this * page_remove_rmap with the ptp_clear_flush above. * Those stores are ordered by (if nothing else,) * the barrier present in the atomic_add_negative * in page_remove_rmap. * * Then the TLB flush in ptep_clear_flush ensures that * no process can access the old page before the * decremented mapcount is visible. And the old page * cannot be reused until after the decremented * mapcount is visible. So transitively, TLBs to * old page will be flushed before it can be reused. */ page_remove_rmap(old_page, false); } /* Free the old page.. */ new_page = old_page; page_copied = 1; } else { update_mmu_tlb(vma, vmf->address, vmf->pte); } if (new_page) put_page(new_page); pte_unmap_unlock(vmf->pte, vmf->ptl); /* * No need to double call mmu_notifier->invalidate_range() callback as * the above ptep_clear_flush_notify() did already call it. */ mmu_notifier_invalidate_range_only_end(&range); if (old_page) { /* * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ if (page_copied && (vma->vm_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ if (PageMlocked(old_page)) munlock_vma_page(old_page); unlock_page(old_page); } if (page_copied) free_swap_cache(old_page); put_page(old_page); } return page_copied ? VM_FAULT_WRITE : 0; oom_free_new: put_page(new_page); oom: if (old_page) put_page(old_page); return VM_FAULT_OOM; } /** * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE * writeable once the page is prepared * * @vmf: structure describing the fault * * This function handles all that is needed to finish a write page fault in a * shared mapping due to PTE being read-only once the mapped page is prepared. * It handles locking of PTE and modifying it. * * The function expects the page to be locked or other protection against * concurrent faults / writeback (such as DAX radix tree locks). * * Return: %0 on success, %VM_FAULT_NOPAGE when PTE got changed before * we acquired PTE lock. */ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); /* * We might have raced with another page fault while we released the * pte_offset_map_lock. */ if (!pte_same(*vmf->pte, vmf->orig_pte)) { update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); return VM_FAULT_NOPAGE; } wp_page_reuse(vmf); return 0; } /* * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED * mapping */ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { vm_fault_t ret; pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) return ret; return finish_mkwrite_fault(vmf); } wp_page_reuse(vmf); return VM_FAULT_WRITE; } static vm_fault_t wp_page_shared(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret = VM_FAULT_WRITE; get_page(vmf->page); if (vma->vm_ops && vma->vm_ops->page_mkwrite) { vm_fault_t tmp; pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); return tmp; } tmp = finish_mkwrite_fault(vmf); if (unlikely(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) { unlock_page(vmf->page); put_page(vmf->page); return tmp; } } else { wp_page_reuse(vmf); lock_page(vmf->page); } ret |= fault_dirty_shared_page(vmf); put_page(vmf->page); return ret; } /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. * * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), with pte both mapped and locked. * We return with mmap_lock still held, but pte unmapped and unlocked. */ static vm_fault_t do_wp_page(struct vm_fault *vmf) __releases(vmf->ptl) { struct vm_area_struct *vma = vmf->vma; if (userfaultfd_pte_wp(vma, *vmf->pte)) { pte_unmap_unlock(vmf->pte, vmf->ptl); return handle_userfault(vmf, VM_UFFD_WP); } /* * Userfaultfd write-protect can defer flushes. Ensure the TLB * is flushed in this case before copying. */ if (unlikely(userfaultfd_wp(vmf->vma) && mm_tlb_flush_pending(vmf->vma->vm_mm))) flush_tlb_page(vmf->vma, vmf->address); vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!vmf->page) { /* * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a * VM_PFNMAP VMA. * * We should not cow pages in a shared writeable mapping. * Just mark the pages writable and/or call ops->pfn_mkwrite. */ if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) return wp_pfn_shared(vmf); pte_unmap_unlock(vmf->pte, vmf->ptl); return wp_page_copy(vmf); } /* * Take out anonymous pages first, anonymous shared vmas are * not dirty accountable. */ if (PageAnon(vmf->page)) { struct page *page = vmf->page; /* PageKsm() doesn't necessarily raise the page refcount */ if (PageKsm(page) || page_count(page) != 1) goto copy; if (!trylock_page(page)) goto copy; if (PageKsm(page) || page_mapcount(page) != 1 || page_count(page) != 1) { unlock_page(page); goto copy; } /* * Ok, we've got the only map reference, and the only * page count reference, and the page is locked, * it's dark out, and we're wearing sunglasses. Hit it. */ unlock_page(page); wp_page_reuse(vmf); return VM_FAULT_WRITE; } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { return wp_page_shared(vmf); } copy: /* * Ok, we need to copy. Oh, well.. */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); return wp_page_copy(vmf); } static void unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) { zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } static inline void unmap_mapping_range_tree(struct rb_root_cached *root, struct zap_details *details) { struct vm_area_struct *vma; pgoff_t vba, vea, zba, zea; vma_interval_tree_foreach(vma, root, details->first_index, details->last_index) { vba = vma->vm_pgoff; vea = vba + vma_pages(vma) - 1; zba = details->first_index; if (zba < vba) zba = vba; zea = details->last_index; if (zea > vea) zea = vea; unmap_mapping_range_vma(vma, ((zba - vba) << PAGE_SHIFT) + vma->vm_start, ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, details); } } /** * unmap_mapping_page() - Unmap single page from processes. * @page: The locked page to be unmapped. * * Unmap this page from any userspace process which still has it mmaped. * Typically, for efficiency, the range of nearby pages has already been * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once * truncation or invalidation holds the lock on a page, it may find that * the page has been remapped again: and then uses unmap_mapping_page() * to unmap it finally. */ void unmap_mapping_page(struct page *page) { struct address_space *mapping = page->mapping; struct zap_details details = { }; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageTail(page)); details.check_mapping = mapping; details.first_index = page->index; details.last_index = page->index + thp_nr_pages(page) - 1; details.single_page = page; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } /** * unmap_mapping_pages() - Unmap pages from processes. * @mapping: The address space containing pages to be unmapped. * @start: Index of first page to be unmapped. * @nr: Number of pages to be unmapped. 0 to unmap to end of file. * @even_cows: Whether to unmap even private COWed pages. * * Unmap the pages in this address space from any userspace process which * has them mmaped. Generally, you want to remove COWed pages as well when * a file is being truncated, but not when invalidating pages from the page * cache. */ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { struct zap_details details = { }; details.check_mapping = even_cows ? NULL : mapping; details.first_index = start; details.last_index = start + nr - 1; if (details.last_index < details.first_index) details.last_index = ULONG_MAX; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } EXPORT_SYMBOL_GPL(unmap_mapping_pages); /** * unmap_mapping_range - unmap the portion of all mmaps in the specified * address_space corresponding to the specified byte range in the underlying * file. * * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE * boundary. Note that this is different from truncate_pagecache(), which * must keep the partial page. In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes. This will be rounded * up to a PAGE_SIZE boundary. A holelen of zero truncates to the * end of the file. * @even_cows: 1 when truncating a file, unmap even private COWed pages; * but 0 when invalidating pagecache, don't throw away private data. */ void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows) { pgoff_t hba = (pgoff_t)(holebegin) >> PAGE_SHIFT; pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT; /* Check for overflow. */ if (sizeof(holelen) > sizeof(hlen)) { long long holeend = (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; if (holeend & ~(long long)ULONG_MAX) hlen = ULONG_MAX - hba + 1; } unmap_mapping_pages(mapping, hba, hlen, even_cows); } EXPORT_SYMBOL(unmap_mapping_range); /* * Restore a potential device exclusive pte to a working pte entry */ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) { struct page *page = vmf->page; struct vm_area_struct *vma = vmf->vma; struct mmu_notifier_range range; /* * We need a reference to lock the page because we don't hold * the PTL so a racing thread can remove the device-exclusive * entry and unmap it. If the page is free the entry must * have been removed already. If it happens to have already * been re-allocated after being freed all we do is lock and * unlock it. */ if (!get_page_unless_zero(page)) return 0; if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) { put_page(page); return VM_FAULT_RETRY; } mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma, vma->vm_mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); mmu_notifier_invalidate_range_start(&range); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) restore_exclusive_pte(vma, page, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); unlock_page(page); put_page(page); mmu_notifier_invalidate_range_end(&range); return 0; } /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with pte unmapped and unlocked. * * We return with the mmap_lock locked or unlocked in the same cases * as does filemap_fault(). */ vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; struct swap_info_struct *si = NULL; swp_entry_t entry; pte_t pte; int locked; int exclusive = 0; vm_fault_t ret = 0; void *shadow = NULL; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; entry = pte_to_swp_entry(vmf->orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { migration_entry_wait(vma->vm_mm, vmf->pmd, vmf->address); } else if (is_device_exclusive_entry(entry)) { vmf->page = pfn_swap_entry_to_page(entry); ret = remove_device_exclusive_entry(vmf); } else if (is_device_private_entry(entry)) { vmf->page = pfn_swap_entry_to_page(entry); ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); ret = VM_FAULT_SIGBUS; } goto out; } /* Prevent swapoff from happening to us. */ si = get_swap_device(entry); if (unlikely(!si)) goto out; delayacct_set_flag(current, DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry, vma, vmf->address); swapcache = page; if (!page) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (page) { __SetPageLocked(page); __SetPageSwapBacked(page); if (mem_cgroup_swapin_charge_page(page, vma->vm_mm, GFP_KERNEL, entry)) { ret = VM_FAULT_OOM; goto out_page; } mem_cgroup_swapin_uncharge_swap(entry); shadow = get_shadow_from_swap_cache(entry); if (shadow) workingset_refault(page, shadow); lru_cache_add(page); /* To provide entry to swap_readpage() */ set_page_private(page, entry.val); swap_readpage(page, true); set_page_private(page, 0); } } else { page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf); swapcache = page; } if (!page) { /* * Back out if somebody else faulted in this pte * while we released the pte lock. */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); goto unlock; } /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); } else if (PageHWPoison(page)) { /* * hwpoisoned dirty swapcache pages are kept for killing * owner processes (which may be unknown at hwpoison time) */ ret = VM_FAULT_HWPOISON; delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); goto out_release; } locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN); if (!locked) { ret |= VM_FAULT_RETRY; goto out_release; } /* * Make sure try_to_free_swap or reuse_swap_page or swapoff did not * release the swapcache from under us. The page pin, and pte_same * test below, are not enough to exclude that. Even if it is still * swapcache, we need to check that the page's swap has not changed. */ if (unlikely((!PageSwapCache(page) || page_private(page) != entry.val)) && swapcache) goto out_page; page = ksm_might_need_to_copy(page, vma, vmf->address); if (unlikely(!page)) { ret = VM_FAULT_OOM; page = swapcache; goto out_page; } cgroup_throttle_swaprate(page, GFP_KERNEL); /* * Back out if somebody else already faulted in this pte. */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) goto out_nomap; if (unlikely(!PageUptodate(page))) { ret = VM_FAULT_SIGBUS; goto out_nomap; } /* * The page isn't present yet, go ahead with the fault. * * Be careful about the sequence of operations here. * To get its accounting right, reuse_swap_page() must be called * while the page is counted on swap but not yet in mapcount i.e. * before page_add_anon_rmap() and swap_free(); try_to_free_swap() * must be called after the swap_free(), or it will never succeed. */ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; exclusive = RMAP_EXCLUSIVE; } flush_icache_page(vma, page); if (pte_swp_soft_dirty(vmf->orig_pte)) pte = pte_mksoft_dirty(pte); if (pte_swp_uffd_wp(vmf->orig_pte)) { pte = pte_mkuffd_wp(pte); pte = pte_wrprotect(pte); } set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); vmf->orig_pte = pte; /* ksm created a completely new copy */ if (unlikely(page != swapcache && swapcache)) { page_add_new_anon_rmap(page, vma, vmf->address, false); lru_cache_add_inactive_or_unevictable(page, vma); } else { do_page_add_anon_rmap(page, vma, vmf->address, exclusive); } swap_free(entry); if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); if (page != swapcache && swapcache) { /* * Hold the lock to avoid the swap entry to be reused * until we take the PT lock for the pte_same() check * (to avoid false positives from pte_same). For * further safety release the lock after the swap_free * so that the swap count won't change under a * parallel locked swapcache. */ unlock_page(swapcache); put_page(swapcache); } if (vmf->flags & FAULT_FLAG_WRITE) { ret |= do_wp_page(vmf); if (ret & VM_FAULT_ERROR) ret &= VM_FAULT_ERROR; goto out; } /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: if (si) put_swap_device(si); return ret; out_nomap: pte_unmap_unlock(vmf->pte, vmf->ptl); out_page: unlock_page(page); out_release: put_page(page); if (page != swapcache && swapcache) { unlock_page(swapcache); put_page(swapcache); } if (si) put_swap_device(si); return ret; } /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_lock still held, but pte unmapped and unlocked. */ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page; vm_fault_t ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ if (vma->vm_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created * from a different thread. * * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when * parallel threads are excluded by other means. * * Here we only have mmap_read_lock(mm). */ if (pte_alloc(vma->vm_mm, vmf->pmd)) return VM_FAULT_OOM; /* See comment in handle_pte_fault() */ if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) { update_mmu_tlb(vma, vmf->address, vmf->pte); goto unlock; } ret = check_stable_address_space(vma->vm_mm); if (ret) goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); return handle_userfault(vmf, VM_UFFD_MISSING); } goto setpte; } /* Allocate our own private page. */ if (unlikely(anon_vma_prepare(vma))) goto oom; page = alloc_zeroed_user_highpage_movable(vma, vmf->address); if (!page) goto oom; if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) goto oom_free_page; cgroup_throttle_swaprate(page, GFP_KERNEL); /* * The memory barrier inside __SetPageUptodate makes sure that * preceding stores to the page contents become visible before * the set_pte_at() write. */ __SetPageUptodate(page); entry = mk_pte(page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) { update_mmu_cache(vma, vmf->address, vmf->pte); goto release; } ret = check_stable_address_space(vma->vm_mm); if (ret) goto release; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); put_page(page); return handle_userfault(vmf, VM_UFFD_MISSING); } inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); lru_cache_add_inactive_or_unevictable(page, vma); setpte: set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; release: put_page(page); goto unlock; oom_free_page: put_page(page); oom: return VM_FAULT_OOM; } /* * The mmap_lock must have been held on entry, and may have been * released depending on flags and vma->vm_ops->fault() return value. * See filemap_fault() and __lock_page_retry(). */ static vm_fault_t __do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; /* * Preallocate pte before we take page_lock because this might lead to * deadlocks for memcg reclaim which waits for pages under writeback: * lock_page(A) * SetPageWriteback(A) * unlock_page(A) * lock_page(B) * lock_page(B) * pte_alloc_one * shrink_page_list * wait_on_page_writeback(A) * SetPageWriteback(B) * unlock_page(B) * # flush A, B to clear the writeback */ if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) return ret; if (unlikely(PageHWPoison(vmf->page))) { struct page *page = vmf->page; vm_fault_t poisonret = VM_FAULT_HWPOISON; if (ret & VM_FAULT_LOCKED) { if (page_mapped(page)) unmap_mapping_pages(page_mapping(page), page->index, 1, false); /* Retry if a clean page was removed from the cache. */ if (invalidate_inode_page(page)) poisonret = VM_FAULT_NOPAGE; unlock_page(page); } put_page(page); vmf->page = NULL; return poisonret; } if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf->page); else VM_BUG_ON_PAGE(!PageLocked(vmf->page), vmf->page); return ret; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void deposit_prealloc_pte(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); /* * We are going to consume the prealloc table, * count that as nr_ptes. */ mm_inc_nr_ptes(vma->vm_mm); vmf->prealloc_pte = NULL; } vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; pmd_t entry; int i; vm_fault_t ret = VM_FAULT_FALLBACK; if (!transhuge_vma_suitable(vma, haddr)) return ret; page = compound_head(page); if (compound_order(page) != HPAGE_PMD_ORDER) return ret; /* * Just backoff if any subpage of a THP is corrupted otherwise * the corrupted page may mapped by PMD silently to escape the * check. This kind of THP just can be PTE mapped. Access to * the corrupted subpage should trigger SIGBUS as expected. */ if (unlikely(PageHasHWPoisoned(page))) return ret; /* * Archs like ppc64 need additional space to store information * related to pte entry. Use the preallocated table for that. */ if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_none(*vmf->pmd))) goto out; for (i = 0; i < HPAGE_PMD_NR; i++) flush_icache_page(vma, page + i); entry = mk_huge_pmd(page, vma->vm_page_prot); if (write) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); page_add_file_rmap(page, true); /* * deposit and withdraw with pmd lock held */ if (arch_needs_pgtable_deposit()) deposit_prealloc_pte(vmf); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); update_mmu_cache_pmd(vma, haddr, vmf->pmd); /* fault is handled */ ret = 0; count_vm_event(THP_FILE_MAPPED); out: spin_unlock(vmf->ptl); return ret; } #else vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { return VM_FAULT_FALLBACK; } #endif void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; bool prefault = vmf->address != addr; pte_t entry; flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); if (prefault && arch_wants_old_prefaulted_pte()) entry = pte_mkold(entry); else entry = pte_sw_mkyoung(entry); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, addr, false); lru_cache_add_inactive_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); } set_pte_at(vma->vm_mm, addr, vmf->pte, entry); } /** * finish_fault - finish page fault once we have prepared the page to fault * * @vmf: structure describing the fault * * This function handles all that is needed to finish a page fault once the * page to fault in is prepared. It handles locking of PTEs, inserts PTE for * given page, adds reverse page mapping, handles memcg charges and LRU * addition. * * The function expects the page to be locked and on success it consumes a * reference of a page being mapped (for the PTE which maps it). * * Return: %0 on success, %VM_FAULT_ code in case of error. */ vm_fault_t finish_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page; vm_fault_t ret; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) page = vmf->cow_page; else page = vmf->page; /* * check even for read faults because we might have lost our CoWed * page */ if (!(vma->vm_flags & VM_SHARED)) { ret = check_stable_address_space(vma->vm_mm); if (ret) return ret; } if (pmd_none(*vmf->pmd)) { if (PageTransCompound(page)) { ret = do_set_pmd(vmf, page); if (ret != VM_FAULT_FALLBACK) return ret; } if (vmf->prealloc_pte) { vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (likely(pmd_none(*vmf->pmd))) { mm_inc_nr_ptes(vma->vm_mm); pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); vmf->prealloc_pte = NULL; } spin_unlock(vmf->ptl); } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { return VM_FAULT_OOM; } } /* * See comment in handle_pte_fault() for how this scenario happens, we * need to return NOPAGE so that we drop this page. */ if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); ret = 0; /* Re-check under ptl */ if (likely(pte_none(*vmf->pte))) do_set_pte(vmf, page, vmf->address); else ret = VM_FAULT_NOPAGE; update_mmu_tlb(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; } static unsigned long fault_around_bytes __read_mostly = rounddown_pow_of_two(65536); #ifdef CONFIG_DEBUG_FS static int fault_around_bytes_get(void *data, u64 *val) { *val = fault_around_bytes; return 0; } /* * fault_around_bytes must be rounded down to the nearest page order as it's * what do_fault_around() expects to see. */ static int fault_around_bytes_set(void *data, u64 val) { if (val / PAGE_SIZE > PTRS_PER_PTE) return -EINVAL; if (val > PAGE_SIZE) fault_around_bytes = rounddown_pow_of_two(val); else fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */ return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops, fault_around_bytes_get, fault_around_bytes_set, "%llu\n"); static int __init fault_around_debugfs(void) { debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL, &fault_around_bytes_fops); return 0; } late_initcall(fault_around_debugfs); #endif /* * do_fault_around() tries to map few pages around the fault address. The hope * is that the pages will be needed soon and this will lower the number of * faults to handle. * * It uses vm_ops->map_pages() to map the pages, which skips the page if it's * not ready to be mapped: not up-to-date, locked, etc. * * This function is called with the page table lock taken. In the split ptlock * case the page table lock only protects only those entries which belong to * the page table corresponding to the fault address. * * This function doesn't cross the VMA boundaries, in order to call map_pages() * only once. * * fault_around_bytes defines how many bytes we'll try to map. * do_fault_around() expects it to be set to a power of two less than or equal * to PTRS_PER_PTE. * * The virtual address of the area that we map is naturally aligned to * fault_around_bytes rounded down to the machine page size * (and therefore to page order). This way it's easier to guarantee * that we don't cross page table boundaries. */ static vm_fault_t do_fault_around(struct vm_fault *vmf) { unsigned long address = vmf->address, nr_pages, mask; pgoff_t start_pgoff = vmf->pgoff; pgoff_t end_pgoff; int off; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; address = max(address & mask, vmf->vma->vm_start); off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); start_pgoff -= off; /* * end_pgoff is either the end of the page table, the end of * the vma or nr_pages from start_pgoff, depending what is nearest. */ end_pgoff = start_pgoff - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); if (pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); } static vm_fault_t do_read_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret = 0; /* * Let's call ->map_pages() first and use ->fault() as fallback * if page by the offset is not ready to be mapped (cold cache or * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { if (likely(!userfaultfd_minor(vmf->vma))) { ret = do_fault_around(vmf); if (ret) return ret; } } ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; ret |= finish_fault(vmf); unlock_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) put_page(vmf->page); return ret; } static vm_fault_t do_cow_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); if (!vmf->cow_page) return VM_FAULT_OOM; if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) { put_page(vmf->cow_page); return VM_FAULT_OOM; } cgroup_throttle_swaprate(vmf->cow_page, GFP_KERNEL); ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; if (ret & VM_FAULT_DONE_COW) return ret; copy_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma); __SetPageUptodate(vmf->cow_page); ret |= finish_fault(vmf); unlock_page(vmf->page); put_page(vmf->page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; return ret; uncharge_out: put_page(vmf->cow_page); return ret; } static vm_fault_t do_shared_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; vm_fault_t ret, tmp; ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; /* * Check if the backing address space wants to know that the page is * about to become writable */ if (vma->vm_ops->page_mkwrite) { unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { put_page(vmf->page); return tmp; } } ret |= finish_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) { unlock_page(vmf->page); put_page(vmf->page); return ret; } ret |= fault_dirty_shared_page(vmf); return ret; } /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults). * The mmap_lock may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). * If mmap_lock is released, vma may become invalid (for example * by other thread calling munmap()). */ static vm_fault_t do_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mm_struct *vm_mm = vma->vm_mm; vm_fault_t ret; /* * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ if (!vma->vm_ops->fault) { /* * If we find a migration pmd entry or a none pmd entry, which * should never happen, return SIGBUS */ if (unlikely(!pmd_present(*vmf->pmd))) ret = VM_FAULT_SIGBUS; else { vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); /* * Make sure this is not a temporary clearing of pte * by holding ptl and checking again. A R/M/W update * of pte involves: take ptl, clearing the pte so that * we don't have concurrent modification by hardware * followed by an update. */ if (unlikely(pte_none(*vmf->pte))) ret = VM_FAULT_SIGBUS; else ret = VM_FAULT_NOPAGE; pte_unmap_unlock(vmf->pte, vmf->ptl); } } else if (!(vmf->flags & FAULT_FLAG_WRITE)) ret = do_read_fault(vmf); else if (!(vma->vm_flags & VM_SHARED)) ret = do_cow_fault(vmf); else ret = do_shared_fault(vmf); /* preallocated pagetable is unused: free it */ if (vmf->prealloc_pte) { pte_free(vm_mm, vmf->prealloc_pte); vmf->prealloc_pte = NULL; } return ret; } int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, unsigned long addr, int page_nid, int *flags) { get_page(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (page_nid == numa_node_id()) { count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); *flags |= TNF_FAULT_LOCAL; } return mpol_misplaced(page, vma, addr); } static vm_fault_t do_numa_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL; int page_nid = NUMA_NO_NODE; int last_cpupid; int target_nid; pte_t pte, old_pte; bool was_writable = pte_savedwrite(vmf->orig_pte); int flags = 0; /* * The "pte" at this point cannot be used safely without * validation through pte_unmap_same(). It's of NUMA type but * the pfn may be screwed if the read is non atomic. */ vmf->ptl = pte_lockptr(vma->vm_mm, vmf->pmd); spin_lock(vmf->ptl); if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } /* Get the normal PTE */ old_pte = ptep_get(vmf->pte); pte = pte_modify(old_pte, vma->vm_page_prot); page = vm_normal_page(vma, vmf->address, pte); if (!page) goto out_map; /* TODO: handle PTE-mapped THP */ if (PageCompound(page)) goto out_map; /* * Avoid grouping on RO pages in general. RO pages shouldn't hurt as * much anyway since they can be in shared cache state. This misses * the case where a mapping is writable but the process never writes * to it but pte_write gets cleared during protection updates and * pte_dirty has unpredictable behaviour between PTE scan updates, * background writeback, dirty balancing and application behaviour. */ if (!was_writable) flags |= TNF_NO_GROUP; /* * Flag if the page is shared between multiple address spaces. This * is later used when determining whether to group tasks together */ if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) flags |= TNF_SHARED; last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, &flags); if (target_nid == NUMA_NO_NODE) { put_page(page); goto out_map; } pte_unmap_unlock(vmf->pte, vmf->ptl); /* Migrate to the requested node */ if (migrate_misplaced_page(page, vma, target_nid)) { page_nid = target_nid; flags |= TNF_MIGRATED; task_numa_fault(last_cpupid, page_nid, 1, flags); return 0; } flags |= TNF_MIGRATE_FAIL; vmf->pte = pte_offset_map(vmf->pmd, vmf->address); spin_lock(vmf->ptl); if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } out_map: /* * Make it present again, depending on how arch implements * non-accessible ptes, some can allow access by kernel mode. */ old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_mkyoung(pte); if (was_writable) pte = pte_mkwrite(pte); ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); update_mmu_cache(vma, vmf->address, vmf->pte); pte_unmap_unlock(vmf->pte, vmf->ptl); if (page_nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, page_nid, 1, flags); return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_anonymous_page(vmf); if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); return VM_FAULT_FALLBACK; } /* `inline' is required to avoid gcc 4.1.2 build error */ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) { if (vma_is_anonymous(vmf->vma)) { if (userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd)) return handle_userfault(vmf, VM_UFFD_WP); return do_huge_pmd_wp_page(vmf); } if (vmf->vma->vm_ops->huge_fault) { vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); if (!(ret & VM_FAULT_FALLBACK)) return ret; } /* COW or write-notify handled on pte level: split pmd. */ __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); return VM_FAULT_FALLBACK; } static vm_fault_t create_huge_pud(struct vm_fault *vmf) { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) return VM_FAULT_FALLBACK; if (vmf->vma->vm_ops->huge_fault) return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ return VM_FAULT_FALLBACK; } static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) goto split; if (vmf->vma->vm_ops->huge_fault) { vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); if (!(ret & VM_FAULT_FALLBACK)) return ret; } split: /* COW or write-notify not handled on PUD level: split pud.*/ __split_huge_pud(vmf->vma, vmf->pud, vmf->address); #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ return VM_FAULT_FALLBACK; } /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). * * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow * concurrent faults). * * The mmap_lock may have been released depending on flags and our return value. * See filemap_fault() and __lock_page_or_retry(). */ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table * for an instant, it will be difficult to retract from * concurrent faults and from rmap lookups. */ vmf->pte = NULL; } else { /* * If a huge pmd materialized under us just retry later. Use * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead * of pmd_trans_huge() to ensure the pmd didn't become * pmd_trans_huge under us and then back to pmd_none, as a * result of MADV_DONTNEED running immediately after a huge pmd * fault in a different thread of this mm, in turn leading to a * misleading pmd_trans_huge() retval. All we have to ensure is * that it is a regular pmd that we can walk with * pte_offset_map() and we can do that through an atomic read * in C, which is what pmd_trans_unstable() provides. */ if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge * pmd from under us anymore at this point because we hold the * mmap_lock read mode and khugepaged takes it in write mode. * So now it's safe to run pte_offset_map(). */ vmf->pte = pte_offset_map(vmf->pmd, vmf->address); vmf->orig_pte = *vmf->pte; /* * some architectures can have larger ptes than wordsize, * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic * accesses. The code below just needs a consistent view * for the ifs and we later double check anyway with the * ptl lock held. So here a barrier will do. */ barrier(); if (pte_none(vmf->orig_pte)) { pte_unmap(vmf->pte); vmf->pte = NULL; } } if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); else return do_fault(vmf); } if (!pte_present(vmf->orig_pte)) return do_swap_page(vmf); if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) return do_numa_page(vmf); vmf->ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); spin_lock(vmf->ptl); entry = vmf->orig_pte; if (unlikely(!pte_same(*vmf->pte, entry))) { update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); goto unlock; } if (vmf->flags & FAULT_FLAG_WRITE) { if (!pte_write(entry)) return do_wp_page(vmf); entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, vmf->flags & FAULT_FLAG_WRITE)) { update_mmu_cache(vmf->vma, vmf->address, vmf->pte); } else { /* Skip spurious TLB flush for retried page fault */ if (vmf->flags & FAULT_FLAG_TRIED) goto unlock; /* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. * This still avoids useless tlb flushes for .text page faults * with threads. */ if (vmf->flags & FAULT_FLAG_WRITE) flush_tlb_fix_spurious_fault(vmf->vma, vmf->address); } unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); return 0; } /* * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { struct vm_fault vmf = { .vma = vma, .address = address & PAGE_MASK, .flags = flags, .pgoff = linear_page_index(vma, address), .gfp_mask = __get_fault_gfp_mask(vma), }; unsigned int dirty = flags & FAULT_FLAG_WRITE; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; p4d_t *p4d; vm_fault_t ret; pgd = pgd_offset(mm, address); p4d = p4d_alloc(mm, pgd, address); if (!p4d) return VM_FAULT_OOM; vmf.pud = pud_alloc(mm, p4d, address); if (!vmf.pud) return VM_FAULT_OOM; retry_pud: if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) { ret = create_huge_pud(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { pud_t orig_pud = *vmf.pud; barrier(); if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { /* NUMA case for anonymous PUDs would go here */ if (dirty && !pud_write(orig_pud)) { ret = wp_huge_pud(&vmf, orig_pud); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { huge_pud_set_accessed(&vmf, orig_pud); return 0; } } } vmf.pmd = pmd_alloc(mm, vmf.pud, address); if (!vmf.pmd) return VM_FAULT_OOM; /* Huge pud page fault raced with pmd_alloc? */ if (pud_trans_unstable(vmf.pud)) goto retry_pud; if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { vmf.orig_pmd = *vmf.pmd; barrier(); if (unlikely(is_swap_pmd(vmf.orig_pmd))) { VM_BUG_ON(thp_migration_supported() && !is_pmd_migration_entry(vmf.orig_pmd)); if (is_pmd_migration_entry(vmf.orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; } if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) { if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf); if (dirty && !pmd_write(vmf.orig_pmd)) { ret = wp_huge_pmd(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { huge_pmd_set_accessed(&vmf); return 0; } } } return handle_pte_fault(&vmf); } /** * mm_account_fault - Do page fault accounting * * @regs: the pt_regs struct pointer. When set to NULL, will skip accounting * of perf event counters, but we'll still do the per-task accounting to * the task who triggered this page fault. * @address: the faulted address. * @flags: the fault flags. * @ret: the fault retcode. * * This will take care of most of the page fault accounting. Meanwhile, it * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter * updates. However, note that the handling of PERF_COUNT_SW_PAGE_FAULTS should * still be in per-arch page fault handlers at the entry of page fault. */ static inline void mm_account_fault(struct pt_regs *regs, unsigned long address, unsigned int flags, vm_fault_t ret) { bool major; /* * We don't do accounting for some specific faults: * * - Unsuccessful faults (e.g. when the address wasn't valid). That * includes arch_vma_access_permitted() failing before reaching here. * So this is not a "this many hardware page faults" counter. We * should use the hw profiling for that. * * - Incomplete faults (VM_FAULT_RETRY). They will only be counted * once they're completed. */ if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) return; /* * We define the fault as a major fault when the final successful fault * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't * handle it immediately previously). */ major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED); if (major) current->maj_flt++; else current->min_flt++; /* * If the fault is done for GUP, regs will be NULL. We only do the * accounting for the per thread fault counters who triggered the * fault, and we skip the perf event updates. */ if (!regs) return; if (major) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); else perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } /* * By the time we get here, we already hold the mm semaphore * * The mmap_lock may have been released depending on flags and our * return value. See filemap_fault() and __lock_page_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct pt_regs *regs) { vm_fault_t ret; __set_current_state(TASK_RUNNING); count_vm_event(PGFAULT); count_memcg_event_mm(vma->vm_mm, PGFAULT); /* do counter updates before entering really critical section. */ check_sync_rss_stat(current); if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) return VM_FAULT_SIGSEGV; /* * Enable the memcg OOM handling for faults triggered in user * space. Kernel faults are handled more gracefully. */ if (flags & FAULT_FLAG_USER) mem_cgroup_enter_user_fault(); if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else ret = __handle_mm_fault(vma, address, flags); if (flags & FAULT_FLAG_USER) { mem_cgroup_exit_user_fault(); /* * The task may have entered a memcg OOM situation but * if the allocation error was handled gracefully (no * VM_FAULT_OOM), there is no need to kill anything. * Just clean up the OOM state peacefully. */ if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) mem_cgroup_oom_synchronize(false); } mm_account_fault(regs, address, flags, ret); return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); #ifndef __PAGETABLE_P4D_FOLDED /* * Allocate p4d page table. * We've already handled the fast-path in-line. */ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { p4d_t *new = p4d_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); if (pgd_present(*pgd)) /* Another has populated it */ p4d_free(mm, new); else pgd_populate(mm, pgd, new); spin_unlock(&mm->page_table_lock); return 0; } #endif /* __PAGETABLE_P4D_FOLDED */ #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. * We've already handled the fast-path in-line. */ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { pud_t *new = pud_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ spin_lock(&mm->page_table_lock); if (!p4d_present(*p4d)) { mm_inc_nr_puds(mm); p4d_populate(mm, p4d, new); } else /* Another has populated it */ pud_free(mm, new); spin_unlock(&mm->page_table_lock); return 0; } #endif /* __PAGETABLE_PUD_FOLDED */ #ifndef __PAGETABLE_PMD_FOLDED /* * Allocate page middle directory. * We've already handled the fast-path in-line. */ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { spinlock_t *ptl; pmd_t *new = pmd_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ ptl = pud_lock(mm, pud); if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); pud_populate(mm, pud, new); } else /* Another has populated it */ pmd_free(mm, new); spin_unlock(ptl); return 0; } #endif /* __PAGETABLE_PMD_FOLDED */ int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *ptep; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto out; p4d = p4d_offset(pgd, address); if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) goto out; pud = pud_offset(p4d, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) goto out; pmd = pmd_offset(pud, address); VM_BUG_ON(pmd_trans_huge(*pmd)); if (pmd_huge(*pmd)) { if (!pmdpp) goto out; if (range) { mmu_notifier_range_init(range, MMU_NOTIFY_CLEAR, 0, NULL, mm, address & PMD_MASK, (address & PMD_MASK) + PMD_SIZE); mmu_notifier_invalidate_range_start(range); } *ptlp = pmd_lock(mm, pmd); if (pmd_huge(*pmd)) { *pmdpp = pmd; return 0; } spin_unlock(*ptlp); if (range) mmu_notifier_invalidate_range_end(range); } if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto out; if (range) { mmu_notifier_range_init(range, MMU_NOTIFY_CLEAR, 0, NULL, mm, address & PAGE_MASK, (address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(range); } ptep = pte_offset_map_lock(mm, pmd, address, ptlp); if (!pte_present(*ptep)) goto unlock; *ptepp = ptep; return 0; unlock: pte_unmap_unlock(ptep, *ptlp); if (range) mmu_notifier_invalidate_range_end(range); out: return -EINVAL; } /** * follow_pte - look up PTE at a user virtual address * @mm: the mm_struct of the target address space * @address: user virtual address * @ptepp: location to store found PTE * @ptlp: location to store the lock for the PTE * * On a successful return, the pointer to the PTE is stored in @ptepp; * the corresponding lock is taken and its location is stored in @ptlp. * The contents of the PTE are only stable until @ptlp is released; * any further use, if any, must be protected against invalidation * with MMU notifiers. * * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore * should be taken for read. * * KVM uses this function. While it is arguably less bad than ``follow_pfn``, * it is not a good general-purpose API. * * Return: zero on success, -ve otherwise. */ int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp, spinlock_t **ptlp) { return follow_invalidate_pte(mm, address, NULL, ptepp, NULL, ptlp); } EXPORT_SYMBOL_GPL(follow_pte); /** * follow_pfn - look up PFN at a user virtual address * @vma: memory mapping * @address: user virtual address * @pfn: location to store found PFN * * Only IO mappings and raw PFN mappings are allowed. * * This function does not allow the caller to read the permissions * of the PTE. Do not use it. * * Return: zero and the pfn at @pfn on success, -ve otherwise. */ int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn) { int ret = -EINVAL; spinlock_t *ptl; pte_t *ptep; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) return ret; ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); if (ret) return ret; *pfn = pte_pfn(*ptep); pte_unmap_unlock(ptep, ptl); return 0; } EXPORT_SYMBOL(follow_pfn); #ifdef CONFIG_HAVE_IOREMAP_PROT int follow_phys(struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned long *prot, resource_size_t *phys) { int ret = -EINVAL; pte_t *ptep, pte; spinlock_t *ptl; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out; if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) goto out; pte = *ptep; /* Never return PFNs of anon folios in COW mappings. */ if (vm_normal_page(vma, address, pte)) goto unlock; if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; *prot = pgprot_val(pte_pgprot(pte)); *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; ret = 0; unlock: pte_unmap_unlock(ptep, ptl); out: return ret; } /** * generic_access_phys - generic implementation for iomem mmap access * @vma: the vma to access * @addr: userspace address, not relative offset within @vma * @buf: buffer to read/write * @len: length of transfer * @write: set to FOLL_WRITE when writing, otherwise reading * * This is a generic implementation for &vm_operations_struct.access for an * iomem mapping. This callback is used by access_process_vm() when the @vma is * not page based. */ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { resource_size_t phys_addr; unsigned long prot = 0; void __iomem *maddr; pte_t *ptep, pte; spinlock_t *ptl; int offset = offset_in_page(addr); int ret = -EINVAL; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) return -EINVAL; retry: if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) return -EINVAL; pte = *ptep; pte_unmap_unlock(ptep, ptl); prot = pgprot_val(pte_pgprot(pte)); phys_addr = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; if ((write & FOLL_WRITE) && !pte_write(pte)) return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot); if (!maddr) return -ENOMEM; if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) goto out_unmap; if (!pte_same(pte, *ptep)) { pte_unmap_unlock(ptep, ptl); iounmap(maddr); goto retry; } if (write) memcpy_toio(maddr + offset, buf, len); else memcpy_fromio(buf, maddr + offset, len); ret = len; pte_unmap_unlock(ptep, ptl); out_unmap: iounmap(maddr); return ret; } EXPORT_SYMBOL_GPL(generic_access_phys); #endif /* * Access another process' address space as given in mm. */ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; int write = gup_flags & FOLL_WRITE; if (mmap_read_lock_killable(mm)) return 0; /* ignore errors, just check how much was successfully transferred */ while (len) { int bytes, ret, offset; void *maddr; struct page *page = NULL; ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, &vma, NULL); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; #else /* * Check if this is a VM_IO | VM_PFNMAP VMA, which * we can access using slightly different code. */ vma = vma_lookup(mm, addr); if (!vma) break; if (vma->vm_ops && vma->vm_ops->access) ret = vma->vm_ops->access(vma, addr, buf, len, write); if (ret <= 0) break; bytes = ret; #endif } else { bytes = len; offset = addr & (PAGE_SIZE-1); if (bytes > PAGE_SIZE-offset) bytes = PAGE_SIZE-offset; maddr = kmap(page); if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); set_page_dirty_lock(page); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } kunmap(page); put_page(page); } len -= bytes; buf += bytes; addr += bytes; } mmap_read_unlock(mm); return buf - old_buf; } /** * access_remote_vm - access another process' address space * @mm: the mm_struct of the target address space * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. * * Return: number of bytes copied from source to destination. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { return __access_remote_vm(mm, addr, buf, len, gup_flags); } /* * Access another process' address space. * Source/target buffer must be kernel space, * Do not walk the page table directly, use get_user_pages */ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct mm_struct *mm; int ret; mm = get_task_mm(tsk); if (!mm) return 0; ret = __access_remote_vm(mm, addr, buf, len, gup_flags); mmput(mm); return ret; } EXPORT_SYMBOL_GPL(access_process_vm); /* * Print the name of a VMA. */ void print_vma_addr(char *prefix, unsigned long ip) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; /* * we might be running from an atomic context so we cannot sleep */ if (!mmap_read_trylock(mm)) return; vma = find_vma(mm, ip); if (vma && vma->vm_file) { struct file *f = vma->vm_file; char *buf = (char *)__get_free_page(GFP_NOWAIT); if (buf) { char *p; p = file_path(f, buf, PAGE_SIZE); if (IS_ERR(p)) p = "?"; printk("%s%s[%lx+%lx]", prefix, kbasename(p), vma->vm_start, vma->vm_end - vma->vm_start); free_page((unsigned long)buf); } } mmap_read_unlock(mm); } #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP) void __might_fault(const char *file, int line) { /* * Some code (nfs/sunrpc) uses socket ops on kernel memory while * holding the mmap_lock, this is safe because kernel memory doesn't * get paged out, therefore we'll never actually fault, and the * below annotations will generate false positives. */ if (uaccess_kernel()) return; if (pagefault_disabled()) return; __might_sleep(file, line, 0); #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) if (current->mm) might_lock_read(¤t->mm->mmap_lock); #endif } EXPORT_SYMBOL(__might_fault); #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) /* * Process all subpages of the specified huge page with the specified * operation. The target subpage will be processed last to keep its * cache lines hot. */ static inline void process_huge_page( unsigned long addr_hint, unsigned int pages_per_huge_page, void (*process_subpage)(unsigned long addr, int idx, void *arg), void *arg) { int i, n, base, l; unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); /* Process target subpage last to keep its cache lines hot */ might_sleep(); n = (addr_hint - addr) / PAGE_SIZE; if (2 * n <= pages_per_huge_page) { /* If target subpage in first half of huge page */ base = 0; l = n; /* Process subpages at the end of huge page */ for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { cond_resched(); process_subpage(addr + i * PAGE_SIZE, i, arg); } } else { /* If target subpage in second half of huge page */ base = pages_per_huge_page - 2 * (pages_per_huge_page - n); l = pages_per_huge_page - n; /* Process subpages at the begin of huge page */ for (i = 0; i < base; i++) { cond_resched(); process_subpage(addr + i * PAGE_SIZE, i, arg); } } /* * Process remaining subpages in left-right-left-right pattern * towards the target subpage */ for (i = 0; i < l; i++) { int left_idx = base + i; int right_idx = base + 2 * l - 1 - i; cond_resched(); process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg); cond_resched(); process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg); } } static void clear_gigantic_page(struct page *page, unsigned long addr, unsigned int pages_per_huge_page) { int i; struct page *p = page; might_sleep(); for (i = 0; i < pages_per_huge_page; i++, p = mem_map_next(p, page, i)) { cond_resched(); clear_user_highpage(p, addr + i * PAGE_SIZE); } } static void clear_subpage(unsigned long addr, int idx, void *arg) { struct page *page = arg; clear_user_highpage(page + idx, addr); } void clear_huge_page(struct page *page, unsigned long addr_hint, unsigned int pages_per_huge_page) { unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { clear_gigantic_page(page, addr, pages_per_huge_page); return; } process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page); } static void copy_user_gigantic_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma, unsigned int pages_per_huge_page) { int i; struct page *dst_base = dst; struct page *src_base = src; for (i = 0; i < pages_per_huge_page; ) { cond_resched(); copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); i++; dst = mem_map_next(dst, dst_base, i); src = mem_map_next(src, src_base, i); } } struct copy_subpage_arg { struct page *dst; struct page *src; struct vm_area_struct *vma; }; static void copy_subpage(unsigned long addr, int idx, void *arg) { struct copy_subpage_arg *copy_arg = arg; copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, addr, copy_arg->vma); } void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr_hint, struct vm_area_struct *vma, unsigned int pages_per_huge_page) { unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1); struct copy_subpage_arg arg = { .dst = dst, .src = src, .vma = vma, }; if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { copy_user_gigantic_page(dst, src, addr, vma, pages_per_huge_page); return; } process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); } long copy_huge_page_from_user(struct page *dst_page, const void __user *usr_src, unsigned int pages_per_huge_page, bool allow_pagefault) { void *src = (void *)usr_src; void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; struct page *subpage = dst_page; for (i = 0; i < pages_per_huge_page; i++, subpage = mem_map_next(subpage, dst_page, i)) { if (allow_pagefault) page_kaddr = kmap(subpage); else page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, (const void __user *)(src + i * PAGE_SIZE), PAGE_SIZE); if (allow_pagefault) kunmap(subpage); else kunmap_atomic(page_kaddr); ret_val -= (PAGE_SIZE - rc); if (rc) break; flush_dcache_page(subpage); cond_resched(); } return ret_val; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS static struct kmem_cache *page_ptl_cachep; void __init ptlock_cache_init(void) { page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, SLAB_PANIC, NULL); } bool ptlock_alloc(struct page *page) { spinlock_t *ptl; ptl = kmem_cache_alloc(page_ptl_cachep, GFP_KERNEL); if (!ptl) return false; page->ptl = ptl; return true; } void ptlock_free(struct page *page) { kmem_cache_free(page_ptl_cachep, page->ptl); } #endif |
11886 1283 1283 11484 12516 4027 12361 7160 7150 13431 13431 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | // SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/lockref.h> #if USE_CMPXCHG_LOCKREF /* * Note that the "cmpxchg()" reloads the "old" value for the * failure case. */ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ int retry = 100; \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ old.lock_count = cmpxchg64_relaxed(&lockref->lock_count, \ old.lock_count, \ new.lock_count); \ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ if (!--retry) \ break; \ } \ } while (0) #else #define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) #endif /** * lockref_get - Increments reference count unconditionally * @lockref: pointer to lockref structure * * This operation is only valid if you already hold a reference * to the object, so you know the count cannot be zero. */ void lockref_get(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; , return; ); spin_lock(&lockref->lock); lockref->count++; spin_unlock(&lockref->lock); } EXPORT_SYMBOL(lockref_get); /** * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ int lockref_get_not_zero(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count++; if (old.count <= 0) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count > 0) { lockref->count++; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_get_not_zero); /** * lockref_put_not_zero - Decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count would become zero */ int lockref_put_not_zero(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count--; if (old.count <= 1) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count > 1) { lockref->count--; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_put_not_zero); /** * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. */ int lockref_get_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; if (old.count <= 0) break; , return 1; ); spin_lock(&lockref->lock); if (lockref->count <= 0) return 0; lockref->count++; spin_unlock(&lockref->lock); return 1; } EXPORT_SYMBOL(lockref_get_or_lock); /** * lockref_put_return - Decrement reference count if possible * @lockref: pointer to lockref structure * * Decrement the reference count and return the new value. * If the lockref was dead or locked, return an error. */ int lockref_put_return(struct lockref *lockref) { CMPXCHG_LOOP( new.count--; if (old.count <= 0) return -1; , return new.count; ); return -1; } EXPORT_SYMBOL(lockref_put_return); /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken */ int lockref_put_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count--; if (old.count <= 1) break; , return 1; ); spin_lock(&lockref->lock); if (lockref->count <= 1) return 0; lockref->count--; spin_unlock(&lockref->lock); return 1; } EXPORT_SYMBOL(lockref_put_or_lock); /** * lockref_mark_dead - mark lockref dead * @lockref: pointer to lockref structure */ void lockref_mark_dead(struct lockref *lockref) { assert_spin_locked(&lockref->lock); lockref->count = -128; } EXPORT_SYMBOL(lockref_mark_dead); /** * lockref_get_not_dead - Increments count unless the ref is dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if lockref was dead */ int lockref_get_not_dead(struct lockref *lockref) { int retval; CMPXCHG_LOOP( new.count++; if (old.count < 0) return 0; , return 1; ); spin_lock(&lockref->lock); retval = 0; if (lockref->count >= 0) { lockref->count++; retval = 1; } spin_unlock(&lockref->lock); return retval; } EXPORT_SYMBOL(lockref_get_not_dead); |
3 3 2 1 1 1 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 | /* * JFFS2 -- Journalling Flash File System, Version 2. * * Copyright © 2001-2007 Red Hat, Inc. * * Created by David Woodhouse <dwmw2@infradead.org> * * For licensing information, see the file 'LICENCE' in this directory. * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/list.h> #include <linux/fs.h> #include <linux/err.h> #include <linux/mount.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/jffs2.h> #include <linux/pagemap.h> #include <linux/mtd/super.h> #include <linux/ctype.h> #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/exportfs.h> #include "compr.h" #include "nodelist.h" static void jffs2_put_super(struct super_block *); static struct kmem_cache *jffs2_inode_cachep; static struct inode *jffs2_alloc_inode(struct super_block *sb) { struct jffs2_inode_info *f; f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); if (!f) return NULL; return &f->vfs_inode; } static void jffs2_free_inode(struct inode *inode) { struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); kfree(f->target); kmem_cache_free(jffs2_inode_cachep, f); } static void jffs2_i_init_once(void *foo) { struct jffs2_inode_info *f = foo; mutex_init(&f->sem); f->target = NULL; inode_init_once(&f->vfs_inode); } static const char *jffs2_compr_name(unsigned int compr) { switch (compr) { case JFFS2_COMPR_MODE_NONE: return "none"; #ifdef CONFIG_JFFS2_LZO case JFFS2_COMPR_MODE_FORCELZO: return "lzo"; #endif #ifdef CONFIG_JFFS2_ZLIB case JFFS2_COMPR_MODE_FORCEZLIB: return "zlib"; #endif default: /* should never happen; programmer error */ WARN_ON(1); return ""; } } static int jffs2_show_options(struct seq_file *s, struct dentry *root) { struct jffs2_sb_info *c = JFFS2_SB_INFO(root->d_sb); struct jffs2_mount_opts *opts = &c->mount_opts; if (opts->override_compr) seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr)); if (opts->set_rp_size) seq_printf(s, ",rp_size=%u", opts->rp_size / 1024); return 0; } static int jffs2_sync_fs(struct super_block *sb, int wait) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); #ifdef CONFIG_JFFS2_FS_WRITEBUFFER if (jffs2_is_writebuffered(c)) cancel_delayed_work_sync(&c->wbuf_dwork); #endif mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); return 0; } static struct inode *jffs2_nfs_get_inode(struct super_block *sb, uint64_t ino, uint32_t generation) { /* We don't care about i_generation. We'll destroy the flash before we start re-using inode numbers anyway. And even if that wasn't true, we'd have other problems...*/ return jffs2_iget(sb, ino); } static struct dentry *jffs2_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, jffs2_nfs_get_inode); } static struct dentry *jffs2_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, jffs2_nfs_get_inode); } static struct dentry *jffs2_get_parent(struct dentry *child) { struct jffs2_inode_info *f; uint32_t pino; BUG_ON(!d_is_dir(child)); f = JFFS2_INODE_INFO(d_inode(child)); pino = f->inocache->pino_nlink; JFFS2_DEBUG("Parent of directory ino #%u is #%u\n", f->inocache->ino, pino); return d_obtain_alias(jffs2_iget(child->d_sb, pino)); } static const struct export_operations jffs2_export_ops = { .get_parent = jffs2_get_parent, .fh_to_dentry = jffs2_fh_to_dentry, .fh_to_parent = jffs2_fh_to_parent, }; /* * JFFS2 mount options. * * Opt_source: The source device * Opt_override_compr: override default compressor * Opt_rp_size: size of reserved pool in KiB */ enum { Opt_override_compr, Opt_rp_size, }; static const struct constant_table jffs2_param_compr[] = { {"none", JFFS2_COMPR_MODE_NONE }, #ifdef CONFIG_JFFS2_LZO {"lzo", JFFS2_COMPR_MODE_FORCELZO }, #endif #ifdef CONFIG_JFFS2_ZLIB {"zlib", JFFS2_COMPR_MODE_FORCEZLIB }, #endif {} }; static const struct fs_parameter_spec jffs2_fs_parameters[] = { fsparam_enum ("compr", Opt_override_compr, jffs2_param_compr), fsparam_u32 ("rp_size", Opt_rp_size), {} }; static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct jffs2_sb_info *c = fc->s_fs_info; int opt; opt = fs_parse(fc, jffs2_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_override_compr: c->mount_opts.compr = result.uint_32; c->mount_opts.override_compr = true; break; case Opt_rp_size: if (result.uint_32 > UINT_MAX / 1024) return invalf(fc, "jffs2: rp_size unrepresentable"); c->mount_opts.rp_size = result.uint_32 * 1024; c->mount_opts.set_rp_size = true; break; default: return -EINVAL; } return 0; } static inline void jffs2_update_mount_opts(struct fs_context *fc) { struct jffs2_sb_info *new_c = fc->s_fs_info; struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb); mutex_lock(&c->alloc_sem); if (new_c->mount_opts.override_compr) { c->mount_opts.override_compr = new_c->mount_opts.override_compr; c->mount_opts.compr = new_c->mount_opts.compr; } if (new_c->mount_opts.set_rp_size) { c->mount_opts.set_rp_size = new_c->mount_opts.set_rp_size; c->mount_opts.rp_size = new_c->mount_opts.rp_size; } mutex_unlock(&c->alloc_sem); } static int jffs2_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; sync_filesystem(sb); jffs2_update_mount_opts(fc); return jffs2_do_remount_fs(sb, fc); } static const struct super_operations jffs2_super_operations = { .alloc_inode = jffs2_alloc_inode, .free_inode = jffs2_free_inode, .put_super = jffs2_put_super, .statfs = jffs2_statfs, .evict_inode = jffs2_evict_inode, .dirty_inode = jffs2_dirty_inode, .show_options = jffs2_show_options, .sync_fs = jffs2_sync_fs, }; /* * fill in the superblock */ static int jffs2_fill_super(struct super_block *sb, struct fs_context *fc) { struct jffs2_sb_info *c = sb->s_fs_info; jffs2_dbg(1, "jffs2_get_sb_mtd():" " New superblock for device %d (\"%s\")\n", sb->s_mtd->index, sb->s_mtd->name); c->mtd = sb->s_mtd; c->os_priv = sb; if (c->mount_opts.rp_size > c->mtd->size) return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", c->mtd->size / 1024); /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ mutex_init(&c->alloc_sem); mutex_init(&c->erase_free_sem); init_waitqueue_head(&c->erase_wait); init_waitqueue_head(&c->inocache_wq); spin_lock_init(&c->erase_completion_lock); spin_lock_init(&c->inocache_lock); sb->s_op = &jffs2_super_operations; sb->s_export_op = &jffs2_export_ops; sb->s_flags = sb->s_flags | SB_NOATIME; sb->s_xattr = jffs2_xattr_handlers; #ifdef CONFIG_JFFS2_FS_POSIX_ACL sb->s_flags |= SB_POSIXACL; #endif return jffs2_do_fill_super(sb, fc); } static int jffs2_get_tree(struct fs_context *fc) { return get_tree_mtd(fc, jffs2_fill_super); } static void jffs2_free_fc(struct fs_context *fc) { kfree(fc->s_fs_info); } static const struct fs_context_operations jffs2_context_ops = { .free = jffs2_free_fc, .parse_param = jffs2_parse_param, .get_tree = jffs2_get_tree, .reconfigure = jffs2_reconfigure, }; static int jffs2_init_fs_context(struct fs_context *fc) { struct jffs2_sb_info *ctx; ctx = kzalloc(sizeof(struct jffs2_sb_info), GFP_KERNEL); if (!ctx) return -ENOMEM; fc->s_fs_info = ctx; fc->ops = &jffs2_context_ops; return 0; } static void jffs2_put_super (struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); jffs2_dbg(2, "%s()\n", __func__); mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); jffs2_sum_exit(c); jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); kvfree(c->blocks); jffs2_flash_cleanup(c); kfree(c->inocache_list); jffs2_clear_xattr_subsystem(c); mtd_sync(c->mtd); jffs2_dbg(1, "%s(): returning\n", __func__); } static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); if (c && !sb_rdonly(sb)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); } static struct file_system_type jffs2_fs_type = { .owner = THIS_MODULE, .name = "jffs2", .init_fs_context = jffs2_init_fs_context, .parameters = jffs2_fs_parameters, .kill_sb = jffs2_kill_sb, }; MODULE_ALIAS_FS("jffs2"); static int __init init_jffs2_fs(void) { int ret; /* Paranoia checks for on-medium structures. If we ask GCC to pack them with __attribute__((packed)) then it _also_ assumes that they're not aligned -- so it emits crappy code on some architectures. Ideally we want an attribute which means just 'no padding', without the alignment thing. But GCC doesn't have that -- we have to just hope the structs are the right sizes, instead. */ BUILD_BUG_ON(sizeof(struct jffs2_unknown_node) != 12); BUILD_BUG_ON(sizeof(struct jffs2_raw_dirent) != 40); BUILD_BUG_ON(sizeof(struct jffs2_raw_inode) != 68); BUILD_BUG_ON(sizeof(struct jffs2_raw_summary) != 32); pr_info("version 2.2." #ifdef CONFIG_JFFS2_FS_WRITEBUFFER " (NAND)" #endif #ifdef CONFIG_JFFS2_SUMMARY " (SUMMARY) " #endif " © 2001-2006 Red Hat, Inc.\n"); jffs2_inode_cachep = kmem_cache_create("jffs2_i", sizeof(struct jffs2_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_ACCOUNT), jffs2_i_init_once); if (!jffs2_inode_cachep) { pr_err("error: Failed to initialise inode cache\n"); return -ENOMEM; } ret = jffs2_compressors_init(); if (ret) { pr_err("error: Failed to initialise compressors\n"); goto out; } ret = jffs2_create_slab_caches(); if (ret) { pr_err("error: Failed to initialise slab caches\n"); goto out_compressors; } ret = register_filesystem(&jffs2_fs_type); if (ret) { pr_err("error: Failed to register filesystem\n"); goto out_slab; } return 0; out_slab: jffs2_destroy_slab_caches(); out_compressors: jffs2_compressors_exit(); out: kmem_cache_destroy(jffs2_inode_cachep); return ret; } static void __exit exit_jffs2_fs(void) { unregister_filesystem(&jffs2_fs_type); jffs2_destroy_slab_caches(); jffs2_compressors_exit(); /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(jffs2_inode_cachep); } module_init(init_jffs2_fs); module_exit(exit_jffs2_fs); MODULE_DESCRIPTION("The Journalling Flash File System, v2"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); // Actually dual-licensed, but it doesn't matter for // the sake of this tag. It's Free Software. |
22 22 20 2 47 40 32 40 36 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | // SPDX-License-Identifier: GPL-2.0-only /* * This contains encryption functions for per-file encryption. * * Copyright (C) 2015, Google, Inc. * Copyright (C) 2015, Motorola Mobility * * Written by Michael Halcrow, 2014. * * Filename encryption additions * Uday Savagaonkar, 2014 * Encryption policy handling additions * Ildar Muslukhov, 2014 * Add fscrypt_pullback_bio_page() * Jaegeuk Kim, 2015. * * This has not yet undergone a rigorous security audit. * * The usage of AES-XTS should conform to recommendations in NIST * Special Publication 800-38E and IEEE P1619/D16. */ #include <linux/pagemap.h> #include <linux/mempool.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/ratelimit.h> #include <crypto/skcipher.h> #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; module_param(num_prealloc_crypto_pages, uint, 0444); MODULE_PARM_DESC(num_prealloc_crypto_pages, "Number of crypto pages to preallocate"); static mempool_t *fscrypt_bounce_page_pool = NULL; static struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); struct kmem_cache *fscrypt_info_cachep; void fscrypt_enqueue_decrypt_work(struct work_struct *work) { queue_work(fscrypt_read_workqueue, work); } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) { return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); } /** * fscrypt_free_bounce_page() - free a ciphertext bounce page * @bounce_page: the bounce page to free, or NULL * * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(), * or by fscrypt_alloc_bounce_page() directly. */ void fscrypt_free_bounce_page(struct page *bounce_page) { if (!bounce_page) return; set_page_private(bounce_page, (unsigned long)NULL); ClearPagePrivate(bounce_page); mempool_free(bounce_page, fscrypt_bounce_page_pool); } EXPORT_SYMBOL(fscrypt_free_bounce_page); void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { u8 flags = fscrypt_policy_flags(&ci->ci_policy); memset(iv, 0, ci->ci_mode->ivsize); if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { WARN_ON_ONCE(lblk_num > U32_MAX); WARN_ON_ONCE(ci->ci_inode->i_ino > U32_MAX); lblk_num |= (u64)ci->ci_inode->i_ino << 32; } else if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) { WARN_ON_ONCE(lblk_num > U32_MAX); lblk_num = (u32)(ci->ci_hashed_ino + lblk_num); } else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { memcpy(iv->nonce, ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE); } iv->lblk_num = cpu_to_le64(lblk_num); } /* Encrypt or decrypt a single filesystem block of file contents */ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, struct page *dest_page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { union fscrypt_iv iv; struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_enc_key.tfm; int res = 0; if (WARN_ON_ONCE(len <= 0)) return -EINVAL; if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0)) return -EINVAL; fscrypt_generate_iv(&iv, lblk_num, ci); req = skcipher_request_alloc(tfm, gfp_flags); if (!req) return -ENOMEM; skcipher_request_set_callback( req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); sg_init_table(&dst, 1); sg_set_page(&dst, dest_page, len, offs); sg_init_table(&src, 1); sg_set_page(&src, src_page, len, offs); skcipher_request_set_crypt(req, &src, &dst, len, &iv); if (rw == FS_DECRYPT) res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait); else res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); skcipher_request_free(req); if (res) { fscrypt_err(inode, "%scryption failed for block %llu: %d", (rw == FS_DECRYPT ? "De" : "En"), lblk_num, res); return res; } return 0; } /** * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a * pagecache page * @page: The locked pagecache page containing the block(s) to encrypt * @len: Total size of the block(s) to encrypt. Must be a nonzero * multiple of the filesystem's block size. * @offs: Byte offset within @page of the first block to encrypt. Must be * a multiple of the filesystem's block size. * @gfp_flags: Memory allocation flags. See details below. * * A new bounce page is allocated, and the specified block(s) are encrypted into * it. In the bounce page, the ciphertext block(s) will be located at the same * offsets at which the plaintext block(s) were located in the source page; any * other parts of the bounce page will be left uninitialized. However, normally * blocksize == PAGE_SIZE and the whole page is encrypted at once. * * This is for use by the filesystem's ->writepages() method. * * The bounce page allocation is mempool-backed, so it will always succeed when * @gfp_flags includes __GFP_DIRECT_RECLAIM, e.g. when it's GFP_NOFS. However, * only the first page of each bio can be allocated this way. To prevent * deadlocks, for any additional pages a mask like GFP_NOWAIT must be used. * * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { const struct inode *inode = page->mapping->host; const unsigned int blockbits = inode->i_blkbits; const unsigned int blocksize = 1 << blockbits; struct page *ciphertext_page; u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + (offs >> blockbits); unsigned int i; int err; if (WARN_ON_ONCE(!PageLocked(page))) return ERR_PTR(-EINVAL); if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) return ERR_PTR(-EINVAL); ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); if (!ciphertext_page) return ERR_PTR(-ENOMEM); for (i = offs; i < offs + len; i += blocksize, lblk_num++) { err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, ciphertext_page, blocksize, i, gfp_flags); if (err) { fscrypt_free_bounce_page(ciphertext_page); return ERR_PTR(err); } } SetPagePrivate(ciphertext_page); set_page_private(ciphertext_page, (unsigned long)page); return ciphertext_page; } EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks); /** * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place * @inode: The inode to which this block belongs * @page: The page containing the block to encrypt * @len: Size of block to encrypt. Doesn't need to be a multiple of the * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. * @offs: Byte offset within @page at which the block to encrypt begins * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based * number of the block within the file * @gfp_flags: Memory allocation flags * * Encrypt a possibly-compressed filesystem block that is located in an * arbitrary page, not necessarily in the original pagecache page. The @inode * and @lblk_num must be specified, as they can't be determined from @page. * * Return: 0 on success; -errno on failure */ int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page, len, offs, gfp_flags); } EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); /** * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a * pagecache page * @page: The locked pagecache page containing the block(s) to decrypt * @len: Total size of the block(s) to decrypt. Must be a nonzero * multiple of the filesystem's block size. * @offs: Byte offset within @page of the first block to decrypt. Must be * a multiple of the filesystem's block size. * * The specified block(s) are decrypted in-place within the pagecache page, * which must still be locked and not uptodate. Normally, blocksize == * PAGE_SIZE and the whole page is decrypted at once. * * This is for use by the filesystem's ->readpages() method. * * Return: 0 on success; -errno on failure */ int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs) { const struct inode *inode = page->mapping->host; const unsigned int blockbits = inode->i_blkbits; const unsigned int blocksize = 1 << blockbits; u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + (offs >> blockbits); unsigned int i; int err; if (WARN_ON_ONCE(!PageLocked(page))) return -EINVAL; if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) return -EINVAL; for (i = offs; i < offs + len; i += blocksize, lblk_num++) { err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, blocksize, i, GFP_NOFS); if (err) return err; } return 0; } EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks); /** * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place * @inode: The inode to which this block belongs * @page: The page containing the block to decrypt * @len: Size of block to decrypt. Doesn't need to be a multiple of the * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. * @offs: Byte offset within @page at which the block to decrypt begins * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based * number of the block within the file * * Decrypt a possibly-compressed filesystem block that is located in an * arbitrary page, not necessarily in the original pagecache page. The @inode * and @lblk_num must be specified, as they can't be determined from @page. * * Return: 0 on success; -errno on failure */ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, len, offs, GFP_NOFS); } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); /** * fscrypt_initialize() - allocate major buffers for fs encryption. * @cop_flags: fscrypt operations flags * * We only call this when we start accessing encrypted files, since it * results in memory getting allocated that wouldn't otherwise be used. * * Return: 0 on success; -errno on failure */ int fscrypt_initialize(unsigned int cop_flags) { int err = 0; /* No need to allocate a bounce page pool if this FS won't use it. */ if (cop_flags & FS_CFLG_OWN_PAGES) return 0; mutex_lock(&fscrypt_init_mutex); if (fscrypt_bounce_page_pool) goto out_unlock; err = -ENOMEM; fscrypt_bounce_page_pool = mempool_create_page_pool(num_prealloc_crypto_pages, 0); if (!fscrypt_bounce_page_pool) goto out_unlock; err = 0; out_unlock: mutex_unlock(&fscrypt_init_mutex); return err; } void fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...) { static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); struct va_format vaf; va_list args; if (!__ratelimit(&rs)) return; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; if (inode && inode->i_ino) printk("%sfscrypt (%s, inode %lu): %pV\n", level, inode->i_sb->s_id, inode->i_ino, &vaf); else if (inode) printk("%sfscrypt (%s): %pV\n", level, inode->i_sb->s_id, &vaf); else printk("%sfscrypt: %pV\n", level, &vaf); va_end(args); } /** * fscrypt_init() - Set up for fs encryption. * * Return: 0 on success; -errno on failure */ static int __init fscrypt_init(void) { int err = -ENOMEM; /* * Use an unbound workqueue to allow bios to be decrypted in parallel * even when they happen to complete on the same CPU. This sacrifices * locality, but it's worthwhile since decryption is CPU-intensive. * * Also use a high-priority workqueue to prioritize decryption work, * which blocks reads from completing, over regular application tasks. */ fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue", WQ_UNBOUND | WQ_HIGHPRI, num_online_cpus()); if (!fscrypt_read_workqueue) goto fail; fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT); if (!fscrypt_info_cachep) goto fail_free_queue; err = fscrypt_init_keyring(); if (err) goto fail_free_info; return 0; fail_free_info: kmem_cache_destroy(fscrypt_info_cachep); fail_free_queue: destroy_workqueue(fscrypt_read_workqueue); fail: return err; } late_initcall(fscrypt_init) |
2 10 2 1 3 3 2 2 1 1 1 1 9 9 9 9 13 3 9 1 10 2 2 1 2 2 13 5 1 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | // SPDX-License-Identifier: GPL-2.0-or-later /* Driver for Philips webcam Functions that send various control messages to the webcam, including video modes. (C) 1999-2003 Nemosoft Unv. (C) 2004-2006 Luc Saillard (luc@saillard.org) (C) 2011 Hans de Goede <hdegoede@redhat.com> NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. The decompression routines have been implemented by reverse-engineering the Nemosoft binary pwcx module. Caveat emptor. */ /* Changes 2001/08/03 Alvarado Added methods for changing white balance and red/green gains */ /* Control functions for the cam; brightness, contrast, video mode, etc. */ #ifdef __KERNEL__ #include <linux/uaccess.h> #endif #include <asm/errno.h> #include "pwc.h" #include "pwc-kiara.h" #include "pwc-timon.h" #include "pwc-dec1.h" #include "pwc-dec23.h" /* Selectors for status controls used only in this file */ #define GET_STATUS_B00 0x0B00 #define SENSOR_TYPE_FORMATTER1 0x0C00 #define GET_STATUS_3000 0x3000 #define READ_RAW_Y_MEAN_FORMATTER 0x3100 #define SET_POWER_SAVE_MODE_FORMATTER 0x3200 #define MIRROR_IMAGE_FORMATTER 0x3300 #define LED_FORMATTER 0x3400 #define LOWLIGHT 0x3500 #define GET_STATUS_3600 0x3600 #define SENSOR_TYPE_FORMATTER2 0x3700 #define GET_STATUS_3800 0x3800 #define GET_STATUS_4000 0x4000 #define GET_STATUS_4100 0x4100 /* Get */ #define CTL_STATUS_4200 0x4200 /* [GS] 1 */ /* Formatters for the Video Endpoint controls [GS]ET_EP_STREAM_CTL */ #define VIDEO_OUTPUT_CONTROL_FORMATTER 0x0100 static const char *size2name[PSZ_MAX] = { "subQCIF", "QSIF", "QCIF", "SIF", "CIF", "VGA", }; /********/ /* Entries for the Nala (645/646) camera; the Nala doesn't have compression preferences, so you either get compressed or non-compressed streams. An alternate value of 0 means this mode is not available at all. */ #define PWC_FPS_MAX_NALA 8 struct Nala_table_entry { char alternate; /* USB alternate setting */ int compressed; /* Compressed yes/no */ unsigned char mode[3]; /* precomputed mode table */ }; static unsigned int Nala_fps_vector[PWC_FPS_MAX_NALA] = { 4, 5, 7, 10, 12, 15, 20, 24 }; static struct Nala_table_entry Nala_table[PSZ_MAX][PWC_FPS_MAX_NALA] = { #include "pwc-nala.h" }; /****************************************************************************/ static int recv_control_msg(struct pwc_device *pdev, u8 request, u16 value, int recv_count) { int rc; rc = usb_control_msg(pdev->udev, usb_rcvctrlpipe(pdev->udev, 0), request, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, pdev->vcinterface, pdev->ctrl_buf, recv_count, USB_CTRL_GET_TIMEOUT); if (rc < 0) PWC_ERROR("recv_control_msg error %d req %02x val %04x\n", rc, request, value); return rc; } static inline int send_video_command(struct pwc_device *pdev, int index, const unsigned char *buf, int buflen) { int rc; memcpy(pdev->ctrl_buf, buf, buflen); rc = usb_control_msg(pdev->udev, usb_sndctrlpipe(pdev->udev, 0), SET_EP_STREAM_CTL, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, VIDEO_OUTPUT_CONTROL_FORMATTER, index, pdev->ctrl_buf, buflen, USB_CTRL_SET_TIMEOUT); if (rc >= 0) memcpy(pdev->cmd_buf, buf, buflen); else PWC_ERROR("send_video_command error %d\n", rc); return rc; } int send_control_msg(struct pwc_device *pdev, u8 request, u16 value, void *buf, int buflen) { return usb_control_msg(pdev->udev, usb_sndctrlpipe(pdev->udev, 0), request, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, pdev->vcinterface, buf, buflen, USB_CTRL_SET_TIMEOUT); } static int set_video_mode_Nala(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { int fps, ret = 0; struct Nala_table_entry *pEntry; int frames2frames[31] = { /* closest match of framerate */ 0, 0, 0, 0, 4, /* 0-4 */ 5, 5, 7, 7, 10, /* 5-9 */ 10, 10, 12, 12, 15, /* 10-14 */ 15, 15, 15, 20, 20, /* 15-19 */ 20, 20, 20, 24, 24, /* 20-24 */ 24, 24, 24, 24, 24, /* 25-29 */ 24 /* 30 */ }; int frames2table[31] = { 0, 0, 0, 0, 0, /* 0-4 */ 1, 1, 1, 2, 2, /* 5-9 */ 3, 3, 4, 4, 4, /* 10-14 */ 5, 5, 5, 5, 5, /* 15-19 */ 6, 6, 6, 6, 7, /* 20-24 */ 7, 7, 7, 7, 7, /* 25-29 */ 7 /* 30 */ }; if (size < 0 || size > PSZ_CIF) return -EINVAL; if (frames < 4) frames = 4; else if (size > PSZ_QCIF && frames > 15) frames = 15; else if (frames > 25) frames = 25; frames = frames2frames[frames]; fps = frames2table[frames]; pEntry = &Nala_table[size][fps]; if (pEntry->alternate == 0) return -EINVAL; if (send_to_cam) ret = send_video_command(pdev, pdev->vendpoint, pEntry->mode, 3); if (ret < 0) return ret; if (pEntry->compressed && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec1_init(pdev, pEntry->mode); /* Set various parameters */ pdev->pixfmt = pixfmt; pdev->vframes = frames; pdev->valternate = pEntry->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->frame_size = (pdev->width * pdev->height * 3) / 2; if (pEntry->compressed) { if (pdev->release < 5) { /* 4 fold compression */ pdev->vbandlength = 528; pdev->frame_size /= 4; } else { pdev->vbandlength = 704; pdev->frame_size /= 3; } } else pdev->vbandlength = 0; /* Let pwc-if.c:isoc_init know we don't support higher compression */ *compression = 3; return 0; } static int set_video_mode_Timon(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { const struct Timon_table_entry *pChoose; int fps, ret = 0; if (size >= PSZ_MAX || *compression < 0 || *compression > 3) return -EINVAL; if (frames < 5) frames = 5; else if (size == PSZ_VGA && frames > 15) frames = 15; else if (frames > 30) frames = 30; fps = (frames / 5) - 1; /* Find a supported framerate with progressively higher compression */ do { pChoose = &Timon_table[size][fps][*compression]; if (pChoose->alternate != 0) break; (*compression)++; } while (*compression <= 3); if (pChoose->alternate == 0) return -ENOENT; /* Not supported. */ if (send_to_cam) ret = send_video_command(pdev, pdev->vendpoint, pChoose->mode, 13); if (ret < 0) return ret; if (pChoose->bandlength > 0 && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec23_init(pdev, pChoose->mode); /* Set various parameters */ pdev->pixfmt = pixfmt; pdev->vframes = (fps + 1) * 5; pdev->valternate = pChoose->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->vbandlength = pChoose->bandlength; if (pChoose->bandlength > 0) pdev->frame_size = (pChoose->bandlength * pdev->height) / 4; else pdev->frame_size = (pdev->width * pdev->height * 12) / 8; return 0; } static int set_video_mode_Kiara(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { const struct Kiara_table_entry *pChoose; int fps, ret = 0; if (size >= PSZ_MAX || *compression < 0 || *compression > 3) return -EINVAL; if (frames < 5) frames = 5; else if (size == PSZ_VGA && frames > 15) frames = 15; else if (frames > 30) frames = 30; fps = (frames / 5) - 1; /* Find a supported framerate with progressively higher compression */ do { pChoose = &Kiara_table[size][fps][*compression]; if (pChoose->alternate != 0) break; (*compression)++; } while (*compression <= 3); if (pChoose->alternate == 0) return -ENOENT; /* Not supported. */ /* Firmware bug: video endpoint is 5, but commands are sent to endpoint 4 */ if (send_to_cam) ret = send_video_command(pdev, 4, pChoose->mode, 12); if (ret < 0) return ret; if (pChoose->bandlength > 0 && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec23_init(pdev, pChoose->mode); /* All set and go */ pdev->pixfmt = pixfmt; pdev->vframes = (fps + 1) * 5; pdev->valternate = pChoose->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->vbandlength = pChoose->bandlength; if (pdev->vbandlength > 0) pdev->frame_size = (pdev->vbandlength * pdev->height) / 4; else pdev->frame_size = (pdev->width * pdev->height * 12) / 8; PWC_TRACE("frame_size=%d, vframes=%d, vsize=%d, vbandlength=%d\n", pdev->frame_size, pdev->vframes, size, pdev->vbandlength); return 0; } int pwc_set_video_mode(struct pwc_device *pdev, int width, int height, int pixfmt, int frames, int *compression, int send_to_cam) { int ret, size; PWC_DEBUG_FLOW("set_video_mode(%dx%d @ %d, pixfmt %08x).\n", width, height, frames, pixfmt); size = pwc_get_size(pdev, width, height); PWC_TRACE("decode_size = %d.\n", size); if (DEVICE_USE_CODEC1(pdev->type)) { ret = set_video_mode_Nala(pdev, size, pixfmt, frames, compression, send_to_cam); } else if (DEVICE_USE_CODEC3(pdev->type)) { ret = set_video_mode_Kiara(pdev, size, pixfmt, frames, compression, send_to_cam); } else { ret = set_video_mode_Timon(pdev, size, pixfmt, frames, compression, send_to_cam); } if (ret < 0) { PWC_ERROR("Failed to set video mode %s@%d fps; return code = %d\n", size2name[size], frames, ret); return ret; } pdev->frame_total_size = pdev->frame_size + pdev->frame_header_size + pdev->frame_trailer_size; PWC_DEBUG_SIZE("Set resolution to %dx%d\n", pdev->width, pdev->height); return 0; } static unsigned int pwc_get_fps_Nala(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i = 0; i < PWC_FPS_MAX_NALA; i++) { if (Nala_table[size][i].alternate) { if (index--==0) return Nala_fps_vector[i]; } } return 0; } static unsigned int pwc_get_fps_Kiara(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i = 0; i < PWC_FPS_MAX_KIARA; i++) { if (Kiara_table[size][i][3].alternate) { if (index--==0) return Kiara_fps_vector[i]; } } return 0; } static unsigned int pwc_get_fps_Timon(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i=0; i < PWC_FPS_MAX_TIMON; i++) { if (Timon_table[size][i][3].alternate) { if (index--==0) return Timon_fps_vector[i]; } } return 0; } unsigned int pwc_get_fps(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int ret; if (DEVICE_USE_CODEC1(pdev->type)) { ret = pwc_get_fps_Nala(pdev, index, size); } else if (DEVICE_USE_CODEC3(pdev->type)) { ret = pwc_get_fps_Kiara(pdev, index, size); } else { ret = pwc_get_fps_Timon(pdev, index, size); } return ret; } int pwc_get_u8_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 1); if (ret < 0) return ret; *data = pdev->ctrl_buf[0]; return 0; } int pwc_set_u8_ctrl(struct pwc_device *pdev, u8 request, u16 value, u8 data) { int ret; pdev->ctrl_buf[0] = data; ret = send_control_msg(pdev, request, value, pdev->ctrl_buf, 1); if (ret < 0) return ret; return 0; } int pwc_get_s8_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 1); if (ret < 0) return ret; *data = ((s8 *)pdev->ctrl_buf)[0]; return 0; } int pwc_get_u16_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 2); if (ret < 0) return ret; *data = (pdev->ctrl_buf[1] << 8) | pdev->ctrl_buf[0]; return 0; } int pwc_set_u16_ctrl(struct pwc_device *pdev, u8 request, u16 value, u16 data) { int ret; pdev->ctrl_buf[0] = data & 0xff; pdev->ctrl_buf[1] = data >> 8; ret = send_control_msg(pdev, request, value, pdev->ctrl_buf, 2); if (ret < 0) return ret; return 0; } int pwc_button_ctrl(struct pwc_device *pdev, u16 value) { int ret; ret = send_control_msg(pdev, SET_STATUS_CTL, value, NULL, 0); if (ret < 0) return ret; return 0; } /* POWER */ void pwc_camera_power(struct pwc_device *pdev, int power) { int r; if (!pdev->power_save) return; if (pdev->type < 675 || (pdev->type < 730 && pdev->release < 6)) return; /* Not supported by Nala or Timon < release 6 */ if (power) pdev->ctrl_buf[0] = 0x00; /* active */ else pdev->ctrl_buf[0] = 0xFF; /* power save */ r = send_control_msg(pdev, SET_STATUS_CTL, SET_POWER_SAVE_MODE_FORMATTER, pdev->ctrl_buf, 1); if (r < 0) PWC_ERROR("Failed to power %s camera (%d)\n", power ? "on" : "off", r); } int pwc_set_leds(struct pwc_device *pdev, int on_value, int off_value) { int r; if (pdev->type < 730) return 0; on_value /= 100; off_value /= 100; if (on_value < 0) on_value = 0; if (on_value > 0xff) on_value = 0xff; if (off_value < 0) off_value = 0; if (off_value > 0xff) off_value = 0xff; pdev->ctrl_buf[0] = on_value; pdev->ctrl_buf[1] = off_value; r = send_control_msg(pdev, SET_STATUS_CTL, LED_FORMATTER, pdev->ctrl_buf, 2); if (r < 0) PWC_ERROR("Failed to set LED on/off time (%d)\n", r); return r; } #ifdef CONFIG_USB_PWC_DEBUG int pwc_get_cmos_sensor(struct pwc_device *pdev, int *sensor) { int ret, request; if (pdev->type < 675) request = SENSOR_TYPE_FORMATTER1; else if (pdev->type < 730) return -1; /* The Vesta series doesn't have this call */ else request = SENSOR_TYPE_FORMATTER2; ret = recv_control_msg(pdev, GET_STATUS_CTL, request, 1); if (ret < 0) return ret; if (pdev->type < 675) *sensor = pdev->ctrl_buf[0] | 0x100; else *sensor = pdev->ctrl_buf[0]; return 0; } #endif |
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 4 4 2 2 2 2 2 1 1 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Syntek STK1135 subdriver * * Copyright (c) 2013 Ondrej Zary * * Based on Syntekdriver (stk11xx) by Nicolas VIVIEN: * http://syntekdriver.sourceforge.net */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "stk1135" #include "gspca.h" #include "stk1135.h" MODULE_AUTHOR("Ondrej Zary"); MODULE_DESCRIPTION("Syntek STK1135 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 pkt_seq; u8 sensor_page; bool flip_status; u8 flip_debounce; struct v4l2_ctrl *hflip; struct v4l2_ctrl *vflip; }; static const struct v4l2_pix_format stk1135_modes[] = { /* default mode (this driver supports variable resolution) */ {640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB}, }; /* -- read a register -- */ static u8 reg_r(struct gspca_dev *gspca_dev, u16 index) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return 0; ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x00, index, gspca_dev->usb_buf, 1, 500); gspca_dbg(gspca_dev, D_USBI, "reg_r 0x%x=0x%02x\n", index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_r 0x%x err %d\n", index, ret); gspca_dev->usb_err = ret; return 0; } return gspca_dev->usb_buf[0]; } /* -- write a register -- */ static void reg_w(struct gspca_dev *gspca_dev, u16 index, u8 val) { int ret; struct usb_device *dev = gspca_dev->dev; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0x01, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, val, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg_w 0x%x:=0x%02x\n", index, val); if (ret < 0) { pr_err("reg_w 0x%x err %d\n", index, ret); gspca_dev->usb_err = ret; } } static void reg_w_mask(struct gspca_dev *gspca_dev, u16 index, u8 val, u8 mask) { val = (reg_r(gspca_dev, index) & ~mask) | (val & mask); reg_w(gspca_dev, index, val); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { gspca_dev->cam.cam_mode = stk1135_modes; gspca_dev->cam.nmodes = ARRAY_SIZE(stk1135_modes); return 0; } static int stk1135_serial_wait_ready(struct gspca_dev *gspca_dev) { int i = 0; u8 val; do { val = reg_r(gspca_dev, STK1135_REG_SICTL + 1); if (i++ > 500) { /* maximum retry count */ pr_err("serial bus timeout: status=0x%02x\n", val); return -1; } /* repeat if BUSY or WRITE/READ not finished */ } while ((val & 0x10) || !(val & 0x05)); return 0; } static u8 sensor_read_8(struct gspca_dev *gspca_dev, u8 addr) { reg_w(gspca_dev, STK1135_REG_SBUSR, addr); /* begin read */ reg_w(gspca_dev, STK1135_REG_SICTL, 0x20); /* wait until finished */ if (stk1135_serial_wait_ready(gspca_dev)) { pr_err("Sensor read failed\n"); return 0; } return reg_r(gspca_dev, STK1135_REG_SBUSR + 1); } static u16 sensor_read_16(struct gspca_dev *gspca_dev, u8 addr) { return (sensor_read_8(gspca_dev, addr) << 8) | sensor_read_8(gspca_dev, 0xf1); } static void sensor_write_8(struct gspca_dev *gspca_dev, u8 addr, u8 data) { /* load address and data registers */ reg_w(gspca_dev, STK1135_REG_SBUSW, addr); reg_w(gspca_dev, STK1135_REG_SBUSW + 1, data); /* begin write */ reg_w(gspca_dev, STK1135_REG_SICTL, 0x01); /* wait until finished */ if (stk1135_serial_wait_ready(gspca_dev)) { pr_err("Sensor write failed\n"); return; } } static void sensor_write_16(struct gspca_dev *gspca_dev, u8 addr, u16 data) { sensor_write_8(gspca_dev, addr, data >> 8); sensor_write_8(gspca_dev, 0xf1, data & 0xff); } static void sensor_set_page(struct gspca_dev *gspca_dev, u8 page) { struct sd *sd = (struct sd *) gspca_dev; if (page != sd->sensor_page) { sensor_write_16(gspca_dev, 0xf0, page); sd->sensor_page = page; } } static u16 sensor_read(struct gspca_dev *gspca_dev, u16 reg) { sensor_set_page(gspca_dev, reg >> 8); return sensor_read_16(gspca_dev, reg & 0xff); } static void sensor_write(struct gspca_dev *gspca_dev, u16 reg, u16 val) { sensor_set_page(gspca_dev, reg >> 8); sensor_write_16(gspca_dev, reg & 0xff, val); } static void sensor_write_mask(struct gspca_dev *gspca_dev, u16 reg, u16 val, u16 mask) { val = (sensor_read(gspca_dev, reg) & ~mask) | (val & mask); sensor_write(gspca_dev, reg, val); } struct sensor_val { u16 reg; u16 val; }; /* configure MT9M112 sensor */ static void stk1135_configure_mt9m112(struct gspca_dev *gspca_dev) { static const struct sensor_val cfg[] = { /* restart&reset, chip enable, reserved */ { 0x00d, 0x000b }, { 0x00d, 0x0008 }, { 0x035, 0x0022 }, /* mode ctl: AWB on, AE both, clip aper corr, defect corr, AE */ { 0x106, 0x700e }, { 0x2dd, 0x18e0 }, /* B-R thresholds, */ /* AWB */ { 0x21f, 0x0180 }, /* Cb and Cr limits */ { 0x220, 0xc814 }, { 0x221, 0x8080 }, /* lum limits, RGB gain */ { 0x222, 0xa078 }, { 0x223, 0xa078 }, /* R, B limit */ { 0x224, 0x5f20 }, { 0x228, 0xea02 }, /* mtx adj lim, adv ctl */ { 0x229, 0x867a }, /* wide gates */ /* Color correction */ /* imager gains base, delta, delta signs */ { 0x25e, 0x594c }, { 0x25f, 0x4d51 }, { 0x260, 0x0002 }, /* AWB adv ctl 2, gain offs */ { 0x2ef, 0x0008 }, { 0x2f2, 0x0000 }, /* base matrix signs, scale K1-5, K6-9 */ { 0x202, 0x00ee }, { 0x203, 0x3923 }, { 0x204, 0x0724 }, /* base matrix coef */ { 0x209, 0x00cd }, { 0x20a, 0x0093 }, { 0x20b, 0x0004 },/*K1-3*/ { 0x20c, 0x005c }, { 0x20d, 0x00d9 }, { 0x20e, 0x0053 },/*K4-6*/ { 0x20f, 0x0008 }, { 0x210, 0x0091 }, { 0x211, 0x00cf },/*K7-9*/ { 0x215, 0x0000 }, /* delta mtx signs */ /* delta matrix coef */ { 0x216, 0x0000 }, { 0x217, 0x0000 }, { 0x218, 0x0000 },/*D1-3*/ { 0x219, 0x0000 }, { 0x21a, 0x0000 }, { 0x21b, 0x0000 },/*D4-6*/ { 0x21c, 0x0000 }, { 0x21d, 0x0000 }, { 0x21e, 0x0000 },/*D7-9*/ /* enable & disable manual WB to apply color corr. settings */ { 0x106, 0xf00e }, { 0x106, 0x700e }, /* Lens shading correction */ { 0x180, 0x0007 }, /* control */ /* vertical knee 0, 2+1, 4+3 */ { 0x181, 0xde13 }, { 0x182, 0xebe2 }, { 0x183, 0x00f6 }, /* R */ { 0x184, 0xe114 }, { 0x185, 0xeadd }, { 0x186, 0xfdf6 }, /* G */ { 0x187, 0xe511 }, { 0x188, 0xede6 }, { 0x189, 0xfbf7 }, /* B */ /* horizontal knee 0, 2+1, 4+3, 5 */ { 0x18a, 0xd613 }, { 0x18b, 0xedec }, /* R .. */ { 0x18c, 0xf9f2 }, { 0x18d, 0x0000 }, /* .. R */ { 0x18e, 0xd815 }, { 0x18f, 0xe9ea }, /* G .. */ { 0x190, 0xf9f1 }, { 0x191, 0x0002 }, /* .. G */ { 0x192, 0xde10 }, { 0x193, 0xefef }, /* B .. */ { 0x194, 0xfbf4 }, { 0x195, 0x0002 }, /* .. B */ /* vertical knee 6+5, 8+7 */ { 0x1b6, 0x0e06 }, { 0x1b7, 0x2713 }, /* R */ { 0x1b8, 0x1106 }, { 0x1b9, 0x2713 }, /* G */ { 0x1ba, 0x0c03 }, { 0x1bb, 0x2a0f }, /* B */ /* horizontal knee 7+6, 9+8, 10 */ { 0x1bc, 0x1208 }, { 0x1bd, 0x1a16 }, { 0x1be, 0x0022 }, /* R */ { 0x1bf, 0x150a }, { 0x1c0, 0x1c1a }, { 0x1c1, 0x002d }, /* G */ { 0x1c2, 0x1109 }, { 0x1c3, 0x1414 }, { 0x1c4, 0x002a }, /* B */ { 0x106, 0x740e }, /* enable lens shading correction */ /* Gamma correction - context A */ { 0x153, 0x0b03 }, { 0x154, 0x4722 }, { 0x155, 0xac82 }, { 0x156, 0xdac7 }, { 0x157, 0xf5e9 }, { 0x158, 0xff00 }, /* Gamma correction - context B */ { 0x1dc, 0x0b03 }, { 0x1dd, 0x4722 }, { 0x1de, 0xac82 }, { 0x1df, 0xdac7 }, { 0x1e0, 0xf5e9 }, { 0x1e1, 0xff00 }, /* output format: RGB, invert output pixclock, output bayer */ { 0x13a, 0x4300 }, { 0x19b, 0x4300 }, /* for context A, B */ { 0x108, 0x0180 }, /* format control - enable bayer row flip */ { 0x22f, 0xd100 }, { 0x29c, 0xd100 }, /* AE A, B */ /* default prg conf, prg ctl - by 0x2d2, prg advance - PA1 */ { 0x2d2, 0x0000 }, { 0x2cc, 0x0004 }, { 0x2cb, 0x0001 }, { 0x22e, 0x0c3c }, { 0x267, 0x1010 }, /* AE tgt ctl, gain lim */ /* PLL */ { 0x065, 0xa000 }, /* clk ctl - enable PLL (clear bit 14) */ { 0x066, 0x2003 }, { 0x067, 0x0501 }, /* PLL M=128, N=3, P=1 */ { 0x065, 0x2000 }, /* disable PLL bypass (clear bit 15) */ { 0x005, 0x01b8 }, { 0x007, 0x00d8 }, /* horiz blanking B, A */ /* AE line size, shutter delay limit */ { 0x239, 0x06c0 }, { 0x23b, 0x040e }, /* for context A */ { 0x23a, 0x06c0 }, { 0x23c, 0x0564 }, /* for context B */ /* shutter width basis 60Hz, 50Hz */ { 0x257, 0x0208 }, { 0x258, 0x0271 }, /* for context A */ { 0x259, 0x0209 }, { 0x25a, 0x0271 }, /* for context B */ { 0x25c, 0x120d }, { 0x25d, 0x1712 }, /* flicker 60Hz, 50Hz */ { 0x264, 0x5e1c }, /* reserved */ /* flicker, AE gain limits, gain zone limits */ { 0x25b, 0x0003 }, { 0x236, 0x7810 }, { 0x237, 0x8304 }, { 0x008, 0x0021 }, /* vert blanking A */ }; int i; u16 width, height; for (i = 0; i < ARRAY_SIZE(cfg); i++) sensor_write(gspca_dev, cfg[i].reg, cfg[i].val); /* set output size */ width = gspca_dev->pixfmt.width; height = gspca_dev->pixfmt.height; if (width <= 640 && height <= 512) { /* context A (half readout speed)*/ sensor_write(gspca_dev, 0x1a7, width); sensor_write(gspca_dev, 0x1aa, height); /* set read mode context A */ sensor_write(gspca_dev, 0x0c8, 0x0000); /* set resize, read mode, vblank, hblank context A */ sensor_write(gspca_dev, 0x2c8, 0x0000); } else { /* context B (full readout speed) */ sensor_write(gspca_dev, 0x1a1, width); sensor_write(gspca_dev, 0x1a4, height); /* set read mode context B */ sensor_write(gspca_dev, 0x0c8, 0x0008); /* set resize, read mode, vblank, hblank context B */ sensor_write(gspca_dev, 0x2c8, 0x040b); } } static void stk1135_configure_clock(struct gspca_dev *gspca_dev) { /* configure SCLKOUT */ reg_w(gspca_dev, STK1135_REG_TMGEN, 0x12); /* set 1 clock per pixel */ /* and positive edge clocked pulse high when pixel counter = 0 */ reg_w(gspca_dev, STK1135_REG_TCP1 + 0, 0x41); reg_w(gspca_dev, STK1135_REG_TCP1 + 1, 0x00); reg_w(gspca_dev, STK1135_REG_TCP1 + 2, 0x00); reg_w(gspca_dev, STK1135_REG_TCP1 + 3, 0x00); /* enable CLKOUT for sensor */ reg_w(gspca_dev, STK1135_REG_SENSO + 0, 0x10); /* disable STOP clock */ reg_w(gspca_dev, STK1135_REG_SENSO + 1, 0x00); /* set lower 8 bits of PLL feedback divider */ reg_w(gspca_dev, STK1135_REG_SENSO + 3, 0x07); /* set other PLL parameters */ reg_w(gspca_dev, STK1135_REG_PLLFD, 0x06); /* enable timing generator */ reg_w(gspca_dev, STK1135_REG_TMGEN, 0x80); /* enable PLL */ reg_w(gspca_dev, STK1135_REG_SENSO + 2, 0x04); /* set serial interface clock divider (30MHz/0x1f*16+2) = 60240 kHz) */ reg_w(gspca_dev, STK1135_REG_SICTL + 2, 0x1f); /* wait a while for sensor to catch up */ udelay(1000); } static void stk1135_camera_disable(struct gspca_dev *gspca_dev) { /* set capture end Y position to 0 */ reg_w(gspca_dev, STK1135_REG_CIEPO + 2, 0x00); reg_w(gspca_dev, STK1135_REG_CIEPO + 3, 0x00); /* disable capture */ reg_w_mask(gspca_dev, STK1135_REG_SCTRL, 0x00, 0x80); /* enable sensor standby and diasble chip enable */ sensor_write_mask(gspca_dev, 0x00d, 0x0004, 0x000c); /* disable PLL */ reg_w_mask(gspca_dev, STK1135_REG_SENSO + 2, 0x00, 0x01); /* disable timing generator */ reg_w(gspca_dev, STK1135_REG_TMGEN, 0x00); /* enable STOP clock */ reg_w(gspca_dev, STK1135_REG_SENSO + 1, 0x20); /* disable CLKOUT for sensor */ reg_w(gspca_dev, STK1135_REG_SENSO, 0x00); /* disable sensor (GPIO5) and enable GPIO0,3,6 (?) - sensor standby? */ reg_w(gspca_dev, STK1135_REG_GCTRL, 0x49); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { u16 sensor_id; char *sensor_name; struct sd *sd = (struct sd *) gspca_dev; /* set GPIO3,4,5,6 direction to output */ reg_w(gspca_dev, STK1135_REG_GCTRL + 2, 0x78); /* enable sensor (GPIO5) */ reg_w(gspca_dev, STK1135_REG_GCTRL, (1 << 5)); /* disable ROM interface */ reg_w(gspca_dev, STK1135_REG_GCTRL + 3, 0x80); /* enable interrupts from GPIO8 (flip sensor) and GPIO9 (???) */ reg_w(gspca_dev, STK1135_REG_ICTRL + 1, 0x00); reg_w(gspca_dev, STK1135_REG_ICTRL + 3, 0x03); /* enable remote wakeup from GPIO9 (???) */ reg_w(gspca_dev, STK1135_REG_RMCTL + 1, 0x00); reg_w(gspca_dev, STK1135_REG_RMCTL + 3, 0x02); /* reset serial interface */ reg_w(gspca_dev, STK1135_REG_SICTL, 0x80); reg_w(gspca_dev, STK1135_REG_SICTL, 0x00); /* set sensor address */ reg_w(gspca_dev, STK1135_REG_SICTL + 3, 0xba); /* disable alt 2-wire serial interface */ reg_w(gspca_dev, STK1135_REG_ASIC + 3, 0x00); stk1135_configure_clock(gspca_dev); /* read sensor ID */ sd->sensor_page = 0xff; sensor_id = sensor_read(gspca_dev, 0x000); switch (sensor_id) { case 0x148c: sensor_name = "MT9M112"; break; default: sensor_name = "unknown"; } pr_info("Detected sensor type %s (0x%x)\n", sensor_name, sensor_id); stk1135_camera_disable(gspca_dev); return gspca_dev->usb_err; } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; u16 width, height; /* enable sensor (GPIO5) */ reg_w(gspca_dev, STK1135_REG_GCTRL, (1 << 5)); stk1135_configure_clock(gspca_dev); /* set capture start position X = 0, Y = 0 */ reg_w(gspca_dev, STK1135_REG_CISPO + 0, 0x00); reg_w(gspca_dev, STK1135_REG_CISPO + 1, 0x00); reg_w(gspca_dev, STK1135_REG_CISPO + 2, 0x00); reg_w(gspca_dev, STK1135_REG_CISPO + 3, 0x00); /* set capture end position */ width = gspca_dev->pixfmt.width; height = gspca_dev->pixfmt.height; reg_w(gspca_dev, STK1135_REG_CIEPO + 0, width & 0xff); reg_w(gspca_dev, STK1135_REG_CIEPO + 1, width >> 8); reg_w(gspca_dev, STK1135_REG_CIEPO + 2, height & 0xff); reg_w(gspca_dev, STK1135_REG_CIEPO + 3, height >> 8); /* set 8-bit mode */ reg_w(gspca_dev, STK1135_REG_SCTRL, 0x20); stk1135_configure_mt9m112(gspca_dev); /* enable capture */ reg_w_mask(gspca_dev, STK1135_REG_SCTRL, 0x80, 0x80); if (gspca_dev->usb_err >= 0) gspca_dbg(gspca_dev, D_STREAM, "camera started alt: 0x%02x\n", gspca_dev->alt); sd->pkt_seq = 0; return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct usb_device *dev = gspca_dev->dev; usb_set_interface(dev, gspca_dev->iface, 0); stk1135_camera_disable(gspca_dev); gspca_dbg(gspca_dev, D_STREAM, "camera stopped\n"); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; int skip = sizeof(struct stk1135_pkt_header); bool flip; enum gspca_packet_type pkt_type = INTER_PACKET; struct stk1135_pkt_header *hdr = (void *)data; u8 seq; if (len < 4) { gspca_dbg(gspca_dev, D_PACK, "received short packet (less than 4 bytes)\n"); return; } /* GPIO 8 is flip sensor (1 = normal position, 0 = flipped to back) */ flip = !(le16_to_cpu(hdr->gpio) & (1 << 8)); /* it's a switch, needs software debounce */ if (sd->flip_status != flip) sd->flip_debounce++; else sd->flip_debounce = 0; /* check sequence number (not present in new frame packets) */ if (!(hdr->flags & STK1135_HDR_FRAME_START)) { seq = hdr->seq & STK1135_HDR_SEQ_MASK; if (seq != sd->pkt_seq) { gspca_dbg(gspca_dev, D_PACK, "received out-of-sequence packet\n"); /* resync sequence and discard packet */ sd->pkt_seq = seq; gspca_dev->last_packet_type = DISCARD_PACKET; return; } } sd->pkt_seq++; if (sd->pkt_seq > STK1135_HDR_SEQ_MASK) sd->pkt_seq = 0; if (len == sizeof(struct stk1135_pkt_header)) return; if (hdr->flags & STK1135_HDR_FRAME_START) { /* new frame */ skip = 8; /* the header is longer */ gspca_frame_add(gspca_dev, LAST_PACKET, data, 0); pkt_type = FIRST_PACKET; } gspca_frame_add(gspca_dev, pkt_type, data + skip, len - skip); } static void sethflip(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; if (sd->flip_status) val = !val; sensor_write_mask(gspca_dev, 0x020, val ? 0x0002 : 0x0000 , 0x0002); } static void setvflip(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; if (sd->flip_status) val = !val; sensor_write_mask(gspca_dev, 0x020, val ? 0x0001 : 0x0000 , 0x0001); } static void stk1135_dq_callback(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->flip_debounce > 100) { sd->flip_status = !sd->flip_status; sethflip(gspca_dev, v4l2_ctrl_g_ctrl(sd->hflip)); setvflip(gspca_dev, v4l2_ctrl_g_ctrl(sd->vflip)); } } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_HFLIP: sethflip(gspca_dev, ctrl->val); break; case V4L2_CID_VFLIP: setvflip(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 2); sd->hflip = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HFLIP, 0, 1, 1, 0); sd->vflip = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_VFLIP, 0, 1, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } static void stk1135_try_fmt(struct gspca_dev *gspca_dev, struct v4l2_format *fmt) { fmt->fmt.pix.width = clamp(fmt->fmt.pix.width, 32U, 1280U); fmt->fmt.pix.height = clamp(fmt->fmt.pix.height, 32U, 1024U); /* round up to even numbers */ fmt->fmt.pix.width += (fmt->fmt.pix.width & 1); fmt->fmt.pix.height += (fmt->fmt.pix.height & 1); fmt->fmt.pix.bytesperline = fmt->fmt.pix.width; fmt->fmt.pix.sizeimage = fmt->fmt.pix.width * fmt->fmt.pix.height; } static int stk1135_enum_framesizes(struct gspca_dev *gspca_dev, struct v4l2_frmsizeenum *fsize) { if (fsize->index != 0 || fsize->pixel_format != V4L2_PIX_FMT_SBGGR8) return -EINVAL; fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; fsize->stepwise.min_width = 32; fsize->stepwise.min_height = 32; fsize->stepwise.max_width = 1280; fsize->stepwise.max_height = 1024; fsize->stepwise.step_width = 2; fsize->stepwise.step_height = 2; return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, .dq_callback = stk1135_dq_callback, .try_fmt = stk1135_try_fmt, .enum_framesizes = stk1135_enum_framesizes, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x174f, 0x6a31)}, /* ASUS laptop, MT9M112 sensor */ {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
81 3209 484 26 4721 69 107 4928 13 3 1 8764 4 15629 11 4 2879 148 13621 3258 781 366 672 4 18 151 14 121 1 506 1505 4449 487 15 3848 14038 425 1 351 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 | // SPDX-License-Identifier: GPL-2.0 // Generated by scripts/atomic/gen-atomic-instrumented.sh // DO NOT MODIFY THIS FILE DIRECTLY /* * This file provides wrappers with KASAN instrumentation for atomic operations. * To use this functionality an arch's atomic.h file needs to define all * atomic operations with arch_ prefix (e.g. arch_atomic_read()) and include * this file at the end. This file provides atomic_read() that forwards to * arch_atomic_read() for actual atomic operation. * Note: if an arch atomic operation is implemented by means of other atomic * operations (e.g. atomic_read()/atomic_cmpxchg() loop), then it needs to use * arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid * double instrumentation. */ #ifndef _LINUX_ATOMIC_INSTRUMENTED_H #define _LINUX_ATOMIC_INSTRUMENTED_H #include <linux/build_bug.h> #include <linux/compiler.h> #include <linux/instrumented.h> static __always_inline int atomic_read(const atomic_t *v) { instrument_atomic_read(v, sizeof(*v)); return arch_atomic_read(v); } static __always_inline int atomic_read_acquire(const atomic_t *v) { instrument_atomic_read(v, sizeof(*v)); return arch_atomic_read_acquire(v); } static __always_inline void atomic_set(atomic_t *v, int i) { instrument_atomic_write(v, sizeof(*v)); arch_atomic_set(v, i); } static __always_inline void atomic_set_release(atomic_t *v, int i) { instrument_atomic_write(v, sizeof(*v)); arch_atomic_set_release(v, i); } static __always_inline void atomic_add(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_add(i, v); } static __always_inline int atomic_add_return(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return(i, v); } static __always_inline int atomic_add_return_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return_acquire(i, v); } static __always_inline int atomic_add_return_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return_release(i, v); } static __always_inline int atomic_add_return_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_return_relaxed(i, v); } static __always_inline int atomic_fetch_add(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add(i, v); } static __always_inline int atomic_fetch_add_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_acquire(i, v); } static __always_inline int atomic_fetch_add_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_release(i, v); } static __always_inline int atomic_fetch_add_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_relaxed(i, v); } static __always_inline void atomic_sub(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_sub(i, v); } static __always_inline int atomic_sub_return(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return(i, v); } static __always_inline int atomic_sub_return_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return_acquire(i, v); } static __always_inline int atomic_sub_return_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return_release(i, v); } static __always_inline int atomic_sub_return_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_return_relaxed(i, v); } static __always_inline int atomic_fetch_sub(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub(i, v); } static __always_inline int atomic_fetch_sub_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub_acquire(i, v); } static __always_inline int atomic_fetch_sub_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub_release(i, v); } static __always_inline int atomic_fetch_sub_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_sub_relaxed(i, v); } static __always_inline void atomic_inc(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_inc(v); } static __always_inline int atomic_inc_return(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return(v); } static __always_inline int atomic_inc_return_acquire(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return_acquire(v); } static __always_inline int atomic_inc_return_release(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return_release(v); } static __always_inline int atomic_inc_return_relaxed(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_return_relaxed(v); } static __always_inline int atomic_fetch_inc(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc(v); } static __always_inline int atomic_fetch_inc_acquire(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc_acquire(v); } static __always_inline int atomic_fetch_inc_release(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc_release(v); } static __always_inline int atomic_fetch_inc_relaxed(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_inc_relaxed(v); } static __always_inline void atomic_dec(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_dec(v); } static __always_inline int atomic_dec_return(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return(v); } static __always_inline int atomic_dec_return_acquire(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return_acquire(v); } static __always_inline int atomic_dec_return_release(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return_release(v); } static __always_inline int atomic_dec_return_relaxed(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_return_relaxed(v); } static __always_inline int atomic_fetch_dec(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec(v); } static __always_inline int atomic_fetch_dec_acquire(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec_acquire(v); } static __always_inline int atomic_fetch_dec_release(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec_release(v); } static __always_inline int atomic_fetch_dec_relaxed(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_dec_relaxed(v); } static __always_inline void atomic_and(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_and(i, v); } static __always_inline int atomic_fetch_and(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and(i, v); } static __always_inline int atomic_fetch_and_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and_acquire(i, v); } static __always_inline int atomic_fetch_and_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and_release(i, v); } static __always_inline int atomic_fetch_and_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_and_relaxed(i, v); } static __always_inline void atomic_andnot(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_andnot(i, v); } static __always_inline int atomic_fetch_andnot(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot(i, v); } static __always_inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot_acquire(i, v); } static __always_inline int atomic_fetch_andnot_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot_release(i, v); } static __always_inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_andnot_relaxed(i, v); } static __always_inline void atomic_or(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_or(i, v); } static __always_inline int atomic_fetch_or(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or(i, v); } static __always_inline int atomic_fetch_or_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or_acquire(i, v); } static __always_inline int atomic_fetch_or_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or_release(i, v); } static __always_inline int atomic_fetch_or_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_or_relaxed(i, v); } static __always_inline void atomic_xor(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_xor(i, v); } static __always_inline int atomic_fetch_xor(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor(i, v); } static __always_inline int atomic_fetch_xor_acquire(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor_acquire(i, v); } static __always_inline int atomic_fetch_xor_release(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor_release(i, v); } static __always_inline int atomic_fetch_xor_relaxed(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_xor_relaxed(i, v); } static __always_inline int atomic_xchg(atomic_t *v, int i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg(v, i); } static __always_inline int atomic_xchg_acquire(atomic_t *v, int i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg_acquire(v, i); } static __always_inline int atomic_xchg_release(atomic_t *v, int i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg_release(v, i); } static __always_inline int atomic_xchg_relaxed(atomic_t *v, int i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_xchg_relaxed(v, i); } static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg(v, old, new); } static __always_inline int atomic_cmpxchg_acquire(atomic_t *v, int old, int new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg_acquire(v, old, new); } static __always_inline int atomic_cmpxchg_release(atomic_t *v, int old, int new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg_release(v, old, new); } static __always_inline int atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_cmpxchg_relaxed(v, old, new); } static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg(v, old, new); } static __always_inline bool atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg_acquire(v, old, new); } static __always_inline bool atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg_release(v, old, new); } static __always_inline bool atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_try_cmpxchg_relaxed(v, old, new); } static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_sub_and_test(i, v); } static __always_inline bool atomic_dec_and_test(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_and_test(v); } static __always_inline bool atomic_inc_and_test(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_and_test(v); } static __always_inline bool atomic_add_negative(int i, atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_negative(i, v); } static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_fetch_add_unless(v, a, u); } static __always_inline bool atomic_add_unless(atomic_t *v, int a, int u) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_add_unless(v, a, u); } static __always_inline bool atomic_inc_not_zero(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_not_zero(v); } static __always_inline bool atomic_inc_unless_negative(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_inc_unless_negative(v); } static __always_inline bool atomic_dec_unless_positive(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_unless_positive(v); } static __always_inline int atomic_dec_if_positive(atomic_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_dec_if_positive(v); } static __always_inline s64 atomic64_read(const atomic64_t *v) { instrument_atomic_read(v, sizeof(*v)); return arch_atomic64_read(v); } static __always_inline s64 atomic64_read_acquire(const atomic64_t *v) { instrument_atomic_read(v, sizeof(*v)); return arch_atomic64_read_acquire(v); } static __always_inline void atomic64_set(atomic64_t *v, s64 i) { instrument_atomic_write(v, sizeof(*v)); arch_atomic64_set(v, i); } static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) { instrument_atomic_write(v, sizeof(*v)); arch_atomic64_set_release(v, i); } static __always_inline void atomic64_add(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_add(i, v); } static __always_inline s64 atomic64_add_return(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return(i, v); } static __always_inline s64 atomic64_add_return_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return_acquire(i, v); } static __always_inline s64 atomic64_add_return_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return_release(i, v); } static __always_inline s64 atomic64_add_return_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_return_relaxed(i, v); } static __always_inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add(i, v); } static __always_inline s64 atomic64_fetch_add_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_acquire(i, v); } static __always_inline s64 atomic64_fetch_add_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_release(i, v); } static __always_inline s64 atomic64_fetch_add_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_relaxed(i, v); } static __always_inline void atomic64_sub(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_sub(i, v); } static __always_inline s64 atomic64_sub_return(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return(i, v); } static __always_inline s64 atomic64_sub_return_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return_acquire(i, v); } static __always_inline s64 atomic64_sub_return_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return_release(i, v); } static __always_inline s64 atomic64_sub_return_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_return_relaxed(i, v); } static __always_inline s64 atomic64_fetch_sub(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub(i, v); } static __always_inline s64 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub_acquire(i, v); } static __always_inline s64 atomic64_fetch_sub_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub_release(i, v); } static __always_inline s64 atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_sub_relaxed(i, v); } static __always_inline void atomic64_inc(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_inc(v); } static __always_inline s64 atomic64_inc_return(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return(v); } static __always_inline s64 atomic64_inc_return_acquire(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return_acquire(v); } static __always_inline s64 atomic64_inc_return_release(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return_release(v); } static __always_inline s64 atomic64_inc_return_relaxed(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_return_relaxed(v); } static __always_inline s64 atomic64_fetch_inc(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc(v); } static __always_inline s64 atomic64_fetch_inc_acquire(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc_acquire(v); } static __always_inline s64 atomic64_fetch_inc_release(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc_release(v); } static __always_inline s64 atomic64_fetch_inc_relaxed(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_inc_relaxed(v); } static __always_inline void atomic64_dec(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_dec(v); } static __always_inline s64 atomic64_dec_return(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return(v); } static __always_inline s64 atomic64_dec_return_acquire(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return_acquire(v); } static __always_inline s64 atomic64_dec_return_release(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return_release(v); } static __always_inline s64 atomic64_dec_return_relaxed(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_return_relaxed(v); } static __always_inline s64 atomic64_fetch_dec(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec(v); } static __always_inline s64 atomic64_fetch_dec_acquire(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec_acquire(v); } static __always_inline s64 atomic64_fetch_dec_release(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec_release(v); } static __always_inline s64 atomic64_fetch_dec_relaxed(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_dec_relaxed(v); } static __always_inline void atomic64_and(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_and(i, v); } static __always_inline s64 atomic64_fetch_and(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and(i, v); } static __always_inline s64 atomic64_fetch_and_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and_acquire(i, v); } static __always_inline s64 atomic64_fetch_and_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and_release(i, v); } static __always_inline s64 atomic64_fetch_and_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_and_relaxed(i, v); } static __always_inline void atomic64_andnot(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_andnot(i, v); } static __always_inline s64 atomic64_fetch_andnot(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot(i, v); } static __always_inline s64 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot_acquire(i, v); } static __always_inline s64 atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot_release(i, v); } static __always_inline s64 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_andnot_relaxed(i, v); } static __always_inline void atomic64_or(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_or(i, v); } static __always_inline s64 atomic64_fetch_or(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or(i, v); } static __always_inline s64 atomic64_fetch_or_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or_acquire(i, v); } static __always_inline s64 atomic64_fetch_or_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or_release(i, v); } static __always_inline s64 atomic64_fetch_or_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_or_relaxed(i, v); } static __always_inline void atomic64_xor(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic64_xor(i, v); } static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor(i, v); } static __always_inline s64 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor_acquire(i, v); } static __always_inline s64 atomic64_fetch_xor_release(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor_release(i, v); } static __always_inline s64 atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_xor_relaxed(i, v); } static __always_inline s64 atomic64_xchg(atomic64_t *v, s64 i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg(v, i); } static __always_inline s64 atomic64_xchg_acquire(atomic64_t *v, s64 i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg_acquire(v, i); } static __always_inline s64 atomic64_xchg_release(atomic64_t *v, s64 i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg_release(v, i); } static __always_inline s64 atomic64_xchg_relaxed(atomic64_t *v, s64 i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_xchg_relaxed(v, i); } static __always_inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg(v, old, new); } static __always_inline s64 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg_acquire(v, old, new); } static __always_inline s64 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg_release(v, old, new); } static __always_inline s64 atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_cmpxchg_relaxed(v, old, new); } static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg(v, old, new); } static __always_inline bool atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg_acquire(v, old, new); } static __always_inline bool atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg_release(v, old, new); } static __always_inline bool atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic64_try_cmpxchg_relaxed(v, old, new); } static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_sub_and_test(i, v); } static __always_inline bool atomic64_dec_and_test(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_and_test(v); } static __always_inline bool atomic64_inc_and_test(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_and_test(v); } static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_negative(i, v); } static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_fetch_add_unless(v, a, u); } static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_add_unless(v, a, u); } static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_not_zero(v); } static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_inc_unless_negative(v); } static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_unless_positive(v); } static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic64_dec_if_positive(v); } static __always_inline long atomic_long_read(const atomic_long_t *v) { instrument_atomic_read(v, sizeof(*v)); return arch_atomic_long_read(v); } static __always_inline long atomic_long_read_acquire(const atomic_long_t *v) { instrument_atomic_read(v, sizeof(*v)); return arch_atomic_long_read_acquire(v); } static __always_inline void atomic_long_set(atomic_long_t *v, long i) { instrument_atomic_write(v, sizeof(*v)); arch_atomic_long_set(v, i); } static __always_inline void atomic_long_set_release(atomic_long_t *v, long i) { instrument_atomic_write(v, sizeof(*v)); arch_atomic_long_set_release(v, i); } static __always_inline void atomic_long_add(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_add(i, v); } static __always_inline long atomic_long_add_return(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return(i, v); } static __always_inline long atomic_long_add_return_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return_acquire(i, v); } static __always_inline long atomic_long_add_return_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return_release(i, v); } static __always_inline long atomic_long_add_return_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_return_relaxed(i, v); } static __always_inline long atomic_long_fetch_add(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add(i, v); } static __always_inline long atomic_long_fetch_add_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_acquire(i, v); } static __always_inline long atomic_long_fetch_add_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_release(i, v); } static __always_inline long atomic_long_fetch_add_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_relaxed(i, v); } static __always_inline void atomic_long_sub(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_sub(i, v); } static __always_inline long atomic_long_sub_return(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return(i, v); } static __always_inline long atomic_long_sub_return_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return_acquire(i, v); } static __always_inline long atomic_long_sub_return_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return_release(i, v); } static __always_inline long atomic_long_sub_return_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_return_relaxed(i, v); } static __always_inline long atomic_long_fetch_sub(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub(i, v); } static __always_inline long atomic_long_fetch_sub_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub_acquire(i, v); } static __always_inline long atomic_long_fetch_sub_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub_release(i, v); } static __always_inline long atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_sub_relaxed(i, v); } static __always_inline void atomic_long_inc(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_inc(v); } static __always_inline long atomic_long_inc_return(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return(v); } static __always_inline long atomic_long_inc_return_acquire(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return_acquire(v); } static __always_inline long atomic_long_inc_return_release(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return_release(v); } static __always_inline long atomic_long_inc_return_relaxed(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_return_relaxed(v); } static __always_inline long atomic_long_fetch_inc(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc(v); } static __always_inline long atomic_long_fetch_inc_acquire(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc_acquire(v); } static __always_inline long atomic_long_fetch_inc_release(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc_release(v); } static __always_inline long atomic_long_fetch_inc_relaxed(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_inc_relaxed(v); } static __always_inline void atomic_long_dec(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_dec(v); } static __always_inline long atomic_long_dec_return(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return(v); } static __always_inline long atomic_long_dec_return_acquire(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return_acquire(v); } static __always_inline long atomic_long_dec_return_release(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return_release(v); } static __always_inline long atomic_long_dec_return_relaxed(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_return_relaxed(v); } static __always_inline long atomic_long_fetch_dec(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec(v); } static __always_inline long atomic_long_fetch_dec_acquire(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec_acquire(v); } static __always_inline long atomic_long_fetch_dec_release(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec_release(v); } static __always_inline long atomic_long_fetch_dec_relaxed(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_dec_relaxed(v); } static __always_inline void atomic_long_and(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_and(i, v); } static __always_inline long atomic_long_fetch_and(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and(i, v); } static __always_inline long atomic_long_fetch_and_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and_acquire(i, v); } static __always_inline long atomic_long_fetch_and_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and_release(i, v); } static __always_inline long atomic_long_fetch_and_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_and_relaxed(i, v); } static __always_inline void atomic_long_andnot(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_andnot(i, v); } static __always_inline long atomic_long_fetch_andnot(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot(i, v); } static __always_inline long atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot_acquire(i, v); } static __always_inline long atomic_long_fetch_andnot_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot_release(i, v); } static __always_inline long atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_andnot_relaxed(i, v); } static __always_inline void atomic_long_or(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_or(i, v); } static __always_inline long atomic_long_fetch_or(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or(i, v); } static __always_inline long atomic_long_fetch_or_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or_acquire(i, v); } static __always_inline long atomic_long_fetch_or_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or_release(i, v); } static __always_inline long atomic_long_fetch_or_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_or_relaxed(i, v); } static __always_inline void atomic_long_xor(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); arch_atomic_long_xor(i, v); } static __always_inline long atomic_long_fetch_xor(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor(i, v); } static __always_inline long atomic_long_fetch_xor_acquire(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor_acquire(i, v); } static __always_inline long atomic_long_fetch_xor_release(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor_release(i, v); } static __always_inline long atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_xor_relaxed(i, v); } static __always_inline long atomic_long_xchg(atomic_long_t *v, long i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg(v, i); } static __always_inline long atomic_long_xchg_acquire(atomic_long_t *v, long i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg_acquire(v, i); } static __always_inline long atomic_long_xchg_release(atomic_long_t *v, long i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg_release(v, i); } static __always_inline long atomic_long_xchg_relaxed(atomic_long_t *v, long i) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_xchg_relaxed(v, i); } static __always_inline long atomic_long_cmpxchg(atomic_long_t *v, long old, long new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg(v, old, new); } static __always_inline long atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg_acquire(v, old, new); } static __always_inline long atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg_release(v, old, new); } static __always_inline long atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_cmpxchg_relaxed(v, old, new); } static __always_inline bool atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg(v, old, new); } static __always_inline bool atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg_acquire(v, old, new); } static __always_inline bool atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg_release(v, old, new); } static __always_inline bool atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new) { instrument_atomic_read_write(v, sizeof(*v)); instrument_atomic_read_write(old, sizeof(*old)); return arch_atomic_long_try_cmpxchg_relaxed(v, old, new); } static __always_inline bool atomic_long_sub_and_test(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_sub_and_test(i, v); } static __always_inline bool atomic_long_dec_and_test(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_and_test(v); } static __always_inline bool atomic_long_inc_and_test(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_and_test(v); } static __always_inline bool atomic_long_add_negative(long i, atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_negative(i, v); } static __always_inline long atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_fetch_add_unless(v, a, u); } static __always_inline bool atomic_long_add_unless(atomic_long_t *v, long a, long u) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_add_unless(v, a, u); } static __always_inline bool atomic_long_inc_not_zero(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_not_zero(v); } static __always_inline bool atomic_long_inc_unless_negative(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_inc_unless_negative(v); } static __always_inline bool atomic_long_dec_unless_positive(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_unless_positive(v); } static __always_inline long atomic_long_dec_if_positive(atomic_long_t *v) { instrument_atomic_read_write(v, sizeof(*v)); return arch_atomic_long_dec_if_positive(v); } #define xchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg(__ai_ptr, __VA_ARGS__); \ }) #define xchg_acquire(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_acquire(__ai_ptr, __VA_ARGS__); \ }) #define xchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_release(__ai_ptr, __VA_ARGS__); \ }) #define xchg_relaxed(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_relaxed(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg_acquire(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg_relaxed(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg64(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg64_acquire(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg64_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg64_relaxed(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \ }) #define try_cmpxchg(ptr, oldp, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) #define try_cmpxchg_acquire(ptr, oldp, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) #define try_cmpxchg_release(ptr, oldp, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) #define try_cmpxchg_relaxed(ptr, oldp, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ typeof(oldp) __ai_oldp = (oldp); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) #define cmpxchg_local(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_local(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg64_local(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \ }) #define sync_cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg_double(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \ arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \ }) #define cmpxchg_double_local(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \ arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__); \ }) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ // 2a9553f0a9d5619f19151092df5cabbbf16ce835 |
5 117 37 45 40 68 237 67 44 61 10 121 133 73 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 25 |