Total coverage: 96037 (6%)of 1802950
4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 // SPDX-License-Identifier: GPL-2.0-only /* * x86 APERF/MPERF KHz calculation for * /sys/.../cpufreq/scaling_cur_freq * * Copyright (C) 2017 Intel Corp. * Author: Len Brown <len.brown@intel.com> */ #include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/ktime.h> #include <linux/math64.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/sched/isolation.h> #include <linux/sched/topology.h> #include <linux/smp.h> #include <linux/syscore_ops.h> #include <asm/cpu.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "cpu.h" struct aperfmperf { seqcount_t seq; unsigned long last_update; u64 acnt; u64 mcnt; u64 aperf; u64 mperf; }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = { .seq = SEQCNT_ZERO(cpu_samples.seq) }; static void init_counter_refs(void) { u64 aperf, mperf; rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); this_cpu_write(cpu_samples.aperf, aperf); this_cpu_write(cpu_samples.mperf, mperf); } #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) /* * APERF/MPERF frequency ratio computation. * * The scheduler wants to do frequency invariant accounting and needs a <1 * ratio to account for the 'current' frequency, corresponding to * freq_curr / freq_max. * * Since the frequency freq_curr on x86 is controlled by micro-controller and * our P-state setting is little more than a request/hint, we need to observe * the effective frequency 'BusyMHz', i.e. the average frequency over a time * interval after discarding idle time. This is given by: * * BusyMHz = delta_APERF / delta_MPERF * freq_base * * where freq_base is the max non-turbo P-state. * * The freq_max term has to be set to a somewhat arbitrary value, because we * can't know which turbo states will be available at a given point in time: * it all depends on the thermal headroom of the entire package. We set it to * the turbo level with 4 cores active. * * Benchmarks show that's a good compromise between the 1C turbo ratio * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, * which would ignore the entire turbo range (a conspicuous part, making * freq_curr/freq_max always maxed out). * * An exception to the heuristic above is the Atom uarch, where we choose the * highest turbo level for freq_max since Atom's are generally oriented towards * power efficiency. * * Setting freq_max to anything less than the 1C turbo ratio makes the ratio * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. */ DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; void arch_set_max_freq_ratio(bool turbo_disabled) { arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : arch_turbo_freq_ratio; } EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); static bool __init turbo_disabled(void) { u64 misc_en; int err; err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); if (err) return false; return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) { int err; err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); if (err) return false; err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); if (err) return false; *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ return true; } #define X86_MATCH(vfm) \ X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_APERFMPERF, NULL) static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = { X86_MATCH(INTEL_XEON_PHI_KNL), X86_MATCH(INTEL_XEON_PHI_KNM), {} }; static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = { X86_MATCH(INTEL_SKYLAKE_X), {} }; static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = { X86_MATCH(INTEL_ATOM_GOLDMONT), X86_MATCH(INTEL_ATOM_GOLDMONT_D), X86_MATCH(INTEL_ATOM_GOLDMONT_PLUS), {} }; static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int num_delta_fratio) { int fratio, delta_fratio, found; int err, i; u64 msr; err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); if (err) return false; *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); if (err) return false; fratio = (msr >> 8) & 0xFF; i = 16; found = 0; do { if (found >= num_delta_fratio) { *turbo_freq = fratio; return true; } delta_fratio = (msr >> (i + 5)) & 0x7; if (delta_fratio) { found += 1; fratio -= delta_fratio; } i += 8; } while (i < 64); return true; } static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) { u64 ratios, counts; u32 group_size; int err, i; err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); if (err) return false; *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); if (err) return false; err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); if (err) return false; for (i = 0; i < 64; i += 8) { group_size = (counts >> i) & 0xFF; if (group_size >= size) { *turbo_freq = (ratios >> i) & 0xFF; return true; } } return false; } static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) { u64 msr; int err; err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); if (err) return false; err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); if (err) return false; *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ /* The CPU may have less than 4 cores */ if (!*turbo_freq) *turbo_freq = msr & 0xFF; /* 1C turbo */ return true; } static bool __init intel_set_max_freq_ratio(void) { u64 base_freq, turbo_freq; u64 turbo_ratio; if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out; if (x86_match_cpu(has_glm_turbo_ratio_limits) && skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) goto out; if (x86_match_cpu(has_knl_turbo_ratio_limits) && knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) goto out; if (x86_match_cpu(has_skx_turbo_ratio_limits) && skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) goto out; if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out; return false; out: /* * Some hypervisors advertise X86_FEATURE_APERFMPERF * but then fill all MSR's with zeroes. * Some CPUs have turbo boost but don't declare any turbo ratio * in MSR_TURBO_RATIO_LIMIT. */ if (!base_freq || !turbo_freq) { pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); return false; } turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); if (!turbo_ratio) { pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); return false; } arch_turbo_freq_ratio = turbo_ratio; arch_set_max_freq_ratio(turbo_disabled()); return true; } #ifdef CONFIG_PM_SLEEP static struct syscore_ops freq_invariance_syscore_ops = { .resume = init_counter_refs, }; static void register_freq_invariance_syscore_ops(void) { register_syscore_ops(&freq_invariance_syscore_ops); } #else static inline void register_freq_invariance_syscore_ops(void) {} #endif static void freq_invariance_enable(void) { if (static_branch_unlikely(&arch_scale_freq_key)) { WARN_ON_ONCE(1); return; } static_branch_enable_cpuslocked(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { arch_turbo_freq_ratio = ratio; arch_set_max_freq_ratio(turbo_disabled); freq_invariance_enable(); } static void __init bp_init_freq_invariance(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; if (intel_set_max_freq_ratio()) { guard(cpus_read_lock)(); freq_invariance_enable(); } } static void disable_freq_invariance_workfn(struct work_struct *work) { int cpu; static_branch_disable(&arch_scale_freq_key); /* * Set arch_freq_scale to a default value on all cpus * This negates the effect of scaling */ for_each_possible_cpu(cpu) per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE; } static DECLARE_WORK(disable_freq_invariance_work, disable_freq_invariance_workfn); DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale); static void scale_freq_tick(u64 acnt, u64 mcnt) { u64 freq_scale; if (!arch_scale_freq_invariant()) return; if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt) goto error; freq_scale = div64_u64(acnt, mcnt); if (!freq_scale) goto error; if (freq_scale > SCHED_CAPACITY_SCALE) freq_scale = SCHED_CAPACITY_SCALE; this_cpu_write(arch_freq_scale, freq_scale); return; error: pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); schedule_work(&disable_freq_invariance_work); } #else static inline void bp_init_freq_invariance(void) { } static inline void scale_freq_tick(u64 acnt, u64 mcnt) { } #endif /* CONFIG_X86_64 && CONFIG_SMP */ void arch_scale_freq_tick(void) { struct aperfmperf *s = this_cpu_ptr(&cpu_samples); u64 acnt, mcnt, aperf, mperf; if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return; rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); acnt = aperf - s->aperf; mcnt = mperf - s->mperf; s->aperf = aperf; s->mperf = mperf; raw_write_seqcount_begin(&s->seq); s->last_update = jiffies; s->acnt = acnt; s->mcnt = mcnt; raw_write_seqcount_end(&s->seq); scale_freq_tick(acnt, mcnt); } /* * Discard samples older than the define maximum sample age of 20ms. There * is no point in sending IPIs in such a case. If the scheduler tick was * not running then the CPU is either idle or isolated. */ #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50) unsigned int arch_freq_get_on_cpu(int cpu) { struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu); unsigned int seq, freq; unsigned long last; u64 acnt, mcnt; if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) goto fallback; do { seq = raw_read_seqcount_begin(&s->seq); last = s->last_update; acnt = s->acnt; mcnt = s->mcnt; } while (read_seqcount_retry(&s->seq, seq)); /* * Bail on invalid count and when the last update was too long ago, * which covers idle and NOHZ full CPUs. */ if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE) goto fallback; return div64_u64((cpu_khz * acnt), mcnt); fallback: freq = cpufreq_quick_get(cpu); return freq ? freq : cpu_khz; } static int __init bp_init_aperfmperf(void) { if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return 0; init_counter_refs(); bp_init_freq_invariance(); return 0; } early_initcall(bp_init_aperfmperf); void ap_init_aperfmperf(void) { if (cpu_feature_enabled(X86_FEATURE_APERFMPERF)) init_counter_refs(); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 /* SPDX-License-Identifier: GPL-2.0-only */ /* * 9P Client Definitions * * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> */ #ifndef NET_9P_CLIENT_H #define NET_9P_CLIENT_H #include <linux/utsname.h> #include <linux/idr.h> #include <linux/tracepoint-defs.h> /* Number of requests per row */ #define P9_ROW_MAXTAG 255 /** enum p9_proto_versions - 9P protocol versions * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u * @p9_proto_2000u: 9P2000.u extension * @p9_proto_2000L: 9P2000.L extension */ enum p9_proto_versions { p9_proto_legacy, p9_proto_2000u, p9_proto_2000L, }; /** * enum p9_trans_status - different states of underlying transports * @Connected: transport is connected and healthy * @Disconnected: transport has been disconnected * @Hung: transport is connected by wedged * * This enumeration details the various states a transport * instatiation can be in. */ enum p9_trans_status { Connected, BeginDisconnect, Disconnected, Hung, }; /** * enum p9_req_status_t - status of a request * @REQ_STATUS_ALLOC: request has been allocated but not sent * @REQ_STATUS_UNSENT: request waiting to be sent * @REQ_STATUS_SENT: request sent to server * @REQ_STATUS_RCVD: response received from server * @REQ_STATUS_FLSHD: request has been flushed * @REQ_STATUS_ERROR: request encountered an error on the client side */ enum p9_req_status_t { REQ_STATUS_ALLOC, REQ_STATUS_UNSENT, REQ_STATUS_SENT, REQ_STATUS_RCVD, REQ_STATUS_FLSHD, REQ_STATUS_ERROR, }; /** * struct p9_req_t - request slots * @status: status of this request slot * @t_err: transport error * @wq: wait_queue for the client to block on for this request * @tc: the request fcall structure * @rc: the response fcall structure * @req_list: link for higher level objects to chain requests */ struct p9_req_t { int status; int t_err; refcount_t refcount; wait_queue_head_t wq; struct p9_fcall tc; struct p9_fcall rc; struct list_head req_list; }; /** * struct p9_client - per client instance state * @lock: protect @fids and @reqs * @msize: maximum data size negotiated by protocol * @proto_version: 9P protocol version to use * @trans_mod: module API instantiated with this client * @status: connection state * @trans: tranport instance state and API * @fids: All active FID handles * @reqs: All active requests. * @name: node name used as client id * * The client structure is used to keep track of various per-client * state that has been instantiated. */ struct p9_client { spinlock_t lock; unsigned int msize; unsigned char proto_version; struct p9_trans_module *trans_mod; enum p9_trans_status status; void *trans; struct kmem_cache *fcall_cache; union { struct { int rfd; int wfd; } fd; struct { u16 port; bool privport; } tcp; } trans_opts; struct idr fids; struct idr reqs; char name[__NEW_UTS_LEN + 1]; }; /** * struct p9_fid - file system entity handle * @clnt: back pointer to instantiating &p9_client * @fid: numeric identifier for this handle * @mode: current mode of this fid (enum?) * @qid: the &p9_qid server identifier this handle points to * @iounit: the server reported maximum transaction size for this file * @uid: the numeric uid of the local user who owns this handle * @rdir: readdir accounting structure (allocated on demand) * @dlist: per-dentry fid tracking * * TODO: This needs lots of explanation. */ enum fid_source { FID_FROM_OTHER, FID_FROM_INODE, FID_FROM_DENTRY, }; struct p9_fid { struct p9_client *clnt; u32 fid; refcount_t count; int mode; struct p9_qid qid; u32 iounit; kuid_t uid; void *rdir; struct hlist_node dlist; /* list of all fids attached to a dentry */ struct hlist_node ilist; }; /** * struct p9_dirent - directory entry structure * @qid: The p9 server qid for this dirent * @d_off: offset to the next dirent * @d_type: type of file * @d_name: file name */ struct p9_dirent { struct p9_qid qid; u64 d_off; unsigned char d_type; char d_name[256]; }; struct iov_iter; int p9_show_client_options(struct seq_file *m, struct p9_client *clnt); int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, const char *name); int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, struct p9_fid *newdirfid, const char *new_name); struct p9_client *p9_client_create(const char *dev_name, char *options); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, const char *uname, kuid_t n_uname, const char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, const unsigned char * const *wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, char *extension); int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname); int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname, kgid_t gid, struct p9_qid *qid); int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode, kgid_t gid, struct p9_qid *qid); int p9_client_clunk(struct p9_fid *fid); int p9_client_fsync(struct p9_fid *fid, int datasync); int p9_client_remove(struct p9_fid *fid); int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags); int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err); int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err); int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err); struct netfs_io_subrequest; void p9_client_write_subreq(struct netfs_io_subrequest *subreq); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); int p9dirent_read(struct p9_client *clnt, char *buf, int len, struct p9_dirent *dirent); struct p9_wstat *p9_client_stat(struct p9_fid *fid); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst); int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask); int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, dev_t rdev, kgid_t gid, struct p9_qid *qid); int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *qid); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); void p9_fcall_fini(struct p9_fcall *fc); struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); static inline void p9_req_get(struct p9_req_t *r) { refcount_inc(&r->refcount); } static inline int p9_req_try_get(struct p9_req_t *r) { return refcount_inc_not_zero(&r->refcount); } int p9_req_put(struct p9_client *c, struct p9_req_t *r); /* We cannot have the real tracepoints in header files, * use a wrapper function */ DECLARE_TRACEPOINT(9p_fid_ref); void do_trace_9p_fid_get(struct p9_fid *fid); void do_trace_9p_fid_put(struct p9_fid *fid); /* fid reference counting helpers: * - fids used for any length of time should always be referenced through * p9_fid_get(), and released with p9_fid_put() * - v9fs_fid_lookup() or similar will automatically call get for you * and also require a put * - the *_fid_add() helpers will stash the fid in the inode, * at which point it is the responsibility of evict_inode() * to call the put * - the last put will automatically send a clunk to the server */ static inline struct p9_fid *p9_fid_get(struct p9_fid *fid) { if (tracepoint_enabled(9p_fid_ref)) do_trace_9p_fid_get(fid); refcount_inc(&fid->count); return fid; } static inline int p9_fid_put(struct p9_fid *fid) { if (!fid || IS_ERR(fid)) return 0; if (tracepoint_enabled(9p_fid_ref)) do_trace_9p_fid_put(fid); if (!refcount_dec_and_test(&fid->count)) return 0; return p9_client_clunk(fid); } void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, int rewind); int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st); void p9stat_free(struct p9_wstat *stbuf); int p9_is_proto_dotu(struct p9_client *clnt); int p9_is_proto_dotl(struct p9_client *clnt); struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, const char *attr_name, u64 *attr_size); int p9_client_xattrcreate(struct p9_fid *fid, const char *name, u64 attr_size, int flags); int p9_client_readlink(struct p9_fid *fid, char **target); int p9_client_init(void); void p9_client_exit(void); #endif /* NET_9P_CLIENT_H */
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* * This file includes implementation of UBI character device operations. * * There are two kinds of character devices in UBI: UBI character devices and * UBI volume character devices. UBI character devices allow users to * manipulate whole volumes: create, remove, and re-size them. Volume character * devices provide volume I/O capabilities. * * Major and minor numbers are assigned dynamically to both UBI and volume * character devices. * * Well, there is the third kind of character devices - the UBI control * character device, which allows to manipulate by UBI devices - create and * delete them. In other words, it is used for attaching and detaching MTD * devices. */ #include <linux/module.h> #include <linux/stat.h> #include <linux/slab.h> #include <linux/ioctl.h> #include <linux/capability.h> #include <linux/uaccess.h> #include <linux/compat.h> #include <linux/math64.h> #include <mtd/ubi-user.h> #include "ubi.h" /** * get_exclusive - get exclusive access to an UBI volume. * @desc: volume descriptor * * This function changes UBI volume open mode to "exclusive". Returns previous * mode value (positive integer) in case of success and a negative error code * in case of failure. */ static int get_exclusive(struct ubi_volume_desc *desc) { int users, err; struct ubi_volume *vol = desc->vol; spin_lock(&vol->ubi->volumes_lock); users = vol->readers + vol->writers + vol->exclusive + vol->metaonly; ubi_assert(users > 0); if (users > 1) { ubi_err(vol->ubi, "%d users for volume %d", users, vol->vol_id); err = -EBUSY; } else { vol->readers = vol->writers = vol->metaonly = 0; vol->exclusive = 1; err = desc->mode; desc->mode = UBI_EXCLUSIVE; } spin_unlock(&vol->ubi->volumes_lock); return err; } /** * revoke_exclusive - revoke exclusive mode. * @desc: volume descriptor * @mode: new mode to switch to */ static void revoke_exclusive(struct ubi_volume_desc *desc, int mode) { struct ubi_volume *vol = desc->vol; spin_lock(&vol->ubi->volumes_lock); ubi_assert(vol->readers == 0 && vol->writers == 0 && vol->metaonly == 0); ubi_assert(vol->exclusive == 1 && desc->mode == UBI_EXCLUSIVE); vol->exclusive = 0; if (mode == UBI_READONLY) vol->readers = 1; else if (mode == UBI_READWRITE) vol->writers = 1; else if (mode == UBI_METAONLY) vol->metaonly = 1; else vol->exclusive = 1; spin_unlock(&vol->ubi->volumes_lock); desc->mode = mode; } static int vol_cdev_open(struct inode *inode, struct file *file) { struct ubi_volume_desc *desc; int vol_id = iminor(inode) - 1, mode, ubi_num; ubi_num = ubi_major2num(imajor(inode)); if (ubi_num < 0) return ubi_num; if (file->f_mode & FMODE_WRITE) mode = UBI_READWRITE; else mode = UBI_READONLY; dbg_gen("open device %d, volume %d, mode %d", ubi_num, vol_id, mode); desc = ubi_open_volume(ubi_num, vol_id, mode); if (IS_ERR(desc)) return PTR_ERR(desc); file->private_data = desc; return 0; } static int vol_cdev_release(struct inode *inode, struct file *file) { struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; dbg_gen("release device %d, volume %d, mode %d", vol->ubi->ubi_num, vol->vol_id, desc->mode); if (vol->updating) { ubi_warn(vol->ubi, "update of volume %d not finished, volume is damaged", vol->vol_id); ubi_assert(!vol->changing_leb); vol->updating = 0; vfree(vol->upd_buf); } else if (vol->changing_leb) { dbg_gen("only %lld of %lld bytes received for atomic LEB change for volume %d:%d, cancel", vol->upd_received, vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id); vol->changing_leb = 0; vfree(vol->upd_buf); } ubi_close_volume(desc); return 0; } static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) { struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; if (vol->updating) { /* Update is in progress, seeking is prohibited */ ubi_err(vol->ubi, "updating"); return -EBUSY; } return fixed_size_llseek(file, offset, origin, vol->used_bytes); } static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct ubi_volume_desc *desc = file->private_data; struct ubi_device *ubi = desc->vol->ubi; struct inode *inode = file_inode(file); int err; inode_lock(inode); err = ubi_sync(ubi->ubi_num); inode_unlock(inode); return err; } static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, loff_t *offp) { struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; int err, lnum, off, len, tbuf_size; size_t count_save = count; void *tbuf; dbg_gen("read %zd bytes from offset %lld of volume %d", count, *offp, vol->vol_id); if (vol->updating) { ubi_err(vol->ubi, "updating"); return -EBUSY; } if (vol->upd_marker) { ubi_err(vol->ubi, "damaged volume, update marker is set"); return -EBADF; } if (*offp == vol->used_bytes || count == 0) return 0; if (vol->corrupted) dbg_gen("read from corrupted volume %d", vol->vol_id); if (*offp + count > vol->used_bytes) count_save = count = vol->used_bytes - *offp; tbuf_size = vol->usable_leb_size; if (count < tbuf_size) tbuf_size = ALIGN(count, ubi->min_io_size); tbuf = vmalloc(tbuf_size); if (!tbuf) return -ENOMEM; len = count > tbuf_size ? tbuf_size : count; lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); do { cond_resched(); if (off + len >= vol->usable_leb_size) len = vol->usable_leb_size - off; err = ubi_eba_read_leb(ubi, vol, lnum, tbuf, off, len, 0); if (err) break; off += len; if (off == vol->usable_leb_size) { lnum += 1; off -= vol->usable_leb_size; } count -= len; *offp += len; err = copy_to_user(buf, tbuf, len); if (err) { err = -EFAULT; break; } buf += len; len = count > tbuf_size ? tbuf_size : count; } while (count); vfree(tbuf); return err ? err : count_save - count; } /* * This function allows to directly write to dynamic UBI volumes, without * issuing the volume update operation. */ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, size_t count, loff_t *offp) { struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; int lnum, off, len, tbuf_size, err = 0; size_t count_save = count; char *tbuf; if (!vol->direct_writes) return -EPERM; dbg_gen("requested: write %zd bytes to offset %lld of volume %u", count, *offp, vol->vol_id); if (vol->vol_type == UBI_STATIC_VOLUME) return -EROFS; lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); if (off & (ubi->min_io_size - 1)) { ubi_err(ubi, "unaligned position"); return -EINVAL; } if (*offp + count > vol->used_bytes) count_save = count = vol->used_bytes - *offp; /* We can write only in fractions of the minimum I/O unit */ if (count & (ubi->min_io_size - 1)) { ubi_err(ubi, "unaligned write length"); return -EINVAL; } tbuf_size = vol->usable_leb_size; if (count < tbuf_size) tbuf_size = ALIGN(count, ubi->min_io_size); tbuf = vmalloc(tbuf_size); if (!tbuf) return -ENOMEM; len = count > tbuf_size ? tbuf_size : count; while (count) { cond_resched(); if (off + len >= vol->usable_leb_size) len = vol->usable_leb_size - off; err = copy_from_user(tbuf, buf, len); if (err) { err = -EFAULT; break; } err = ubi_eba_write_leb(ubi, vol, lnum, tbuf, off, len); if (err) break; off += len; if (off == vol->usable_leb_size) { lnum += 1; off -= vol->usable_leb_size; } count -= len; *offp += len; buf += len; len = count > tbuf_size ? tbuf_size : count; } vfree(tbuf); return err ? err : count_save - count; } static ssize_t vol_cdev_write(struct file *file, const char __user *buf, size_t count, loff_t *offp) { int err = 0; struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; if (!vol->updating && !vol->changing_leb) return vol_cdev_direct_write(file, buf, count, offp); if (vol->updating) err = ubi_more_update_data(ubi, vol, buf, count); else err = ubi_more_leb_change_data(ubi, vol, buf, count); if (err < 0) { ubi_err(ubi, "cannot accept more %zd bytes of data, error %d", count, err); return err; } if (err) { /* * The operation is finished, @err contains number of actually * written bytes. */ count = err; if (vol->changing_leb) { revoke_exclusive(desc, UBI_READWRITE); return count; } /* * We voluntarily do not take into account the skip_check flag * as we want to make sure what we wrote was correctly written. */ err = ubi_check_volume(ubi, vol->vol_id); if (err < 0) return err; if (err) { ubi_warn(ubi, "volume %d on UBI device %d is corrupted", vol->vol_id, ubi->ubi_num); vol->corrupted = 1; } vol->checked = 1; ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED); revoke_exclusive(desc, UBI_READWRITE); } return count; } static long vol_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int err = 0; struct ubi_volume_desc *desc = file->private_data; struct ubi_volume *vol = desc->vol; struct ubi_device *ubi = vol->ubi; void __user *argp = (void __user *)arg; switch (cmd) { /* Volume update command */ case UBI_IOCVOLUP: { int64_t bytes, rsvd_bytes; if (!capable(CAP_SYS_RESOURCE)) { err = -EPERM; break; } err = copy_from_user(&bytes, argp, sizeof(int64_t)); if (err) { err = -EFAULT; break; } if (desc->mode == UBI_READONLY) { err = -EROFS; break; } rsvd_bytes = (long long)vol->reserved_pebs * vol->usable_leb_size; if (bytes < 0 || bytes > rsvd_bytes) { err = -EINVAL; break; } err = get_exclusive(desc); if (err < 0) break; err = ubi_start_update(ubi, vol, bytes); if (bytes == 0) { ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED); revoke_exclusive(desc, UBI_READWRITE); } break; } /* Atomic logical eraseblock change command */ case UBI_IOCEBCH: { struct ubi_leb_change_req req; err = copy_from_user(&req, argp, sizeof(struct ubi_leb_change_req)); if (err) { err = -EFAULT; break; } if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) { err = -EROFS; break; } /* Validate the request */ err = -EINVAL; if (!ubi_leb_valid(vol, req.lnum) || req.bytes < 0 || req.bytes > vol->usable_leb_size) break; err = get_exclusive(desc); if (err < 0) break; err = ubi_start_leb_change(ubi, vol, &req); if (req.bytes == 0) revoke_exclusive(desc, UBI_READWRITE); break; } /* Logical eraseblock erasure command */ case UBI_IOCEBER: { int32_t lnum; err = get_user(lnum, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) { err = -EROFS; break; } if (!ubi_leb_valid(vol, lnum)) { err = -EINVAL; break; } dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); err = ubi_eba_unmap_leb(ubi, vol, lnum); if (err) break; err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); break; } /* Logical eraseblock map command */ case UBI_IOCEBMAP: { struct ubi_map_req req; err = copy_from_user(&req, argp, sizeof(struct ubi_map_req)); if (err) { err = -EFAULT; break; } err = ubi_leb_map(desc, req.lnum); break; } /* Logical eraseblock un-map command */ case UBI_IOCEBUNMAP: { int32_t lnum; err = get_user(lnum, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } err = ubi_leb_unmap(desc, lnum); break; } /* Check if logical eraseblock is mapped command */ case UBI_IOCEBISMAP: { int32_t lnum; err = get_user(lnum, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } err = ubi_is_mapped(desc, lnum); break; } /* Set volume property command */ case UBI_IOCSETVOLPROP: { struct ubi_set_vol_prop_req req; err = copy_from_user(&req, argp, sizeof(struct ubi_set_vol_prop_req)); if (err) { err = -EFAULT; break; } switch (req.property) { case UBI_VOL_PROP_DIRECT_WRITE: mutex_lock(&ubi->device_mutex); desc->vol->direct_writes = !!req.value; mutex_unlock(&ubi->device_mutex); break; default: err = -EINVAL; break; } break; } /* Create a R/O block device on top of the UBI volume */ case UBI_IOCVOLCRBLK: { struct ubi_volume_info vi; ubi_get_volume_info(desc, &vi); err = ubiblock_create(&vi); break; } /* Remove the R/O block device */ case UBI_IOCVOLRMBLK: { struct ubi_volume_info vi; ubi_get_volume_info(desc, &vi); err = ubiblock_remove(&vi); break; } default: err = -ENOTTY; break; } return err; } /** * verify_mkvol_req - verify volume creation request. * @ubi: UBI device description object * @req: the request to check * * This function zero if the request is correct, and %-EINVAL if not. */ static int verify_mkvol_req(const struct ubi_device *ubi, const struct ubi_mkvol_req *req) { int n, err = -EINVAL; if (req->bytes < 0 || req->alignment < 0 || req->vol_type < 0 || req->name_len < 0) goto bad; if ((req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) && req->vol_id != UBI_VOL_NUM_AUTO) goto bad; if (req->alignment == 0) goto bad; if (req->bytes == 0) goto bad; if (req->vol_type != UBI_DYNAMIC_VOLUME && req->vol_type != UBI_STATIC_VOLUME) goto bad; if (req->flags & ~UBI_VOL_VALID_FLGS) goto bad; if (req->flags & UBI_VOL_SKIP_CRC_CHECK_FLG && req->vol_type != UBI_STATIC_VOLUME) goto bad; if (req->alignment > ubi->leb_size) goto bad; n = req->alignment & (ubi->min_io_size - 1); if (req->alignment != 1 && n) goto bad; if (!req->name[0] || !req->name_len) goto bad; if (req->name_len > UBI_VOL_NAME_MAX) { err = -ENAMETOOLONG; goto bad; } n = strnlen(req->name, req->name_len + 1); if (n != req->name_len) goto bad; return 0; bad: ubi_err(ubi, "bad volume creation request"); ubi_dump_mkvol_req(req); return err; } /** * verify_rsvol_req - verify volume re-size request. * @ubi: UBI device description object * @req: the request to check * * This function returns zero if the request is correct, and %-EINVAL if not. */ static int verify_rsvol_req(const struct ubi_device *ubi, const struct ubi_rsvol_req *req) { if (req->bytes <= 0) return -EINVAL; if (req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) return -EINVAL; return 0; } /** * rename_volumes - rename UBI volumes. * @ubi: UBI device description object * @req: volumes re-name request * * This is a helper function for the volume re-name IOCTL which validates the * request, opens the volume and calls corresponding volumes management * function. Returns zero in case of success and a negative error code in case * of failure. */ static int rename_volumes(struct ubi_device *ubi, struct ubi_rnvol_req *req) { int i, n, err; struct list_head rename_list; struct ubi_rename_entry *re, *re1; if (req->count < 0 || req->count > UBI_MAX_RNVOL) return -EINVAL; if (req->count == 0) return 0; /* Validate volume IDs and names in the request */ for (i = 0; i < req->count; i++) { if (req->ents[i].vol_id < 0 || req->ents[i].vol_id >= ubi->vtbl_slots) return -EINVAL; if (req->ents[i].name_len < 0) return -EINVAL; if (req->ents[i].name_len > UBI_VOL_NAME_MAX) return -ENAMETOOLONG; req->ents[i].name[req->ents[i].name_len] = '\0'; n = strlen(req->ents[i].name); if (n != req->ents[i].name_len) return -EINVAL; } /* Make sure volume IDs and names are unique */ for (i = 0; i < req->count - 1; i++) { for (n = i + 1; n < req->count; n++) { if (req->ents[i].vol_id == req->ents[n].vol_id) { ubi_err(ubi, "duplicated volume id %d", req->ents[i].vol_id); return -EINVAL; } if (!strcmp(req->ents[i].name, req->ents[n].name)) { ubi_err(ubi, "duplicated volume name \"%s\"", req->ents[i].name); return -EINVAL; } } } /* Create the re-name list */ INIT_LIST_HEAD(&rename_list); for (i = 0; i < req->count; i++) { int vol_id = req->ents[i].vol_id; int name_len = req->ents[i].name_len; const char *name = req->ents[i].name; re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); if (!re) { err = -ENOMEM; goto out_free; } re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_METAONLY); if (IS_ERR(re->desc)) { err = PTR_ERR(re->desc); ubi_err(ubi, "cannot open volume %d, error %d", vol_id, err); kfree(re); goto out_free; } /* Skip this re-naming if the name does not really change */ if (re->desc->vol->name_len == name_len && !memcmp(re->desc->vol->name, name, name_len)) { ubi_close_volume(re->desc); kfree(re); continue; } re->new_name_len = name_len; memcpy(re->new_name, name, name_len); list_add_tail(&re->list, &rename_list); dbg_gen("will rename volume %d from \"%s\" to \"%s\"", vol_id, re->desc->vol->name, name); } if (list_empty(&rename_list)) return 0; /* Find out the volumes which have to be removed */ list_for_each_entry(re, &rename_list, list) { struct ubi_volume_desc *desc; int no_remove_needed = 0; /* * Volume @re->vol_id is going to be re-named to * @re->new_name, while its current name is @name. If a volume * with name @re->new_name currently exists, it has to be * removed, unless it is also re-named in the request (@req). */ list_for_each_entry(re1, &rename_list, list) { if (re->new_name_len == re1->desc->vol->name_len && !memcmp(re->new_name, re1->desc->vol->name, re1->desc->vol->name_len)) { no_remove_needed = 1; break; } } if (no_remove_needed) continue; /* * It seems we need to remove volume with name @re->new_name, * if it exists. */ desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE); if (IS_ERR(desc)) { err = PTR_ERR(desc); if (err == -ENODEV) /* Re-naming into a non-existing volume name */ continue; /* The volume exists but busy, or an error occurred */ ubi_err(ubi, "cannot open volume \"%s\", error %d", re->new_name, err); goto out_free; } re1 = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); if (!re1) { err = -ENOMEM; ubi_close_volume(desc); goto out_free; } re1->remove = 1; re1->desc = desc; list_add(&re1->list, &rename_list); dbg_gen("will remove volume %d, name \"%s\"", re1->desc->vol->vol_id, re1->desc->vol->name); } mutex_lock(&ubi->device_mutex); err = ubi_rename_volumes(ubi, &rename_list); mutex_unlock(&ubi->device_mutex); out_free: list_for_each_entry_safe(re, re1, &rename_list, list) { ubi_close_volume(re->desc); list_del(&re->list); kfree(re); } return err; } static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int err = 0; struct ubi_device *ubi; struct ubi_volume_desc *desc; void __user *argp = (void __user *)arg; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; ubi = ubi_get_by_major(imajor(file->f_mapping->host)); if (!ubi) return -ENODEV; switch (cmd) { /* Create volume command */ case UBI_IOCMKVOL: { struct ubi_mkvol_req req; dbg_gen("create volume"); err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req)); if (err) { err = -EFAULT; break; } err = verify_mkvol_req(ubi, &req); if (err) break; mutex_lock(&ubi->device_mutex); err = ubi_create_volume(ubi, &req); mutex_unlock(&ubi->device_mutex); if (err) break; err = put_user(req.vol_id, (__user int32_t *)argp); if (err) err = -EFAULT; break; } /* Remove volume command */ case UBI_IOCRMVOL: { int vol_id; dbg_gen("remove volume"); err = get_user(vol_id, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); if (IS_ERR(desc)) { err = PTR_ERR(desc); break; } mutex_lock(&ubi->device_mutex); err = ubi_remove_volume(desc, 0); mutex_unlock(&ubi->device_mutex); /* * The volume is deleted (unless an error occurred), and the * 'struct ubi_volume' object will be freed when * 'ubi_close_volume()' will call 'put_device()'. */ ubi_close_volume(desc); break; } /* Re-size volume command */ case UBI_IOCRSVOL: { int pebs; struct ubi_rsvol_req req; dbg_gen("re-size volume"); err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req)); if (err) { err = -EFAULT; break; } err = verify_rsvol_req(ubi, &req); if (err) break; desc = ubi_open_volume(ubi->ubi_num, req.vol_id, UBI_EXCLUSIVE); if (IS_ERR(desc)) { err = PTR_ERR(desc); break; } pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1, desc->vol->usable_leb_size); mutex_lock(&ubi->device_mutex); err = ubi_resize_volume(desc, pebs); mutex_unlock(&ubi->device_mutex); ubi_close_volume(desc); break; } /* Re-name volumes command */ case UBI_IOCRNVOL: { struct ubi_rnvol_req *req; dbg_gen("re-name volumes"); req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL); if (!req) { err = -ENOMEM; break; } err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req)); if (err) { err = -EFAULT; kfree(req); break; } err = rename_volumes(ubi, req); kfree(req); break; } /* Check a specific PEB for bitflips and scrub it if needed */ case UBI_IOCRPEB: { int pnum; err = get_user(pnum, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } err = ubi_bitflip_check(ubi, pnum, 0); break; } /* Force scrubbing for a specific PEB */ case UBI_IOCSPEB: { int pnum; err = get_user(pnum, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } err = ubi_bitflip_check(ubi, pnum, 1); break; } default: err = -ENOTTY; break; } ubi_put_device(ubi); return err; } static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int err = 0; void __user *argp = (void __user *)arg; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; switch (cmd) { /* Attach an MTD device command */ case UBI_IOCATT: { struct ubi_attach_req req; struct mtd_info *mtd; dbg_gen("attach MTD device"); err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req)); if (err) { err = -EFAULT; break; } if (req.mtd_num < 0 || (req.ubi_num < 0 && req.ubi_num != UBI_DEV_NUM_AUTO)) { err = -EINVAL; break; } mtd = get_mtd_device(NULL, req.mtd_num); if (IS_ERR(mtd)) { err = PTR_ERR(mtd); break; } /* * Note, further request verification is done by * 'ubi_attach_mtd_dev()'. */ mutex_lock(&ubi_devices_mutex); err = ubi_attach_mtd_dev(mtd, req.ubi_num, req.vid_hdr_offset, req.max_beb_per1024, !!req.disable_fm, !!req.need_resv_pool); mutex_unlock(&ubi_devices_mutex); if (err < 0) put_mtd_device(mtd); else /* @err contains UBI device number */ err = put_user(err, (__user int32_t *)argp); break; } /* Detach an MTD device command */ case UBI_IOCDET: { int ubi_num; dbg_gen("detach MTD device"); err = get_user(ubi_num, (__user int32_t *)argp); if (err) { err = -EFAULT; break; } mutex_lock(&ubi_devices_mutex); err = ubi_detach_mtd_dev(ubi_num, 0); mutex_unlock(&ubi_devices_mutex); break; } default: err = -ENOTTY; break; } return err; } /* UBI volume character device operations */ const struct file_operations ubi_vol_cdev_operations = { .owner = THIS_MODULE, .open = vol_cdev_open, .release = vol_cdev_release, .llseek = vol_cdev_llseek, .read = vol_cdev_read, .write = vol_cdev_write, .fsync = vol_cdev_fsync, .unlocked_ioctl = vol_cdev_ioctl, .compat_ioctl = compat_ptr_ioctl, }; /* UBI character device operations */ const struct file_operations ubi_cdev_operations = { .owner = THIS_MODULE, .llseek = no_llseek, .unlocked_ioctl = ubi_cdev_ioctl, .compat_ioctl = compat_ptr_ioctl, }; /* UBI control character device operations */ const struct file_operations ubi_ctrl_cdev_operations = { .owner = THIS_MODULE, .unlocked_ioctl = ctrl_cdev_ioctl, .compat_ioctl = compat_ptr_ioctl, .llseek = no_llseek, };
1 2 1 1 2 42 43 43 43 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 // SPDX-License-Identifier: GPL-2.0+ /* * 2002-10-15 Posix Clocks & timers * by George Anzinger george@mvista.com * Copyright (C) 2002 2003 by MontaVista Software. * * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. * Copyright (C) 2004 Boris Hu * * These are all the functions necessary to implement POSIX clocks & timers */ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/mutex.h> #include <linux/sched/task.h> #include <linux/uaccess.h> #include <linux/list.h> #include <linux/init.h> #include <linux/compiler.h> #include <linux/hash.h> #include <linux/posix-clock.h> #include <linux/posix-timers.h> #include <linux/syscalls.h> #include <linux/wait.h> #include <linux/workqueue.h> #include <linux/export.h> #include <linux/hashtable.h> #include <linux/compat.h> #include <linux/nospec.h> #include <linux/time_namespace.h> #include "timekeeping.h" #include "posix-timers.h" static struct kmem_cache *posix_timers_cache; /* * Timers are managed in a hash table for lockless lookup. The hash key is * constructed from current::signal and the timer ID and the timer is * matched against current::signal and the timer ID when walking the hash * bucket list. * * This allows checkpoint/restore to reconstruct the exact timer IDs for * a process. */ static DEFINE_HASHTABLE(posix_timers_hashtable, 9); static DEFINE_SPINLOCK(hash_lock); static const struct k_clock * const posix_clocks[]; static const struct k_clock *clockid_to_kclock(const clockid_t id); static const struct k_clock clock_realtime, clock_monotonic; /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); #define lock_timer(tid, flags) \ ({ struct k_itimer *__timr; \ __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \ __timr; \ }) static int hash(struct signal_struct *sig, unsigned int nr) { return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable)); } static struct k_itimer *__posix_timers_find(struct hlist_head *head, struct signal_struct *sig, timer_t id) { struct k_itimer *timer; hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) { /* timer->it_signal can be set concurrently */ if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) return timer; } return NULL; } static struct k_itimer *posix_timer_by_id(timer_t id) { struct signal_struct *sig = current->signal; struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)]; return __posix_timers_find(head, sig, id); } static int posix_timer_add(struct k_itimer *timer) { struct signal_struct *sig = current->signal; struct hlist_head *head; unsigned int cnt, id; /* * FIXME: Replace this by a per signal struct xarray once there is * a plan to handle the resulting CRIU regression gracefully. */ for (cnt = 0; cnt <= INT_MAX; cnt++) { spin_lock(&hash_lock); id = sig->next_posix_timer_id; /* Write the next ID back. Clamp it to the positive space */ sig->next_posix_timer_id = (id + 1) & INT_MAX; head = &posix_timers_hashtable[hash(sig, id)]; if (!__posix_timers_find(head, sig, id)) { hlist_add_head_rcu(&timer->t_hash, head); spin_unlock(&hash_lock); return id; } spin_unlock(&hash_lock); } /* POSIX return code when no timer ID could be allocated */ return -EAGAIN; } static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) { spin_unlock_irqrestore(&timr->it_lock, flags); } static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_real_ts64(tp); return 0; } static ktime_t posix_get_realtime_ktime(clockid_t which_clock) { return ktime_get_real(); } static int posix_clock_realtime_set(const clockid_t which_clock, const struct timespec64 *tp) { return do_sys_settimeofday64(tp, NULL); } static int posix_clock_realtime_adj(const clockid_t which_clock, struct __kernel_timex *t) { return do_adjtimex(t); } static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_ts64(tp); timens_add_monotonic(tp); return 0; } static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) { return ktime_get(); } static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { ktime_get_raw_ts64(tp); timens_add_monotonic(tp); return 0; } static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_real_ts64(tp); return 0; } static int posix_get_monotonic_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_ts64(tp); timens_add_monotonic(tp); return 0; } static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp) { *tp = ktime_to_timespec64(KTIME_LOW_RES); return 0; } static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp) { ktime_get_boottime_ts64(tp); timens_add_boottime(tp); return 0; } static ktime_t posix_get_boottime_ktime(const clockid_t which_clock) { return ktime_get_boottime(); } static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_clocktai_ts64(tp); return 0; } static ktime_t posix_get_tai_ktime(clockid_t which_clock) { return ktime_get_clocktai(); } static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; tp->tv_nsec = hrtimer_resolution; return 0; } static __init int init_posix_timers(void) { posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof(struct k_itimer), 0, SLAB_PANIC | SLAB_ACCOUNT, NULL); return 0; } __initcall(init_posix_timers); /* * The siginfo si_overrun field and the return value of timer_getoverrun(2) * are of type int. Clamp the overrun value to INT_MAX */ static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval) { s64 sum = timr->it_overrun_last + (s64)baseval; return sum > (s64)INT_MAX ? INT_MAX : (int)sum; } static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(), timr->it_interval); hrtimer_restart(timer); } /* * This function is called from the signal delivery code if * info->si_sys_private is not zero, which indicates that the timer has to * be rearmed. Restart the timer and update info::si_overrun. */ void posixtimer_rearm(struct kernel_siginfo *info) { struct k_itimer *timr; unsigned long flags; timr = lock_timer(info->si_tid, &flags); if (!timr) return; if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) { timr->kclock->timer_rearm(timr); timr->it_active = 1; timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1LL; ++timr->it_requeue_pending; info->si_overrun = timer_overrun_to_int(timr, info->si_overrun); } unlock_timer(timr, flags); } int posix_timer_event(struct k_itimer *timr, int si_private) { enum pid_type type; int ret; /* * FIXME: if ->sigq is queued we can race with * dequeue_signal()->posixtimer_rearm(). * * If dequeue_signal() sees the "right" value of * si_sys_private it calls posixtimer_rearm(). * We re-queue ->sigq and drop ->it_lock(). * posixtimer_rearm() locks the timer * and re-schedules it while ->sigq is pending. * Not really bad, but not that we want. */ timr->sigq->info.si_sys_private = si_private; type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID; ret = send_sigqueue(timr->sigq, timr->it_pid, type); /* If we failed to send the signal the timer stops. */ return ret > 0; } /* * This function gets called when a POSIX.1b interval timer expires from * the HRTIMER interrupt (soft interrupt on RT kernels). * * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI * based timers. */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { enum hrtimer_restart ret = HRTIMER_NORESTART; struct k_itimer *timr; unsigned long flags; int si_private = 0; timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); timr->it_active = 0; if (timr->it_interval != 0) si_private = ++timr->it_requeue_pending; if (posix_timer_event(timr, si_private)) { /* * The signal was not queued due to SIG_IGN. As a * consequence the timer is not going to be rearmed from * the signal delivery path. But as a real signal handler * can be installed later the timer must be rearmed here. */ if (timr->it_interval != 0) { ktime_t now = hrtimer_cb_get_time(timer); /* * FIXME: What we really want, is to stop this * timer completely and restart it in case the * SIG_IGN is removed. This is a non trivial * change to the signal handling code. * * For now let timers with an interval less than a * jiffie expire every jiffie and recheck for a * valid signal handler. * * This avoids interrupt starvation in case of a * very small interval, which would expire the * timer immediately again. * * Moving now ahead of time by one jiffie tricks * hrtimer_forward() to expire the timer later, * while it still maintains the overrun accuracy * for the price of a slight inconsistency in the * timer_gettime() case. This is at least better * than a timer storm. * * Only required when high resolution timers are * enabled as the periodic tick based timers are * automatically aligned to the next tick. */ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) { ktime_t kj = TICK_NSEC; if (timr->it_interval < kj) now = ktime_add(now, kj); } timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; timr->it_active = 1; } } unlock_timer(timr, flags); return ret; } static struct pid *good_sigevent(sigevent_t * event) { struct pid *pid = task_tgid(current); struct task_struct *rtn; switch (event->sigev_notify) { case SIGEV_SIGNAL | SIGEV_THREAD_ID: pid = find_vpid(event->sigev_notify_thread_id); rtn = pid_task(pid, PIDTYPE_PID); if (!rtn || !same_thread_group(rtn, current)) return NULL; fallthrough; case SIGEV_SIGNAL: case SIGEV_THREAD: if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) return NULL; fallthrough; case SIGEV_NONE: return pid; default: return NULL; } } static struct k_itimer * alloc_posix_timer(void) { struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); if (!tmr) return tmr; if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { kmem_cache_free(posix_timers_cache, tmr); return NULL; } clear_siginfo(&tmr->sigq->info); return tmr; } static void k_itimer_rcu_free(struct rcu_head *head) { struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); kmem_cache_free(posix_timers_cache, tmr); } static void posix_timer_free(struct k_itimer *tmr) { put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); call_rcu(&tmr->rcu, k_itimer_rcu_free); } static void posix_timer_unhash_and_free(struct k_itimer *tmr) { spin_lock(&hash_lock); hlist_del_rcu(&tmr->t_hash); spin_unlock(&hash_lock); posix_timer_free(tmr); } static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); return 0; } /* Create a POSIX.1b interval timer. */ static int do_timer_create(clockid_t which_clock, struct sigevent *event, timer_t __user *created_timer_id) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct k_itimer *new_timer; int error, new_timer_id; if (!kc) return -EINVAL; if (!kc->timer_create) return -EOPNOTSUPP; new_timer = alloc_posix_timer(); if (unlikely(!new_timer)) return -EAGAIN; spin_lock_init(&new_timer->it_lock); /* * Add the timer to the hash table. The timer is not yet valid * because new_timer::it_signal is still NULL. The timer id is also * not yet visible to user space. */ new_timer_id = posix_timer_add(new_timer); if (new_timer_id < 0) { posix_timer_free(new_timer); return new_timer_id; } new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; new_timer->kclock = kc; new_timer->it_overrun = -1LL; if (event) { rcu_read_lock(); new_timer->it_pid = get_pid(good_sigevent(event)); rcu_read_unlock(); if (!new_timer->it_pid) { error = -EINVAL; goto out; } new_timer->it_sigev_notify = event->sigev_notify; new_timer->sigq->info.si_signo = event->sigev_signo; new_timer->sigq->info.si_value = event->sigev_value; } else { new_timer->it_sigev_notify = SIGEV_SIGNAL; new_timer->sigq->info.si_signo = SIGALRM; memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t)); new_timer->sigq->info.si_value.sival_int = new_timer->it_id; new_timer->it_pid = get_pid(task_tgid(current)); } new_timer->sigq->info.si_tid = new_timer->it_id; new_timer->sigq->info.si_code = SI_TIMER; if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) { error = -EFAULT; goto out; } /* * After succesful copy out, the timer ID is visible to user space * now but not yet valid because new_timer::signal is still NULL. * * Complete the initialization with the clock specific create * callback. */ error = kc->timer_create(new_timer); if (error) goto out; spin_lock_irq(&current->sighand->siglock); /* This makes the timer valid in the hash table */ WRITE_ONCE(new_timer->it_signal, current->signal); list_add(&new_timer->list, &current->signal->posix_timers); spin_unlock_irq(&current->sighand->siglock); /* * After unlocking sighand::siglock @new_timer is subject to * concurrent removal and cannot be touched anymore */ return 0; out: posix_timer_unhash_and_free(new_timer); return error; } SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, struct sigevent __user *, timer_event_spec, timer_t __user *, created_timer_id) { if (timer_event_spec) { sigevent_t event; if (copy_from_user(&event, timer_event_spec, sizeof (event))) return -EFAULT; return do_timer_create(which_clock, &event, created_timer_id); } return do_timer_create(which_clock, NULL, created_timer_id); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, struct compat_sigevent __user *, timer_event_spec, timer_t __user *, created_timer_id) { if (timer_event_spec) { sigevent_t event; if (get_compat_sigevent(&event, timer_event_spec)) return -EFAULT; return do_timer_create(which_clock, &event, created_timer_id); } return do_timer_create(which_clock, NULL, created_timer_id); } #endif static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; /* * timer_t could be any type >= int and we want to make sure any * @timer_id outside positive int range fails lookup. */ if ((unsigned long long)timer_id > INT_MAX) return NULL; /* * The hash lookup and the timers are RCU protected. * * Timers are added to the hash in invalid state where * timr::it_signal == NULL. timer::it_signal is only set after the * rest of the initialization succeeded. * * Timer destruction happens in steps: * 1) Set timr::it_signal to NULL with timr::it_lock held * 2) Release timr::it_lock * 3) Remove from the hash under hash_lock * 4) Call RCU for removal after the grace period * * Holding rcu_read_lock() accross the lookup ensures that * the timer cannot be freed. * * The lookup validates locklessly that timr::it_signal == * current::it_signal and timr::it_id == @timer_id. timr::it_id * can't change, but timr::it_signal becomes NULL during * destruction. */ rcu_read_lock(); timr = posix_timer_by_id(timer_id); if (timr) { spin_lock_irqsave(&timr->it_lock, *flags); /* * Validate under timr::it_lock that timr::it_signal is * still valid. Pairs with #1 above. */ if (timr->it_signal == current->signal) { rcu_read_unlock(); return timr; } spin_unlock_irqrestore(&timr->it_lock, *flags); } rcu_read_unlock(); return NULL; } static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now) { struct hrtimer *timer = &timr->it.real.timer; return __hrtimer_expires_remaining_adjusted(timer, now); } static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) { struct hrtimer *timer = &timr->it.real.timer; return hrtimer_forward(timer, now, timr->it_interval); } /* * Get the time remaining on a POSIX.1b interval timer. * * Two issues to handle here: * * 1) The timer has a requeue pending. The return value must appear as * if the timer has been requeued right now. * * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued * into the hrtimer queue and therefore never expired. Emulate expiry * here taking #1 into account. */ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { const struct k_clock *kc = timr->kclock; ktime_t now, remaining, iv; bool sig_none; sig_none = timr->it_sigev_notify == SIGEV_NONE; iv = timr->it_interval; /* interval timer ? */ if (iv) { cur_setting->it_interval = ktime_to_timespec64(iv); } else if (!timr->it_active) { /* * SIGEV_NONE oneshot timers are never queued and therefore * timr->it_active is always false. The check below * vs. remaining time will handle this case. * * For all other timers there is nothing to update here, so * return. */ if (!sig_none) return; } now = kc->clock_get_ktime(timr->it_clock); /* * If this is an interval timer and either has requeue pending or * is a SIGEV_NONE timer move the expiry time forward by intervals, * so expiry is > now. */ if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) timr->it_overrun += kc->timer_forward(timr, now); remaining = kc->timer_remaining(timr, now); /* * As @now is retrieved before a possible timer_forward() and * cannot be reevaluated by the compiler @remaining is based on the * same @now value. Therefore @remaining is consistent vs. @now. * * Consequently all interval timers, i.e. @iv > 0, cannot have a * remaining time <= 0 because timer_forward() guarantees to move * them forward so that the next timer expiry is > @now. */ if (remaining <= 0) { /* * A single shot SIGEV_NONE timer must return 0, when it is * expired! Timers which have a real signal delivery mode * must return a remaining time greater than 0 because the * signal has not yet been delivered. */ if (!sig_none) cur_setting->it_value.tv_nsec = 1; } else { cur_setting->it_value = ktime_to_timespec64(remaining); } } static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { const struct k_clock *kc; struct k_itimer *timr; unsigned long flags; int ret = 0; timr = lock_timer(timer_id, &flags); if (!timr) return -EINVAL; memset(setting, 0, sizeof(*setting)); kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_get)) ret = -EINVAL; else kc->timer_get(timr, setting); unlock_timer(timr, flags); return ret; } /* Get the time remaining on a POSIX.1b interval timer. */ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct __kernel_itimerspec __user *, setting) { struct itimerspec64 cur_setting; int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { if (put_itimerspec64(&cur_setting, setting)) ret = -EFAULT; } return ret; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, struct old_itimerspec32 __user *, setting) { struct itimerspec64 cur_setting; int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { if (put_old_itimerspec32(&cur_setting, setting)) ret = -EFAULT; } return ret; } #endif /** * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer * @timer_id: The timer ID which identifies the timer * * The "overrun count" of a timer is one plus the number of expiration * intervals which have elapsed between the first expiry, which queues the * signal and the actual signal delivery. On signal delivery the "overrun * count" is calculated and cached, so it can be returned directly here. * * As this is relative to the last queued signal the returned overrun count * is meaningless outside of the signal delivery path and even there it * does not accurately reflect the current state when user space evaluates * it. * * Returns: * -EINVAL @timer_id is invalid * 1..INT_MAX The number of overruns related to the last delivered signal */ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { struct k_itimer *timr; unsigned long flags; int overrun; timr = lock_timer(timer_id, &flags); if (!timr) return -EINVAL; overrun = timer_overrun_to_int(timr, 0); unlock_timer(timr, flags); return overrun; } static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, bool absolute, bool sigev_none) { struct hrtimer *timer = &timr->it.real.timer; enum hrtimer_mode mode; mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; /* * Posix magic: Relative CLOCK_REALTIME timers are not affected by * clock modifications, so they become CLOCK_MONOTONIC based under the * hood. See hrtimer_init(). Update timr->kclock, so the generic * functions which use timr->kclock->clock_get_*() work. * * Note: it_clock stays unmodified, because the next timer_set() might * use ABSTIME, so it needs to switch back. */ if (timr->it_clock == CLOCK_REALTIME) timr->kclock = absolute ? &clock_realtime : &clock_monotonic; hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); timr->it.real.timer.function = posix_timer_fn; if (!absolute) expires = ktime_add_safe(expires, timer->base->get_time()); hrtimer_set_expires(timer, expires); if (!sigev_none) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } static int common_hrtimer_try_to_cancel(struct k_itimer *timr) { return hrtimer_try_to_cancel(&timr->it.real.timer); } static void common_timer_wait_running(struct k_itimer *timer) { hrtimer_cancel_wait_running(&timer->it.real.timer); } /* * On PREEMPT_RT this prevents priority inversion and a potential livelock * against the ksoftirqd thread in case that ksoftirqd gets preempted while * executing a hrtimer callback. * * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this * just results in a cpu_relax(). * * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this * prevents spinning on an eventually scheduled out task and a livelock * when the task which tries to delete or disarm the timer has preempted * the task which runs the expiry in task work context. */ static struct k_itimer *timer_wait_running(struct k_itimer *timer, unsigned long *flags) { const struct k_clock *kc = READ_ONCE(timer->kclock); timer_t timer_id = READ_ONCE(timer->it_id); /* Prevent kfree(timer) after dropping the lock */ rcu_read_lock(); unlock_timer(timer, *flags); /* * kc->timer_wait_running() might drop RCU lock. So @timer * cannot be touched anymore after the function returns! */ if (!WARN_ON_ONCE(!kc->timer_wait_running)) kc->timer_wait_running(timer); rcu_read_unlock(); /* Relock the timer. It might be not longer hashed. */ return lock_timer(timer_id, flags); } /* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, struct itimerspec64 *old_setting) { const struct k_clock *kc = timr->kclock; bool sigev_none; ktime_t expires; if (old_setting) common_timer_get(timr, old_setting); /* Prevent rearming by clearing the interval */ timr->it_interval = 0; /* * Careful here. On SMP systems the timer expiry function could be * active and spinning on timr->it_lock. */ if (kc->timer_try_to_cancel(timr) < 0) return TIMER_RETRY; timr->it_active = 0; timr->it_requeue_pending = (timr->it_requeue_pending + 2) & ~REQUEUE_PENDING; timr->it_overrun_last = 0; /* Switch off the timer when it_value is zero */ if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) return 0; timr->it_interval = timespec64_to_ktime(new_setting->it_interval); expires = timespec64_to_ktime(new_setting->it_value); if (flags & TIMER_ABSTIME) expires = timens_ktime_to_host(timr->it_clock, expires); sigev_none = timr->it_sigev_notify == SIGEV_NONE; kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); timr->it_active = !sigev_none; return 0; } static int do_timer_settime(timer_t timer_id, int tmr_flags, struct itimerspec64 *new_spec64, struct itimerspec64 *old_spec64) { const struct k_clock *kc; struct k_itimer *timr; unsigned long flags; int error = 0; if (!timespec64_valid(&new_spec64->it_interval) || !timespec64_valid(&new_spec64->it_value)) return -EINVAL; if (old_spec64) memset(old_spec64, 0, sizeof(*old_spec64)); timr = lock_timer(timer_id, &flags); retry: if (!timr) return -EINVAL; kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; else error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64); if (error == TIMER_RETRY) { // We already got the old time... old_spec64 = NULL; /* Unlocks and relocks the timer if it still exists */ timr = timer_wait_running(timr, &flags); goto retry; } unlock_timer(timr, flags); return error; } /* Set a POSIX.1b interval timer */ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, const struct __kernel_itimerspec __user *, new_setting, struct __kernel_itimerspec __user *, old_setting) { struct itimerspec64 new_spec, old_spec, *rtn; int error = 0; if (!new_setting) return -EINVAL; if (get_itimerspec64(&new_spec, new_setting)) return -EFAULT; rtn = old_setting ? &old_spec : NULL; error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old_setting) { if (put_itimerspec64(&old_spec, old_setting)) error = -EFAULT; } return error; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags, struct old_itimerspec32 __user *, new, struct old_itimerspec32 __user *, old) { struct itimerspec64 new_spec, old_spec; struct itimerspec64 *rtn = old ? &old_spec : NULL; int error = 0; if (!new) return -EINVAL; if (get_old_itimerspec32(&new_spec, new)) return -EFAULT; error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old) { if (put_old_itimerspec32(&old_spec, old)) error = -EFAULT; } return error; } #endif int common_timer_del(struct k_itimer *timer) { const struct k_clock *kc = timer->kclock; timer->it_interval = 0; if (kc->timer_try_to_cancel(timer) < 0) return TIMER_RETRY; timer->it_active = 0; return 0; } static inline int timer_delete_hook(struct k_itimer *timer) { const struct k_clock *kc = timer->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_del)) return -EINVAL; return kc->timer_del(timer); } /* Delete a POSIX.1b interval timer. */ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) { struct k_itimer *timer; unsigned long flags; timer = lock_timer(timer_id, &flags); retry_delete: if (!timer) return -EINVAL; if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) { /* Unlocks and relocks the timer if it still exists */ timer = timer_wait_running(timer, &flags); goto retry_delete; } spin_lock(&current->sighand->siglock); list_del(&timer->list); spin_unlock(&current->sighand->siglock); /* * A concurrent lookup could check timer::it_signal lockless. It * will reevaluate with timer::it_lock held and observe the NULL. */ WRITE_ONCE(timer->it_signal, NULL); unlock_timer(timer, flags); posix_timer_unhash_and_free(timer); return 0; } /* * Delete a timer if it is armed, remove it from the hash and schedule it * for RCU freeing. */ static void itimer_delete(struct k_itimer *timer) { unsigned long flags; /* * irqsave is required to make timer_wait_running() work. */ spin_lock_irqsave(&timer->it_lock, flags); retry_delete: /* * Even if the timer is not longer accessible from other tasks * it still might be armed and queued in the underlying timer * mechanism. Worse, that timer mechanism might run the expiry * function concurrently. */ if (timer_delete_hook(timer) == TIMER_RETRY) { /* * Timer is expired concurrently, prevent livelocks * and pointless spinning on RT. * * timer_wait_running() drops timer::it_lock, which opens * the possibility for another task to delete the timer. * * That's not possible here because this is invoked from * do_exit() only for the last thread of the thread group. * So no other task can access and delete that timer. */ if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) return; goto retry_delete; } list_del(&timer->list); /* * Setting timer::it_signal to NULL is technically not required * here as nothing can access the timer anymore legitimately via * the hash table. Set it to NULL nevertheless so that all deletion * paths are consistent. */ WRITE_ONCE(timer->it_signal, NULL); spin_unlock_irqrestore(&timer->it_lock, flags); posix_timer_unhash_and_free(timer); } /* * Invoked from do_exit() when the last thread of a thread group exits. * At that point no other task can access the timers of the dying * task anymore. */ void exit_itimers(struct task_struct *tsk) { struct list_head timers; struct k_itimer *tmr; if (list_empty(&tsk->signal->posix_timers)) return; /* Protect against concurrent read via /proc/$PID/timers */ spin_lock_irq(&tsk->sighand->siglock); list_replace_init(&tsk->signal->posix_timers, &timers); spin_unlock_irq(&tsk->sighand->siglock); /* The timers are not longer accessible via tsk::signal */ while (!list_empty(&timers)) { tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); } } SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct __kernel_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 new_tp; if (!kc || !kc->clock_set) return -EINVAL; if (get_timespec64(&new_tp, tp)) return -EFAULT; /* * Permission checks have to be done inside the clock specific * setter callback. */ return kc->clock_set(which_clock, &new_tp); } SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct __kernel_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 kernel_tp; int error; if (!kc) return -EINVAL; error = kc->clock_get_timespec(which_clock, &kernel_tp); if (!error && put_timespec64(&kernel_tp, tp)) error = -EFAULT; return error; } int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) { const struct k_clock *kc = clockid_to_kclock(which_clock); if (!kc) return -EINVAL; if (!kc->clock_adj) return -EOPNOTSUPP; return kc->clock_adj(which_clock, ktx); } SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, struct __kernel_timex __user *, utx) { struct __kernel_timex ktx; int err; if (copy_from_user(&ktx, utx, sizeof(ktx))) return -EFAULT; err = do_clock_adjtime(which_clock, &ktx); if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT; return err; } /** * sys_clock_getres - Get the resolution of a clock * @which_clock: The clock to get the resolution for * @tp: Pointer to a a user space timespec64 for storage * * POSIX defines: * * "The clock_getres() function shall return the resolution of any * clock. Clock resolutions are implementation-defined and cannot be set by * a process. If the argument res is not NULL, the resolution of the * specified clock shall be stored in the location pointed to by res. If * res is NULL, the clock resolution is not returned. If the time argument * of clock_settime() is not a multiple of res, then the value is truncated * to a multiple of res." * * Due to the various hardware constraints the real resolution can vary * wildly and even change during runtime when the underlying devices are * replaced. The kernel also can use hardware devices with different * resolutions for reading the time and for arming timers. * * The kernel therefore deviates from the POSIX spec in various aspects: * * 1) The resolution returned to user space * * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW * the kernel differentiates only two cases: * * I) Low resolution mode: * * When high resolution timers are disabled at compile or runtime * the resolution returned is nanoseconds per tick, which represents * the precision at which timers expire. * * II) High resolution mode: * * When high resolution timers are enabled the resolution returned * is always one nanosecond independent of the actual resolution of * the underlying hardware devices. * * For CLOCK_*_ALARM the actual resolution depends on system * state. When system is running the resolution is the same as the * resolution of the other clocks. During suspend the actual * resolution is the resolution of the underlying RTC device which * might be way less precise than the clockevent device used during * running state. * * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution * returned is always nanoseconds per tick. * * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution * returned is always one nanosecond under the assumption that the * underlying scheduler clock has a better resolution than nanoseconds * per tick. * * For dynamic POSIX clocks (PTP devices) the resolution returned is * always one nanosecond. * * 2) Affect on sys_clock_settime() * * The kernel does not truncate the time which is handed in to * sys_clock_settime(). The kernel internal timekeeping is always using * nanoseconds precision independent of the clocksource device which is * used to read the time from. The resolution of that device only * affects the presicion of the time returned by sys_clock_gettime(). * * Returns: * 0 Success. @tp contains the resolution * -EINVAL @which_clock is not a valid clock ID * -EFAULT Copying the resolution to @tp faulted * -ENODEV Dynamic POSIX clock is not backed by a device * -EOPNOTSUPP Dynamic POSIX clock does not support getres() */ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct __kernel_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 rtn_tp; int error; if (!kc) return -EINVAL; error = kc->clock_getres(which_clock, &rtn_tp); if (!error && tp && put_timespec64(&rtn_tp, tp)) error = -EFAULT; return error; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock, struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; if (!kc || !kc->clock_set) return -EINVAL; if (get_old_timespec32(&ts, tp)) return -EFAULT; return kc->clock_set(which_clock, &ts); } SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; int err; if (!kc) return -EINVAL; err = kc->clock_get_timespec(which_clock, &ts); if (!err && put_old_timespec32(&ts, tp)) err = -EFAULT; return err; } SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock, struct old_timex32 __user *, utp) { struct __kernel_timex ktx; int err; err = get_old_timex32(&ktx, utp); if (err) return err; err = do_clock_adjtime(which_clock, &ktx); if (err >= 0 && put_old_timex32(utp, &ktx)) return -EFAULT; return err; } SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; int err; if (!kc) return -EINVAL; err = kc->clock_getres(which_clock, &ts); if (!err && tp && put_old_timespec32(&ts, tp)) return -EFAULT; return err; } #endif /* * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI */ static int common_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { ktime_t texp = timespec64_to_ktime(*rqtp); return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } /* * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME * * Absolute nanosleeps for these clocks are time-namespace adjusted. */ static int common_nsleep_timens(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { ktime_t texp = timespec64_to_ktime(*rqtp); if (flags & TIMER_ABSTIME) texp = timens_ktime_to_host(which_clock, texp); return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct __kernel_timespec __user *, rqtp, struct __kernel_timespec __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -EOPNOTSUPP; if (get_timespec64(&t, rqtp)) return -EFAULT; if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; return kc->nsleep(which_clock, flags, &t); } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, struct old_timespec32 __user *, rqtp, struct old_timespec32 __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -EOPNOTSUPP; if (get_old_timespec32(&t, rqtp)) return -EFAULT; if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; return kc->nsleep(which_clock, flags, &t); } #endif static const struct k_clock clock_realtime = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_realtime_timespec, .clock_get_ktime = posix_get_realtime_ktime, .clock_set = posix_clock_realtime_set, .clock_adj = posix_clock_realtime_adj, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock clock_monotonic = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_monotonic_timespec, .clock_get_ktime = posix_get_monotonic_ktime, .nsleep = common_nsleep_timens, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock clock_monotonic_raw = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_monotonic_raw, }; static const struct k_clock clock_realtime_coarse = { .clock_getres = posix_get_coarse_res, .clock_get_timespec = posix_get_realtime_coarse, }; static const struct k_clock clock_monotonic_coarse = { .clock_getres = posix_get_coarse_res, .clock_get_timespec = posix_get_monotonic_coarse, }; static const struct k_clock clock_tai = { .clock_getres = posix_get_hrtimer_res, .clock_get_ktime = posix_get_tai_ktime, .clock_get_timespec = posix_get_tai_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, .clock_get_ktime = posix_get_boottime_ktime, .clock_get_timespec = posix_get_boottime_timespec, .nsleep = common_nsleep_timens, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock * const posix_clocks[] = { [CLOCK_REALTIME] = &clock_realtime, [CLOCK_MONOTONIC] = &clock_monotonic, [CLOCK_PROCESS_CPUTIME_ID] = &clock_process, [CLOCK_THREAD_CPUTIME_ID] = &clock_thread, [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, [CLOCK_BOOTTIME] = &clock_boottime, [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, }; static const struct k_clock *clockid_to_kclock(const clockid_t id) { clockid_t idx = id; if (id < 0) { return (id & CLOCKFD_MASK) == CLOCKFD ? &clock_posix_dynamic : &clock_posix_cpu; } if (id >= ARRAY_SIZE(posix_clocks)) return NULL; return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))]; }
5 2 3 11 14 14 6 6 6 6 6 8 8 6 14 13 13 13 13 1 1 2 25 2 25 14 9 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/proc/root.c * * Copyright (C) 1991, 1992 Linus Torvalds * * proc root directory handling functions */ #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/stat.h> #include <linux/module.h> #include <linux/bitops.h> #include <linux/user_namespace.h> #include <linux/fs_context.h> #include <linux/mount.h> #include <linux/pid_namespace.h> #include <linux/fs_parser.h> #include <linux/cred.h> #include <linux/magic.h> #include <linux/slab.h> #include "internal.h" struct proc_fs_context { struct pid_namespace *pid_ns; unsigned int mask; enum proc_hidepid hidepid; int gid; enum proc_pidonly pidonly; }; enum proc_param { Opt_gid, Opt_hidepid, Opt_subset, }; static const struct fs_parameter_spec proc_fs_parameters[] = { fsparam_u32("gid", Opt_gid), fsparam_string("hidepid", Opt_hidepid), fsparam_string("subset", Opt_subset), {} }; static inline int valid_hidepid(unsigned int value) { return (value == HIDEPID_OFF || value == HIDEPID_NO_ACCESS || value == HIDEPID_INVISIBLE || value == HIDEPID_NOT_PTRACEABLE); } static int proc_parse_hidepid_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; struct fs_parameter_spec hidepid_u32_spec = fsparam_u32("hidepid", Opt_hidepid); struct fs_parse_result result; int base = (unsigned long)hidepid_u32_spec.data; if (param->type != fs_value_is_string) return invalf(fc, "proc: unexpected type of hidepid value\n"); if (!kstrtouint(param->string, base, &result.uint_32)) { if (!valid_hidepid(result.uint_32)) return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string); ctx->hidepid = result.uint_32; return 0; } if (!strcmp(param->string, "off")) ctx->hidepid = HIDEPID_OFF; else if (!strcmp(param->string, "noaccess")) ctx->hidepid = HIDEPID_NO_ACCESS; else if (!strcmp(param->string, "invisible")) ctx->hidepid = HIDEPID_INVISIBLE; else if (!strcmp(param->string, "ptraceable")) ctx->hidepid = HIDEPID_NOT_PTRACEABLE; else return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string); return 0; } static int proc_parse_subset_param(struct fs_context *fc, char *value) { struct proc_fs_context *ctx = fc->fs_private; while (value) { char *ptr = strchr(value, ','); if (ptr != NULL) *ptr++ = '\0'; if (*value != '\0') { if (!strcmp(value, "pid")) { ctx->pidonly = PROC_PIDONLY_ON; } else { return invalf(fc, "proc: unsupported subset option - %s\n", value); } } value = ptr; } return 0; } static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, proc_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_gid: ctx->gid = result.uint_32; break; case Opt_hidepid: if (proc_parse_hidepid_param(fc, param)) return -EINVAL; break; case Opt_subset: if (proc_parse_subset_param(fc, param->string) < 0) return -EINVAL; break; default: return -EINVAL; } ctx->mask |= 1 << opt; return 0; } static void proc_apply_options(struct proc_fs_info *fs_info, struct fs_context *fc, struct user_namespace *user_ns) { struct proc_fs_context *ctx = fc->fs_private; if (ctx->mask & (1 << Opt_gid)) fs_info->pid_gid = make_kgid(user_ns, ctx->gid); if (ctx->mask & (1 << Opt_hidepid)) fs_info->hide_pid = ctx->hidepid; if (ctx->mask & (1 << Opt_subset)) fs_info->pidonly = ctx->pidonly; } static int proc_fill_super(struct super_block *s, struct fs_context *fc) { struct proc_fs_context *ctx = fc->fs_private; struct inode *root_inode; struct proc_fs_info *fs_info; int ret; fs_info = kzalloc(sizeof(*fs_info), GFP_KERNEL); if (!fs_info) return -ENOMEM; fs_info->pid_ns = get_pid_ns(ctx->pid_ns); proc_apply_options(fs_info, fc, current_user_ns()); /* User space would break if executables or devices appear on proc */ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = PROC_SUPER_MAGIC; s->s_op = &proc_sops; s->s_time_gran = 1; s->s_fs_info = fs_info; /* * procfs isn't actually a stacking filesystem; however, there is * too much magic going on inside it to permit stacking things on * top of it */ s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; /* procfs dentries and inodes don't require IO to create */ s->s_shrink->seeks = 0; pde_get(&proc_root); root_inode = proc_get_inode(s, &proc_root); if (!root_inode) { pr_err("proc_fill_super: get root inode failed\n"); return -ENOMEM; } s->s_root = d_make_root(root_inode); if (!s->s_root) { pr_err("proc_fill_super: allocate dentry failed\n"); return -ENOMEM; } ret = proc_setup_self(s); if (ret) { return ret; } return proc_setup_thread_self(s); } static int proc_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct proc_fs_info *fs_info = proc_sb_info(sb); sync_filesystem(sb); proc_apply_options(fs_info, fc, current_user_ns()); return 0; } static int proc_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, proc_fill_super); } static void proc_fs_context_free(struct fs_context *fc) { struct proc_fs_context *ctx = fc->fs_private; put_pid_ns(ctx->pid_ns); kfree(ctx); } static const struct fs_context_operations proc_fs_context_ops = { .free = proc_fs_context_free, .parse_param = proc_parse_param, .get_tree = proc_get_tree, .reconfigure = proc_reconfigure, }; static int proc_init_fs_context(struct fs_context *fc) { struct proc_fs_context *ctx; ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->pid_ns = get_pid_ns(task_active_pid_ns(current)); put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->pid_ns->user_ns); fc->fs_private = ctx; fc->ops = &proc_fs_context_ops; return 0; } static void proc_kill_sb(struct super_block *sb) { struct proc_fs_info *fs_info = proc_sb_info(sb); if (!fs_info) { kill_anon_super(sb); return; } dput(fs_info->proc_self); dput(fs_info->proc_thread_self); kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); kfree_rcu(fs_info, rcu); } static struct file_system_type proc_fs_type = { .name = "proc", .init_fs_context = proc_init_fs_context, .parameters = proc_fs_parameters, .kill_sb = proc_kill_sb, .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; void __init proc_root_init(void) { proc_init_kmemcache(); set_proc_pid_nlink(); proc_self_init(); proc_thread_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); proc_mkdir("fs", NULL); proc_mkdir("driver", NULL); proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ proc_create_mount_point("openprom"); #endif proc_tty_init(); proc_mkdir("bus", NULL); proc_sys_init(); /* * Last things last. It is not like userspace processes eager * to open /proc files exist at this point but register last * anyway. */ register_filesystem(&proc_fs_type); } static int proc_root_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry), stat); stat->nlink = proc_root.nlink + nr_processes(); return 0; } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { if (!proc_pid_lookup(dentry, flags)) return NULL; return proc_lookup(dir, dentry, flags); } static int proc_root_readdir(struct file *file, struct dir_context *ctx) { if (ctx->pos < FIRST_PROCESS_ENTRY) { int error = proc_readdir(file, ctx); if (unlikely(error <= 0)) return error; ctx->pos = FIRST_PROCESS_ENTRY; } return proc_pid_readdir(file, ctx); } /* * The root /proc directory is special, as it has the * <pid> directories. Thus we don't use the generic * directory handling functions for that.. */ static const struct file_operations proc_root_operations = { .read = generic_read_dir, .iterate_shared = proc_root_readdir, .llseek = generic_file_llseek, }; /* * proc root can do almost nothing.. */ static const struct inode_operations proc_root_inode_operations = { .lookup = proc_root_lookup, .getattr = proc_root_getattr, }; /* * This is the root "inode" in the /proc tree.. */ struct proc_dir_entry proc_root = { .low_ino = PROC_ROOT_INO, .namelen = 5, .mode = S_IFDIR | S_IRUGO | S_IXUGO, .nlink = 2, .refcnt = REFCOUNT_INIT(1), .proc_iops = &proc_root_inode_operations, .proc_dir_ops = &proc_root_operations, .parent = &proc_root, .subdir = RB_ROOT, .name = "/proc", };
7961 7958 7961 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 // SPDX-License-Identifier: GPL-2.0-only /* * xsave/xrstor support. * * Author: Suresh Siddha <suresh.b.siddha@intel.com> */ #include <linux/bitops.h> #include <linux/compat.h> #include <linux/cpu.h> #include <linux/mman.h> #include <linux/nospec.h> #include <linux/pkeys.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <asm/fpu/api.h> #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/xcr.h> #include <asm/tlbflush.h> #include <asm/prctl.h> #include <asm/elf.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" #define for_each_extended_xfeature(bit, mask) \ (bit) = FIRST_EXTENDED_XFEATURE; \ for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) /* * Although we spell it out in here, the Processor Trace * xfeature is completely unused. We use other mechanisms * to save/restore PT state in Linux. */ static const char *xfeature_names[] = { "x87 floating point registers", "SSE registers", "AVX registers", "MPX bounds registers", "MPX CSR", "AVX-512 opmask", "AVX-512 Hi256", "AVX-512 ZMM_Hi256", "Processor Trace (unused)", "Protection Keys User registers", "PASID state", "Control-flow User registers", "Control-flow Kernel registers (unused)", "unknown xstate feature", "unknown xstate feature", "unknown xstate feature", "unknown xstate feature", "AMX Tile config", "AMX Tile data", "unknown xstate feature", }; static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_FP] = X86_FEATURE_FPU, [XFEATURE_SSE] = X86_FEATURE_XMM, [XFEATURE_YMM] = X86_FEATURE_AVX, [XFEATURE_BNDREGS] = X86_FEATURE_MPX, [XFEATURE_BNDCSR] = X86_FEATURE_MPX, [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, [XFEATURE_PKRU] = X86_FEATURE_OSPKE, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, }; static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; #define XSTATE_FLAG_SUPERVISOR BIT(0) #define XSTATE_FLAG_ALIGNED64 BIT(1) /* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: */ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; if (unlikely(feature_name)) { long xfeature_idx, max_idx; u64 xfeatures_print; /* * So we use FLS here to be able to print the most advanced * feature that was requested but is missing. So if a driver * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the * missing AVX feature - this is the most informative message * to users: */ if (xfeatures_missing) xfeatures_print = xfeatures_missing; else xfeatures_print = xfeatures_needed; xfeature_idx = fls64(xfeatures_print)-1; max_idx = ARRAY_SIZE(xfeature_names)-1; xfeature_idx = min(xfeature_idx, max_idx); *feature_name = xfeature_names[xfeature_idx]; } if (xfeatures_missing) return 0; return 1; } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); static bool xfeature_is_aligned64(int xfeature_nr) { return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64; } static bool xfeature_is_supervisor(int xfeature_nr) { return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; } static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) { unsigned int offs, i; /* * Non-compacted format and legacy features use the cached fixed * offsets. */ if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || xfeature <= XFEATURE_SSE) return xstate_offsets[xfeature]; /* * Compacted format offsets depend on the actual content of the * compacted xsave area which is determined by the xcomp_bv header * field. */ offs = FXSAVE_SIZE + XSAVE_HDR_SIZE; for_each_extended_xfeature(i, xcomp_bv) { if (xfeature_is_aligned64(i)) offs = ALIGN(offs, 64); if (i == xfeature) break; offs += xstate_sizes[i]; } return offs; } /* * Enable the extended processor state save/restore feature. * Called once per CPU onlining. */ void fpu__init_cpu_xstate(void) { if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) return; cr4_set_bits(X86_CR4_OSXSAVE); /* * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures * that any stale state is wiped out from XFD. Reset the per CPU * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user * states can be set here. */ xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * MSR_IA32_XSS sets supervisor states managed by XSAVES. */ if (boot_cpu_has(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } } static bool xfeature_enabled(enum xfeature xfeature) { return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); } /* * Record the offsets and sizes of various xstates contained * in the XSAVE state memory layout. */ static void __init setup_xstate_cache(void) { u32 eax, ebx, ecx, edx, i; /* start at the beginning of the "extended state" */ unsigned int last_good_offset = offsetof(struct xregs_state, extended_state_area); /* * The FP xstates and SSE xstates are legacy states. They are always * in the fixed offsets in the xsave area in either compacted form * or standard form. */ xstate_offsets[XFEATURE_FP] = 0; xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, xmm_space); xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, xmm_space); for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_sizes[i] = eax; xstate_flags[i] = ecx; /* * If an xfeature is supervisor state, the offset in EBX is * invalid, leave it to -1. */ if (xfeature_is_supervisor(i)) continue; xstate_offsets[i] = ebx; /* * In our xstate size checks, we assume that the highest-numbered * xstate feature has the highest offset in the buffer. Ensure * it does. */ WARN_ONCE(last_good_offset > xstate_offsets[i], "x86/fpu: misordered xstate at %d\n", last_good_offset); last_good_offset = xstate_offsets[i]; } } static void __init print_xstate_feature(u64 xstate_mask) { const char *feature_name; if (cpu_has_xfeatures(xstate_mask, &feature_name)) pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name); } /* * Print out all the supported xstate features: */ static void __init print_xstate_features(void) { print_xstate_feature(XFEATURE_MASK_FP); print_xstate_feature(XFEATURE_MASK_SSE); print_xstate_feature(XFEATURE_MASK_YMM); print_xstate_feature(XFEATURE_MASK_BNDREGS); print_xstate_feature(XFEATURE_MASK_BNDCSR); print_xstate_feature(XFEATURE_MASK_OPMASK); print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); print_xstate_feature(XFEATURE_MASK_PKRU); print_xstate_feature(XFEATURE_MASK_PASID); print_xstate_feature(XFEATURE_MASK_CET_USER); print_xstate_feature(XFEATURE_MASK_XTILE_CFG); print_xstate_feature(XFEATURE_MASK_XTILE_DATA); } /* * This check is important because it is easy to get XSTATE_* * confused with XSTATE_BIT_*. */ #define CHECK_XFEATURE(nr) do { \ WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ WARN_ON(nr >= XFEATURE_MAX); \ } while (0) /* * Print out xstate component offsets and sizes */ static void __init print_xstate_offset_size(void) { int i; for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, xfeature_get_offset(fpu_kernel_cfg.max_features, i), i, xstate_sizes[i]); } } /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ static __init void os_xrstor_booting(struct xregs_state *xstate) { u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; u32 lmask = mask; u32 hmask = mask >> 32; int err; if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); /* * We should never fault when copying from a kernel buffer, and the FPU * state we set at boot time should be valid. */ WARN_ON_FPU(err); } /* * All supported features have either init state all zeros or are * handled in setup_init_fpu() individually. This is an explicit * feature list and does not use XFEATURE_MASK*SUPPORTED to catch * newly added supported features at build time and make people * actually look at the init state for the new feature. */ #define XFEATURES_INIT_FPSTATE_HANDLED \ (XFEATURE_MASK_FP | \ XFEATURE_MASK_SSE | \ XFEATURE_MASK_YMM | \ XFEATURE_MASK_OPMASK | \ XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM | \ XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR | \ XFEATURE_MASK_PASID | \ XFEATURE_MASK_CET_USER | \ XFEATURE_MASK_XTILE) /* * setup the xstate image representing the init state */ static void __init setup_init_fpu_buf(void) { BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED) != XFEATURES_INIT_FPSTATE_HANDLED); if (!boot_cpu_has(X86_FEATURE_XSAVE)) return; print_xstate_features(); xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures); /* * Init all the features state with header.xfeatures being 0x0 */ os_xrstor_booting(&init_fpstate.regs.xsave); /* * All components are now in init state. Read the state back so * that init_fpstate contains all non-zero init state. This only * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because * those use the init optimization which skips writing data for * components in init state. * * XSAVE could be used, but that would require to reshuffle the * data when XSAVEC/S is available because XSAVEC/S uses xstate * compaction. But doing so is a pointless exercise because most * components have an all zeros init state except for the legacy * ones (FP and SSE). Those can be saved with FXSAVE into the * legacy area. Adding new features requires to ensure that init * state is all zeroes or if not to add the necessary handling * here. */ fxsave(&init_fpstate.regs.fxsave); } int xfeature_size(int xfeature_nr) { u32 eax, ebx, ecx, edx; CHECK_XFEATURE(xfeature_nr); cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); return eax; } /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ static int validate_user_xstate_header(const struct xstate_header *hdr, struct fpstate *fpstate) { /* No unknown or supervisor features may be set */ if (hdr->xfeatures & ~fpstate->user_xfeatures) return -EINVAL; /* Userspace must use the uncompacted format */ if (hdr->xcomp_bv) return -EINVAL; /* * If 'reserved' is shrunken to add a new field, make sure to validate * that new field here! */ BUILD_BUG_ON(sizeof(hdr->reserved) != 48); /* No reserved bits may be set */ if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) return -EINVAL; return 0; } static void __init __xstate_dump_leaves(void) { int i; u32 eax, ebx, ecx, edx; static int should_dump = 1; if (!should_dump) return; should_dump = 0; /* * Dump out a few leaves past the ones that we support * just in case there are some goodies up there */ for (i = 0; i < XFEATURE_MAX + 10; i++) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", XSTATE_CPUID, i, eax, ebx, ecx, edx); } } #define XSTATE_WARN_ON(x, fmt, ...) do { \ if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \ __xstate_dump_leaves(); \ } \ } while (0) #define XCHECK_SZ(sz, nr, __struct) ({ \ if (WARN_ONCE(sz != sizeof(__struct), \ "[%s]: struct is %zu bytes, cpu state %d bytes\n", \ xfeature_names[nr], sizeof(__struct), sz)) { \ __xstate_dump_leaves(); \ } \ true; \ }) /** * check_xtile_data_against_struct - Check tile data state size. * * Calculate the state size by multiplying the single tile size which is * recorded in a C struct, and the number of tiles that the CPU informs. * Compare the provided size with the calculation. * * @size: The tile data state size * * Returns: 0 on success, -EINVAL on mismatch. */ static int __init check_xtile_data_against_struct(int size) { u32 max_palid, palid, state_size; u32 eax, ebx, ecx, edx; u16 max_tile; /* * Check the maximum palette id: * eax: the highest numbered palette subleaf. */ cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx); /* * Cross-check each tile size and find the maximum number of * supported tiles. */ for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { u16 tile_size, max; /* * Check the tile size info: * eax[31:16]: bytes per title * ebx[31:16]: the max names (or max number of tiles) */ cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx); tile_size = eax >> 16; max = ebx >> 16; if (tile_size != sizeof(struct xtile_data)) { pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", __stringify(XFEATURE_XTILE_DATA), sizeof(struct xtile_data), tile_size); __xstate_dump_leaves(); return -EINVAL; } if (max > max_tile) max_tile = max; } state_size = sizeof(struct xtile_data) * max_tile; if (size != state_size) { pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", __stringify(XFEATURE_XTILE_DATA), state_size, size); __xstate_dump_leaves(); return -EINVAL; } return 0; } /* * We have a C struct for each 'xstate'. We need to ensure * that our software representation matches what the CPU * tells us about the state's size. */ static bool __init check_xstate_against_struct(int nr) { /* * Ask the CPU for the size of the state. */ int sz = xfeature_size(nr); /* * Match each CPU state with the corresponding software * structure. */ switch (nr) { case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct); case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state); case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state); case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state); case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state); case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state); case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state); case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; default: XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); return false; } return true; } static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) { unsigned int topmost = fls64(xfeatures) - 1; unsigned int offset = xstate_offsets[topmost]; if (topmost <= XFEATURE_SSE) return sizeof(struct xregs_state); if (compacted) offset = xfeature_get_offset(xfeatures, topmost); return offset + xstate_sizes[topmost]; } /* * This essentially double-checks what the cpu told us about * how large the XSAVE buffer needs to be. We are recalculating * it to be safe. * * Independent XSAVE features allocate their own buffers and are not * covered by these checks. Only the size of the buffer for task->fpu * is checked here. */ static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) { bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; int i; for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { if (!check_xstate_against_struct(i)) return false; /* * Supervisor state components can be managed only by * XSAVES. */ if (!xsaves && xfeature_is_supervisor(i)) { XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i); return false; } } size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); XSTATE_WARN_ON(size != kernel_size, "size %u != kernel_size %u\n", size, kernel_size); return size == kernel_size; } /* * Get total size of enabled xstates in XCR0 | IA32_XSS. * * Note the SDM's wording here. "sub-function 0" only enumerates * the size of the *user* states. If we use it to size a buffer * that we use 'XSAVES' on, we could potentially overflow the * buffer because 'XSAVES' saves system states too. * * This also takes compaction into account. So this works for * XSAVEC as well. */ static unsigned int __init get_compacted_size(void) { unsigned int eax, ebx, ecx, edx; /* * - CPUID function 0DH, sub-function 1: * EBX enumerates the size (in bytes) required by * the XSAVES instruction for an XSAVE area * containing all the state components * corresponding to bits currently set in * XCR0 | IA32_XSS. * * When XSAVES is not available but XSAVEC is (virt), then there * are no supervisor states, but XSAVEC still uses compacted * format. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); return ebx; } /* * Get the total size of the enabled xstates without the independent supervisor * features. */ static unsigned int __init get_xsave_compacted_size(void) { u64 mask = xfeatures_mask_independent(); unsigned int size; if (!mask) return get_compacted_size(); /* Disable independent features. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); /* * Ask the hardware what size is required of the buffer. * This is the size required for the task->fpu buffer. */ size = get_compacted_size(); /* Re-enable independent features so XSAVES will work on them again. */ wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); return size; } static unsigned int __init get_xsave_size_user(void) { unsigned int eax, ebx, ecx, edx; /* * - CPUID function 0DH, sub-function 0: * EBX enumerates the size (in bytes) required by * the XSAVE instruction for an XSAVE area * containing all the *user* state components * corresponding to bits currently set in XCR0. */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); return ebx; } static int __init init_xstate_size(void) { /* Recompute the context size for enabled features: */ unsigned int user_size, kernel_size, kernel_default_size; bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); /* Uncompacted user space size */ user_size = get_xsave_size_user(); /* * XSAVES kernel size includes supervisor states and uses compacted * format. XSAVEC uses compacted format, but does not save * supervisor states. * * XSAVE[OPT] do not support supervisor states so kernel and user * size is identical. */ if (compacted) kernel_size = get_xsave_compacted_size(); else kernel_size = user_size; kernel_default_size = xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); if (!paranoid_xstate_size_valid(kernel_size)) return -EINVAL; fpu_kernel_cfg.max_size = kernel_size; fpu_user_cfg.max_size = user_size; fpu_kernel_cfg.default_size = kernel_default_size; fpu_user_cfg.default_size = xstate_calculate_size(fpu_user_cfg.default_features, false); return 0; } /* * We enabled the XSAVE hardware, but something went wrong and * we can not use it. Disable it. */ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) { fpu_kernel_cfg.max_features = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); /* Restore the legacy size.*/ fpu_kernel_cfg.max_size = legacy_size; fpu_kernel_cfg.default_size = legacy_size; fpu_user_cfg.max_size = legacy_size; fpu_user_cfg.default_size = legacy_size; /* * Prevent enabling the static branch which enables writes to the * XFD MSR. */ init_fpstate.xfd = 0; fpstate_reset(&current->thread.fpu); } /* * Enable and initialize the xsave feature. * Called once per system bootup. */ void __init fpu__init_system_xstate(unsigned int legacy_size) { unsigned int eax, ebx, ecx, edx; u64 xfeatures; int err; int i; if (!boot_cpu_has(X86_FEATURE_FPU)) { pr_info("x86/fpu: No FPU detected\n"); return; } if (!boot_cpu_has(X86_FEATURE_XSAVE)) { pr_info("x86/fpu: x87 FPU will use %s\n", boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); return; } if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { WARN_ON_FPU(1); return; } /* * Find user xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); /* * Find supervisor xstates supported by the processor. */ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* * This indicates that something really unexpected happened * with the enumeration. Disable XSAVE and try to continue * booting without it. This is too early to BUG(). */ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", fpu_kernel_cfg.max_features); goto out_disable; } fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & XFEATURE_MASK_INDEPENDENT; /* * Clear XSAVE features that are disabled in the normal CPUID. */ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { unsigned short cid = xsave_cpuid_features[i]; /* Careful: X86_FEATURE_FPU is 0! */ if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) fpu_kernel_cfg.max_features &= ~BIT_ULL(i); } if (!cpu_feature_enabled(X86_FEATURE_XFD)) fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; else fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | XFEATURE_MASK_SUPERVISOR_SUPPORTED; fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; /* Clean out dynamic features from default */ fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; fpu_user_cfg.default_features = fpu_user_cfg.max_features; fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; /* Store it for paranoia check at the end */ xfeatures = fpu_kernel_cfg.max_features; /* * Initialize the default XFD state in initfp_state and enable the * dynamic sizing mechanism if dynamic states are available. The * static key cannot be enabled here because this runs before * jump_label_init(). This is delayed to an initcall. */ init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; /* Set up compaction feature bit */ if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || cpu_feature_enabled(X86_FEATURE_XSAVES)) setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); /* Cache size, offset and flags for initialization */ setup_xstate_cache(); err = init_xstate_size(); if (err) goto out_disable; /* Reset the state for the current task */ fpstate_reset(&current->thread.fpu); /* * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: */ update_regset_xstate_info(fpu_user_cfg.max_size, fpu_user_cfg.max_features); /* * init_fpstate excludes dynamic states as they are large but init * state is zero. */ init_fpstate.size = fpu_kernel_cfg.default_size; init_fpstate.xfeatures = fpu_kernel_cfg.default_features; if (init_fpstate.size > sizeof(init_fpstate.regs)) { pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n", sizeof(init_fpstate.regs), init_fpstate.size); goto out_disable; } setup_init_fpu_buf(); /* * Paranoia check whether something in the setup modified the * xfeatures mask. */ if (xfeatures != fpu_kernel_cfg.max_features) { pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n", xfeatures, fpu_kernel_cfg.max_features); goto out_disable; } /* * CPU capabilities initialization runs before FPU init. So * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely * functional, set the feature bit so depending code works. */ setup_force_cpu_cap(X86_FEATURE_OSXSAVE); print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", fpu_kernel_cfg.max_features, fpu_kernel_cfg.max_size, boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); return; out_disable: /* something went wrong, try to boot without any XSAVE support */ fpu__init_disable_system_xstate(legacy_size); } /* * Restore minimal FPU state after suspend: */ void fpu__resume_cpu(void) { /* * Restore XCR0 on xsave capable CPUs: */ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); /* * Restore IA32_XSS. The same CPUID bit enumerates support * of XSAVES and MSR_IA32_XSS. */ if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | xfeatures_mask_independent()); } if (fpu_state_size_dynamic()) wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd); } /* * Given an xstate feature nr, calculate where in the xsave * buffer the state is. Callers should ensure that the buffer * is valid. */ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { u64 xcomp_bv = xsave->header.xcomp_bv; if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) return NULL; } return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr); } /* * Given the xsave area and a state inside, this function returns the * address of the state. * * This is the API that is called to get xstate address in either * standard format or compacted format of xsave area. * * Note that if there is no data for the field in the xsave buffer * this will return NULL. * * Inputs: * xstate: the thread's storage area for all FPU data * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { /* * Do we even *have* xsave state? */ if (!boot_cpu_has(X86_FEATURE_XSAVE)) return NULL; /* * We should not ever be requesting features that we * have not enabled. */ if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) return NULL; /* * This assumes the last 'xsave*' instruction to * have requested that 'xfeature_nr' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * * This can happen because the last 'xsave' did not * request that this feature be saved (unlikely) * or because the "init optimization" caused it * to not be saved. */ if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) return NULL; return __raw_xsave_addr(xsave, xfeature_nr); } EXPORT_SYMBOL_GPL(get_xsave_addr); #ifdef CONFIG_ARCH_HAS_PKEYS /* * This will go out and modify PKRU register to set the access * rights for @pkey to @init_val. */ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { u32 old_pkru, new_pkru_bits = 0; int pkey_shift; /* * This check implies XSAVE support. OSPKE only gets * set if we enable XSAVE and we enable PKU in XCR0. */ if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return -EINVAL; /* * This code should only be called with valid 'pkey' * values originating from in-kernel users. Complain * if a bad value is observed. */ if (WARN_ON_ONCE(pkey >= arch_max_pkey())) return -EINVAL; /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) new_pkru_bits |= PKRU_AD_BIT; if (init_val & PKEY_DISABLE_WRITE) new_pkru_bits |= PKRU_WD_BIT; /* Shift the bits in to the correct place in PKRU for pkey: */ pkey_shift = pkey * PKRU_BITS_PER_PKEY; new_pkru_bits <<= pkey_shift; /* Get old PKRU and mask off any old bits in place: */ old_pkru = read_pkru(); old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); /* Write old part along with new part: */ write_pkru(old_pkru | new_pkru_bits); return 0; } #endif /* ! CONFIG_ARCH_HAS_PKEYS */ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, void *init_xstate, unsigned int size) { membuf_write(to, from_xstate ? xstate : init_xstate, size); } /** * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @fpstate: The fpstate buffer from which to copy * @xfeatures: The mask of xfeatures to save (XSAVE mode only) * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming * format, i.e. from the kernel internal hardware dependent storage format * to the requested @mode. UABI XSTATE is always uncompacted! * * It supports partial copy but @to.pos always starts from zero. */ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u64 xfeatures, u32 pkru_val, enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; struct xregs_state *xsave = &fpstate->regs.xsave; struct xstate_header header; unsigned int zerofrom; u64 mask; int i; memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; /* Mask out the feature bits depending on copy mode */ switch (copy_mode) { case XSTATE_COPY_FP: header.xfeatures &= XFEATURE_MASK_FP; break; case XSTATE_COPY_FX: header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE; break; case XSTATE_COPY_XSAVE: header.xfeatures &= fpstate->user_xfeatures & xfeatures; break; } /* Copy FP state up to MXCSR */ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387, &xinit->i387, off_mxcsr); /* Copy MXCSR when SSE or YMM are set in the feature mask */ copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM), &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr, MXCSR_AND_FLAGS_SIZE); /* Copy the remaining FP state */ copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387.st_space, &xinit->i387.st_space, sizeof(xsave->i387.st_space)); /* Copy the SSE state - shared with YMM, but independently managed */ copy_feature(header.xfeatures & XFEATURE_MASK_SSE, &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space, sizeof(xsave->i387.xmm_space)); if (copy_mode != XSTATE_COPY_XSAVE) goto out; /* Zero the padding area */ membuf_zero(&to, sizeof(xsave->i387.padding)); /* Copy xsave->i387.sw_reserved */ membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved)); /* Copy the user space relevant state of @xsave->header */ membuf_write(&to, &header, sizeof(header)); zerofrom = offsetof(struct xregs_state, extended_state_area); /* * This 'mask' indicates which states to copy from fpstate. * Those extended states that are not present in fpstate are * either disabled or initialized: * * In non-compacted format, disabled features still occupy * state space but there is no state to copy from in the * compacted init_fpstate. The gap tracking will zero these * states. * * The extended features have an all zeroes init state. Thus, * remove them from 'mask' to zero those features in the user * buffer instead of retrieving them from init_fpstate. */ mask = header.xfeatures; for_each_extended_xfeature(i, mask) { /* * If there was a feature or alignment gap, zero the space * in the destination buffer. */ if (zerofrom < xstate_offsets[i]) membuf_zero(&to, xstate_offsets[i] - zerofrom); if (i == XFEATURE_PKRU) { struct pkru_state pkru = {0}; /* * PKRU is not necessarily up to date in the * XSAVE buffer. Use the provided value. */ pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { membuf_write(&to, __raw_xsave_addr(xsave, i), xstate_sizes[i]); } /* * Keep track of the last copied state in the non-compacted * target buffer for gap zeroing. */ zerofrom = xstate_offsets[i] + xstate_sizes[i]; } out: if (to.left) membuf_zero(&to, to.left); } /** * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @tsk: The task from which to copy the saved xstate * @copy_mode: The requested copy mode * * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming * format, i.e. from the kernel internal hardware dependent storage format * to the requested @mode. UABI XSTATE is always uncompacted! * * It supports partial copy but @to.pos always starts from zero. */ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, tsk->thread.fpu.fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, const void *kbuf, const void __user *ubuf) { if (kbuf) { memcpy(dst, kbuf + offset, size); } else { if (copy_from_user(dst, ubuf + offset, size)) return -EFAULT; } return 0; } /** * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate * @fpstate: The fpstate buffer to copy to * @kbuf: The UABI format buffer, if it comes from the kernel * @ubuf: The UABI format buffer, if it comes from userspace * @pkru: The location to write the PKRU value to * * Converts from the UABI format into the kernel internal hardware * dependent format. * * This function ultimately has three different callers with distinct PKRU * behavior. * 1. When called from sigreturn the PKRU register will be restored from * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to * @fpstate is sufficient to cover this case, but the caller will also * pass a pointer to the thread_struct's pkru field in @pkru and updating * it is harmless. * 2. When called from ptrace the PKRU register will be restored from the * thread_struct's pkru field. A pointer to that is passed in @pkru. * The kernel will restore it manually, so the XRSTOR behavior that resets * the PKRU register to the hardware init value (0) if the corresponding * xfeatures bit is not set is emulated here. * 3. When called from KVM the PKRU register will be restored from the vcpu's * pkru field. A pointer to that is passed in @pkru. KVM hasn't used * XRSTOR and hasn't had the PKRU resetting behavior described above. To * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures * bit is not set. */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf, u32 *pkru) { struct xregs_state *xsave = &fpstate->regs.xsave; unsigned int offset, size; struct xstate_header hdr; u64 mask; int i; offset = offsetof(struct xregs_state, header); if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) return -EFAULT; if (validate_user_xstate_header(&hdr, fpstate)) return -EINVAL; /* Validate MXCSR when any of the related features is in use */ mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; if (hdr.xfeatures & mask) { u32 mxcsr[2]; offset = offsetof(struct fxregs_state, mxcsr); if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf)) return -EFAULT; /* Reserved bits in MXCSR must be zero. */ if (mxcsr[0] & ~mxcsr_feature_mask) return -EINVAL; /* SSE and YMM require MXCSR even when FP is not in use. */ if (!(hdr.xfeatures & XFEATURE_MASK_FP)) { xsave->i387.mxcsr = mxcsr[0]; xsave->i387.mxcsr_mask = mxcsr[1]; } } for (i = 0; i < XFEATURE_MAX; i++) { mask = BIT_ULL(i); if (hdr.xfeatures & mask) { void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; if (copy_from_buffer(dst, offset, size, kbuf, ubuf)) return -EFAULT; } } if (hdr.xfeatures & XFEATURE_MASK_PKRU) { struct pkru_state *xpkru; xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); *pkru = xpkru->pkru; } else { /* * KVM may pass NULL here to indicate that it does not need * PKRU updated. */ if (pkru) *pkru = 0; } /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': */ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; /* * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= hdr.xfeatures; return 0; } /* * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] * format and copy to the target thread. Used by ptrace and KVM. */ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) { return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); } /* * Convert from a sigreturn standard-format user-space buffer to kernel * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru); } static bool validate_independent_components(u64 mask) { u64 xchk; if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) return false; xchk = ~xfeatures_mask_independent(); if (WARN_ON_ONCE(!mask || mask & xchk)) return false; return true; } /** * xsaves - Save selected components to a kernel xstate buffer * @xstate: Pointer to the buffer * @mask: Feature mask to select the components to save * * The @xstate buffer must be 64 byte aligned and correctly initialized as * XSAVES does not write the full xstate header. Before first use the * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer * can #GP. * * The feature mask must be a subset of the independent features. */ void xsaves(struct xregs_state *xstate, u64 mask) { int err; if (!validate_independent_components(mask)) return; XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); WARN_ON_ONCE(err); } /** * xrstors - Restore selected components from a kernel xstate buffer * @xstate: Pointer to the buffer * @mask: Feature mask to select the components to restore * * The @xstate buffer must be 64 byte aligned and correctly initialized * otherwise XRSTORS from that buffer can #GP. * * Proper usage is to restore the state which was saved with * xsaves() into @xstate. * * The feature mask must be a subset of the independent features. */ void xrstors(struct xregs_state *xstate, u64 mask) { int err; if (!validate_independent_components(mask)) return; XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); WARN_ON_ONCE(err); } #if IS_ENABLED(CONFIG_KVM) void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature) { void *addr = get_xsave_addr(&fps->regs.xsave, xfeature); if (addr) memset(addr, 0, xstate_sizes[xfeature]); } EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); #endif #ifdef CONFIG_X86_64 #ifdef CONFIG_X86_DEBUG_FPU /* * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask * can safely operate on the @fpstate buffer. */ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) { u64 xfd = __this_cpu_read(xfd_state); if (fpstate->xfd == xfd) return true; /* * The XFD MSR does not match fpstate->xfd. That's invalid when * the passed in fpstate is current's fpstate. */ if (fpstate->xfd == current->thread.fpu.fpstate->xfd) return false; /* * XRSTOR(S) from init_fpstate are always correct as it will just * bring all components into init state and not read from the * buffer. XSAVE(S) raises #PF after init. */ if (fpstate == &init_fpstate) return rstor; /* * XSAVE(S): clone(), fpu_swap_kvm_fpstate() * XRSTORS(S): fpu_swap_kvm_fpstate() */ /* * No XSAVE/XRSTOR instructions (except XSAVE itself) touch * the buffer area for XFD-disabled state components. */ mask &= ~xfd; /* * Remove features which are valid in fpstate. They * have space allocated in fpstate. */ mask &= ~fpstate->xfeatures; /* * Any remaining state components in 'mask' might be written * by XSAVE/XRSTOR. Fail validation it found. */ return !mask; } void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); } #endif /* CONFIG_X86_DEBUG_FPU */ static int __init xfd_update_static_branch(void) { /* * If init_fpstate.xfd has bits set then dynamic features are * available and the dynamic sizing must be enabled. */ if (init_fpstate.xfd) static_branch_enable(&__fpu_state_size_dynamic); return 0; } arch_initcall(xfd_update_static_branch) void fpstate_free(struct fpu *fpu) { if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) vfree(fpu->fpstate); } /** * fpstate_realloc - Reallocate struct fpstate for the requested new features * * @xfeatures: A bitmap of xstate features which extend the enabled features * of that task * @ksize: The required size for the kernel buffer * @usize: The required size for user space buffers * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations * * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer * terminates quickly, vfree()-induced IPIs may be a concern, but tasks * with large states are likely to live longer. * * Returns: 0 on success, -ENOMEM on allocation error. */ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, unsigned int usize, struct fpu_guest *guest_fpu) { struct fpu *fpu = &current->thread.fpu; struct fpstate *curfps, *newfps = NULL; unsigned int fpsize; bool in_use; fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); newfps = vzalloc(fpsize); if (!newfps) return -ENOMEM; newfps->size = ksize; newfps->user_size = usize; newfps->is_valloc = true; /* * When a guest FPU is supplied, use @guest_fpu->fpstate * as reference independent whether it is in use or not. */ curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate; /* Determine whether @curfps is the active fpstate */ in_use = fpu->fpstate == curfps; if (guest_fpu) { newfps->is_guest = true; newfps->is_confidential = curfps->is_confidential; newfps->in_use = curfps->in_use; guest_fpu->xfeatures |= xfeatures; guest_fpu->uabi_size = usize; } fpregs_lock(); /* * If @curfps is in use, ensure that the current state is in the * registers before swapping fpstate as that might invalidate it * due to layout changes. */ if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); if (guest_fpu) { guest_fpu->fpstate = newfps; /* If curfps is active, update the FPU fpstate pointer */ if (in_use) fpu->fpstate = newfps; } else { fpu->fpstate = newfps; } if (in_use) xfd_update_state(fpu->fpstate); fpregs_unlock(); /* Only free valloc'ed state */ if (curfps && curfps->is_valloc) vfree(curfps); return 0; } static int validate_sigaltstack(unsigned int usize) { struct task_struct *thread, *leader = current->group_leader; unsigned long framesize = get_sigframe_size(); lockdep_assert_held(&current->sighand->siglock); /* get_sigframe_size() is based on fpu_user_cfg.max_size */ framesize -= fpu_user_cfg.max_size; framesize += usize; for_each_thread(leader, thread) { if (thread->sas_ss_size && thread->sas_ss_size < framesize) return -ENOSPC; } return 0; } static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) { /* * This deliberately does not exclude !XSAVES as we still might * decide to optionally context switch XCR0 or talk the silicon * vendors into extending XFD for the pre AMX states, especially * AVX512. */ bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); struct fpu *fpu = &current->group_leader->thread.fpu; struct fpu_state_perm *perm; unsigned int ksize, usize; u64 mask; int ret = 0; /* Check whether fully enabled */ if ((permitted & requested) == requested) return 0; /* Calculate the resulting kernel state size */ mask = permitted | requested; /* Take supervisor states into account on the host */ if (!guest) mask |= xfeatures_mask_supervisor(); ksize = xstate_calculate_size(mask, compacted); /* Calculate the resulting user state size */ mask &= XFEATURE_MASK_USER_SUPPORTED; usize = xstate_calculate_size(mask, false); if (!guest) { ret = validate_sigaltstack(usize); if (ret) return ret; } perm = guest ? &fpu->guest_perm : &fpu->perm; /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ WRITE_ONCE(perm->__state_perm, mask); /* Protected by sighand lock */ perm->__state_size = ksize; perm->__user_state_size = usize; return ret; } /* * Permissions array to map facilities with more than one component */ static const u64 xstate_prctl_req[XFEATURE_MAX] = { [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, }; static int xstate_request_perm(unsigned long idx, bool guest) { u64 permitted, requested; int ret; if (idx >= XFEATURE_MAX) return -EINVAL; /* * Look up the facility mask which can require more than * one xstate component. */ idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); requested = xstate_prctl_req[idx]; if (!requested) return -EOPNOTSUPP; if ((fpu_user_cfg.max_features & requested) != requested) return -EOPNOTSUPP; /* Lockless quick check */ permitted = xstate_get_group_perm(guest); if ((permitted & requested) == requested) return 0; /* Protect against concurrent modifications */ spin_lock_irq(&current->sighand->siglock); permitted = xstate_get_group_perm(guest); /* First vCPU allocation locks the permissions. */ if (guest && (permitted & FPU_GUEST_PERM_LOCKED)) ret = -EBUSY; else ret = __xstate_request_perm(permitted, requested, guest); spin_unlock_irq(&current->sighand->siglock); return ret; } int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) { u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; struct fpu_state_perm *perm; unsigned int ksize, usize; struct fpu *fpu; if (!xfd_event) { if (!guest_fpu) pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); return 0; } /* Protect against concurrent modifications */ spin_lock_irq(&current->sighand->siglock); /* If not permitted let it die */ if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { spin_unlock_irq(&current->sighand->siglock); return -EPERM; } fpu = &current->group_leader->thread.fpu; perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; ksize = perm->__state_size; usize = perm->__user_state_size; /* * The feature is permitted. State size is sufficient. Dropping * the lock is safe here even if more features are added from * another task, the retrieved buffer sizes are valid for the * currently requested feature(s). */ spin_unlock_irq(&current->sighand->siglock); /* * Try to allocate a new fpstate. If that fails there is no way * out. */ if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) return -EFAULT; return 0; } int xfd_enable_feature(u64 xfd_err) { return __xfd_enable_feature(xfd_err, NULL); } #else /* CONFIG_X86_64 */ static inline int xstate_request_perm(unsigned long idx, bool guest) { return -EPERM; } #endif /* !CONFIG_X86_64 */ u64 xstate_get_guest_group_perm(void) { return xstate_get_group_perm(true); } EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); /** * fpu_xstate_prctl - xstate permission operations * @option: A subfunction of arch_prctl() * @arg2: option argument * Return: 0 if successful; otherwise, an error code * * Option arguments: * * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info * ARCH_REQ_XCOMP_PERM: Facility number requested * * For facilities which require more than one XSTATE component, the request * must be the highest state component number related to that facility, * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). */ long fpu_xstate_prctl(int option, unsigned long arg2) { u64 __user *uptr = (u64 __user *)arg2; u64 permitted, supported; unsigned long idx = arg2; bool guest = false; switch (option) { case ARCH_GET_XCOMP_SUPP: supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; return put_user(supported, uptr); case ARCH_GET_XCOMP_PERM: /* * Lockless snapshot as it can also change right after the * dropping the lock. */ permitted = xstate_get_host_group_perm(); permitted &= XFEATURE_MASK_USER_SUPPORTED; return put_user(permitted, uptr); case ARCH_GET_XCOMP_GUEST_PERM: permitted = xstate_get_guest_group_perm(); permitted &= XFEATURE_MASK_USER_SUPPORTED; return put_user(permitted, uptr); case ARCH_REQ_XCOMP_GUEST_PERM: guest = true; fallthrough; case ARCH_REQ_XCOMP_PERM: if (!IS_ENABLED(CONFIG_X86_64)) return -EOPNOTSUPP; return xstate_request_perm(idx, guest); default: return -EINVAL; } } #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 * use in the task. */ static void avx512_status(struct seq_file *m, struct task_struct *task) { unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp); long delta; if (!timestamp) { /* * Report -1 if no AVX512 usage */ delta = -1; } else { delta = (long)(jiffies - timestamp); /* * Cap to LONG_MAX if time difference > LONG_MAX */ if (delta < 0) delta = LONG_MAX; delta = jiffies_to_msecs(delta); } seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); seq_putc(m, '\n'); } /* * Report architecture specific information */ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { /* * Report AVX512 state if the processor and build option supported. */ if (cpu_feature_enabled(X86_FEATURE_AVX512F)) avx512_status(m, task); return 0; } #endif /* CONFIG_PROC_PID_ARCH_STATUS */
40 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 // SPDX-License-Identifier: GPL-2.0 #include "io_uring.h" #include "napi.h" #ifdef CONFIG_NET_RX_BUSY_POLL /* Timeout for cleanout of stale entries. */ #define NAPI_TIMEOUT (60 * SEC_CONVERSION) struct io_napi_entry { unsigned int napi_id; struct list_head list; unsigned long timeout; struct hlist_node node; struct rcu_head rcu; }; static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, unsigned int napi_id) { struct io_napi_entry *e; hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; return e; } return NULL; } static inline ktime_t net_to_ktime(unsigned long t) { /* napi approximating usecs, reverse busy_loop_current_time */ return ns_to_ktime(t << 10); } void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) { struct hlist_head *hash_list; unsigned int napi_id; struct sock *sk; struct io_napi_entry *e; sk = sock->sk; if (!sk) return; napi_id = READ_ONCE(sk->sk_napi_id); /* Non-NAPI IDs can be rejected. */ if (napi_id < MIN_NAPI_ID) return; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; rcu_read_lock(); e = io_napi_hash_find(hash_list, napi_id); if (e) { e->timeout = jiffies + NAPI_TIMEOUT; rcu_read_unlock(); return; } rcu_read_unlock(); e = kmalloc(sizeof(*e), GFP_NOWAIT); if (!e) return; e->napi_id = napi_id; e->timeout = jiffies + NAPI_TIMEOUT; spin_lock(&ctx->napi_lock); if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); kfree(e); return; } hlist_add_tail_rcu(&e->node, hash_list); list_add_tail(&e->list, &ctx->napi_list); spin_unlock(&ctx->napi_lock); } static void __io_napi_remove_stale(struct io_ring_ctx *ctx) { struct io_napi_entry *e; unsigned int i; spin_lock(&ctx->napi_lock); hash_for_each(ctx->napi_ht, i, e, node) { if (time_after(jiffies, e->timeout)) { list_del(&e->list); hash_del_rcu(&e->node); kfree_rcu(e, rcu); } } spin_unlock(&ctx->napi_lock); } static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) { if (is_stale) __io_napi_remove_stale(ctx); } static inline bool io_napi_busy_loop_timeout(ktime_t start_time, ktime_t bp) { if (bp) { ktime_t end_time = ktime_add(start_time, bp); ktime_t now = net_to_ktime(busy_loop_current_time()); return ktime_after(now, end_time); } return true; } static bool io_napi_busy_loop_should_end(void *data, unsigned long start_time) { struct io_wait_queue *iowq = data; if (signal_pending(current)) return true; if (io_should_wake(iowq) || io_has_work(iowq->ctx)) return true; if (io_napi_busy_loop_timeout(net_to_ktime(start_time), iowq->napi_busy_poll_dt)) return true; return false; } static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, void *loop_end_arg) { struct io_napi_entry *e; bool (*loop_end)(void *, unsigned long) = NULL; bool is_stale = false; if (loop_end_arg) loop_end = io_napi_busy_loop_should_end; list_for_each_entry_rcu(e, &ctx->napi_list, list) { napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); if (time_after(jiffies, e->timeout)) is_stale = true; } return is_stale; } static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { unsigned long start_time = busy_loop_current_time(); void *loop_end_arg = NULL; bool is_stale = false; /* Singular lists use a different napi loop end check function and are * only executed once. */ if (list_is_singular(&ctx->napi_list)) loop_end_arg = iowq; rcu_read_lock(); do { is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); rcu_read_unlock(); io_napi_remove_stale(ctx, is_stale); } /* * io_napi_init() - Init napi settings * @ctx: pointer to io-uring context structure * * Init napi settings in the io-uring context. */ void io_napi_init(struct io_ring_ctx *ctx) { u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; INIT_LIST_HEAD(&ctx->napi_list); spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); } /* * io_napi_free() - Deallocate napi * @ctx: pointer to io-uring context structure * * Free the napi list and the hash table in the io-uring context. */ void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; unsigned int i; spin_lock(&ctx->napi_lock); hash_for_each(ctx->napi_ht, i, e, node) { hash_del_rcu(&e->node); kfree_rcu(e, rcu); } spin_unlock(&ctx->napi_lock); } /* * io_napi_register() - Register napi with io-uring * @ctx: pointer to io-uring context structure * @arg: pointer to io_uring_napi structure * * Register napi in the io-uring context. */ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; struct io_uring_napi napi; if (ctx->flags & IORING_SETUP_IOPOLL) return -EINVAL; if (copy_from_user(&napi, arg, sizeof(napi))) return -EFAULT; if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) return -EINVAL; if (copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); WRITE_ONCE(ctx->napi_enabled, true); return 0; } /* * io_napi_unregister() - Unregister napi with io-uring * @ctx: pointer to io-uring context structure * @arg: pointer to io_uring_napi structure * * Unregister napi. If arg has been specified copy the busy poll timeout and * prefer busy poll setting to the passed in structure. */ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); WRITE_ONCE(ctx->napi_enabled, false); return 0; } /* * __io_napi_adjust_timeout() - adjust busy loop timeout * @ctx: pointer to io-uring context structure * @iowq: pointer to io wait queue * @ts: pointer to timespec or NULL * * Adjust the busy loop timeout according to timespec and busy poll timeout. * If the specified NAPI timeout is bigger than the wait timeout, then adjust * the NAPI timeout accordingly. */ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, ktime_t to_wait) { ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); if (to_wait) poll_dt = min(poll_dt, to_wait); iowq->napi_busy_poll_dt = poll_dt; } /* * __io_napi_busy_loop() - execute busy poll loop * @ctx: pointer to io-uring context structure * @iowq: pointer to io wait queue * * Execute the busy poll loop and merge the spliced off list. */ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } /* * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll * @ctx: pointer to io-uring context structure * * Splice of the napi list and execute the napi busy poll loop. */ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { bool is_stale = false; if (!READ_ONCE(ctx->napi_busy_poll_dt)) return 0; if (list_empty_careful(&ctx->napi_list)) return 0; rcu_read_lock(); is_stale = __io_napi_do_busy_loop(ctx, NULL); rcu_read_unlock(); io_napi_remove_stale(ctx, is_stale); return 1; } #endif
87 86 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/util.c */ #include <linux/time.h> #include "isofs.h" /* * We have to convert from a MM/DD/YY format to the Unix ctime format. * We have to take into account leap years and all of that good stuff. * Unfortunately, the kernel does not have the information on hand to * take into account daylight savings time, but it shouldn't matter. * The time stored should be localtime (with or without DST in effect), * and the timezone offset should hold the offset required to get back * to GMT. Thus we should always be correct. */ int iso_date(u8 *p, int flag) { int year, month, day, hour, minute, second, tz; int crtime; year = p[0]; month = p[1]; day = p[2]; hour = p[3]; minute = p[4]; second = p[5]; if (flag == 0) tz = p[6]; /* High sierra has no time zone */ else tz = 0; if (year < 0) { crtime = 0; } else { crtime = mktime64(year+1900, month, day, hour, minute, second); /* sign extend */ if (tz & 0x80) tz |= (-1 << 8); /* * The timezone offset is unreliable on some disks, * so we make a sanity check. In no case is it ever * more than 13 hours from GMT, which is 52*15min. * The time is always stored in localtime with the * timezone offset being what get added to GMT to * get to localtime. Thus we need to subtract the offset * to get to true GMT, which is what we store the time * as internally. On the local system, the user may set * their timezone any way they wish, of course, so GMT * gets converted back to localtime on the receiving * system. * * NOTE: mkisofs in versions prior to mkisofs-1.10 had * the sign wrong on the timezone offset. This has now * been corrected there too, but if you are getting screwy * results this may be the explanation. If enough people * complain, a user configuration option could be added * to add the timezone offset in with the wrong sign * for 'compatibility' with older discs, but I cannot see how * it will matter that much. * * Thanks to kuhlmav@elec.canterbury.ac.nz (Volker Kuhlmann) * for pointing out the sign error. */ if (-52 <= tz && tz <= 52) crtime -= tz * 15 * 60; } return crtime; }
74 69 11 134 136 138 138 1 25 1 4 1 9 220 221 200 203 12 1 5 2 6 1 1 2 2 3 4 352 362 3 358 2 6 3 2 6 356 325 8 11 4 7 1 10 4 348 264 12 345 1 344 224 129 345 94 246 27 323 2 9 6 142 386 3 87 297 5 5 370 12 358 368 241 370 262 83 96 14 326 49 142 281 102 260 341 338 142 142 114 232 367 329 32 3 30 1 21 2 19 10 9 58 2 1 9 3 31 2 31 1 29 2 1 2 8 1 7 2 20 29 29 13 2 46 2 21 33 6 53 4 23 34 34 9 2 2 10 10 4 2 11 3 2 3 3 1 1 11 2 10 10 2 14 9 1 25 24 2 13 8 23 64 1 1 2 63 60 60 12 8 1 5 13 11 1 1 1 1 1 5 2 7 3 4 3 4 1 3 1 1 2 2 2 11 4 2 1 1 4 2 2 2 14 1 1 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 // SPDX-License-Identifier: GPL-2.0-only /* * "splice": joining two ropes together by interweaving their strands. * * This is the "extended pipe" functionality, where a pipe is used as * an arbitrary in-memory buffer. Think of a pipe as a small kernel * buffer that you can use to transfer data from one end to the other. * * The traditional unix read/write is extended with a "splice()" operation * that transfers data buffers to or from a pipe buffer. * * Named by Larry McVoy, original implementation from Linus, extended by * Jens to support splicing to files, network, direct splicing, etc and * fixing lots of bugs. * * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> * */ #include <linux/bvec.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/splice.h> #include <linux/memcontrol.h> #include <linux/mm_inline.h> #include <linux/swap.h> #include <linux/writeback.h> #include <linux/export.h> #include <linux/syscalls.h> #include <linux/uio.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/gfp.h> #include <linux/net.h> #include <linux/socket.h> #include <linux/sched/signal.h> #include "internal.h" /* * Splice doesn't support FMODE_NOWAIT. Since pipes may set this flag to * indicate they support non-blocking reads or writes, we must clear it * here if set to avoid blocking other users of this pipe if splice is * being done on it. */ static noinline void noinline pipe_clear_nowait(struct file *file) { fmode_t fmode = READ_ONCE(file->f_mode); do { if (!(fmode & FMODE_NOWAIT)) break; } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_NOWAIT)); } /* * Attempt to steal a page from a pipe buffer. This should perhaps go into * a vm helper function, it's already simplified quite a bit by the * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct folio *folio = page_folio(buf->page); struct address_space *mapping; folio_lock(folio); mapping = folio_mapping(folio); if (mapping) { WARN_ON(!folio_test_uptodate(folio)); /* * At least for ext2 with nobh option, we need to wait on * writeback completing on this folio, since we'll remove it * from the pagecache. Otherwise truncate wont wait on the * folio, allowing the disk blocks to be reused by someone else * before we actually wrote our data to them. fs corruption * ensues. */ folio_wait_writeback(folio); if (!filemap_release_folio(folio, GFP_KERNEL)) goto out_unlock; /* * If we succeeded in removing the mapping, set LRU flag * and return good. */ if (remove_mapping(mapping, folio)) { buf->flags |= PIPE_BUF_FLAG_LRU; return true; } } /* * Raced with truncate or failed to remove folio from current * address space, unlock and return failure. */ out_unlock: folio_unlock(folio); return false; } static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { put_page(buf->page); buf->flags &= ~PIPE_BUF_FLAG_LRU; } /* * Check whether the contents of buf is OK to access. Since the content * is a page cache page, IO may be in flight. */ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct folio *folio = page_folio(buf->page); int err; if (!folio_test_uptodate(folio)) { folio_lock(folio); /* * Folio got truncated/unhashed. This will cause a 0-byte * splice, if this is the first page. */ if (!folio->mapping) { err = -ENODATA; goto error; } /* * Uh oh, read-error from disk. */ if (!folio_test_uptodate(folio)) { err = -EIO; goto error; } /* Folio is ok after all, we are done */ folio_unlock(folio); } return 0; error: folio_unlock(folio); return err; } const struct pipe_buf_operations page_cache_pipe_buf_ops = { .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .try_steal = page_cache_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return false; buf->flags |= PIPE_BUF_FLAG_LRU; return generic_pipe_buf_try_steal(pipe, buf); } static const struct pipe_buf_operations user_page_pipe_buf_ops = { .release = page_cache_pipe_buf_release, .try_steal = user_page_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; static void wakeup_pipe_readers(struct pipe_inode_info *pipe) { smp_mb(); if (waitqueue_active(&pipe->rd_wait)) wake_up_interruptible(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } /** * splice_to_pipe - fill passed data into a pipe * @pipe: pipe to fill * @spd: data to fill * * Description: * @spd contains a map of pages and len/offset tuples, along with * the struct pipe_buf_operations associated with these pages. This * function will link that data to the pipe. * */ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { unsigned int spd_pages = spd->nr_pages; unsigned int tail = pipe->tail; unsigned int head = pipe->head; unsigned int mask = pipe->ring_size - 1; ssize_t ret = 0; int page_nr = 0; if (!spd_pages) return 0; if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; goto out; } while (!pipe_full(head, tail, pipe->max_usage)) { struct pipe_buffer *buf = &pipe->bufs[head & mask]; buf->page = spd->pages[page_nr]; buf->offset = spd->partial[page_nr].offset; buf->len = spd->partial[page_nr].len; buf->private = spd->partial[page_nr].private; buf->ops = spd->ops; buf->flags = 0; head++; pipe->head = head; page_nr++; ret += buf->len; if (!--spd->nr_pages) break; } if (!ret) ret = -EAGAIN; out: while (page_nr < spd_pages) spd->spd_release(spd, page_nr++); return ret; } EXPORT_SYMBOL_GPL(splice_to_pipe); ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { unsigned int head = pipe->head; unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; int ret; if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; } else if (pipe_full(head, tail, pipe->max_usage)) { ret = -EAGAIN; } else { pipe->bufs[head & mask] = *buf; pipe->head = head + 1; return buf->len; } pipe_buf_release(pipe, buf); return ret; } EXPORT_SYMBOL(add_to_pipe); /* * Check if we need to grow the arrays holding pages and partial page * descriptions. */ int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { unsigned int max_usage = READ_ONCE(pipe->max_usage); spd->nr_pages_max = max_usage; if (max_usage <= PIPE_DEF_BUFFERS) return 0; spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL); spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page), GFP_KERNEL); if (spd->pages && spd->partial) return 0; kfree(spd->pages); kfree(spd->partial); return -ENOMEM; } void splice_shrink_spd(struct splice_pipe_desc *spd) { if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) return; kfree(spd->pages); kfree(spd->partial); } /** * copy_splice_read - Copy data from a file and splice the copy into a pipe * @in: The file to read from * @ppos: Pointer to the file position to read from * @pipe: The pipe to splice into * @len: The amount to splice * @flags: The SPLICE_F_* flags * * This function allocates a bunch of pages sufficient to hold the requested * amount of data (but limited by the remaining pipe capacity), passes it to * the file's ->read_iter() to read into and then splices the used pages into * the pipe. * * Return: On success, the number of bytes read will be returned and *@ppos * will be updated if appropriate; 0 will be returned if there is no more data * to be read; -EAGAIN will be returned if the pipe had no space, and some * other negative error code will be returned on error. A short read may occur * if the pipe has insufficient space, we reach the end of the data or we hit a * hole. */ ssize_t copy_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct iov_iter to; struct bio_vec *bv; struct kiocb kiocb; struct page **pages; ssize_t ret; size_t used, npages, chunk, remain, keep = 0; int i; /* Work out how much data we can actually add into the pipe */ used = pipe_occupancy(pipe->head, pipe->tail); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); npages = DIV_ROUND_UP(len, PAGE_SIZE); bv = kzalloc(array_size(npages, sizeof(bv[0])) + array_size(npages, sizeof(struct page *)), GFP_KERNEL); if (!bv) return -ENOMEM; pages = (struct page **)(bv + npages); npages = alloc_pages_bulk_array(GFP_USER, npages, pages); if (!npages) { kfree(bv); return -ENOMEM; } remain = len = min_t(size_t, len, npages * PAGE_SIZE); for (i = 0; i < npages; i++) { chunk = min_t(size_t, PAGE_SIZE, remain); bv[i].bv_page = pages[i]; bv[i].bv_offset = 0; bv[i].bv_len = chunk; remain -= chunk; } /* Do the I/O */ iov_iter_bvec(&to, ITER_DEST, bv, npages, len); init_sync_kiocb(&kiocb, in); kiocb.ki_pos = *ppos; ret = in->f_op->read_iter(&kiocb, &to); if (ret > 0) { keep = DIV_ROUND_UP(ret, PAGE_SIZE); *ppos = kiocb.ki_pos; } /* * Callers of ->splice_read() expect -EAGAIN on "can't put anything in * there", rather than -EFAULT. */ if (ret == -EFAULT) ret = -EAGAIN; /* Free any pages that didn't get touched at all. */ if (keep < npages) release_pages(pages + keep, npages - keep); /* Push the remaining pages into the pipe. */ remain = ret; for (i = 0; i < keep; i++) { struct pipe_buffer *buf = pipe_head_buf(pipe); chunk = min_t(size_t, remain, PAGE_SIZE); *buf = (struct pipe_buffer) { .ops = &default_pipe_buf_ops, .page = bv[i].bv_page, .offset = 0, .len = chunk, }; pipe->head++; remain -= chunk; } kfree(bv); return ret; } EXPORT_SYMBOL(copy_splice_read); const struct pipe_buf_operations default_pipe_buf_ops = { .release = generic_pipe_buf_release, .try_steal = generic_pipe_buf_try_steal, .get = generic_pipe_buf_get, }; /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { .release = generic_pipe_buf_release, .get = generic_pipe_buf_get, }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); if (waitqueue_active(&pipe->wr_wait)) wake_up_interruptible(&pipe->wr_wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } /** * splice_from_pipe_feed - feed available data from a pipe to a file * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: * This function loops over the pipe and calls @actor to do the * actual moving of a single struct pipe_buffer to the desired * destination. It returns when there's no more buffers left in * the pipe or if the requested number of bytes (@sd->total_len) * have been copied. It returns a positive number (one) if the * pipe needs to be filled with more data, zero if the required * number of bytes have been copied and -errno on error. * * This, together with splice_from_pipe_{begin,end,next}, may be * used to implement the functionality of __splice_from_pipe() when * locking is required around copying the pipe buffers to the * destination. */ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { unsigned int head = pipe->head; unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; int ret; while (!pipe_empty(head, tail)) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; sd->len = buf->len; if (sd->len > sd->total_len) sd->len = sd->total_len; ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; return ret; } ret = actor(pipe, buf, sd); if (ret <= 0) return ret; buf->offset += ret; buf->len -= ret; sd->num_spliced += ret; sd->len -= ret; sd->pos += ret; sd->total_len -= ret; if (!buf->len) { pipe_buf_release(pipe, buf); tail++; pipe->tail = tail; if (pipe->files) sd->need_wakeup = true; } if (!sd->total_len) return 0; } return 1; } /* We know we have a pipe buffer, but maybe it's empty? */ static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) { unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[tail & mask]; if (unlikely(!buf->len)) { pipe_buf_release(pipe, buf); pipe->tail = tail+1; return true; } return false; } /** * splice_from_pipe_next - wait for some data to splice from * @pipe: pipe to splice from * @sd: information about the splice operation * * Description: * This function will wait for some data and return a positive * value (one) if pipe buffers are available. It will return zero * or -errno if no more data needs to be spliced. */ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { /* * Check for signal early to make process killable when there are * always buffers available */ if (signal_pending(current)) return -ERESTARTSYS; repeat: while (pipe_empty(pipe->head, pipe->tail)) { if (!pipe->writers) return 0; if (sd->num_spliced) return 0; if (sd->flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; if (sd->need_wakeup) { wakeup_pipe_writers(pipe); sd->need_wakeup = false; } pipe_wait_readable(pipe); } if (eat_empty_buffer(pipe)) goto repeat; return 1; } /** * splice_from_pipe_begin - start splicing from pipe * @sd: information about the splice operation * * Description: * This function should be called before a loop containing * splice_from_pipe_next() and splice_from_pipe_feed() to * initialize the necessary fields of @sd. */ static void splice_from_pipe_begin(struct splice_desc *sd) { sd->num_spliced = 0; sd->need_wakeup = false; } /** * splice_from_pipe_end - finish splicing from pipe * @pipe: pipe to splice from * @sd: information about the splice operation * * Description: * This function will wake up pipe writers if necessary. It should * be called after a loop containing splice_from_pipe_next() and * splice_from_pipe_feed(). */ static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) { if (sd->need_wakeup) wakeup_pipe_writers(pipe); } /** * __splice_from_pipe - splice data from a pipe to given actor * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: * This function does little more than loop over the pipe and call * @actor to do the actual moving of a single struct pipe_buffer to * the desired destination. See pipe_to_file, pipe_to_sendmsg, or * pipe_to_user. * */ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { int ret; splice_from_pipe_begin(sd); do { cond_resched(); ret = splice_from_pipe_next(pipe, sd); if (ret > 0) ret = splice_from_pipe_feed(pipe, sd, actor); } while (ret > 0); splice_from_pipe_end(pipe, sd); return sd->num_spliced ? sd->num_spliced : ret; } EXPORT_SYMBOL(__splice_from_pipe); /** * splice_from_pipe - splice data from a pipe to a file * @pipe: pipe to splice from * @out: file to splice to * @ppos: position in @out * @len: how many bytes to splice * @flags: splice modifier flags * @actor: handler that splices the data * * Description: * See __splice_from_pipe. This function locks the pipe inode, * otherwise it's identical to __splice_from_pipe(). * */ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, splice_actor *actor) { ssize_t ret; struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, actor); pipe_unlock(pipe); return ret; } /** * iter_file_splice_write - splice data from a pipe to a file * @pipe: pipe info * @out: file to write to * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * Will either move or copy pages (determined by @flags options) from * the given pipe inode to the given file. * This one is ->write_iter-based. * */ ssize_t iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; int nbufs = pipe->max_usage; struct bio_vec *array; ssize_t ret; if (!out->f_op->write_iter) return -EINVAL; array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); if (unlikely(!array)) return -ENOMEM; pipe_lock(pipe); splice_from_pipe_begin(&sd); while (sd.total_len) { struct kiocb kiocb; struct iov_iter from; unsigned int head, tail, mask; size_t left; int n; ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; if (unlikely(nbufs < pipe->max_usage)) { kfree(array); nbufs = pipe->max_usage; array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); if (!array) { ret = -ENOMEM; break; } } head = pipe->head; tail = pipe->tail; mask = pipe->ring_size - 1; /* build the vector */ left = sd.total_len; for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t this_len = buf->len; /* zero-length bvecs are not supported, skip them */ if (!this_len) continue; this_len = min(this_len, left); ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; goto done; } bvec_set_page(&array[n], buf->page, this_len, buf->offset); left -= this_len; n++; } iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left); init_sync_kiocb(&kiocb, out); kiocb.ki_pos = sd.pos; ret = out->f_op->write_iter(&kiocb, &from); sd.pos = kiocb.ki_pos; if (ret <= 0) break; sd.num_spliced += ret; sd.total_len -= ret; *ppos = sd.pos; /* dismiss the fully eaten buffers, adjust the partial one */ tail = pipe->tail; while (ret) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; if (ret >= buf->len) { ret -= buf->len; buf->len = 0; pipe_buf_release(pipe, buf); tail++; pipe->tail = tail; if (pipe->files) sd.need_wakeup = true; } else { buf->offset += ret; buf->len -= ret; ret = 0; } } } done: kfree(array); splice_from_pipe_end(pipe, &sd); pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; return ret; } EXPORT_SYMBOL(iter_file_splice_write); #ifdef CONFIG_NET /** * splice_to_socket - splice data from a pipe to a socket * @pipe: pipe to splice from * @out: socket to write to * @ppos: position in @out * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * Will send @len bytes from the pipe to a network socket. No data copying * is involved. * */ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct socket *sock = sock_from_file(out); struct bio_vec bvec[16]; struct msghdr msg = {}; ssize_t ret = 0; size_t spliced = 0; bool need_wakeup = false; pipe_lock(pipe); while (len > 0) { unsigned int head, tail, mask, bc = 0; size_t remain = len; /* * Check for signal early to make process killable when there * are always buffers available */ ret = -ERESTARTSYS; if (signal_pending(current)) break; while (pipe_empty(pipe->head, pipe->tail)) { ret = 0; if (!pipe->writers) goto out; if (spliced) goto out; ret = -EAGAIN; if (flags & SPLICE_F_NONBLOCK) goto out; ret = -ERESTARTSYS; if (signal_pending(current)) goto out; if (need_wakeup) { wakeup_pipe_writers(pipe); need_wakeup = false; } pipe_wait_readable(pipe); } head = pipe->head; tail = pipe->tail; mask = pipe->ring_size - 1; while (!pipe_empty(head, tail)) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t seg; if (!buf->len) { tail++; continue; } seg = min_t(size_t, remain, buf->len); ret = pipe_buf_confirm(pipe, buf); if (unlikely(ret)) { if (ret == -ENODATA) ret = 0; break; } bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset); remain -= seg; if (remain == 0 || bc >= ARRAY_SIZE(bvec)) break; tail++; } if (!bc) break; msg.msg_flags = MSG_SPLICE_PAGES; if (flags & SPLICE_F_MORE) msg.msg_flags |= MSG_MORE; if (remain && pipe_occupancy(pipe->head, tail) > 0) msg.msg_flags |= MSG_MORE; if (out->f_flags & O_NONBLOCK) msg.msg_flags |= MSG_DONTWAIT; iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc, len - remain); ret = sock_sendmsg(sock, &msg); if (ret <= 0) break; spliced += ret; len -= ret; tail = pipe->tail; while (ret > 0) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t seg = min_t(size_t, ret, buf->len); buf->offset += seg; buf->len -= seg; ret -= seg; if (!buf->len) { pipe_buf_release(pipe, buf); tail++; } } if (tail != pipe->tail) { pipe->tail = tail; if (pipe->files) need_wakeup = true; } } out: pipe_unlock(pipe); if (need_wakeup) wakeup_pipe_writers(pipe); return spliced ?: ret; } #endif static int warn_unsupported(struct file *file, const char *op) { pr_debug_ratelimited( "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", op, file, current->pid, current->comm); return -EINVAL; } /* * Attempt to initiate a splice from pipe to file. */ static ssize_t do_splice_from(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { if (unlikely(!out->f_op->splice_write)) return warn_unsupported(out, "write"); return out->f_op->splice_write(pipe, out, ppos, len, flags); } /* * Indicate to the caller that there was a premature EOF when reading from the * source and the caller didn't indicate they would be sending more data after * this. */ static void do_splice_eof(struct splice_desc *sd) { if (sd->splice_eof) sd->splice_eof(sd); } /* * Callers already called rw_verify_area() on the entire range. * No need to call it for sub ranges. */ static ssize_t do_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { unsigned int p_space; if (unlikely(!(in->f_mode & FMODE_READ))) return -EBADF; if (!len) return 0; /* Don't try to read more the pipe has space for. */ p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); len = min_t(size_t, len, p_space << PAGE_SHIFT); if (unlikely(len > MAX_RW_COUNT)) len = MAX_RW_COUNT; if (unlikely(!in->f_op->splice_read)) return warn_unsupported(in, "read"); /* * O_DIRECT and DAX don't deal with the pagecache, so we allocate a * buffer, copy into it and splice that into the pipe. */ if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host)) return copy_splice_read(in, ppos, pipe, len, flags); return in->f_op->splice_read(in, ppos, pipe, len, flags); } /** * vfs_splice_read - Read data from a file and splice it into a pipe * @in: File to splice from * @ppos: Input file offset * @pipe: Pipe to splice to * @len: Number of bytes to splice * @flags: Splice modifier flags (SPLICE_F_*) * * Splice the requested amount of data from the input file to the pipe. This * is synchronous as the caller must hold the pipe lock across the entire * operation. * * If successful, it returns the amount of data spliced, 0 if it hit the EOF or * a hole and a negative error code otherwise. */ ssize_t vfs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { ssize_t ret; ret = rw_verify_area(READ, in, ppos, len); if (unlikely(ret < 0)) return ret; return do_splice_read(in, ppos, pipe, len, flags); } EXPORT_SYMBOL_GPL(vfs_splice_read); /** * splice_direct_to_actor - splices data directly between two non-pipes * @in: file to splice from * @sd: actor information on where to splice to * @actor: handles the data splicing * * Description: * This is a special case helper to splice directly between two * points, without requiring an explicit pipe. Internally an allocated * pipe is cached in the process, and reused during the lifetime of * that process. * */ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, splice_direct_actor *actor) { struct pipe_inode_info *pipe; ssize_t ret, bytes; size_t len; int i, flags, more; /* * We require the input to be seekable, as we don't want to randomly * drop data for eg socket -> socket splicing. Use the piped splicing * for that! */ if (unlikely(!(in->f_mode & FMODE_LSEEK))) return -EINVAL; /* * neither in nor out is a pipe, setup an internal pipe attached to * 'out' and transfer the wanted data from 'in' to 'out' through that */ pipe = current->splice_pipe; if (unlikely(!pipe)) { pipe = alloc_pipe_info(); if (!pipe) return -ENOMEM; /* * We don't have an immediate reader, but we'll read the stuff * out of the pipe right after the splice_to_pipe(). So set * PIPE_READERS appropriately. */ pipe->readers = 1; current->splice_pipe = pipe; } /* * Do the splice. */ bytes = 0; len = sd->total_len; /* Don't block on output, we have to drain the direct pipe. */ flags = sd->flags; sd->flags &= ~SPLICE_F_NONBLOCK; /* * We signal MORE until we've read sufficient data to fulfill the * request and we keep signalling it if the caller set it. */ more = sd->flags & SPLICE_F_MORE; sd->flags |= SPLICE_F_MORE; WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); while (len) { size_t read_len; loff_t pos = sd->pos, prev_pos = pos; ret = do_splice_read(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) goto read_failure; read_len = ret; sd->total_len = read_len; /* * If we now have sufficient data to fulfill the request then * we clear SPLICE_F_MORE if it was not set initially. */ if (read_len >= len && !more) sd->flags &= ~SPLICE_F_MORE; /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: */ ret = actor(pipe, sd); if (unlikely(ret <= 0)) { sd->pos = prev_pos; goto out_release; } bytes += ret; len -= ret; sd->pos = pos; if (ret < read_len) { sd->pos = prev_pos + ret; goto out_release; } } done: pipe->tail = pipe->head = 0; file_accessed(in); return bytes; read_failure: /* * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a * "->splice_in()" that returned EOF (ie zero) *and* we have sent at * least 1 byte *then* we will also do the ->splice_eof() call. */ if (ret == 0 && !more && len > 0 && bytes) do_splice_eof(sd); out_release: /* * If we did an incomplete transfer we must release * the pipe buffers in question: */ for (i = 0; i < pipe->ring_size; i++) { struct pipe_buffer *buf = &pipe->bufs[i]; if (buf->ops) pipe_buf_release(pipe, buf); } if (!bytes) bytes = ret; goto done; } EXPORT_SYMBOL(splice_direct_to_actor); static int direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { struct file *file = sd->u.file; long ret; file_start_write(file); ret = do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags); file_end_write(file); return ret; } static int splice_file_range_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) { struct file *file = sd->u.file; return do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags); } static void direct_file_splice_eof(struct splice_desc *sd) { struct file *file = sd->u.file; if (file->f_op->splice_eof) file->f_op->splice_eof(file); } static ssize_t do_splice_direct_actor(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len, unsigned int flags, splice_direct_actor *actor) { struct splice_desc sd = { .len = len, .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, .splice_eof = direct_file_splice_eof, .opos = opos, }; ssize_t ret; if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; ret = splice_direct_to_actor(in, &sd, actor); if (ret > 0) *ppos = sd.pos; return ret; } /** * do_splice_direct - splices data directly between two files * @in: file to splice from * @ppos: input file offset * @out: file to splice to * @opos: output file offset * @len: number of bytes to splice * @flags: splice modifier flags * * Description: * For use by do_sendfile(). splice can easily emulate sendfile, but * doing it in the application would incur an extra system call * (splice in + splice out, as compared to just sendfile()). So this helper * can splice directly through a process-private pipe. * * Callers already called rw_verify_area() on the entire range. */ ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len, unsigned int flags) { return do_splice_direct_actor(in, ppos, out, opos, len, flags, direct_splice_actor); } EXPORT_SYMBOL(do_splice_direct); /** * splice_file_range - splices data between two files for copy_file_range() * @in: file to splice from * @ppos: input file offset * @out: file to splice to * @opos: output file offset * @len: number of bytes to splice * * Description: * For use by ->copy_file_range() methods. * Like do_splice_direct(), but vfs_copy_file_range() already holds * start_file_write() on @out file. * * Callers already called rw_verify_area() on the entire range. */ ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out, loff_t *opos, size_t len) { lockdep_assert(file_write_started(out)); return do_splice_direct_actor(in, ppos, out, opos, min_t(size_t, len, MAX_RW_COUNT), 0, splice_file_range_actor); } EXPORT_SYMBOL(splice_file_range); static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) { for (;;) { if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); return -EPIPE; } if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; pipe_wait_writable(pipe); } } static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags); ssize_t splice_file_to_pipe(struct file *in, struct pipe_inode_info *opipe, loff_t *offset, size_t len, unsigned int flags) { ssize_t ret; pipe_lock(opipe); ret = wait_for_space(opipe, flags); if (!ret) ret = do_splice_read(in, offset, opipe, len, flags); pipe_unlock(opipe); if (ret > 0) wakeup_pipe_readers(opipe); return ret; } /* * Determine where to splice to/from. */ ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out, loff_t *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset; ssize_t ret; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) return -EBADF; ipipe = get_pipe_info(in, true); opipe = get_pipe_info(out, true); if (ipipe && opipe) { if (off_in || off_out) return -ESPIPE; /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; if ((in->f_flags | out->f_flags) & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; ret = splice_pipe_to_pipe(ipipe, opipe, len, flags); } else if (ipipe) { if (off_in) return -ESPIPE; if (off_out) { if (!(out->f_mode & FMODE_PWRITE)) return -EINVAL; offset = *off_out; } else { offset = out->f_pos; } if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; ret = rw_verify_area(WRITE, out, &offset, len); if (unlikely(ret < 0)) return ret; if (in->f_flags & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; file_start_write(out); ret = do_splice_from(ipipe, out, &offset, len, flags); file_end_write(out); if (!off_out) out->f_pos = offset; else *off_out = offset; } else if (opipe) { if (off_out) return -ESPIPE; if (off_in) { if (!(in->f_mode & FMODE_PREAD)) return -EINVAL; offset = *off_in; } else { offset = in->f_pos; } ret = rw_verify_area(READ, in, &offset, len); if (unlikely(ret < 0)) return ret; if (out->f_flags & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; ret = splice_file_to_pipe(in, opipe, &offset, len, flags); if (!off_in) in->f_pos = offset; else *off_in = offset; } else { ret = -EINVAL; } if (ret > 0) { /* * Generate modify out before access in: * do_splice_from() may've already sent modify out, * and this ensures the events get merged. */ fsnotify_modify(out); fsnotify_access(in); } return ret; } static ssize_t __do_splice(struct file *in, loff_t __user *off_in, struct file *out, loff_t __user *off_out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe; struct pipe_inode_info *opipe; loff_t offset, *__off_in = NULL, *__off_out = NULL; ssize_t ret; ipipe = get_pipe_info(in, true); opipe = get_pipe_info(out, true); if (ipipe) { if (off_in) return -ESPIPE; pipe_clear_nowait(in); } if (opipe) { if (off_out) return -ESPIPE; pipe_clear_nowait(out); } if (off_out) { if (copy_from_user(&offset, off_out, sizeof(loff_t))) return -EFAULT; __off_out = &offset; } if (off_in) { if (copy_from_user(&offset, off_in, sizeof(loff_t))) return -EFAULT; __off_in = &offset; } ret = do_splice(in, __off_in, out, __off_out, len, flags); if (ret < 0) return ret; if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) return -EFAULT; if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) return -EFAULT; return ret; } static ssize_t iter_to_pipe(struct iov_iter *from, struct pipe_inode_info *pipe, unsigned int flags) { struct pipe_buffer buf = { .ops = &user_page_pipe_buf_ops, .flags = flags }; size_t total = 0; ssize_t ret = 0; while (iov_iter_count(from)) { struct page *pages[16]; ssize_t left; size_t start; int i, n; left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start); if (left <= 0) { ret = left; break; } n = DIV_ROUND_UP(left + start, PAGE_SIZE); for (i = 0; i < n; i++) { int size = min_t(int, left, PAGE_SIZE - start); buf.page = pages[i]; buf.offset = start; buf.len = size; ret = add_to_pipe(pipe, &buf); if (unlikely(ret < 0)) { iov_iter_revert(from, left); // this one got dropped by add_to_pipe() while (++i < n) put_page(pages[i]); goto out; } total += ret; left -= size; start = 0; } } out: return total ? total : ret; } static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data); return n == sd->len ? n : -EFAULT; } /* * For lack of a better implementation, implement vmsplice() to userspace * as a simple copy of the pipes pages to the user iov. */ static ssize_t vmsplice_to_user(struct file *file, struct iov_iter *iter, unsigned int flags) { struct pipe_inode_info *pipe = get_pipe_info(file, true); struct splice_desc sd = { .total_len = iov_iter_count(iter), .flags = flags, .u.data = iter }; ssize_t ret = 0; if (!pipe) return -EBADF; pipe_clear_nowait(file); if (sd.total_len) { pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, pipe_to_user); pipe_unlock(pipe); } if (ret > 0) fsnotify_access(file); return ret; } /* * vmsplice splices a user address range into a pipe. It can be thought of * as splice-from-memory, where the regular splice is splice-from-file (or * to file). In both cases the output is a pipe, naturally. */ static ssize_t vmsplice_to_pipe(struct file *file, struct iov_iter *iter, unsigned int flags) { struct pipe_inode_info *pipe; ssize_t ret = 0; unsigned buf_flag = 0; if (flags & SPLICE_F_GIFT) buf_flag = PIPE_BUF_FLAG_GIFT; pipe = get_pipe_info(file, true); if (!pipe) return -EBADF; pipe_clear_nowait(file); pipe_lock(pipe); ret = wait_for_space(pipe, flags); if (!ret) ret = iter_to_pipe(iter, pipe, buf_flag); pipe_unlock(pipe); if (ret > 0) { wakeup_pipe_readers(pipe); fsnotify_modify(file); } return ret; } static int vmsplice_type(struct fd f, int *type) { if (!f.file) return -EBADF; if (f.file->f_mode & FMODE_WRITE) { *type = ITER_SOURCE; } else if (f.file->f_mode & FMODE_READ) { *type = ITER_DEST; } else { fdput(f); return -EBADF; } return 0; } /* * Note that vmsplice only really supports true splicing _from_ user memory * to a pipe, not the other way around. Splicing from user memory is a simple * operation that can be supported without any funky alignment restrictions * or nasty vm tricks. We simply map in the user memory and fill them into * a pipe. The reverse isn't quite as easy, though. There are two possible * solutions for that: * * - memcpy() the data internally, at which point we might as well just * do a regular read() on the buffer anyway. * - Lots of nasty vm tricks, that are neither fast nor flexible (it * has restriction limitations on both ends of the pipe). * * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, unsigned long, nr_segs, unsigned int, flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; ssize_t error; struct fd f; int type; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; f = fdget(fd); error = vmsplice_type(f, &type); if (error) return error; error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); if (error < 0) goto out_fdput; if (!iov_iter_count(&iter)) error = 0; else if (type == ITER_SOURCE) error = vmsplice_to_pipe(f.file, &iter, flags); else error = vmsplice_to_user(f.file, &iter, flags); kfree(iov); out_fdput: fdput(f); return error; } SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags) { struct fd in, out; ssize_t error; if (unlikely(!len)) return 0; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; error = -EBADF; in = fdget(fd_in); if (in.file) { out = fdget(fd_out); if (out.file) { error = __do_splice(in.file, off_in, out.file, off_out, len, flags); fdput(out); } fdput(in); } return error; } /* * Make sure there's data to read. Wait for input if we can, otherwise * return an appropriate error. */ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; /* * Check the pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ if (!pipe_empty(pipe->head, pipe->tail)) return 0; ret = 0; pipe_lock(pipe); while (pipe_empty(pipe->head, pipe->tail)) { if (signal_pending(current)) { ret = -ERESTARTSYS; break; } if (!pipe->writers) break; if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } pipe_wait_readable(pipe); } pipe_unlock(pipe); return ret; } /* * Make sure there's writeable room. Wait for room if we can, otherwise * return an appropriate error. */ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) { int ret; /* * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; ret = 0; pipe_lock(pipe); while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; break; } if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } pipe_wait_writable(pipe); } pipe_unlock(pipe); return ret; } /* * Splice contents of ipipe to opipe. */ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; unsigned int i_mask, o_mask; int ret = 0; bool input_wakeup = false; retry: ret = ipipe_prep(ipipe, flags); if (ret) return ret; ret = opipe_prep(opipe, flags); if (ret) return ret; /* * Potential ABBA deadlock, work around it by ordering lock * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ pipe_double_lock(ipipe, opipe); i_tail = ipipe->tail; i_mask = ipipe->ring_size - 1; o_head = opipe->head; o_mask = opipe->ring_size - 1; do { size_t o_len; if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } i_head = ipipe->head; o_tail = opipe->tail; if (pipe_empty(i_head, i_tail) && !ipipe->writers) break; /* * Cannot make any progress, because either the input * pipe is empty or the output pipe is full. */ if (pipe_empty(i_head, i_tail) || pipe_full(o_head, o_tail, opipe->max_usage)) { /* Already processed some buffers, break */ if (ret) break; if (flags & SPLICE_F_NONBLOCK) { ret = -EAGAIN; break; } /* * We raced with another reader/writer and haven't * managed to process any buffers. A zero return * value means EOF, so retry instead. */ pipe_unlock(ipipe); pipe_unlock(opipe); goto retry; } ibuf = &ipipe->bufs[i_tail & i_mask]; obuf = &opipe->bufs[o_head & o_mask]; if (len >= ibuf->len) { /* * Simply move the whole buffer from ipipe to opipe */ *obuf = *ibuf; ibuf->ops = NULL; i_tail++; ipipe->tail = i_tail; input_wakeup = true; o_len = obuf->len; o_head++; opipe->head = o_head; } else { /* * Get a reference to this pipe buffer, * so we can copy the contents over. */ if (!pipe_buf_get(ipipe, ibuf)) { if (ret == 0) ret = -EFAULT; break; } *obuf = *ibuf; /* * Don't inherit the gift and merge flags, we need to * prevent multiple steals of this page. */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; obuf->len = len; ibuf->offset += len; ibuf->len -= len; o_len = len; o_head++; opipe->head = o_head; } ret += o_len; len -= o_len; } while (len); pipe_unlock(ipipe); pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. */ if (ret > 0) wakeup_pipe_readers(opipe); if (input_wakeup) wakeup_pipe_writers(ipipe); return ret; } /* * Link contents of ipipe to opipe. */ static ssize_t link_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; unsigned int i_head, o_head; unsigned int i_tail, o_tail; unsigned int i_mask, o_mask; ssize_t ret = 0; /* * Potential ABBA deadlock, work around it by ordering lock * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ pipe_double_lock(ipipe, opipe); i_tail = ipipe->tail; i_mask = ipipe->ring_size - 1; o_head = opipe->head; o_mask = opipe->ring_size - 1; do { if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } i_head = ipipe->head; o_tail = opipe->tail; /* * If we have iterated all input buffers or run out of * output room, break. */ if (pipe_empty(i_head, i_tail) || pipe_full(o_head, o_tail, opipe->max_usage)) break; ibuf = &ipipe->bufs[i_tail & i_mask]; obuf = &opipe->bufs[o_head & o_mask]; /* * Get a reference to this pipe buffer, * so we can copy the contents over. */ if (!pipe_buf_get(ipipe, ibuf)) { if (ret == 0) ret = -EFAULT; break; } *obuf = *ibuf; /* * Don't inherit the gift and merge flag, we need to prevent * multiple steals of this page. */ obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; if (obuf->len > len) obuf->len = len; ret += obuf->len; len -= obuf->len; o_head++; opipe->head = o_head; i_tail++; } while (len); pipe_unlock(ipipe); pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. */ if (ret > 0) wakeup_pipe_readers(opipe); return ret; } /* * This is a tee(1) implementation that works on pipes. It doesn't copy * any data, it simply references the 'in' pages on the 'out' pipe. * The 'flags' used are the SPLICE_F_* variants, currently the only * applicable one is SPLICE_F_NONBLOCK. */ ssize_t do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { struct pipe_inode_info *ipipe = get_pipe_info(in, true); struct pipe_inode_info *opipe = get_pipe_info(out, true); ssize_t ret = -EINVAL; if (unlikely(!(in->f_mode & FMODE_READ) || !(out->f_mode & FMODE_WRITE))) return -EBADF; /* * Duplicate the contents of ipipe to opipe without actually * copying the data. */ if (ipipe && opipe && ipipe != opipe) { if ((in->f_flags | out->f_flags) & O_NONBLOCK) flags |= SPLICE_F_NONBLOCK; /* * Keep going, unless we encounter an error. The ipipe/opipe * ordering doesn't really matter. */ ret = ipipe_prep(ipipe, flags); if (!ret) { ret = opipe_prep(opipe, flags); if (!ret) ret = link_pipe(ipipe, opipe, len, flags); } } if (ret > 0) { fsnotify_access(in); fsnotify_modify(out); } return ret; } SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { struct fd in, out; ssize_t error; if (unlikely(flags & ~SPLICE_F_ALL)) return -EINVAL; if (unlikely(!len)) return 0; error = -EBADF; in = fdget(fdin); if (in.file) { out = fdget(fdout); if (out.file) { error = do_tee(in.file, out.file, len, flags); fdput(out); } fdput(in); } return error; }
140 12 7 41 92 5 2 21 21 5 17 63 62 40 2 21 2 23 63 225 225 66 65 48 29 66 1 24 24 24 11 45 15 36 36 36 24 20 1 12 23 1 21 30 25 9 25 24 6 24 14 1 8 35 8 2 24 15 35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #include <linux/fs.h> #include <linux/mpage.h> #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/uio.h> #include <linux/writeback.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_imap.h" #include "jfs_extent.h" #include "jfs_unicode.h" #include "jfs_debug.h" #include "jfs_dmap.h" struct inode *jfs_iget(struct super_block *sb, unsigned long ino) { struct inode *inode; int ret; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; ret = diRead(inode); if (ret < 0) { iget_failed(inode); return ERR_PTR(ret); } if (S_ISREG(inode->i_mode)) { inode->i_op = &jfs_file_inode_operations; inode->i_fop = &jfs_file_operations; inode->i_mapping->a_ops = &jfs_aops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &jfs_dir_inode_operations; inode->i_fop = &jfs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { if (inode->i_size >= IDATASIZE) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &jfs_aops; } else { inode->i_op = &jfs_fast_symlink_inode_operations; inode->i_link = JFS_IP(inode)->i_inline; /* * The inline data should be null-terminated, but * don't let on-disk corruption crash the kernel */ inode->i_link[inode->i_size] = '\0'; } } else { inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); } unlock_new_inode(inode); return inode; } /* * Workhorse of both fsync & write_inode */ int jfs_commit_inode(struct inode *inode, int wait) { int rc = 0; tid_t tid; static int noisy = 5; jfs_info("In jfs_commit_inode, inode = 0x%p", inode); /* * Don't commit if inode has been committed since last being * marked dirty, or if it has been deleted. */ if (inode->i_nlink == 0 || !test_cflag(COMMIT_Dirty, inode)) return 0; if (isReadOnly(inode)) { /* kernel allows writes to devices on read-only * partitions and may think inode is dirty */ if (!special_file(inode->i_mode) && noisy) { jfs_err("jfs_commit_inode(0x%p) called on read-only volume", inode); jfs_err("Is remount racy?"); noisy--; } return 0; } tid = txBegin(inode->i_sb, COMMIT_INODE); mutex_lock(&JFS_IP(inode)->commit_mutex); /* * Retest inode state after taking commit_mutex */ if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode)) rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0); txEnd(tid); mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; } int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int wait = wbc->sync_mode == WB_SYNC_ALL; if (inode->i_nlink == 0) return 0; /* * If COMMIT_DIRTY is not set, the inode isn't really dirty. * It has been committed since the last change, but was still * on the dirty inode list. */ if (!test_cflag(COMMIT_Dirty, inode)) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait); return 0; } if (jfs_commit_inode(inode, wait)) { jfs_err("jfs_write_inode: jfs_commit_inode failed!"); return -EIO; } else return 0; } void jfs_evict_inode(struct inode *inode) { struct jfs_inode_info *ji = JFS_IP(inode); jfs_info("In jfs_evict_inode, inode = 0x%p", inode); if (!inode->i_nlink && !is_bad_inode(inode)) { dquot_initialize(inode); if (JFS_IP(inode)->fileset == FILESYSTEM_I) { struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; truncate_inode_pages_final(&inode->i_data); if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); if (ipimap && JFS_IP(ipimap)->i_imap) diFree(inode); /* * Free the inode from the quota allocation. */ dquot_free_inode(inode); } } else { truncate_inode_pages_final(&inode->i_data); } clear_inode(inode); dquot_drop(inode); BUG_ON(!list_empty(&ji->anon_inode_list)); spin_lock_irq(&ji->ag_lock); if (ji->active_ag != -1) { struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; atomic_dec(&bmap->db_active[ji->active_ag]); ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); } void jfs_dirty_inode(struct inode *inode, int flags) { static int noisy = 5; if (isReadOnly(inode)) { if (!special_file(inode->i_mode) && noisy) { /* kernel allows writes to devices on read-only * partitions and may try to mark inode dirty */ jfs_err("jfs_dirty_inode called on read-only volume"); jfs_err("Is remount racy?"); noisy--; } return; } set_cflag(COMMIT_Dirty, inode); } int jfs_get_block(struct inode *ip, sector_t lblock, struct buffer_head *bh_result, int create) { s64 lblock64 = lblock; int rc = 0; xad_t xad; s64 xaddr; int xflag; s32 xlen = bh_result->b_size >> ip->i_blkbits; /* * Take appropriate lock on inode */ if (create) IWRITE_LOCK(ip, RDWRLOCK_NORMAL); else IREAD_LOCK(ip, RDWRLOCK_NORMAL); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && xaddr) { if (xflag & XAD_NOTRECORDED) { if (!create) /* * Allocated but not recorded, read treats * this as a hole */ goto unlock; XADoffset(&xad, lblock64); XADlength(&xad, xlen); XADaddress(&xad, xaddr); rc = extRecord(ip, &xad); if (rc) goto unlock; set_buffer_new(bh_result); } map_bh(bh_result, ip->i_sb, xaddr); bh_result->b_size = xlen << ip->i_blkbits; goto unlock; } if (!create) goto unlock; /* * Allocate a new block */ if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) goto unlock; rc = extAlloc(ip, xlen, lblock64, &xad, false); if (rc) goto unlock; set_buffer_new(bh_result); map_bh(bh_result, ip->i_sb, addressXAD(&xad)); bh_result->b_size = lengthXAD(&xad) << ip->i_blkbits; unlock: /* * Release lock on inode */ if (create) IWRITE_UNLOCK(ip); else IREAD_UNLOCK(ip); return rc; } static int jfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { return mpage_writepages(mapping, wbc, jfs_get_block); } static int jfs_read_folio(struct file *file, struct folio *folio) { return mpage_read_folio(folio, jfs_get_block); } static void jfs_readahead(struct readahead_control *rac) { mpage_readahead(rac, jfs_get_block); } static void jfs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); jfs_truncate(inode); } } static int jfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata) { int ret; ret = block_write_begin(mapping, pos, len, pagep, jfs_get_block); if (unlikely(ret)) jfs_write_failed(mapping, pos + len); return ret; } static int jfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { int ret; ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); if (ret < len) jfs_write_failed(mapping, pos + len); return ret; } static sector_t jfs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, jfs_get_block); } static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; size_t count = iov_iter_count(iter); ssize_t ret; ret = blockdev_direct_IO(iocb, inode, iter, jfs_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = iocb->ki_pos + count; if (end > isize) jfs_write_failed(mapping, end); } return ret; } const struct address_space_operations jfs_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = jfs_read_folio, .readahead = jfs_readahead, .writepages = jfs_writepages, .write_begin = jfs_write_begin, .write_end = jfs_write_end, .bmap = jfs_bmap, .direct_IO = jfs_direct_IO, .migrate_folio = buffer_migrate_folio, }; /* * Guts of jfs_truncate. Called with locks already held. Can be called * with directory for truncating directory index table. */ void jfs_truncate_nolock(struct inode *ip, loff_t length) { loff_t newsize; tid_t tid; ASSERT(length >= 0); if (test_cflag(COMMIT_Nolink, ip)) { xtTruncate(0, ip, length, COMMIT_WMAP); return; } do { tid = txBegin(ip->i_sb, 0); /* * The commit_mutex cannot be taken before txBegin. * txBegin may block and there is a chance the inode * could be marked dirty and need to be committed * before txBegin unblocks */ mutex_lock(&JFS_IP(ip)->commit_mutex); newsize = xtTruncate(tid, ip, length, COMMIT_TRUNCATE | COMMIT_PWMAP); if (newsize < 0) { txEnd(tid); mutex_unlock(&JFS_IP(ip)->commit_mutex); break; } inode_set_mtime_to_ts(ip, inode_set_ctime_current(ip)); mark_inode_dirty(ip); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&JFS_IP(ip)->commit_mutex); } while (newsize > length); /* Truncate isn't always atomic */ } void jfs_truncate(struct inode *ip) { jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); IWRITE_LOCK(ip, RDWRLOCK_NORMAL); jfs_truncate_nolock(ip, ip->i_size); IWRITE_UNLOCK(ip); }
8 7 1 2 57 240 245 17 138 5 11 75 7 47 47 14 3 46 15 90 88 14 34 97 75 72 18 20 7 50 17 50 50 17 7 31 16 20 79 79 79 79 42 42 42 10 39 40 41 40 97 97 97 25 79 36 8 8 4 3 1 4 1 7 8 157 79 92 157 103 103 86 13 29 91 4 12 98 13 79 79 79 79 4 2 3 79 49 49 78 78 20 70 4 3 78 77 97 16 90 97 40 2 37 1 1 7 1 1 10 37 10 33 43 74 71 11 86 11 28 78 10 11 10 1 1 10 2 1 2 12 3 120 14 3 3 3 1 1 3 225 225 196 184 28 120 120 70 70 3 3 57 56 5 97 97 68 69 6 3 2 1 14 13 17 17 14 3 11 2 17 37 37 14 14 37 121 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 // SPDX-License-Identifier: GPL-2.0 /* * f2fs extent cache support * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim <jaegeuk@kernel.org> * Chao Yu <chao2.yu@samsung.com> * * block_age-based extent cache added by: * Copyright (c) 2022 xiaomi Co., Ltd. * http://www.xiaomi.com/ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include "f2fs.h" #include "node.h" #include <trace/events/f2fs.h> bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_info ei; get_read_extent_info(&ei, i_ext); if (!ei.len) return true; if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, DATA_GENERIC_ENHANCE)) { f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); return false; } return true; } static void __set_extent_info(struct extent_info *ei, unsigned int fofs, unsigned int len, block_t blk, bool keep_clen, unsigned long age, unsigned long last_blocks, enum extent_type type) { ei->fofs = fofs; ei->len = len; if (type == EX_READ) { ei->blk = blk; if (keep_clen) return; #ifdef CONFIG_F2FS_FS_COMPRESSION ei->c_len = 0; #endif } else if (type == EX_BLOCK_AGE) { ei->age = age; ei->last_blocks = last_blocks; } } static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) { if (type == EX_READ) return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && S_ISREG(inode->i_mode); if (type == EX_BLOCK_AGE) return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); return false; } static bool __may_extent_tree(struct inode *inode, enum extent_type type) { /* * for recovered files during mount do not create extents * if shrinker is not registered. */ if (list_empty(&F2FS_I_SB(inode)->s_list)) return false; if (!__init_may_extent_tree(inode, type)) return false; if (type == EX_READ) { if (is_inode_flag_set(inode, FI_NO_EXTENT)) return false; if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && !f2fs_sb_has_readonly(F2FS_I_SB(inode))) return false; } else if (type == EX_BLOCK_AGE) { if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) return false; if (file_is_cold(inode)) return false; } return true; } static void __try_update_largest_extent(struct extent_tree *et, struct extent_node *en) { if (et->type != EX_READ) return; if (en->ei.len <= et->largest.len) return; et->largest = en->ei; et->largest_updated = true; } static bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front, enum extent_type type) { if (type == EX_READ) { #ifdef CONFIG_F2FS_FS_COMPRESSION if (back->c_len && back->len != back->c_len) return false; if (front->c_len && front->len != front->c_len) return false; #endif return (back->fofs + back->len == front->fofs && back->blk + back->len == front->blk); } else if (type == EX_BLOCK_AGE) { return (back->fofs + back->len == front->fofs && abs(back->age - front->age) <= SAME_AGE_REGION && abs(back->last_blocks - front->last_blocks) <= SAME_AGE_REGION); } return false; } static bool __is_back_mergeable(struct extent_info *cur, struct extent_info *back, enum extent_type type) { return __is_extent_mergeable(back, cur, type); } static bool __is_front_mergeable(struct extent_info *cur, struct extent_info *front, enum extent_type type) { return __is_extent_mergeable(cur, front, type); } static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, struct extent_node *cached_en, unsigned int fofs) { struct rb_node *node = root->rb_root.rb_node; struct extent_node *en; /* check a cached entry */ if (cached_en && cached_en->ei.fofs <= fofs && cached_en->ei.fofs + cached_en->ei.len > fofs) return cached_en; /* check rb_tree */ while (node) { en = rb_entry(node, struct extent_node, rb_node); if (fofs < en->ei.fofs) node = node->rb_left; else if (fofs >= en->ei.fofs + en->ei.len) node = node->rb_right; else return en; } return NULL; } /* * lookup rb entry in position of @fofs in rb-tree, * if hit, return the entry, otherwise, return NULL * @prev_ex: extent before fofs * @next_ex: extent after fofs * @insert_p: insert point for new extent at fofs * in order to simplify the insertion after. * tree must stay unchanged between lookup and insertion. */ static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, struct extent_node *cached_en, unsigned int fofs, struct extent_node **prev_entry, struct extent_node **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, bool *leftmost) { struct rb_node **pnode = &root->rb_root.rb_node; struct rb_node *parent = NULL, *tmp_node; struct extent_node *en = cached_en; *insert_p = NULL; *insert_parent = NULL; *prev_entry = NULL; *next_entry = NULL; if (RB_EMPTY_ROOT(&root->rb_root)) return NULL; if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) goto lookup_neighbors; *leftmost = true; while (*pnode) { parent = *pnode; en = rb_entry(*pnode, struct extent_node, rb_node); if (fofs < en->ei.fofs) { pnode = &(*pnode)->rb_left; } else if (fofs >= en->ei.fofs + en->ei.len) { pnode = &(*pnode)->rb_right; *leftmost = false; } else { goto lookup_neighbors; } } *insert_p = pnode; *insert_parent = parent; en = rb_entry(parent, struct extent_node, rb_node); tmp_node = parent; if (parent && fofs > en->ei.fofs) tmp_node = rb_next(parent); *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); tmp_node = parent; if (parent && fofs < en->ei.fofs) tmp_node = rb_prev(parent); *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); return NULL; lookup_neighbors: if (fofs == en->ei.fofs) { /* lookup prev node for merging backward later */ tmp_node = rb_prev(&en->rb_node); *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); } if (fofs == en->ei.fofs + en->ei.len - 1) { /* lookup next node for merging frontward later */ tmp_node = rb_next(&en->rb_node); *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); } return en; } static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab; static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node *parent, struct rb_node **p, bool leftmost) { struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en; en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); if (!en) return NULL; en->ei = *ei; INIT_LIST_HEAD(&en->list); en->et = et; rb_link_node(&en->rb_node, parent, p); rb_insert_color_cached(&en->rb_node, &et->root, leftmost); atomic_inc(&et->node_cnt); atomic_inc(&eti->total_ext_node); return en; } static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { struct extent_tree_info *eti = &sbi->extent_tree[et->type]; rb_erase_cached(&en->rb_node, &et->root); atomic_dec(&et->node_cnt); atomic_dec(&eti->total_ext_node); if (et->cached_en == en) et->cached_en = NULL; kmem_cache_free(extent_node_slab, en); } /* * Flow to release an extent_node: * 1. list_del_init * 2. __detach_extent_node * 3. kmem_cache_free. */ static void __release_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { struct extent_tree_info *eti = &sbi->extent_tree[et->type]; spin_lock(&eti->extent_lock); f2fs_bug_on(sbi, list_empty(&en->list)); list_del_init(&en->list); spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); } static struct extent_tree *__grab_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et; nid_t ino = inode->i_ino; mutex_lock(&eti->extent_tree_lock); et = radix_tree_lookup(&eti->extent_tree_root, ino); if (!et) { et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS, true, NULL); f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; et->type = type; et->root = RB_ROOT_CACHED; et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); atomic_set(&et->node_cnt, 0); atomic_inc(&eti->total_ext_tree); } else { atomic_dec(&eti->total_zombie_tree); list_del_init(&et->list); } mutex_unlock(&eti->extent_tree_lock); /* never died until evict_inode */ F2FS_I(inode)->extent_tree[type] = et; return et; } static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et) { struct rb_node *node, *next; struct extent_node *en; unsigned int count = atomic_read(&et->node_cnt); node = rb_first_cached(&et->root); while (node) { next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); __release_extent_node(sbi, et, en); node = next; } return count - atomic_read(&et->node_cnt); } static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { if (fofs < et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; } } void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei; if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ if (i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); } set_inode_flag(inode, FI_NO_EXTENT); return; } et = __grab_extent_tree(inode, EX_READ); get_read_extent_info(&ei, i_ext); write_lock(&et->lock); if (atomic_read(&et->node_cnt) || !ei.len) goto skip; en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); if (en) { et->largest = en->ei; et->cached_en = en; spin_lock(&eti->extent_lock); list_add_tail(&en->list, &eti->extent_list); spin_unlock(&eti->extent_lock); } skip: /* Let's drop, if checkpoint got corrupted. */ if (f2fs_cp_error(sbi)) { et->largest.len = 0; et->largest_updated = true; } write_unlock(&et->lock); } void f2fs_init_age_extent_tree(struct inode *inode) { if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) return; __grab_extent_tree(inode, EX_BLOCK_AGE); } void f2fs_init_extent_tree(struct inode *inode) { /* initialize read cache */ if (__init_may_extent_tree(inode, EX_READ)) __grab_extent_tree(inode, EX_READ); /* initialize block age cache */ if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) __grab_extent_tree(inode, EX_BLOCK_AGE); } static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en; bool ret = false; if (!et) return false; trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); read_lock(&et->lock); if (type == EX_READ && et->largest.fofs <= pgofs && et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); goto out; } en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out; if (en == et->cached_en) stat_inc_cached_node_hit(sbi, type); else stat_inc_rbtree_node_hit(sbi, type); *ei = en->ei; spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } spin_unlock(&eti->extent_lock); ret = true; out: stat_inc_total_hit(sbi, type); read_unlock(&et->lock); if (type == EX_READ) trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); else if (type == EX_BLOCK_AGE) trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); return ret; } static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { prev_ex->ei.len += ei->len; ei = &prev_ex->ei; en = prev_ex; } if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { next_ex->ei.fofs = ei->fofs; next_ex->ei.len += ei->len; if (et->type == EX_READ) next_ex->ei.blk = ei->blk; if (en) __release_extent_node(sbi, et, prev_ex); en = next_ex; } if (!en) return NULL; __try_update_largest_extent(et, en); spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } spin_unlock(&eti->extent_lock); return en; } static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent, bool leftmost) { struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct rb_node **p = &et->root.rb_root.rb_node; struct rb_node *parent = NULL; struct extent_node *en = NULL; if (insert_p && insert_parent) { parent = insert_parent; p = insert_p; goto do_insert; } leftmost = true; /* look up extent_node in the rb tree */ while (*p) { parent = *p; en = rb_entry(parent, struct extent_node, rb_node); if (ei->fofs < en->ei.fofs) { p = &(*p)->rb_left; } else if (ei->fofs >= en->ei.fofs + en->ei.len) { p = &(*p)->rb_right; leftmost = false; } else { f2fs_bug_on(sbi, 1); } } do_insert: en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); if (!en) return NULL; __try_update_largest_extent(et, en); /* update in global extent list */ spin_lock(&eti->extent_lock); list_add_tail(&en->list, &eti->extent_list); et->cached_en = en; spin_unlock(&eti->extent_lock); return en; } static void __update_extent_tree_range(struct inode *inode, struct extent_info *tei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en = NULL, *en1 = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei, dei, prev; struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int fofs = tei->fofs, len = tei->len; unsigned int end = fofs + len; bool updated = false; bool leftmost = false; if (!et) return; if (type == EX_READ) trace_f2fs_update_read_extent_tree_range(inode, fofs, len, tei->blk, 0); else if (type == EX_BLOCK_AGE) trace_f2fs_update_age_extent_tree_range(inode, fofs, len, tei->age, tei->last_blocks); write_lock(&et->lock); if (type == EX_READ) { if (is_inode_flag_set(inode, FI_NO_EXTENT)) { write_unlock(&et->lock); return; } prev = et->largest; dei.len = 0; /* * drop largest extent before lookup, in case it's already * been shrunk from extent tree */ __drop_largest_extent(et, fofs, len); } /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = __lookup_extent_node_ret(&et->root, et->cached_en, fofs, &prev_en, &next_en, &insert_p, &insert_parent, &leftmost); if (!en) en = next_en; /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ while (en && en->ei.fofs < end) { unsigned int org_end; int parts = 0; /* # of parts current extent split into */ next_en = en1 = NULL; dei = en->ei; org_end = dei.fofs + dei.len; f2fs_bug_on(sbi, fofs >= org_end); if (fofs > dei.fofs && (type != EX_READ || fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { en->ei.len = fofs - en->ei.fofs; prev_en = en; parts = 1; } if (end < org_end && (type != EX_READ || org_end - end >= F2FS_MIN_EXTENT_LEN)) { if (parts) { __set_extent_info(&ei, end, org_end - end, end - dei.fofs + dei.blk, false, dei.age, dei.last_blocks, type); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); next_en = en1; } else { __set_extent_info(&en->ei, end, en->ei.len - (end - dei.fofs), en->ei.blk + (end - dei.fofs), true, dei.age, dei.last_blocks, type); next_en = en; } parts++; } if (!next_en) { struct rb_node *node = rb_next(&en->rb_node); next_en = rb_entry_safe(node, struct extent_node, rb_node); } if (parts) __try_update_largest_extent(et, en); else __release_extent_node(sbi, et, en); /* * if original extent is split into zero or two parts, extent * tree has been altered by deletion or insertion, therefore * invalidate pointers regard to tree. */ if (parts != 1) { insert_p = NULL; insert_parent = NULL; } en = next_en; } if (type == EX_BLOCK_AGE) goto update_age_extent_cache; /* 3. update extent in read extent cache */ BUG_ON(type != EX_READ); if (tei->blk) { __set_extent_info(&ei, fofs, len, tei->blk, false, 0, 0, EX_READ); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { et->largest.len = 0; et->largest_updated = true; set_inode_flag(inode, FI_NO_EXTENT); } } if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); if (et->largest_updated) { et->largest_updated = false; updated = true; } goto out_read_extent_cache; update_age_extent_cache: if (!tei->last_blocks) goto out_read_extent_cache; __set_extent_info(&ei, fofs, len, 0, false, tei->age, tei->last_blocks, EX_BLOCK_AGE); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); out_read_extent_cache: write_unlock(&et->lock); if (updated) f2fs_mark_inode_dirty_sync(inode, true); } #ifdef CONFIG_F2FS_FS_COMPRESSION void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; struct extent_node *en = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei; struct rb_node **insert_p = NULL, *insert_parent = NULL; bool leftmost = false; trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, blkaddr, c_len); /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ if (is_inode_flag_set(inode, FI_NO_EXTENT)) return; write_lock(&et->lock); en = __lookup_extent_node_ret(&et->root, et->cached_en, fofs, &prev_en, &next_en, &insert_p, &insert_parent, &leftmost); if (en) goto unlock_out; __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); unlock_out: write_unlock(&et->lock); } #endif static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, unsigned long long new, unsigned long long old) { unsigned int rem_old, rem_new; unsigned long long res; unsigned int weight = sbi->last_age_weight; res = div_u64_rem(new, 100, &rem_new) * (100 - weight) + div_u64_rem(old, 100, &rem_old) * weight; if (rem_new) res += rem_new * (100 - weight) / 100; if (rem_old) res += rem_old * weight / 100; return res; } /* This returns a new age and allocated blocks in ei */ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t f_size = i_size_read(inode); unsigned long long cur_blocks = atomic64_read(&sbi->allocated_data_blocks); struct extent_info tei = *ei; /* only fofs and len are valid */ /* * When I/O is not aligned to a PAGE_SIZE, update will happen to the last * file block even in seq write. So don't record age for newly last file * block here. */ if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && blkaddr == NEW_ADDR) return -EINVAL; if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { unsigned long long cur_age; if (cur_blocks >= tei.last_blocks) cur_age = cur_blocks - tei.last_blocks; else /* allocated_data_blocks overflow */ cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; if (tei.age) ei->age = __calculate_block_age(sbi, cur_age, tei.age); else ei->age = cur_age; ei->last_blocks = cur_blocks; WARN_ON(ei->age > cur_blocks); return 0; } f2fs_bug_on(sbi, blkaddr == NULL_ADDR); /* the data block was allocated for the first time */ if (blkaddr == NEW_ADDR) goto out; if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) return -EINVAL; out: /* * init block age with zero, this can happen when the block age extent * was reclaimed due to memory constraint or system reboot */ ei->age = 0; ei->last_blocks = cur_blocks; return 0; } static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) { struct extent_info ei = {}; if (!__may_extent_tree(dn->inode, type)) return; ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; ei.len = 1; if (type == EX_READ) { if (dn->data_blkaddr == NEW_ADDR) ei.blk = NULL_ADDR; else ei.blk = dn->data_blkaddr; } else if (type == EX_BLOCK_AGE) { if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) return; } __update_extent_tree_range(dn->inode, &ei, type); } static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, enum extent_type type) { struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et, *next; struct extent_node *en; unsigned int node_cnt = 0, tree_cnt = 0; int remained; if (!atomic_read(&eti->total_zombie_tree)) goto free_node; if (!mutex_trylock(&eti->extent_tree_lock)) goto out; /* 1. remove unreferenced extent tree */ list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); node_cnt += __free_extent_tree(sbi, et); write_unlock(&et->lock); } f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); list_del_init(&et->list); radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&eti->total_ext_tree); atomic_dec(&eti->total_zombie_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) goto unlock_out; cond_resched(); } mutex_unlock(&eti->extent_tree_lock); free_node: /* 2. remove LRU extent entries */ if (!mutex_trylock(&eti->extent_tree_lock)) goto out; remained = nr_shrink - (node_cnt + tree_cnt); spin_lock(&eti->extent_lock); for (; remained > 0; remained--) { if (list_empty(&eti->extent_list)) break; en = list_first_entry(&eti->extent_list, struct extent_node, list); et = en->et; if (!write_trylock(&et->lock)) { /* refresh this extent node's position in extent list */ list_move_tail(&en->list, &eti->extent_list); continue; } list_del_init(&en->list); spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); write_unlock(&et->lock); node_cnt++; spin_lock(&eti->extent_lock); } spin_unlock(&eti->extent_lock); unlock_out: mutex_unlock(&eti->extent_tree_lock); out: trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); return node_cnt + tree_cnt; } /* read extent cache operations */ bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { if (!__may_extent_tree(inode, EX_READ)) return false; return __lookup_extent_tree(inode, pgofs, ei, EX_READ); } bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, block_t *blkaddr) { struct extent_info ei = {}; if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) return false; *blkaddr = ei.blk + index - ei.fofs; return true; } void f2fs_update_read_extent_cache(struct dnode_of_data *dn) { return __update_extent_cache(dn, EX_READ); } void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, pgoff_t fofs, block_t blkaddr, unsigned int len) { struct extent_info ei = { .fofs = fofs, .len = len, .blk = blkaddr, }; if (!__may_extent_tree(dn->inode, EX_READ)) return; __update_extent_tree_range(dn->inode, &ei, EX_READ); } unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { if (!test_opt(sbi, READ_EXTENT_CACHE)) return 0; return __shrink_extent_tree(sbi, nr_shrink, EX_READ); } /* block age extent cache operations */ bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { if (!__may_extent_tree(inode, EX_BLOCK_AGE)) return false; return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); } void f2fs_update_age_extent_cache(struct dnode_of_data *dn) { return __update_extent_cache(dn, EX_BLOCK_AGE); } void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, pgoff_t fofs, unsigned int len) { struct extent_info ei = { .fofs = fofs, .len = len, }; if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) return; __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); } unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { if (!test_opt(sbi, AGE_EXTENT_CACHE)) return 0; return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); } static unsigned int __destroy_extent_node(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et || !atomic_read(&et->node_cnt)) return 0; write_lock(&et->lock); node_cnt = __free_extent_tree(sbi, et); write_unlock(&et->lock); return node_cnt; } void f2fs_destroy_extent_node(struct inode *inode) { __destroy_extent_node(inode, EX_READ); __destroy_extent_node(inode, EX_BLOCK_AGE); } static void __drop_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; if (!__may_extent_tree(inode, type)) return; write_lock(&et->lock); __free_extent_tree(sbi, et); if (type == EX_READ) { set_inode_flag(inode, FI_NO_EXTENT); if (et->largest.len) { et->largest.len = 0; updated = true; } } write_unlock(&et->lock); if (updated) f2fs_mark_inode_dirty_sync(inode, true); } void f2fs_drop_extent_tree(struct inode *inode) { __drop_extent_tree(inode, EX_READ); __drop_extent_tree(inode, EX_BLOCK_AGE); } static void __destroy_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et) return; if (inode->i_nlink && !is_bad_inode(inode) && atomic_read(&et->node_cnt)) { mutex_lock(&eti->extent_tree_lock); list_add_tail(&et->list, &eti->zombie_list); atomic_inc(&eti->total_zombie_tree); mutex_unlock(&eti->extent_tree_lock); return; } /* free all extent info belong to this extent tree */ node_cnt = __destroy_extent_node(inode, type); /* delete extent tree entry in radix tree */ mutex_lock(&eti->extent_tree_lock); f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); radix_tree_delete(&eti->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); atomic_dec(&eti->total_ext_tree); mutex_unlock(&eti->extent_tree_lock); F2FS_I(inode)->extent_tree[type] = NULL; trace_f2fs_destroy_extent_tree(inode, node_cnt, type); } void f2fs_destroy_extent_tree(struct inode *inode) { __destroy_extent_tree(inode, EX_READ); __destroy_extent_tree(inode, EX_BLOCK_AGE); } static void __init_extent_tree_info(struct extent_tree_info *eti) { INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); mutex_init(&eti->extent_tree_lock); INIT_LIST_HEAD(&eti->extent_list); spin_lock_init(&eti->extent_lock); atomic_set(&eti->total_ext_tree, 0); INIT_LIST_HEAD(&eti->zombie_list); atomic_set(&eti->total_zombie_tree, 0); atomic_set(&eti->total_ext_node, 0); } void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { __init_extent_tree_info(&sbi->extent_tree[EX_READ]); __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); /* initialize for block age extents */ atomic64_set(&sbi->allocated_data_blocks, 0); sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; sbi->last_age_weight = LAST_AGE_WEIGHT; } int __init f2fs_create_extent_cache(void) { extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", sizeof(struct extent_tree)); if (!extent_tree_slab) return -ENOMEM; extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", sizeof(struct extent_node)); if (!extent_node_slab) { kmem_cache_destroy(extent_tree_slab); return -ENOMEM; } return 0; } void f2fs_destroy_extent_cache(void) { kmem_cache_destroy(extent_node_slab); kmem_cache_destroy(extent_tree_slab); }
3 3 1 1 2 2 1 1 2 2 2 3 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 // SPDX-License-Identifier: GPL-2.0 /* * Simple file system for zoned block devices exposing zones as files. * * Copyright (C) 2019 Western Digital Corporation or its affiliates. */ #include <linux/module.h> #include <linux/pagemap.h> #include <linux/magic.h> #include <linux/iomap.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/statfs.h> #include <linux/writeback.h> #include <linux/quotaops.h> #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/crc32.h> #include <linux/task_io_accounting_ops.h> #include <linux/fs_parser.h> #include <linux/fs_context.h> #include "zonefs.h" #define CREATE_TRACE_POINTS #include "trace.h" /* * Get the name of a zone group directory. */ static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) { switch (ztype) { case ZONEFS_ZTYPE_CNV: return "cnv"; case ZONEFS_ZTYPE_SEQ: return "seq"; default: WARN_ON_ONCE(1); return "???"; } } /* * Manage the active zone count. */ static void zonefs_account_active(struct super_block *sb, struct zonefs_zone *z) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); if (zonefs_zone_is_cnv(z)) return; /* * For zones that transitioned to the offline or readonly condition, * we only need to clear the active state. */ if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) goto out; /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. */ if ((z->z_flags & ZONEFS_ZONE_OPEN) || (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { z->z_flags |= ZONEFS_ZONE_ACTIVE; atomic_inc(&sbi->s_active_seq_files); } return; } out: /* The zone is not active. If it was, update the active count */ if (z->z_flags & ZONEFS_ZONE_ACTIVE) { z->z_flags &= ~ZONEFS_ZONE_ACTIVE; atomic_dec(&sbi->s_active_seq_files); } } /* * Manage the active zone count. Called with zi->i_truncate_mutex held. */ void zonefs_inode_account_active(struct inode *inode) { lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); } /* * Execute a zone management operation. */ static int zonefs_zone_mgmt(struct super_block *sb, struct zonefs_zone *z, enum req_op op) { int ret; /* * With ZNS drives, closing an explicitly open zone that has not been * written will change the zone state to "closed", that is, the zone * will remain active. Since this can then cause failure of explicit * open operation on other zones if the drive active zone resources * are exceeded, make sure that the zone does not remain active by * resetting it. */ if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) op = REQ_OP_ZONE_RESET; trace_zonefs_zone_mgmt(sb, z, op); ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, z->z_size >> SECTOR_SHIFT); if (ret) { zonefs_err(sb, "Zone management operation %s at %llu failed %d\n", blk_op_str(op), z->z_sector, ret); return ret; } return 0; } int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) { lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); } void zonefs_i_size_write(struct inode *inode, loff_t isize) { struct zonefs_zone *z = zonefs_inode_zone(inode); i_size_write(inode, isize); /* * A full zone is no longer open/active and does not need * explicit closing. */ if (isize >= z->z_capacity) { struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); if (z->z_flags & ZONEFS_ZONE_ACTIVE) atomic_dec(&sbi->s_active_seq_files); z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); } } void zonefs_update_stats(struct inode *inode, loff_t new_isize) { struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); loff_t old_isize = i_size_read(inode); loff_t nr_blocks; if (new_isize == old_isize) return; spin_lock(&sbi->s_lock); /* * This may be called for an update after an IO error. * So beware of the values seen. */ if (new_isize < old_isize) { nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; if (sbi->s_used_blocks > nr_blocks) sbi->s_used_blocks -= nr_blocks; else sbi->s_used_blocks = 0; } else { sbi->s_used_blocks += (new_isize - old_isize) >> sb->s_blocksize_bits; if (sbi->s_used_blocks > sbi->s_blocks) sbi->s_used_blocks = sbi->s_blocks; } spin_unlock(&sbi->s_lock); } /* * Check a zone condition. Return the amount of written (and still readable) * data in the zone. */ static loff_t zonefs_check_zone_condition(struct super_block *sb, struct zonefs_zone *z, struct blk_zone *zone) { switch (zone->cond) { case BLK_ZONE_COND_OFFLINE: zonefs_warn(sb, "Zone %llu: offline zone\n", z->z_sector); z->z_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* * The write pointer of read-only zones is invalid, so we cannot * determine the zone wpoffset (inode size). We thus keep the * zone wpoffset as is, which leads to an empty file * (wpoffset == 0) on mount. For a runtime error, this keeps * the inode size as it was when last updated so that the user * can recover data. */ zonefs_warn(sb, "Zone %llu: read-only zone\n", z->z_sector); z->z_flags |= ZONEFS_ZONE_READONLY; if (zonefs_zone_is_cnv(z)) return z->z_capacity; return z->z_wpoffset; case BLK_ZONE_COND_FULL: /* The write pointer of full zones is invalid. */ return z->z_capacity; default: if (zonefs_zone_is_cnv(z)) return z->z_capacity; return (zone->wp - zone->start) << SECTOR_SHIFT; } } /* * Check a zone condition and adjust its inode access permissions for * offline and readonly zones. */ static void zonefs_inode_update_mode(struct inode *inode) { struct zonefs_zone *z = zonefs_inode_zone(inode); if (z->z_flags & ZONEFS_ZONE_OFFLINE) { /* Offline zones cannot be read nor written */ inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; } else if (z->z_flags & ZONEFS_ZONE_READONLY) { /* Readonly zones cannot be written */ inode->i_flags |= S_IMMUTABLE; if (z->z_flags & ZONEFS_ZONE_INIT_MODE) inode->i_mode &= ~0777; else inode->i_mode &= ~0222; } z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; z->z_mode = inode->i_mode; } static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_zone *z = data; *z = *zone; return 0; } static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); loff_t isize, data_size; /* * Check the zone condition: if the zone is not "bad" (offline or * read-only), read errors are simply signaled to the IO issuer as long * as there is no inconsistency between the inode size and the amount of * data writen in the zone (data_size). */ data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && !write && isize == data_size) return; /* * At this point, we detected either a bad zone or an inconsistency * between the inode size and the amount of data written in the zone. * For the latter case, the cause may be a write IO error or an external * action on the device. Two error patterns exist: * 1) The inode size is lower than the amount of data in the zone: * a write operation partially failed and data was writen at the end * of the file. This can happen in the case of a large direct IO * needing several BIOs and/or write requests to be processed. * 2) The inode size is larger than the amount of data in the zone: * this can happen with a deferred write error with the use of the * device side write cache after getting successful write IO * completions. Other possibilities are (a) an external corruption, * e.g. an application reset the zone directly, or (b) the device * has a serious problem (e.g. firmware bug). * * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); /* * First handle bad zones signaled by hardware. The mount options * errors=zone-ro and errors=zone-offline result in changing the * zone condition to read-only and offline respectively, as if the * condition was signaled by the hardware. */ if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { zonefs_warn(sb, "inode %lu: read/write access disabled\n", inode->i_ino); if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) z->z_flags |= ZONEFS_ZONE_OFFLINE; zonefs_inode_update_mode(inode); data_size = 0; } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { zonefs_warn(sb, "inode %lu: write access disabled\n", inode->i_ino); if (!(z->z_flags & ZONEFS_ZONE_READONLY)) z->z_flags |= ZONEFS_ZONE_READONLY; zonefs_inode_update_mode(inode); data_size = isize; } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && data_size > isize) { /* Do not expose garbage data */ data_size = isize; } /* * If the filesystem is mounted with the explicit-open mount option, we * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to * the read-only or offline condition, to avoid attempting an explicit * close of the zone when the inode file is closed. */ if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) z->z_flags &= ~ZONEFS_ZONE_OPEN; /* * If error=remount-ro was specified, any error result in remounting * the volume as read-only. */ if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { zonefs_warn(sb, "remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; } /* * Update block usage stats and the inode size to prevent access to * invalid data. */ zonefs_update_stats(inode, data_size); zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); } /* * When an file IO error occurs, check the file zone to see if there is a change * in the zone condition (e.g. offline or read-only). For a failed write to a * sequential zone, the zone write pointer position must also be checked to * eventually correct the file size and zonefs inode write pointer offset * (which can be out of sync with the drive due to partial write failures). */ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; unsigned int noio_flag; struct blk_zone zone; int ret; /* * Conventional zone have no write pointer and cannot become read-only * or offline. So simply fake a report for a single or aggregated zone * and let zonefs_handle_io_error() correct the zone inode information * according to the mount options. */ if (!zonefs_zone_is_seq(z)) { zone.start = z->z_sector; zone.len = z->z_size >> SECTOR_SHIFT; zone.wp = zone.start + zone.len; zone.type = BLK_ZONE_TYPE_CONVENTIONAL; zone.cond = BLK_ZONE_COND_NOT_WP; zone.capacity = zone.len; goto handle_io_error; } /* * Memory allocations in blkdev_report_zones() can trigger a memory * reclaim which may in turn cause a recursion into zonefs as well as * struct request allocations for the same device. The former case may * end up in a deadlock on the inode truncate mutex, while the latter * may prevent IO forward progress. Executing the report zones under * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, zonefs_io_error_cb, &zone); memalloc_noio_restore(noio_flag); if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); zonefs_warn(sb, "remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; return; } handle_io_error: zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; static struct inode *zonefs_alloc_inode(struct super_block *sb) { struct zonefs_inode_info *zi; zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL); if (!zi) return NULL; inode_init_once(&zi->i_vnode); mutex_init(&zi->i_truncate_mutex); zi->i_wr_refcnt = 0; return &zi->i_vnode; } static void zonefs_free_inode(struct inode *inode) { kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); } /* * File system stat. */ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype t; buf->f_type = ZONEFS_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_namelen = ZONEFS_NAME_MAX; spin_lock(&sbi->s_lock); buf->f_blocks = sbi->s_blocks; if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) buf->f_bfree = 0; else buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; buf->f_bavail = buf->f_bfree; for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { if (sbi->s_zgroup[t].g_nr_zones) buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; } buf->f_ffree = 0; spin_unlock(&sbi->s_lock); buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b); return 0; } enum { Opt_errors, Opt_explicit_open, }; struct zonefs_context { unsigned long s_mount_opts; }; static const struct constant_table zonefs_param_errors[] = { {"remount-ro", ZONEFS_MNTOPT_ERRORS_RO}, {"zone-ro", ZONEFS_MNTOPT_ERRORS_ZRO}, {"zone-offline", ZONEFS_MNTOPT_ERRORS_ZOL}, {"repair", ZONEFS_MNTOPT_ERRORS_REPAIR}, {} }; static const struct fs_parameter_spec zonefs_param_spec[] = { fsparam_enum ("errors", Opt_errors, zonefs_param_errors), fsparam_flag ("explicit-open", Opt_explicit_open), {} }; static int zonefs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct zonefs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, zonefs_param_spec, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_errors: ctx->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; ctx->s_mount_opts |= result.uint_32; break; case Opt_explicit_open: ctx->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN; break; default: return -EINVAL; } return 0; } static int zonefs_show_options(struct seq_file *seq, struct dentry *root) { struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) seq_puts(seq, ",errors=remount-ro"); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) seq_puts(seq, ",errors=zone-ro"); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) seq_puts(seq, ",errors=zone-offline"); if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) seq_puts(seq, ",errors=repair"); return 0; } static int zonefs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode *inode = d_inode(dentry); int ret; if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (ret) return ret; /* * Since files and directories cannot be created nor deleted, do not * allow setting any write attributes on the sub-directories grouping * files by zone type. */ if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && (iattr->ia_mode & 0222)) return -EPERM; if (((iattr->ia_valid & ATTR_UID) && !uid_eq(iattr->ia_uid, inode->i_uid)) || ((iattr->ia_valid & ATTR_GID) && !gid_eq(iattr->ia_gid, inode->i_gid))) { ret = dquot_transfer(&nop_mnt_idmap, inode, iattr); if (ret) return ret; } if (iattr->ia_valid & ATTR_SIZE) { ret = zonefs_file_truncate(inode, iattr->ia_size); if (ret) return ret; } setattr_copy(&nop_mnt_idmap, inode, iattr); if (S_ISREG(inode->i_mode)) { struct zonefs_zone *z = zonefs_inode_zone(inode); z->z_mode = inode->i_mode; z->z_uid = inode->i_uid; z->z_gid = inode->i_gid; } return 0; } static const struct inode_operations zonefs_file_inode_operations = { .setattr = zonefs_inode_setattr, }; static long zonefs_fname_to_fno(const struct qstr *fname) { const char *name = fname->name; unsigned int len = fname->len; long fno = 0, shift = 1; const char *rname; char c = *name; unsigned int i; /* * File names are always a base-10 number string without any * leading 0s. */ if (!isdigit(c)) return -ENOENT; if (len > 1 && c == '0') return -ENOENT; if (len == 1) return c - '0'; for (i = 0, rname = name + len - 1; i < len; i++, rname--) { c = *rname; if (!isdigit(c)) return -ENOENT; fno += (c - '0') * shift; shift *= 10; } return fno; } static struct inode *zonefs_get_file_inode(struct inode *dir, struct dentry *dentry) { struct zonefs_zone_group *zgroup = dir->i_private; struct super_block *sb = dir->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_zone *z; struct inode *inode; ino_t ino; long fno; /* Get the file number from the file name */ fno = zonefs_fname_to_fno(&dentry->d_name); if (fno < 0) return ERR_PTR(fno); if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones) return ERR_PTR(-ENOENT); z = &zgroup->g_zones[fno]; ino = z->z_sector >> sbi->s_zone_sectors_shift; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { WARN_ON_ONCE(inode->i_private != z); return inode; } inode->i_ino = ino; inode->i_mode = z->z_mode; inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(dir)))); inode->i_uid = z->z_uid; inode->i_gid = z->z_gid; inode->i_size = z->z_wpoffset; inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; inode->i_private = z; inode->i_op = &zonefs_file_inode_operations; inode->i_fop = &zonefs_file_operations; inode->i_mapping->a_ops = &zonefs_file_aops; mapping_set_large_folios(inode->i_mapping); /* Update the inode access rights depending on the zone condition */ zonefs_inode_update_mode(inode); unlock_new_inode(inode); return inode; } static struct inode *zonefs_get_zgroup_inode(struct super_block *sb, enum zonefs_ztype ztype) { struct inode *root = d_inode(sb->s_root); struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct inode *inode; ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; inode->i_ino = ino; inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555); inode->i_size = sbi->s_zgroup[ztype].g_nr_zones; inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, inode_get_ctime(root)))); inode->i_private = &sbi->s_zgroup[ztype]; set_nlink(inode, 2); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &zonefs_dir_operations; unlock_new_inode(inode); return inode; } static struct inode *zonefs_get_dir_inode(struct inode *dir, struct dentry *dentry) { struct super_block *sb = dir->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); const char *name = dentry->d_name.name; enum zonefs_ztype ztype; /* * We only need to check for the "seq" directory and * the "cnv" directory if we have conventional zones. */ if (dentry->d_name.len != 3) return ERR_PTR(-ENOENT); for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (sbi->s_zgroup[ztype].g_nr_zones && memcmp(name, zonefs_zgroup_name(ztype), 3) == 0) break; } if (ztype == ZONEFS_ZTYPE_MAX) return ERR_PTR(-ENOENT); return zonefs_get_zgroup_inode(sb, ztype); } static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; if (dentry->d_name.len > ZONEFS_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (dir == d_inode(dir->i_sb->s_root)) inode = zonefs_get_dir_inode(dir, dentry); else inode = zonefs_get_file_inode(dir, dentry); return d_splice_alias(inode, dentry); } static int zonefs_readdir_root(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV; ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1; if (ctx->pos >= inode->i_size) return 0; if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos == 2) { if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ztype = ZONEFS_ZTYPE_SEQ; if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, base_ino + ztype, DT_DIR)) return 0; ctx->pos++; } if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) { ztype = ZONEFS_ZTYPE_SEQ; if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, base_ino + ztype, DT_DIR)) return 0; ctx->pos++; } return 0; } static int zonefs_readdir_zgroup(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct zonefs_zone_group *zgroup = inode->i_private; struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_zone *z; int fname_len; char *fname; ino_t ino; int f; /* * The size of zone group directories is equal to the number * of zone files in the group and does note include the "." and * ".." entries. Hence the "+ 2" here. */ if (ctx->pos >= inode->i_size + 2) return 0; if (!dir_emit_dots(file, ctx)) return 0; fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); if (!fname) return -ENOMEM; for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) { z = &zgroup->g_zones[f]; ino = z->z_sector >> sbi->s_zone_sectors_shift; fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f); if (!dir_emit(ctx, fname, fname_len, ino, DT_REG)) break; ctx->pos++; } kfree(fname); return 0; } static int zonefs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); if (inode == d_inode(inode->i_sb->s_root)) return zonefs_readdir_root(file, ctx); return zonefs_readdir_zgroup(file, ctx); } const struct inode_operations zonefs_dir_inode_operations = { .lookup = zonefs_lookup, .setattr = zonefs_inode_setattr, }; const struct file_operations zonefs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = zonefs_readdir, }; struct zonefs_zone_data { struct super_block *sb; unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; sector_t cnv_zone_start; struct blk_zone *zones; }; static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct zonefs_zone_data *zd = data; struct super_block *sb = zd->sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); /* * We do not care about the first zone: it contains the super block * and not exposed as a file. */ if (!idx) return 0; /* * Count the number of zones that will be exposed as files. * For sequential zones, we always have as many files as zones. * FOr conventional zones, the number of files depends on if we have * conventional zones aggregation enabled. */ switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: if (sbi->s_features & ZONEFS_F_AGGRCNV) { /* One file per set of contiguous conventional zones */ if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || zone->start != zd->cnv_zone_start) sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; zd->cnv_zone_start = zone->start + zone->len; } else { /* One file per zone */ sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; } break; case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; break; default: zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", zone->type); return -EIO; } memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); return 0; } static int zonefs_get_zone_info(struct zonefs_zone_data *zd) { struct block_device *bdev = zd->sb->s_bdev; int ret; zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone), GFP_KERNEL); if (!zd->zones) return -ENOMEM; /* Get zones information from the device */ ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, zonefs_get_zone_info_cb, zd); if (ret < 0) { zonefs_err(zd->sb, "Zone report failed %d\n", ret); return ret; } if (ret != bdev_nr_zones(bdev)) { zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", ret, bdev_nr_zones(bdev)); return -EIO; } return 0; } static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) { kvfree(zd->zones); } /* * Create a zone group and populate it with zone files. */ static int zonefs_init_zgroup(struct super_block *sb, struct zonefs_zone_data *zd, enum zonefs_ztype ztype) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; struct blk_zone *zone, *next, *end; struct zonefs_zone *z; unsigned int n = 0; int ret; /* Allocate the zone group. If it is empty, we have nothing to do. */ if (!zgroup->g_nr_zones) return 0; zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, sizeof(struct zonefs_zone), GFP_KERNEL); if (!zgroup->g_zones) return -ENOMEM; /* * Initialize the zone groups using the device zone information. * We always skip the first zone as it contains the super block * and is not use to back a file. */ end = zd->zones + bdev_nr_zones(sb->s_bdev); for (zone = &zd->zones[1]; zone < end; zone = next) { next = zone + 1; if (zonefs_zone_type(zone) != ztype) continue; if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) return -EINVAL; /* * For conventional zones, contiguous zones can be aggregated * together to form larger files. Note that this overwrites the * length of the first zone of the set of contiguous zones * aggregated together. If one offline or read-only zone is * found, assume that all zones aggregated have the same * condition. */ if (ztype == ZONEFS_ZTYPE_CNV && (sbi->s_features & ZONEFS_F_AGGRCNV)) { for (; next < end; next++) { if (zonefs_zone_type(next) != ztype) break; zone->len += next->len; zone->capacity += next->capacity; if (next->cond == BLK_ZONE_COND_READONLY && zone->cond != BLK_ZONE_COND_OFFLINE) zone->cond = BLK_ZONE_COND_READONLY; else if (next->cond == BLK_ZONE_COND_OFFLINE) zone->cond = BLK_ZONE_COND_OFFLINE; } } z = &zgroup->g_zones[n]; if (ztype == ZONEFS_ZTYPE_CNV) z->z_flags |= ZONEFS_ZONE_CNV; z->z_sector = zone->start; z->z_size = zone->len << SECTOR_SHIFT; if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && !(sbi->s_features & ZONEFS_F_AGGRCNV)) { zonefs_err(sb, "Invalid zone size %llu (device zone sectors %llu)\n", z->z_size, bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); return -EINVAL; } z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, zone->capacity << SECTOR_SHIFT); z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); z->z_mode = S_IFREG | sbi->s_perm; z->z_uid = sbi->s_uid; z->z_gid = sbi->s_gid; /* * Let zonefs_inode_update_mode() know that we will need * special initialization of the inode mode the first time * it is accessed. */ z->z_flags |= ZONEFS_ZONE_INIT_MODE; sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; /* * For sequential zones, make sure that any open zone is closed * first to ensure that the initial number of open zones is 0, * in sync with the open zone accounting done when the mount * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. */ if (ztype == ZONEFS_ZTYPE_SEQ && (zone->cond == BLK_ZONE_COND_IMP_OPEN || zone->cond == BLK_ZONE_COND_EXP_OPEN)) { ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); if (ret) return ret; } zonefs_account_active(sb, z); n++; } if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) return -EINVAL; zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", zonefs_zgroup_name(ztype), zgroup->g_nr_zones, str_plural(zgroup->g_nr_zones)); return 0; } static void zonefs_free_zgroups(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype ztype; if (!sbi) return; for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { kvfree(sbi->s_zgroup[ztype].g_zones); sbi->s_zgroup[ztype].g_zones = NULL; } } /* * Create a zone group and populate it with zone files. */ static int zonefs_init_zgroups(struct super_block *sb) { struct zonefs_zone_data zd; enum zonefs_ztype ztype; int ret; /* First get the device zone information */ memset(&zd, 0, sizeof(struct zonefs_zone_data)); zd.sb = sb; ret = zonefs_get_zone_info(&zd); if (ret) goto cleanup; /* Allocate and initialize the zone groups */ for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { ret = zonefs_init_zgroup(sb, &zd, ztype); if (ret) { zonefs_info(sb, "Zone group \"%s\" initialization failed\n", zonefs_zgroup_name(ztype)); break; } } cleanup: zonefs_free_zone_info(&zd); if (ret) zonefs_free_zgroups(sb); return ret; } /* * Read super block information from the device. */ static int zonefs_read_super(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_super *super; u32 crc, stored_crc; struct page *page; struct bio_vec bio_vec; struct bio bio; int ret; page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = 0; __bio_add_page(&bio, page, PAGE_SIZE, 0); ret = submit_bio_wait(&bio); if (ret) goto free_page; super = page_address(page); ret = -EINVAL; if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) goto free_page; stored_crc = le32_to_cpu(super->s_crc); super->s_crc = 0; crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); if (crc != stored_crc) { zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", crc, stored_crc); goto free_page; } sbi->s_features = le64_to_cpu(super->s_features); if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { zonefs_err(sb, "Unknown features set 0x%llx\n", sbi->s_features); goto free_page; } if (sbi->s_features & ZONEFS_F_UID) { sbi->s_uid = make_kuid(current_user_ns(), le32_to_cpu(super->s_uid)); if (!uid_valid(sbi->s_uid)) { zonefs_err(sb, "Invalid UID feature\n"); goto free_page; } } if (sbi->s_features & ZONEFS_F_GID) { sbi->s_gid = make_kgid(current_user_ns(), le32_to_cpu(super->s_gid)); if (!gid_valid(sbi->s_gid)) { zonefs_err(sb, "Invalid GID feature\n"); goto free_page; } } if (sbi->s_features & ZONEFS_F_PERM) sbi->s_perm = le32_to_cpu(super->s_perm); if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { zonefs_err(sb, "Reserved area is being used\n"); goto free_page; } import_uuid(&sbi->s_uuid, super->s_uuid); ret = 0; free_page: __free_page(page); return ret; } static const struct super_operations zonefs_sops = { .alloc_inode = zonefs_alloc_inode, .free_inode = zonefs_free_inode, .statfs = zonefs_statfs, .show_options = zonefs_show_options, }; static int zonefs_get_zgroup_inodes(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct inode *dir_inode; enum zonefs_ztype ztype; for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (!sbi->s_zgroup[ztype].g_nr_zones) continue; dir_inode = zonefs_get_zgroup_inode(sb, ztype); if (IS_ERR(dir_inode)) return PTR_ERR(dir_inode); sbi->s_zgroup[ztype].g_inode = dir_inode; } return 0; } static void zonefs_release_zgroup_inodes(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); enum zonefs_ztype ztype; if (!sbi) return; for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (sbi->s_zgroup[ztype].g_inode) { iput(sbi->s_zgroup[ztype].g_inode); sbi->s_zgroup[ztype].g_inode = NULL; } } } /* * Check that the device is zoned. If it is, get the list of zones and create * sub-directories and files according to the device zone configuration and * format options. */ static int zonefs_fill_super(struct super_block *sb, struct fs_context *fc) { struct zonefs_sb_info *sbi; struct zonefs_context *ctx = fc->fs_private; struct inode *inode; enum zonefs_ztype ztype; int ret; if (!bdev_is_zoned(sb->s_bdev)) { zonefs_err(sb, "Not a zoned block device\n"); return -EINVAL; } /* * Initialize super block information: the maximum file size is updated * when the zone files are created so that the format option * ZONEFS_F_AGGRCNV which increases the maximum file size of a file * beyond the zone size is taken into account. */ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; spin_lock_init(&sbi->s_lock); sb->s_fs_info = sbi; sb->s_magic = ZONEFS_MAGIC; sb->s_maxbytes = 0; sb->s_op = &zonefs_sops; sb->s_time_gran = 1; /* * The block size is set to the device zone write granularity to ensure * that write operations are always aligned according to the device * interface constraints. */ sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev)); sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); sbi->s_uid = GLOBAL_ROOT_UID; sbi->s_gid = GLOBAL_ROOT_GID; sbi->s_perm = 0640; sbi->s_mount_opts = ctx->s_mount_opts; atomic_set(&sbi->s_wro_seq_files, 0); sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev); atomic_set(&sbi->s_active_seq_files, 0); sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev); ret = zonefs_read_super(sb); if (ret) return ret; zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); if (!sbi->s_max_wro_seq_files && !sbi->s_max_active_seq_files && sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { zonefs_info(sb, "No open and active zone limits. Ignoring explicit_open mount option\n"); sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; } /* Initialize the zone groups */ ret = zonefs_init_zgroups(sb); if (ret) goto cleanup; /* Create the root directory inode */ ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto cleanup; inode->i_ino = bdev_nr_zones(sb->s_bdev); inode->i_mode = S_IFDIR | 0555; simple_inode_init_ts(inode); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &zonefs_dir_operations; inode->i_size = 2; set_nlink(inode, 2); for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { if (sbi->s_zgroup[ztype].g_nr_zones) { inc_nlink(inode); inode->i_size++; } } sb->s_root = d_make_root(inode); if (!sb->s_root) goto cleanup; /* * Take a reference on the zone groups directory inodes * to keep them in the inode cache. */ ret = zonefs_get_zgroup_inodes(sb); if (ret) goto cleanup; ret = zonefs_sysfs_register(sb); if (ret) goto cleanup; return 0; cleanup: zonefs_release_zgroup_inodes(sb); zonefs_free_zgroups(sb); return ret; } static void zonefs_kill_super(struct super_block *sb) { struct zonefs_sb_info *sbi = ZONEFS_SB(sb); /* Release the reference on the zone group directory inodes */ zonefs_release_zgroup_inodes(sb); kill_block_super(sb); zonefs_sysfs_unregister(sb); zonefs_free_zgroups(sb); kfree(sbi); } static void zonefs_free_fc(struct fs_context *fc) { struct zonefs_context *ctx = fc->fs_private; kfree(ctx); } static int zonefs_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, zonefs_fill_super); } static int zonefs_reconfigure(struct fs_context *fc) { struct zonefs_context *ctx = fc->fs_private; struct super_block *sb = fc->root->d_sb; struct zonefs_sb_info *sbi = sb->s_fs_info; sync_filesystem(fc->root->d_sb); /* Copy new options from ctx into sbi. */ sbi->s_mount_opts = ctx->s_mount_opts; return 0; } static const struct fs_context_operations zonefs_context_ops = { .parse_param = zonefs_parse_param, .get_tree = zonefs_get_tree, .reconfigure = zonefs_reconfigure, .free = zonefs_free_fc, }; /* * Set up the filesystem mount context. */ static int zonefs_init_fs_context(struct fs_context *fc) { struct zonefs_context *ctx; ctx = kzalloc(sizeof(struct zonefs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; fc->ops = &zonefs_context_ops; fc->fs_private = ctx; return 0; } /* * File system definition and registration. */ static struct file_system_type zonefs_type = { .owner = THIS_MODULE, .name = "zonefs", .kill_sb = zonefs_kill_super, .fs_flags = FS_REQUIRES_DEV, .init_fs_context = zonefs_init_fs_context, .parameters = zonefs_param_spec, }; static int __init zonefs_init_inodecache(void) { zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", sizeof(struct zonefs_inode_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, NULL); if (zonefs_inode_cachep == NULL) return -ENOMEM; return 0; } static void zonefs_destroy_inodecache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy the inode cache. */ rcu_barrier(); kmem_cache_destroy(zonefs_inode_cachep); } static int __init zonefs_init(void) { int ret; BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); ret = zonefs_init_inodecache(); if (ret) return ret; ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; ret = register_filesystem(&zonefs_type); if (ret) goto sysfs_exit; return 0; sysfs_exit: zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); return ret; } static void __exit zonefs_exit(void) { unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); } MODULE_AUTHOR("Damien Le Moal"); MODULE_DESCRIPTION("Zone file system for zoned block devices"); MODULE_LICENSE("GPL"); MODULE_ALIAS_FS("zonefs"); module_init(zonefs_init); module_exit(zonefs_exit);
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 /* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions related to Power Management Quality of Service (PM QoS). * * Copyright (C) 2020 Intel Corporation * * Authors: * Mark Gross <mgross@linux.intel.com> * Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ #ifndef _LINUX_PM_QOS_H #define _LINUX_PM_QOS_H #include <linux/plist.h> #include <linux/notifier.h> #include <linux/device.h> enum pm_qos_flags_status { PM_QOS_FLAGS_UNDEFINED = -1, PM_QOS_FLAGS_NONE, PM_QOS_FLAGS_SOME, PM_QOS_FLAGS_ALL, }; #define PM_QOS_DEFAULT_VALUE (-1) #define PM_QOS_LATENCY_ANY S32_MAX #define PM_QOS_LATENCY_ANY_NS ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC) #define PM_QOS_CPU_LATENCY_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE 0 #define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE FREQ_QOS_MAX_DEFAULT_VALUE #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ PM_QOS_MIN, /* return the smallest value */ }; /* * Note: The lockless read path depends on the CPU accessing target_value * or effective_flags atomically. Atomic access is only guaranteed on all CPU * types linux supports for 32 bit quantites */ struct pm_qos_constraints { struct plist_head list; s32 target_value; /* Do not change to 64 bit */ s32 default_value; s32 no_constraint_value; enum pm_qos_type type; struct blocking_notifier_head *notifiers; }; struct pm_qos_request { struct plist_node node; struct pm_qos_constraints *qos; }; struct pm_qos_flags_request { struct list_head node; s32 flags; /* Do not change to 64 bit */ }; struct pm_qos_flags { struct list_head list; s32 effective_flags; /* Do not change to 64 bit */ }; #define FREQ_QOS_MIN_DEFAULT_VALUE 0 #define FREQ_QOS_MAX_DEFAULT_VALUE S32_MAX enum freq_qos_req_type { FREQ_QOS_MIN = 1, FREQ_QOS_MAX, }; struct freq_constraints { struct pm_qos_constraints min_freq; struct blocking_notifier_head min_freq_notifiers; struct pm_qos_constraints max_freq; struct blocking_notifier_head max_freq_notifiers; }; struct freq_qos_request { enum freq_qos_req_type type; struct plist_node pnode; struct freq_constraints *qos; }; enum dev_pm_qos_req_type { DEV_PM_QOS_RESUME_LATENCY = 1, DEV_PM_QOS_LATENCY_TOLERANCE, DEV_PM_QOS_MIN_FREQUENCY, DEV_PM_QOS_MAX_FREQUENCY, DEV_PM_QOS_FLAGS, }; struct dev_pm_qos_request { enum dev_pm_qos_req_type type; union { struct plist_node pnode; struct pm_qos_flags_request flr; struct freq_qos_request freq; } data; struct device *dev; }; struct dev_pm_qos { struct pm_qos_constraints resume_latency; struct pm_qos_constraints latency_tolerance; struct freq_constraints freq; struct pm_qos_flags flags; struct dev_pm_qos_request *resume_latency_req; struct dev_pm_qos_request *latency_tolerance_req; struct dev_pm_qos_request *flags_req; }; /* Action requested to pm_qos_update_target */ enum pm_qos_req_action { PM_QOS_ADD_REQ, /* Add a new request */ PM_QOS_UPDATE_REQ, /* Update an existing request */ PM_QOS_REMOVE_REQ /* Remove an existing request */ }; static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req) { return req->dev != NULL; } s32 pm_qos_read_value(struct pm_qos_constraints *c); int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value); bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val); #ifdef CONFIG_CPU_IDLE s32 cpu_latency_qos_limit(void); bool cpu_latency_qos_request_active(struct pm_qos_request *req); void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value); void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value); void cpu_latency_qos_remove_request(struct pm_qos_request *req); #else static inline s32 cpu_latency_qos_limit(void) { return INT_MAX; } static inline bool cpu_latency_qos_request_active(struct pm_qos_request *req) { return false; } static inline void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) {} static inline void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) {} static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) {} #endif #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask); s32 __dev_pm_qos_resume_latency(struct device *dev); s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value); int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value); int dev_pm_qos_remove_request(struct dev_pm_qos_request *req); int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, enum dev_pm_qos_req_type type); int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier, enum dev_pm_qos_req_type type); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); int dev_pm_qos_add_ancestor_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value); int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value); void dev_pm_qos_hide_latency_limit(struct device *dev); int dev_pm_qos_expose_flags(struct device *dev, s32 value); void dev_pm_qos_hide_flags(struct device *dev); int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set); s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev); int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val); int dev_pm_qos_expose_latency_tolerance(struct device *dev); void dev_pm_qos_hide_latency_tolerance(struct device *dev); static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return dev->power.qos->resume_latency_req->data.pnode.prio; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return dev->power.qos->flags_req->data.flr.flags; } static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) { return IS_ERR_OR_NULL(dev->power.qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : pm_qos_read_value(&dev->power.qos->resume_latency); } #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) { return PM_QOS_FLAGS_UNDEFINED; } static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask) { return PM_QOS_FLAGS_UNDEFINED; } static inline s32 __dev_pm_qos_resume_latency(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } static inline s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type) { switch (type) { case DEV_PM_QOS_RESUME_LATENCY: return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; case DEV_PM_QOS_MIN_FREQUENCY: return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; case DEV_PM_QOS_MAX_FREQUENCY: return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; default: WARN_ON(1); return 0; } } static inline int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value) { return 0; } static inline int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { return 0; } static inline int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { return 0; } static inline int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, enum dev_pm_qos_req_type type) { return 0; } static inline int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier, enum dev_pm_qos_req_type type) { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) { dev->power.power_state = PMSG_ON; } static inline void dev_pm_qos_constraints_destroy(struct device *dev) { dev->power.power_state = PMSG_INVALID; } static inline int dev_pm_qos_add_ancestor_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value) { return 0; } static inline int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) { return 0; } static inline void dev_pm_qos_hide_latency_limit(struct device *dev) {} static inline int dev_pm_qos_expose_flags(struct device *dev, s32 value) { return 0; } static inline void dev_pm_qos_hide_flags(struct device *dev) {} static inline int dev_pm_qos_update_flags(struct device *dev, s32 m, bool set) { return 0; } static inline s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev) { return PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; } static inline int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val) { return 0; } static inline int dev_pm_qos_expose_latency_tolerance(struct device *dev) { return 0; } static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {} static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } #endif static inline int freq_qos_request_active(struct freq_qos_request *req) { return !IS_ERR_OR_NULL(req->qos); } void freq_constraints_init(struct freq_constraints *qos); s32 freq_qos_read_value(struct freq_constraints *qos, enum freq_qos_req_type type); int freq_qos_add_request(struct freq_constraints *qos, struct freq_qos_request *req, enum freq_qos_req_type type, s32 value); int freq_qos_update_request(struct freq_qos_request *req, s32 new_value); int freq_qos_remove_request(struct freq_qos_request *req); int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value); int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier); int freq_qos_remove_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier); #endif
3 5 1 4 1 3 2 1 1 4 4 2 6 3 2 2 2 2 247 248 248 4 1 1 3 3 3 3 3 2 2 2 4 4 4 3 1 3 1 2 4 8 8 7 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler_types.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/gfp.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/ipc_namespace.h> #include <linux/kdev_t.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/namei.h> #include <linux/magic.h> #include <linux/major.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/mount.h> #include <linux/fs_parser.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/spinlock_types.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/user_namespace.h> #include <linux/xarray.h> #include <uapi/linux/android/binder.h> #include <uapi/linux/android/binderfs.h> #include "binder_internal.h" #define FIRST_INODE 1 #define SECOND_INODE 2 #define INODE_OFFSET 3 #define BINDERFS_MAX_MINOR (1U << MINORBITS) /* Ensure that the initial ipc namespace always has devices available. */ #define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); enum binderfs_param { Opt_max, Opt_stats_mode, }; enum binderfs_stats_mode { binderfs_stats_mode_unset, binderfs_stats_mode_global, }; struct binder_features { bool oneway_spam_detection; bool extended_error; }; static const struct constant_table binderfs_param_stats[] = { { "global", binderfs_stats_mode_global }, {} }; static const struct fs_parameter_spec binderfs_fs_parameters[] = { fsparam_u32("max", Opt_max), fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats), {} }; static struct binder_features binder_features = { .oneway_spam_detection = true, .extended_error = true, }; static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) { return sb->s_fs_info; } bool is_binderfs_device(const struct inode *inode) { if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC) return true; return false; } /** * binderfs_binder_device_create - allocate inode from super block of a * binderfs mount * @ref_inode: inode from which the super block will be taken * @userp: buffer to copy information about new device for userspace to * @req: struct binderfs_device as copied from userspace * * This function allocates a new binder_device and reserves a new minor * number for it. * Minor numbers are limited and tracked globally in binderfs_minors. The * function will stash a struct binder_device for the specific binder * device in i_private of the inode. * It will go on to allocate a new inode from the super block of the * filesystem mount, stash a struct binder_device in its i_private field * and attach a dentry to that inode. * * Return: 0 on success, negative errno on failure */ static int binderfs_binder_device_create(struct inode *ref_inode, struct binderfs_device __user *userp, struct binderfs_device *req) { int minor, ret; struct dentry *dentry, *root; struct binder_device *device; char *name = NULL; size_t name_len; struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; #if defined(CONFIG_IPC_NS) bool use_reserve = (info->ipc_ns == &init_ipc_ns); #else bool use_reserve = true; #endif /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); if (++info->device_count <= info->mount_opts.max) minor = ida_alloc_max(&binderfs_minors, use_reserve ? BINDERFS_MAX_MINOR : BINDERFS_MAX_MINOR_CAPPED, GFP_KERNEL); else minor = -ENOSPC; if (minor < 0) { --info->device_count; mutex_unlock(&binderfs_minors_mutex); return minor; } mutex_unlock(&binderfs_minors_mutex); ret = -ENOMEM; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) goto err; inode = new_inode(sb); if (!inode) goto err; inode->i_ino = minor + INODE_OFFSET; simple_inode_init_ts(inode); init_special_inode(inode, S_IFCHR | 0600, MKDEV(MAJOR(binderfs_dev), minor)); inode->i_fop = &binder_fops; inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ name_len = strlen(req->name); /* Make sure to include terminating NUL byte */ name = kmemdup(req->name, name_len + 1, GFP_KERNEL); if (!name) goto err; refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; device->miscdev.name = name; device->miscdev.minor = minor; mutex_init(&device->context.context_mgr_node_lock); req->major = MAJOR(binderfs_dev); req->minor = minor; if (userp && copy_to_user(userp, req, sizeof(*req))) { ret = -EFAULT; goto err; } root = sb->s_root; inode_lock(d_inode(root)); /* look it up */ dentry = lookup_one_len(name, root, name_len); if (IS_ERR(dentry)) { inode_unlock(d_inode(root)); ret = PTR_ERR(dentry); goto err; } if (d_really_is_positive(dentry)) { /* already exists */ dput(dentry); inode_unlock(d_inode(root)); ret = -EEXIST; goto err; } inode->i_private = device; d_instantiate(dentry, inode); fsnotify_create(root->d_inode, dentry); inode_unlock(d_inode(root)); return 0; err: kfree(name); kfree(device); mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, minor); mutex_unlock(&binderfs_minors_mutex); iput(inode); return ret; } /** * binder_ctl_ioctl - handle binder device node allocation requests * * The request handler for the binder-control device. All requests operate on * the binderfs mount the binder-control device resides in: * - BINDER_CTL_ADD * Allocate a new binder device. * * Return: %0 on success, negative errno on failure. */ static long binder_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret = -EINVAL; struct inode *inode = file_inode(file); struct binderfs_device __user *device = (struct binderfs_device __user *)arg; struct binderfs_device device_req; switch (cmd) { case BINDER_CTL_ADD: ret = copy_from_user(&device_req, device, sizeof(device_req)); if (ret) { ret = -EFAULT; break; } ret = binderfs_binder_device_create(inode, device, &device_req); break; default: break; } return ret; } static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; struct binderfs_info *info = BINDERFS_SB(inode->i_sb); clear_inode(inode); if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { kfree(device->context.name); kfree(device); } } static int binderfs_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { int opt; struct binderfs_mount_opts *ctx = fc->fs_private; struct fs_parse_result result; opt = fs_parse(fc, binderfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_max: if (result.uint_32 > BINDERFS_MAX_MINOR) return invalfc(fc, "Bad value for '%s'", param->key); ctx->max = result.uint_32; break; case Opt_stats_mode: if (!capable(CAP_SYS_ADMIN)) return -EPERM; ctx->stats_mode = result.uint_32; break; default: return invalfc(fc, "Unsupported parameter '%s'", param->key); } return 0; } static int binderfs_fs_context_reconfigure(struct fs_context *fc) { struct binderfs_mount_opts *ctx = fc->fs_private; struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb); if (info->mount_opts.stats_mode != ctx->stats_mode) return invalfc(fc, "Binderfs stats mode cannot be changed during a remount"); info->mount_opts.stats_mode = ctx->stats_mode; info->mount_opts.max = ctx->max; return 0; } static int binderfs_show_options(struct seq_file *seq, struct dentry *root) { struct binderfs_info *info = BINDERFS_SB(root->d_sb); if (info->mount_opts.max <= BINDERFS_MAX_MINOR) seq_printf(seq, ",max=%d", info->mount_opts.max); switch (info->mount_opts.stats_mode) { case binderfs_stats_mode_unset: break; case binderfs_stats_mode_global: seq_printf(seq, ",stats=global"); break; } return 0; } static const struct super_operations binderfs_super_ops = { .evict_inode = binderfs_evict_inode, .show_options = binderfs_show_options, .statfs = simple_statfs, }; static inline bool is_binderfs_control_device(const struct dentry *dentry) { struct binderfs_info *info = dentry->d_sb->s_fs_info; return info->control_dentry == dentry; } static int binderfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (is_binderfs_control_device(old_dentry) || is_binderfs_control_device(new_dentry)) return -EPERM; return simple_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); } static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { if (is_binderfs_control_device(dentry)) return -EPERM; return simple_unlink(dir, dentry); } static const struct file_operations binder_ctl_fops = { .owner = THIS_MODULE, .open = nonseekable_open, .unlocked_ioctl = binder_ctl_ioctl, .compat_ioctl = binder_ctl_ioctl, .llseek = noop_llseek, }; /** * binderfs_binder_ctl_create - create a new binder-control device * @sb: super block of the binderfs mount * * This function creates a new binder-control device node in the binderfs mount * referred to by @sb. * * Return: 0 on success, negative errno on failure */ static int binderfs_binder_ctl_create(struct super_block *sb) { int minor, ret; struct dentry *dentry; struct binder_device *device; struct inode *inode = NULL; struct dentry *root = sb->s_root; struct binderfs_info *info = sb->s_fs_info; #if defined(CONFIG_IPC_NS) bool use_reserve = (info->ipc_ns == &init_ipc_ns); #else bool use_reserve = true; #endif device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) return -ENOMEM; /* If we have already created a binder-control node, return. */ if (info->control_dentry) { ret = 0; goto out; } ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto out; /* Reserve a new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); minor = ida_alloc_max(&binderfs_minors, use_reserve ? BINDERFS_MAX_MINOR : BINDERFS_MAX_MINOR_CAPPED, GFP_KERNEL); mutex_unlock(&binderfs_minors_mutex); if (minor < 0) { ret = minor; goto out; } inode->i_ino = SECOND_INODE; simple_inode_init_ts(inode); init_special_inode(inode, S_IFCHR | 0600, MKDEV(MAJOR(binderfs_dev), minor)); inode->i_fop = &binder_ctl_fops; inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->miscdev.minor = minor; dentry = d_alloc_name(root, "binder-control"); if (!dentry) goto out; inode->i_private = device; info->control_dentry = dentry; d_add(dentry, inode); return 0; out: kfree(device); iput(inode); return ret; } static const struct inode_operations binderfs_dir_inode_operations = { .lookup = simple_lookup, .rename = binderfs_rename, .unlink = binderfs_unlink, }; static struct inode *binderfs_make_inode(struct super_block *sb, int mode) { struct inode *ret; ret = new_inode(sb); if (ret) { ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); ret->i_mode = mode; simple_inode_init_ts(ret); } return ret; } static struct dentry *binderfs_create_dentry(struct dentry *parent, const char *name) { struct dentry *dentry; dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) return dentry; /* Return error if the file/dir already exists. */ if (d_really_is_positive(dentry)) { dput(dentry); return ERR_PTR(-EEXIST); } return dentry; } void binderfs_remove_file(struct dentry *dentry) { struct inode *parent_inode; parent_inode = d_inode(dentry->d_parent); inode_lock(parent_inode); if (simple_positive(dentry)) { dget(dentry); simple_unlink(parent_inode, dentry); d_delete(dentry); dput(dentry); } inode_unlock(parent_inode); } struct dentry *binderfs_create_file(struct dentry *parent, const char *name, const struct file_operations *fops, void *data) { struct dentry *dentry; struct inode *new_inode, *parent_inode; struct super_block *sb; parent_inode = d_inode(parent); inode_lock(parent_inode); dentry = binderfs_create_dentry(parent, name); if (IS_ERR(dentry)) goto out; sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFREG | 0444); if (!new_inode) { dput(dentry); dentry = ERR_PTR(-ENOMEM); goto out; } new_inode->i_fop = fops; new_inode->i_private = data; d_instantiate(dentry, new_inode); fsnotify_create(parent_inode, dentry); out: inode_unlock(parent_inode); return dentry; } static struct dentry *binderfs_create_dir(struct dentry *parent, const char *name) { struct dentry *dentry; struct inode *new_inode, *parent_inode; struct super_block *sb; parent_inode = d_inode(parent); inode_lock(parent_inode); dentry = binderfs_create_dentry(parent, name); if (IS_ERR(dentry)) goto out; sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); if (!new_inode) { dput(dentry); dentry = ERR_PTR(-ENOMEM); goto out; } new_inode->i_fop = &simple_dir_operations; new_inode->i_op = &simple_dir_inode_operations; set_nlink(new_inode, 2); d_instantiate(dentry, new_inode); inc_nlink(parent_inode); fsnotify_mkdir(parent_inode, dentry); out: inode_unlock(parent_inode); return dentry; } static int binder_features_show(struct seq_file *m, void *unused) { bool *feature = m->private; seq_printf(m, "%d\n", *feature); return 0; } DEFINE_SHOW_ATTRIBUTE(binder_features); static int init_binder_features(struct super_block *sb) { struct dentry *dentry, *dir; dir = binderfs_create_dir(sb->s_root, "features"); if (IS_ERR(dir)) return PTR_ERR(dir); dentry = binderfs_create_file(dir, "oneway_spam_detection", &binder_features_fops, &binder_features.oneway_spam_detection); if (IS_ERR(dentry)) return PTR_ERR(dentry); dentry = binderfs_create_file(dir, "extended_error", &binder_features_fops, &binder_features.extended_error); if (IS_ERR(dentry)) return PTR_ERR(dentry); return 0; } static int init_binder_logs(struct super_block *sb) { struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; const struct binder_debugfs_entry *db_entry; struct binderfs_info *info; int ret = 0; binder_logs_root_dir = binderfs_create_dir(sb->s_root, "binder_logs"); if (IS_ERR(binder_logs_root_dir)) { ret = PTR_ERR(binder_logs_root_dir); goto out; } binder_for_each_debugfs_entry(db_entry) { dentry = binderfs_create_file(binder_logs_root_dir, db_entry->name, db_entry->fops, db_entry->data); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto out; } } proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); if (IS_ERR(proc_log_dir)) { ret = PTR_ERR(proc_log_dir); goto out; } info = sb->s_fs_info; info->proc_log_dir = proc_log_dir; out: return ret; } static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc) { int ret; struct binderfs_info *info; struct binderfs_mount_opts *ctx = fc->fs_private; struct inode *inode = NULL; struct binderfs_device device_info = {}; const char *name; size_t len; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; /* * The binderfs filesystem can be mounted by userns root in a * non-initial userns. By default such mounts have the SB_I_NODEV flag * set in s_iflags to prevent security issues where userns root can * just create random device nodes via mknod() since it owns the * filesystem mount. But binderfs does not allow to create any files * including devices nodes. The only way to create binder devices nodes * is through the binder-control device which userns root is explicitly * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both * necessary and safe. */ sb->s_iflags &= ~SB_I_NODEV; sb->s_iflags |= SB_I_NOEXEC; sb->s_magic = BINDERFS_SUPER_MAGIC; sb->s_op = &binderfs_super_ops; sb->s_time_gran = 1; sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); if (!sb->s_fs_info) return -ENOMEM; info = sb->s_fs_info; info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) info->root_gid = GLOBAL_ROOT_GID; info->root_uid = make_kuid(sb->s_user_ns, 0); if (!uid_valid(info->root_uid)) info->root_uid = GLOBAL_ROOT_UID; info->mount_opts.max = ctx->max; info->mount_opts.stats_mode = ctx->stats_mode; inode = new_inode(sb); if (!inode) return -ENOMEM; inode->i_ino = FIRST_INODE; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | 0755; simple_inode_init_ts(inode); inode->i_op = &binderfs_dir_inode_operations; set_nlink(inode, 2); sb->s_root = d_make_root(inode); if (!sb->s_root) return -ENOMEM; ret = binderfs_binder_ctl_create(sb); if (ret) return ret; name = binder_devices_param; for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { strscpy(device_info.name, name, len + 1); ret = binderfs_binder_device_create(inode, NULL, &device_info); if (ret) return ret; name += len; if (*name == ',') name++; } ret = init_binder_features(sb); if (ret) return ret; if (info->mount_opts.stats_mode == binderfs_stats_mode_global) return init_binder_logs(sb); return 0; } static int binderfs_fs_context_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, binderfs_fill_super); } static void binderfs_fs_context_free(struct fs_context *fc) { struct binderfs_mount_opts *ctx = fc->fs_private; kfree(ctx); } static const struct fs_context_operations binderfs_fs_context_ops = { .free = binderfs_fs_context_free, .get_tree = binderfs_fs_context_get_tree, .parse_param = binderfs_fs_context_parse_param, .reconfigure = binderfs_fs_context_reconfigure, }; static int binderfs_init_fs_context(struct fs_context *fc) { struct binderfs_mount_opts *ctx; ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->max = BINDERFS_MAX_MINOR; ctx->stats_mode = binderfs_stats_mode_unset; fc->fs_private = ctx; fc->ops = &binderfs_fs_context_ops; return 0; } static void binderfs_kill_super(struct super_block *sb) { struct binderfs_info *info = sb->s_fs_info; /* * During inode eviction struct binderfs_info is needed. * So first wipe the super_block then free struct binderfs_info. */ kill_litter_super(sb); if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); kfree(info); } static struct file_system_type binder_fs_type = { .name = "binder", .init_fs_context = binderfs_init_fs_context, .parameters = binderfs_fs_parameters, .kill_sb = binderfs_kill_super, .fs_flags = FS_USERNS_MOUNT, }; int __init init_binderfs(void) { int ret; const char *name; size_t len; /* Verify that the default binderfs device names are valid. */ name = binder_devices_param; for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { if (len > BINDERFS_MAX_NAME) return -E2BIG; name += len; if (*name == ',') name++; } /* Allocate new major number for binderfs. */ ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, "binder"); if (ret) return ret; ret = register_filesystem(&binder_fs_type); if (ret) { unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); return ret; } return ret; }
287 287 87 69 102 2 1 8 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2018 Red Hat, Inc. * All rights reserved. */ #ifndef __LIBXFS_AG_H #define __LIBXFS_AG_H 1 struct xfs_mount; struct xfs_trans; struct xfs_perag; /* * Per-ag infrastructure */ /* per-AG block reservation data structures*/ struct xfs_ag_resv { /* number of blocks originally reserved here */ xfs_extlen_t ar_orig_reserved; /* number of blocks reserved here */ xfs_extlen_t ar_reserved; /* number of blocks originally asked for */ xfs_extlen_t ar_asked; }; /* * Per-ag incore structure, copies of information in agf and agi, to improve the * performance of allocation group selection. */ struct xfs_perag { struct xfs_mount *pag_mount; /* owner filesystem */ xfs_agnumber_t pag_agno; /* AG this structure belongs to */ atomic_t pag_ref; /* passive reference count */ atomic_t pag_active_ref; /* active reference count */ wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */ unsigned long pag_opstate; uint8_t pagf_bno_level; /* # of levels in bno btree */ uint8_t pagf_cnt_level; /* # of levels in cnt btree */ uint8_t pagf_rmap_level;/* # of levels in rmap btree */ uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ xfs_agino_t pagi_freecount; /* number of free inodes */ xfs_agino_t pagi_count; /* number of allocated inodes */ /* * Inode allocation search lookup optimisation. * If the pagino matches, the search for new inodes * doesn't need to search the near ones again straight away */ xfs_agino_t pagl_pagino; xfs_agino_t pagl_leftrec; xfs_agino_t pagl_rightrec; int pagb_count; /* pagb slots in use */ uint8_t pagf_refcount_level; /* recount btree height */ /* Blocks reserved for all kinds of metadata. */ struct xfs_ag_resv pag_meta_resv; /* Blocks reserved for the reverse mapping btree. */ struct xfs_ag_resv pag_rmapbt_resv; /* for rcu-safe freeing */ struct rcu_head rcu_head; /* Precalculated geometry info */ xfs_agblock_t block_count; xfs_agblock_t min_block; xfs_agino_t agino_min; xfs_agino_t agino_max; #ifdef __KERNEL__ /* -- kernel only structures below this line -- */ /* * Bitsets of per-ag metadata that have been checked and/or are sick. * Callers should hold pag_state_lock before accessing this field. */ uint16_t pag_checked; uint16_t pag_sick; #ifdef CONFIG_XFS_ONLINE_REPAIR /* * Alternate btree heights so that online repair won't trip the write * verifiers while rebuilding the AG btrees. */ uint8_t pagf_repair_bno_level; uint8_t pagf_repair_cnt_level; uint8_t pagf_repair_refcount_level; uint8_t pagf_repair_rmap_level; #endif spinlock_t pag_state_lock; spinlock_t pagb_lock; /* lock for pagb_tree */ struct rb_root pagb_tree; /* ordered tree of busy extents */ unsigned int pagb_gen; /* generation count for pagb_tree */ wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */ atomic_t pagf_fstrms; /* # of filestreams active in this AG */ spinlock_t pag_ici_lock; /* incore inode cache lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ struct xfs_buf_cache pag_bcache; /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; /* * We use xfs_drain to track the number of deferred log intent items * that have been queued (but not yet processed) so that waiters (e.g. * scrub) will not lock resources when other threads are in the middle * of processing a chain of intent items only to find momentary * inconsistencies. */ struct xfs_defer_drain pag_intents_drain; /* Hook to feed rmapbt updates to an active online repair. */ struct xfs_hooks pag_rmap_update_hooks; #endif /* __KERNEL__ */ }; /* * Per-AG operational state. These are atomic flag bits. */ #define XFS_AGSTATE_AGF_INIT 0 #define XFS_AGSTATE_AGI_INIT 1 #define XFS_AGSTATE_PREFERS_METADATA 2 #define XFS_AGSTATE_ALLOWS_INODES 3 #define XFS_AGSTATE_AGFL_NEEDS_RESET 4 #define __XFS_AG_OPSTATE(name, NAME) \ static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \ { \ return test_bit(XFS_AGSTATE_ ## NAME, &pag->pag_opstate); \ } __XFS_AG_OPSTATE(initialised_agf, AGF_INIT) __XFS_AG_OPSTATE(initialised_agi, AGI_INIT) __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA) __XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES) __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET) void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart, xfs_agnumber_t agend); int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); void xfs_free_perag(struct xfs_mount *mp); /* Passive AG references */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int tag); struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag); /* Active AG references */ struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t); struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t, int tag); void xfs_perag_rele(struct xfs_perag *pag); /* * Per-ag geometry infomation and validation */ xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t *first, xfs_agino_t *last); static inline bool xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) { if (agbno >= pag->block_count) return false; if (agbno <= pag->min_block) return false; return true; } static inline bool xfs_verify_agbext( struct xfs_perag *pag, xfs_agblock_t agbno, xfs_agblock_t len) { if (agbno + len <= agbno) return false; if (!xfs_verify_agbno(pag, agbno)) return false; return xfs_verify_agbno(pag, agbno + len - 1); } /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata. */ static inline bool xfs_verify_agino(struct xfs_perag *pag, xfs_agino_t agino) { if (agino < pag->agino_min) return false; if (agino > pag->agino_max) return false; return true; } /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata, or is NULLAGINO. */ static inline bool xfs_verify_agino_or_null(struct xfs_perag *pag, xfs_agino_t agino) { if (agino == NULLAGINO) return true; return xfs_verify_agino(pag, agino); } static inline bool xfs_ag_contains_log(struct xfs_mount *mp, xfs_agnumber_t agno) { return mp->m_sb.sb_logstart > 0 && agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); } /* * Perag iteration APIs */ static inline struct xfs_perag * xfs_perag_next( struct xfs_perag *pag, xfs_agnumber_t *agno, xfs_agnumber_t end_agno) { struct xfs_mount *mp = pag->pag_mount; *agno = pag->pag_agno + 1; xfs_perag_rele(pag); while (*agno <= end_agno) { pag = xfs_perag_grab(mp, *agno); if (pag) return pag; (*agno)++; } return NULL; } #define for_each_perag_range(mp, agno, end_agno, pag) \ for ((pag) = xfs_perag_grab((mp), (agno)); \ (pag) != NULL; \ (pag) = xfs_perag_next((pag), &(agno), (end_agno))) #define for_each_perag_from(mp, agno, pag) \ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) #define for_each_perag(mp, agno, pag) \ (agno) = 0; \ for_each_perag_from((mp), (agno), (pag)) #define for_each_perag_tag(mp, agno, pag, tag) \ for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \ (pag) != NULL; \ (agno) = (pag)->pag_agno + 1, \ xfs_perag_rele(pag), \ (pag) = xfs_perag_grab_tag((mp), (agno), (tag))) static inline struct xfs_perag * xfs_perag_next_wrap( struct xfs_perag *pag, xfs_agnumber_t *agno, xfs_agnumber_t stop_agno, xfs_agnumber_t restart_agno, xfs_agnumber_t wrap_agno) { struct xfs_mount *mp = pag->pag_mount; *agno = pag->pag_agno + 1; xfs_perag_rele(pag); while (*agno != stop_agno) { if (*agno >= wrap_agno) { if (restart_agno >= stop_agno) break; *agno = restart_agno; } pag = xfs_perag_grab(mp, *agno); if (pag) return pag; (*agno)++; } return NULL; } /* * Iterate all AGs from start_agno through wrap_agno, then restart_agno through * (start_agno - 1). */ #define for_each_perag_wrap_range(mp, start_agno, restart_agno, wrap_agno, agno, pag) \ for ((agno) = (start_agno), (pag) = xfs_perag_grab((mp), (agno)); \ (pag) != NULL; \ (pag) = xfs_perag_next_wrap((pag), &(agno), (start_agno), \ (restart_agno), (wrap_agno))) /* * Iterate all AGs from start_agno through wrap_agno, then 0 through * (start_agno - 1). */ #define for_each_perag_wrap_at(mp, start_agno, wrap_agno, agno, pag) \ for_each_perag_wrap_range((mp), (start_agno), 0, (wrap_agno), (agno), (pag)) /* * Iterate all AGs from start_agno through to the end of the filesystem, then 0 * through (start_agno - 1). */ #define for_each_perag_wrap(mp, start_agno, agno, pag) \ for_each_perag_wrap_at((mp), (start_agno), (mp)->m_sb.sb_agcount, \ (agno), (pag)) struct aghdr_init_data { /* per ag data */ xfs_agblock_t agno; /* ag to init */ xfs_extlen_t agsize; /* new AG size */ struct list_head buffer_list; /* buffer writeback list */ xfs_rfsblock_t nfree; /* cumulative new free space */ /* per header data */ xfs_daddr_t daddr; /* header location */ size_t numblks; /* size of header */ const struct xfs_btree_ops *bc_ops; /* btree ops */ }; int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); int xfs_ag_shrink_space(struct xfs_perag *pag, struct xfs_trans **tpp, xfs_extlen_t delta); int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp, xfs_extlen_t len); int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); #endif /* __LIBXFS_AG_H */
3 3 3 3 3 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 // SPDX-License-Identifier: GPL-2.0-only /* * Yama Linux Security Module * * Author: Kees Cook <keescook@chromium.org> * * Copyright (C) 2010 Canonical, Ltd. * Copyright (C) 2011 The Chromium OS Authors. */ #include <linux/lsm_hooks.h> #include <linux/sysctl.h> #include <linux/ptrace.h> #include <linux/prctl.h> #include <linux/ratelimit.h> #include <linux/workqueue.h> #include <linux/string_helpers.h> #include <linux/task_work.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <uapi/linux/lsm.h> #define YAMA_SCOPE_DISABLED 0 #define YAMA_SCOPE_RELATIONAL 1 #define YAMA_SCOPE_CAPABILITY 2 #define YAMA_SCOPE_NO_ATTACH 3 static int ptrace_scope = YAMA_SCOPE_RELATIONAL; /* describe a ptrace relationship for potential exception */ struct ptrace_relation { struct task_struct *tracer; struct task_struct *tracee; bool invalid; struct list_head node; struct rcu_head rcu; }; static LIST_HEAD(ptracer_relations); static DEFINE_SPINLOCK(ptracer_relations_lock); static void yama_relation_cleanup(struct work_struct *work); static DECLARE_WORK(yama_relation_work, yama_relation_cleanup); struct access_report_info { struct callback_head work; const char *access; struct task_struct *target; struct task_struct *agent; }; static void __report_access(struct callback_head *work) { struct access_report_info *info = container_of(work, struct access_report_info, work); char *target_cmd, *agent_cmd; target_cmd = kstrdup_quotable_cmdline(info->target, GFP_KERNEL); agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_KERNEL); pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", info->access, target_cmd, info->target->pid, agent_cmd, info->agent->pid); kfree(agent_cmd); kfree(target_cmd); put_task_struct(info->agent); put_task_struct(info->target); kfree(info); } /* defers execution because cmdline access can sleep */ static void report_access(const char *access, struct task_struct *target, struct task_struct *agent) { struct access_report_info *info; char agent_comm[sizeof(agent->comm)]; assert_spin_locked(&target->alloc_lock); /* for target->comm */ if (current->flags & PF_KTHREAD) { /* I don't think kthreads call task_work_run() before exiting. * Imagine angry ranting about procfs here. */ pr_notice_ratelimited( "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n", access, target->comm, target->pid, get_task_comm(agent_comm, agent), agent->pid); return; } info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) return; init_task_work(&info->work, __report_access); get_task_struct(target); get_task_struct(agent); info->access = access; info->target = target; info->agent = agent; if (task_work_add(current, &info->work, TWA_RESUME) == 0) return; /* success */ WARN(1, "report_access called from exiting task"); put_task_struct(target); put_task_struct(agent); kfree(info); } /** * yama_relation_cleanup - remove invalid entries from the relation list * @work: unused * */ static void yama_relation_cleanup(struct work_struct *work) { struct ptrace_relation *relation; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) { list_del_rcu(&relation->node); kfree_rcu(relation, rcu); } } rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); } /** * yama_ptracer_add - add/replace an exception for this tracer/tracee pair * @tracer: the task_struct of the process doing the ptrace * @tracee: the task_struct of the process to be ptraced * * Each tracee can have, at most, one tracer registered. Each time this * is called, the prior registered tracer will be replaced for the tracee. * * Returns 0 if relationship was added, -ve on error. */ static int yama_ptracer_add(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation, *added; added = kmalloc(sizeof(*added), GFP_KERNEL); if (!added) return -ENOMEM; added->tracee = tracee; added->tracer = tracer; added->invalid = false; spin_lock(&ptracer_relations_lock); rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { list_replace_rcu(&relation->node, &added->node); kfree_rcu(relation, rcu); goto out; } } list_add_rcu(&added->node, &ptracer_relations); out: rcu_read_unlock(); spin_unlock(&ptracer_relations_lock); return 0; } /** * yama_ptracer_del - remove exceptions related to the given tasks * @tracer: remove any relation where tracer task matches * @tracee: remove any relation where tracee task matches */ static void yama_ptracer_del(struct task_struct *tracer, struct task_struct *tracee) { struct ptrace_relation *relation; bool marked = false; rcu_read_lock(); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee || (tracer && relation->tracer == tracer)) { relation->invalid = true; marked = true; } } rcu_read_unlock(); if (marked) schedule_work(&yama_relation_work); } /** * yama_task_free - check for task_pid to remove from exception list * @task: task being removed */ static void yama_task_free(struct task_struct *task) { yama_ptracer_del(task, task); } /** * yama_task_prctl - check for Yama-specific prctl operations * @option: operation * @arg2: argument * @arg3: argument * @arg4: argument * @arg5: argument * * Return 0 on success, -ve on error. -ENOSYS is returned when Yama * does not handle the given option. */ static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int rc = -ENOSYS; struct task_struct *myself = current; switch (option) { case PR_SET_PTRACER: /* Since a thread can call prctl(), find the group leader * before calling _add() or _del() on it, since we want * process-level granularity of control. The tracer group * leader checking is handled later when walking the ancestry * at the time of PTRACE_ATTACH check. */ rcu_read_lock(); if (!thread_group_leader(myself)) myself = rcu_dereference(myself->group_leader); get_task_struct(myself); rcu_read_unlock(); if (arg2 == 0) { yama_ptracer_del(NULL, myself); rc = 0; } else if (arg2 == PR_SET_PTRACER_ANY || (int)arg2 == -1) { rc = yama_ptracer_add(NULL, myself); } else { struct task_struct *tracer; tracer = find_get_task_by_vpid(arg2); if (!tracer) { rc = -EINVAL; } else { rc = yama_ptracer_add(tracer, myself); put_task_struct(tracer); } } put_task_struct(myself); break; } return rc; } /** * task_is_descendant - walk up a process family tree looking for a match * @parent: the process to compare against while walking up from child * @child: the process to start from while looking upwards for parent * * Returns 1 if child is a descendant of parent, 0 if not. */ static int task_is_descendant(struct task_struct *parent, struct task_struct *child) { int rc = 0; struct task_struct *walker = child; if (!parent || !child) return 0; rcu_read_lock(); if (!thread_group_leader(parent)) parent = rcu_dereference(parent->group_leader); while (walker->pid > 0) { if (!thread_group_leader(walker)) walker = rcu_dereference(walker->group_leader); if (walker == parent) { rc = 1; break; } walker = rcu_dereference(walker->real_parent); } rcu_read_unlock(); return rc; } /** * ptracer_exception_found - tracer registered as exception for this tracee * @tracer: the task_struct of the process attempting ptrace * @tracee: the task_struct of the process to be ptraced * * Returns 1 if tracer has a ptracer exception ancestor for tracee. */ static int ptracer_exception_found(struct task_struct *tracer, struct task_struct *tracee) { int rc = 0; struct ptrace_relation *relation; struct task_struct *parent = NULL; bool found = false; rcu_read_lock(); /* * If there's already an active tracing relationship, then make an * exception for the sake of other accesses, like process_vm_rw(). */ parent = ptrace_parent(tracee); if (parent != NULL && same_thread_group(parent, tracer)) { rc = 1; goto unlock; } /* Look for a PR_SET_PTRACER relationship. */ if (!thread_group_leader(tracee)) tracee = rcu_dereference(tracee->group_leader); list_for_each_entry_rcu(relation, &ptracer_relations, node) { if (relation->invalid) continue; if (relation->tracee == tracee) { parent = relation->tracer; found = true; break; } } if (found && (parent == NULL || task_is_descendant(parent, tracer))) rc = 1; unlock: rcu_read_unlock(); return rc; } /** * yama_ptrace_access_check - validate PTRACE_ATTACH calls * @child: task that current task is attempting to ptrace * @mode: ptrace attach mode * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_access_check(struct task_struct *child, unsigned int mode) { int rc = 0; /* require ptrace target be a child of ptracer on attach */ if (mode & PTRACE_MODE_ATTACH) { switch (ptrace_scope) { case YAMA_SCOPE_DISABLED: /* No additional restrictions. */ break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); if (!pid_alive(child)) rc = -EPERM; if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_CAPABILITY: rcu_read_lock(); if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; rcu_read_unlock(); break; case YAMA_SCOPE_NO_ATTACH: default: rc = -EPERM; break; } } if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0) report_access("attach", child, current); return rc; } /** * yama_ptrace_traceme - validate PTRACE_TRACEME calls * @parent: task that will become the ptracer of the current task * * Returns 0 if following the ptrace is allowed, -ve on error. */ static int yama_ptrace_traceme(struct task_struct *parent) { int rc = 0; /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { case YAMA_SCOPE_CAPABILITY: if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE)) rc = -EPERM; break; case YAMA_SCOPE_NO_ATTACH: rc = -EPERM; break; } if (rc) { task_lock(current); report_access("traceme", current, parent); task_unlock(current); } return rc; } static const struct lsm_id yama_lsmid = { .name = "yama", .id = LSM_ID_YAMA, }; static struct security_hook_list yama_hooks[] __ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, yama_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, yama_ptrace_traceme), LSM_HOOK_INIT(task_prctl, yama_task_prctl), LSM_HOOK_INIT(task_free, yama_task_free), }; #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table table_copy; if (write && !capable(CAP_SYS_PTRACE)) return -EPERM; /* Lock the max value if it ever gets set. */ table_copy = *table; if (*(int *)table_copy.data == *(int *)table_copy.extra2) table_copy.extra1 = table_copy.extra2; return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); } static int max_scope = YAMA_SCOPE_NO_ATTACH; static struct ctl_table yama_sysctl_table[] = { { .procname = "ptrace_scope", .data = &ptrace_scope, .maxlen = sizeof(int), .mode = 0644, .proc_handler = yama_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &max_scope, }, }; static void __init yama_init_sysctl(void) { if (!register_sysctl("kernel/yama", yama_sysctl_table)) panic("Yama: sysctl registration failed.\n"); } #else static inline void yama_init_sysctl(void) { } #endif /* CONFIG_SYSCTL */ static int __init yama_init(void) { pr_info("Yama: becoming mindful.\n"); security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), &yama_lsmid); yama_init_sysctl(); return 0; } DEFINE_LSM(yama) = { .name = "yama", .init = yama_init, };
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 // SPDX-License-Identifier: GPL-2.0-only /* * Changes: * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 08/23/2000 * - get rid of some verify_areas and use __copy*user and __get/put_user * for the ones that remain */ #include <linux/module.h> #include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/cdrom.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> #include <scsi/sg.h> #include <scsi/scsi_dbg.h> #include "scsi_logging.h" #define NORMAL_RETRIES 5 #define IOCTL_NORMAL_TIMEOUT (10 * HZ) #define MAX_BUF PAGE_SIZE /** * ioctl_probe -- return host identification * @host: host to identify * @buffer: userspace buffer for identification * * Return an identifying string at @buffer, if @buffer is non-NULL, filling * to the length stored at * (int *) @buffer. */ static int ioctl_probe(struct Scsi_Host *host, void __user *buffer) { unsigned int len, slen; const char *string; if (buffer) { if (get_user(len, (unsigned int __user *) buffer)) return -EFAULT; if (host->hostt->info) string = host->hostt->info(host); else string = host->hostt->name; if (string) { slen = strlen(string); if (len > slen) len = slen + 1; if (copy_to_user(buffer, string, len)) return -EFAULT; } } return 1; } static int ioctl_internal_command(struct scsi_device *sdev, char *cmd, int timeout, int retries) { int result; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; SCSI_LOG_IOCTL(1, sdev_printk(KERN_INFO, sdev, "Trying ioctl with scsi command %d\n", *cmd)); result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, retries, &exec_args); SCSI_LOG_IOCTL(2, sdev_printk(KERN_INFO, sdev, "Ioctl returned 0x%x\n", result)); if (result < 0) goto out; if (scsi_sense_valid(&sshdr)) { switch (sshdr.sense_key) { case ILLEGAL_REQUEST: if (cmd[0] == ALLOW_MEDIUM_REMOVAL) sdev->lockable = 0; else sdev_printk(KERN_INFO, sdev, "ioctl_internal_command: " "ILLEGAL REQUEST " "asc=0x%x ascq=0x%x\n", sshdr.asc, sshdr.ascq); break; case NOT_READY: /* This happens if there is no disc in drive */ if (sdev->removable) break; fallthrough; case UNIT_ATTENTION: if (sdev->removable) { sdev->changed = 1; result = 0; /* This is no longer considered an error */ break; } fallthrough; /* for non-removable media */ default: sdev_printk(KERN_INFO, sdev, "ioctl_internal_command return code = %x\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); break; } } out: SCSI_LOG_IOCTL(2, sdev_printk(KERN_INFO, sdev, "IOCTL Releasing command\n")); return result; } int scsi_set_medium_removal(struct scsi_device *sdev, char state) { char scsi_cmd[MAX_COMMAND_SIZE]; int ret; if (!sdev->removable || !sdev->lockable) return 0; scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL; scsi_cmd[1] = 0; scsi_cmd[2] = 0; scsi_cmd[3] = 0; scsi_cmd[4] = state; scsi_cmd[5] = 0; ret = ioctl_internal_command(sdev, scsi_cmd, IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES); if (ret == 0) sdev->locked = (state == SCSI_REMOVAL_PREVENT); return ret; } EXPORT_SYMBOL(scsi_set_medium_removal); /* * The scsi_ioctl_get_pci() function places into arg the value * pci_dev::slot_name (8 characters) for the PCI device (if any). * Returns: 0 on success * -ENXIO if there isn't a PCI device pointer * (could be because the SCSI driver hasn't been * updated yet, or because it isn't a SCSI * device) * any copy_to_user() error on failure there */ static int scsi_ioctl_get_pci(struct scsi_device *sdev, void __user *arg) { struct device *dev = scsi_get_device(sdev->host); const char *name; if (!dev) return -ENXIO; name = dev_name(dev); /* compatibility with old ioctl which only returned * 20 characters */ return copy_to_user(arg, name, min(strlen(name), (size_t)20)) ? -EFAULT: 0; } static int sg_get_version(int __user *p) { static const int sg_version_num = 30527; return put_user(sg_version_num, p); } static int sg_set_timeout(struct scsi_device *sdev, int __user *p) { int timeout, err = get_user(timeout, p); if (!err) sdev->sg_timeout = clock_t_to_jiffies(timeout); return err; } static int sg_get_reserved_size(struct scsi_device *sdev, int __user *p) { int val = min(sdev->sg_reserved_size, queue_max_bytes(sdev->request_queue)); return put_user(val, p); } static int sg_set_reserved_size(struct scsi_device *sdev, int __user *p) { int size, err = get_user(size, p); if (err) return err; if (size < 0) return -EINVAL; sdev->sg_reserved_size = min_t(unsigned int, size, queue_max_bytes(sdev->request_queue)); return 0; } /* * will always return that we are ATAPI even for a real SCSI drive, I'm not * so sure this is worth doing anything about (why would you care??) */ static int sg_emulated_host(struct request_queue *q, int __user *p) { return put_user(1, p); } static int scsi_get_idlun(struct scsi_device *sdev, void __user *argp) { struct scsi_idlun v = { .dev_id = (sdev->id & 0xff) + ((sdev->lun & 0xff) << 8) + ((sdev->channel & 0xff) << 16) + ((sdev->host->host_no & 0xff) << 24), .host_unique_id = sdev->host->unique_id }; if (copy_to_user(argp, &v, sizeof(struct scsi_idlun))) return -EFAULT; return 0; } static int scsi_send_start_stop(struct scsi_device *sdev, int data) { u8 cdb[MAX_COMMAND_SIZE] = { }; cdb[0] = START_STOP; cdb[4] = data; return ioctl_internal_command(sdev, cdb, START_STOP_TIMEOUT, NORMAL_RETRIES); } /* * Check if the given command is allowed. * * Only a subset of commands are allowed for unprivileged users. Commands used * to format the media, update the firmware, etc. are not permitted. */ bool scsi_cmd_allowed(unsigned char *cmd, bool open_for_write) { /* root can do any command. */ if (capable(CAP_SYS_RAWIO)) return true; /* Anybody who can open the device can do a read-safe command */ switch (cmd[0]) { /* Basic read-only commands */ case TEST_UNIT_READY: case REQUEST_SENSE: case READ_6: case READ_10: case READ_12: case READ_16: case READ_BUFFER: case READ_DEFECT_DATA: case READ_CAPACITY: /* also GPCMD_READ_CDVD_CAPACITY */ case READ_LONG: case INQUIRY: case MODE_SENSE: case MODE_SENSE_10: case LOG_SENSE: case START_STOP: case GPCMD_VERIFY_10: case VERIFY_16: case REPORT_LUNS: case SERVICE_ACTION_IN_16: case RECEIVE_DIAGNOSTIC: case MAINTENANCE_IN: /* also GPCMD_SEND_KEY, which is a write command */ case GPCMD_READ_BUFFER_CAPACITY: /* Audio CD commands */ case GPCMD_PLAY_CD: case GPCMD_PLAY_AUDIO_10: case GPCMD_PLAY_AUDIO_MSF: case GPCMD_PLAY_AUDIO_TI: case GPCMD_PAUSE_RESUME: /* CD/DVD data reading */ case GPCMD_READ_CD: case GPCMD_READ_CD_MSF: case GPCMD_READ_DISC_INFO: case GPCMD_READ_DVD_STRUCTURE: case GPCMD_READ_HEADER: case GPCMD_READ_TRACK_RZONE_INFO: case GPCMD_READ_SUBCHANNEL: case GPCMD_READ_TOC_PMA_ATIP: case GPCMD_REPORT_KEY: case GPCMD_SCAN: case GPCMD_GET_CONFIGURATION: case GPCMD_READ_FORMAT_CAPACITIES: case GPCMD_GET_EVENT_STATUS_NOTIFICATION: case GPCMD_GET_PERFORMANCE: case GPCMD_SEEK: case GPCMD_STOP_PLAY_SCAN: /* ZBC */ case ZBC_IN: return true; /* Basic writing commands */ case WRITE_6: case WRITE_10: case WRITE_VERIFY: case WRITE_12: case WRITE_VERIFY_12: case WRITE_16: case WRITE_LONG: case WRITE_LONG_2: case WRITE_SAME: case WRITE_SAME_16: case WRITE_SAME_32: case ERASE: case GPCMD_MODE_SELECT_10: case MODE_SELECT: case LOG_SELECT: case GPCMD_BLANK: case GPCMD_CLOSE_TRACK: case GPCMD_FLUSH_CACHE: case GPCMD_FORMAT_UNIT: case GPCMD_REPAIR_RZONE_TRACK: case GPCMD_RESERVE_RZONE_TRACK: case GPCMD_SEND_DVD_STRUCTURE: case GPCMD_SEND_EVENT: case GPCMD_SEND_OPC: case GPCMD_SEND_CUE_SHEET: case GPCMD_SET_SPEED: case GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL: case GPCMD_LOAD_UNLOAD: case GPCMD_SET_STREAMING: case GPCMD_SET_READ_AHEAD: /* ZBC */ case ZBC_OUT: return open_for_write; default: return false; } } EXPORT_SYMBOL(scsi_cmd_allowed); static int scsi_fill_sghdr_rq(struct scsi_device *sdev, struct request *rq, struct sg_io_hdr *hdr, bool open_for_write) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); if (hdr->cmd_len < 6) return -EMSGSIZE; if (copy_from_user(scmd->cmnd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; if (!scsi_cmd_allowed(scmd->cmnd, open_for_write)) return -EPERM; scmd->cmd_len = hdr->cmd_len; rq->timeout = msecs_to_jiffies(hdr->timeout); if (!rq->timeout) rq->timeout = sdev->sg_timeout; if (!rq->timeout) rq->timeout = BLK_DEFAULT_SG_TIMEOUT; if (rq->timeout < BLK_MIN_SG_TIMEOUT) rq->timeout = BLK_MIN_SG_TIMEOUT; return 0; } static int scsi_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, struct bio *bio) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); int r, ret = 0; /* * fill in all the output members */ hdr->status = scmd->result & 0xff; hdr->masked_status = sg_status_byte(scmd->result); hdr->msg_status = COMMAND_COMPLETE; hdr->host_status = host_byte(scmd->result); hdr->driver_status = 0; if (scsi_status_is_check_condition(hdr->status)) hdr->driver_status = DRIVER_SENSE; hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; hdr->resid = scmd->resid_len; hdr->sb_len_wr = 0; if (scmd->sense_len && hdr->sbp) { int len = min((unsigned int) hdr->mx_sb_len, scmd->sense_len); if (!copy_to_user(hdr->sbp, scmd->sense_buffer, len)) hdr->sb_len_wr = len; else ret = -EFAULT; } r = blk_rq_unmap_user(bio); if (!ret) ret = r; return ret; } static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, bool open_for_write) { unsigned long start_time; ssize_t ret = 0; int writing = 0; int at_head = 0; struct request *rq; struct scsi_cmnd *scmd; struct bio *bio; if (hdr->interface_id != 'S') return -EINVAL; if (hdr->dxfer_len > (queue_max_hw_sectors(sdev->request_queue) << 9)) return -EIO; if (hdr->dxfer_len) switch (hdr->dxfer_direction) { default: return -EINVAL; case SG_DXFER_TO_DEV: writing = 1; break; case SG_DXFER_TO_FROM_DEV: case SG_DXFER_FROM_DEV: break; } if (hdr->flags & SG_FLAG_Q_AT_HEAD) at_head = 1; rq = scsi_alloc_request(sdev->request_queue, writing ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); scmd = blk_mq_rq_to_pdu(rq); if (hdr->cmd_len > sizeof(scmd->cmnd)) { ret = -EINVAL; goto out_put_request; } ret = scsi_fill_sghdr_rq(sdev, rq, hdr, open_for_write); if (ret < 0) goto out_put_request; ret = blk_rq_map_user_io(rq, NULL, hdr->dxferp, hdr->dxfer_len, GFP_KERNEL, hdr->iovec_count && hdr->dxfer_len, hdr->iovec_count, 0, rq_data_dir(rq)); if (ret) goto out_put_request; bio = rq->bio; scmd->allowed = 0; start_time = jiffies; blk_execute_rq(rq, at_head); hdr->duration = jiffies_to_msecs(jiffies - start_time); ret = scsi_complete_sghdr_rq(rq, hdr, bio); out_put_request: blk_mq_free_request(rq); return ret; } /** * sg_scsi_ioctl -- handle deprecated SCSI_IOCTL_SEND_COMMAND ioctl * @q: request queue to send scsi commands down * @open_for_write: is the file / block device opened for writing? * @sic: userspace structure describing the command to perform * * Send down the scsi command described by @sic to the device below * the request queue @q. * * Notes: * - This interface is deprecated - users should use the SG_IO * interface instead, as this is a more flexible approach to * performing SCSI commands on a device. * - The SCSI command length is determined by examining the 1st byte * of the given command. There is no way to override this. * - Data transfers are limited to PAGE_SIZE * - The length (x + y) must be at least OMAX_SB_LEN bytes long to * accommodate the sense buffer when an error occurs. * The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that * old code will not be surprised. * - If a Unix error occurs (e.g. ENOMEM) then the user will receive * a negative return and the Unix error code in 'errno'. * If the SCSI command succeeds then 0 is returned. * Positive numbers returned are the compacted SCSI error codes (4 * bytes in one int) where the lowest byte is the SCSI status. */ static int sg_scsi_ioctl(struct request_queue *q, bool open_for_write, struct scsi_ioctl_command __user *sic) { struct request *rq; int err; unsigned int in_len, out_len, bytes, opcode, cmdlen; struct scsi_cmnd *scmd; char *buffer = NULL; if (!sic) return -EINVAL; /* * get in an out lengths, verify they don't exceed a page worth of data */ if (get_user(in_len, &sic->inlen)) return -EFAULT; if (get_user(out_len, &sic->outlen)) return -EFAULT; if (in_len > PAGE_SIZE || out_len > PAGE_SIZE) return -EINVAL; if (get_user(opcode, &sic->data[0])) return -EFAULT; bytes = max(in_len, out_len); if (bytes) { buffer = kzalloc(bytes, GFP_NOIO | GFP_USER | __GFP_NOWARN); if (!buffer) return -ENOMEM; } rq = scsi_alloc_request(q, in_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto error_free_buffer; } scmd = blk_mq_rq_to_pdu(rq); cmdlen = COMMAND_SIZE(opcode); /* * get command and data to send to device, if any */ err = -EFAULT; scmd->cmd_len = cmdlen; if (copy_from_user(scmd->cmnd, sic->data, cmdlen)) goto error; if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; err = -EPERM; if (!scsi_cmd_allowed(scmd->cmnd, open_for_write)) goto error; /* default. possible overridden later */ scmd->allowed = 5; switch (opcode) { case SEND_DIAGNOSTIC: case FORMAT_UNIT: rq->timeout = FORMAT_UNIT_TIMEOUT; scmd->allowed = 1; break; case START_STOP: rq->timeout = START_STOP_TIMEOUT; break; case MOVE_MEDIUM: rq->timeout = MOVE_MEDIUM_TIMEOUT; break; case READ_ELEMENT_STATUS: rq->timeout = READ_ELEMENT_STATUS_TIMEOUT; break; case READ_DEFECT_DATA: rq->timeout = READ_DEFECT_DATA_TIMEOUT; scmd->allowed = 1; break; default: rq->timeout = BLK_DEFAULT_SG_TIMEOUT; break; } if (bytes) { err = blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO); if (err) goto error; } blk_execute_rq(rq, false); err = scmd->result & 0xff; /* only 8 bit SCSI status */ if (err) { if (scmd->sense_len && scmd->sense_buffer) { /* limit sense len for backward compatibility */ if (copy_to_user(sic->data, scmd->sense_buffer, min(scmd->sense_len, 16U))) err = -EFAULT; } } else { if (copy_to_user(sic->data, buffer, out_len)) err = -EFAULT; } error: blk_mq_free_request(rq); error_free_buffer: kfree(buffer); return err; } int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) { struct compat_sg_io_hdr hdr32 = { .interface_id = hdr->interface_id, .dxfer_direction = hdr->dxfer_direction, .cmd_len = hdr->cmd_len, .mx_sb_len = hdr->mx_sb_len, .iovec_count = hdr->iovec_count, .dxfer_len = hdr->dxfer_len, .dxferp = (uintptr_t)hdr->dxferp, .cmdp = (uintptr_t)hdr->cmdp, .sbp = (uintptr_t)hdr->sbp, .timeout = hdr->timeout, .flags = hdr->flags, .pack_id = hdr->pack_id, .usr_ptr = (uintptr_t)hdr->usr_ptr, .status = hdr->status, .masked_status = hdr->masked_status, .msg_status = hdr->msg_status, .sb_len_wr = hdr->sb_len_wr, .host_status = hdr->host_status, .driver_status = hdr->driver_status, .resid = hdr->resid, .duration = hdr->duration, .info = hdr->info, }; if (copy_to_user(argp, &hdr32, sizeof(hdr32))) return -EFAULT; return 0; } #endif if (copy_to_user(argp, hdr, sizeof(*hdr))) return -EFAULT; return 0; } EXPORT_SYMBOL(put_sg_io_hdr); int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp) { #ifdef CONFIG_COMPAT struct compat_sg_io_hdr hdr32; if (in_compat_syscall()) { if (copy_from_user(&hdr32, argp, sizeof(hdr32))) return -EFAULT; *hdr = (struct sg_io_hdr) { .interface_id = hdr32.interface_id, .dxfer_direction = hdr32.dxfer_direction, .cmd_len = hdr32.cmd_len, .mx_sb_len = hdr32.mx_sb_len, .iovec_count = hdr32.iovec_count, .dxfer_len = hdr32.dxfer_len, .dxferp = compat_ptr(hdr32.dxferp), .cmdp = compat_ptr(hdr32.cmdp), .sbp = compat_ptr(hdr32.sbp), .timeout = hdr32.timeout, .flags = hdr32.flags, .pack_id = hdr32.pack_id, .usr_ptr = compat_ptr(hdr32.usr_ptr), .status = hdr32.status, .masked_status = hdr32.masked_status, .msg_status = hdr32.msg_status, .sb_len_wr = hdr32.sb_len_wr, .host_status = hdr32.host_status, .driver_status = hdr32.driver_status, .resid = hdr32.resid, .duration = hdr32.duration, .info = hdr32.info, }; return 0; } #endif if (copy_from_user(hdr, argp, sizeof(*hdr))) return -EFAULT; return 0; } EXPORT_SYMBOL(get_sg_io_hdr); #ifdef CONFIG_COMPAT struct compat_cdrom_generic_command { unsigned char cmd[CDROM_PACKET_SIZE]; compat_caddr_t buffer; compat_uint_t buflen; compat_int_t stat; compat_caddr_t sense; unsigned char data_direction; unsigned char pad[3]; compat_int_t quiet; compat_int_t timeout; compat_caddr_t unused; }; #endif static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc, const void __user *arg) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) { struct compat_cdrom_generic_command cgc32; if (copy_from_user(&cgc32, arg, sizeof(cgc32))) return -EFAULT; *cgc = (struct cdrom_generic_command) { .buffer = compat_ptr(cgc32.buffer), .buflen = cgc32.buflen, .stat = cgc32.stat, .sense = compat_ptr(cgc32.sense), .data_direction = cgc32.data_direction, .quiet = cgc32.quiet, .timeout = cgc32.timeout, .unused = compat_ptr(cgc32.unused), }; memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE); return 0; } #endif if (copy_from_user(cgc, arg, sizeof(*cgc))) return -EFAULT; return 0; } static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc, void __user *arg) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) { struct compat_cdrom_generic_command cgc32 = { .buffer = (uintptr_t)(cgc->buffer), .buflen = cgc->buflen, .stat = cgc->stat, .sense = (uintptr_t)(cgc->sense), .data_direction = cgc->data_direction, .quiet = cgc->quiet, .timeout = cgc->timeout, .unused = (uintptr_t)(cgc->unused), }; memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE); if (copy_to_user(arg, &cgc32, sizeof(cgc32))) return -EFAULT; return 0; } #endif if (copy_to_user(arg, cgc, sizeof(*cgc))) return -EFAULT; return 0; } static int scsi_cdrom_send_packet(struct scsi_device *sdev, bool open_for_write, void __user *arg) { struct cdrom_generic_command cgc; struct sg_io_hdr hdr; int err; err = scsi_get_cdrom_generic_arg(&cgc, arg); if (err) return err; cgc.timeout = clock_t_to_jiffies(cgc.timeout); memset(&hdr, 0, sizeof(hdr)); hdr.interface_id = 'S'; hdr.cmd_len = sizeof(cgc.cmd); hdr.dxfer_len = cgc.buflen; switch (cgc.data_direction) { case CGC_DATA_UNKNOWN: hdr.dxfer_direction = SG_DXFER_UNKNOWN; break; case CGC_DATA_WRITE: hdr.dxfer_direction = SG_DXFER_TO_DEV; break; case CGC_DATA_READ: hdr.dxfer_direction = SG_DXFER_FROM_DEV; break; case CGC_DATA_NONE: hdr.dxfer_direction = SG_DXFER_NONE; break; default: return -EINVAL; } hdr.dxferp = cgc.buffer; hdr.sbp = cgc.sense; if (hdr.sbp) hdr.mx_sb_len = sizeof(struct request_sense); hdr.timeout = jiffies_to_msecs(cgc.timeout); hdr.cmdp = ((struct cdrom_generic_command __user *) arg)->cmd; hdr.cmd_len = sizeof(cgc.cmd); err = sg_io(sdev, &hdr, open_for_write); if (err == -EFAULT) return -EFAULT; if (hdr.status) return -EIO; cgc.stat = err; cgc.buflen = hdr.resid; if (scsi_put_cdrom_generic_arg(&cgc, arg)) return -EFAULT; return err; } static int scsi_ioctl_sg_io(struct scsi_device *sdev, bool open_for_write, void __user *argp) { struct sg_io_hdr hdr; int error; error = get_sg_io_hdr(&hdr, argp); if (error) return error; error = sg_io(sdev, &hdr, open_for_write); if (error == -EFAULT) return error; if (put_sg_io_hdr(&hdr, argp)) return -EFAULT; return error; } /** * scsi_ioctl - Dispatch ioctl to scsi device * @sdev: scsi device receiving ioctl * @open_for_write: is the file / block device opened for writing? * @cmd: which ioctl is it * @arg: data associated with ioctl * * Description: The scsi_ioctl() function differs from most ioctls in that it * does not take a major/minor number as the dev field. Rather, it takes * a pointer to a &struct scsi_device. */ int scsi_ioctl(struct scsi_device *sdev, bool open_for_write, int cmd, void __user *arg) { struct request_queue *q = sdev->request_queue; struct scsi_sense_hdr sense_hdr; /* Check for deprecated ioctls ... all the ioctls which don't * follow the new unique numbering scheme are deprecated */ switch (cmd) { case SCSI_IOCTL_SEND_COMMAND: case SCSI_IOCTL_TEST_UNIT_READY: case SCSI_IOCTL_BENCHMARK_COMMAND: case SCSI_IOCTL_SYNC: case SCSI_IOCTL_START_UNIT: case SCSI_IOCTL_STOP_UNIT: printk(KERN_WARNING "program %s is using a deprecated SCSI " "ioctl, please convert it to SG_IO\n", current->comm); break; default: break; } switch (cmd) { case SG_GET_VERSION_NUM: return sg_get_version(arg); case SG_SET_TIMEOUT: return sg_set_timeout(sdev, arg); case SG_GET_TIMEOUT: return jiffies_to_clock_t(sdev->sg_timeout); case SG_GET_RESERVED_SIZE: return sg_get_reserved_size(sdev, arg); case SG_SET_RESERVED_SIZE: return sg_set_reserved_size(sdev, arg); case SG_EMULATED_HOST: return sg_emulated_host(q, arg); case SG_IO: return scsi_ioctl_sg_io(sdev, open_for_write, arg); case SCSI_IOCTL_SEND_COMMAND: return sg_scsi_ioctl(q, open_for_write, arg); case CDROM_SEND_PACKET: return scsi_cdrom_send_packet(sdev, open_for_write, arg); case CDROMCLOSETRAY: return scsi_send_start_stop(sdev, 3); case CDROMEJECT: return scsi_send_start_stop(sdev, 2); case SCSI_IOCTL_GET_IDLUN: return scsi_get_idlun(sdev, arg); case SCSI_IOCTL_GET_BUS_NUMBER: return put_user(sdev->host->host_no, (int __user *)arg); case SCSI_IOCTL_PROBE_HOST: return ioctl_probe(sdev->host, arg); case SCSI_IOCTL_DOORLOCK: return scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); case SCSI_IOCTL_DOORUNLOCK: return scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); case SCSI_IOCTL_TEST_UNIT_READY: return scsi_test_unit_ready(sdev, IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES, &sense_hdr); case SCSI_IOCTL_START_UNIT: return scsi_send_start_stop(sdev, 1); case SCSI_IOCTL_STOP_UNIT: return scsi_send_start_stop(sdev, 0); case SCSI_IOCTL_GET_PCI: return scsi_ioctl_get_pci(sdev, arg); case SG_SCSI_RESET: return scsi_ioctl_reset(sdev, arg); } #ifdef CONFIG_COMPAT if (in_compat_syscall()) { if (!sdev->host->hostt->compat_ioctl) return -EINVAL; return sdev->host->hostt->compat_ioctl(sdev, cmd, arg); } #endif if (!sdev->host->hostt->ioctl) return -EINVAL; return sdev->host->hostt->ioctl(sdev, cmd, arg); } EXPORT_SYMBOL(scsi_ioctl); /* * We can process a reset even when a device isn't fully operable. */ int scsi_ioctl_block_when_processing_errors(struct scsi_device *sdev, int cmd, bool ndelay) { if (cmd == SG_SCSI_RESET && ndelay) { if (scsi_host_in_recovery(sdev->host)) return -EAGAIN; } else { if (!scsi_block_when_processing_errors(sdev)) return -ENODEV; } return 0; } EXPORT_SYMBOL_GPL(scsi_ioctl_block_when_processing_errors);
2161 201 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PKRU_H #define _ASM_X86_PKRU_H #include <asm/cpufeature.h> #define PKRU_AD_BIT 0x1u #define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS extern u32 init_pkru_value; #define pkru_get_init_value() READ_ONCE(init_pkru_value) #else #define init_pkru_value 0 #define pkru_get_init_value() 0 #endif static inline bool __pkru_allows_read(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); } static inline bool __pkru_allows_write(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; /* * Access-disable disables writes too so we need to check * both bits here. */ return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); } static inline u32 read_pkru(void) { if (cpu_feature_enabled(X86_FEATURE_OSPKE)) return rdpkru(); return 0; } static inline void write_pkru(u32 pkru) { if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; /* * WRPKRU is relatively expensive compared to RDPKRU. * Avoid WRPKRU when it would not change the value. */ if (pkru != rdpkru()) wrpkru(pkru); } static inline void pkru_write_default(void) { if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; wrpkru(pkru_get_init_value()); } #endif
150 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PARAVIRT_H #define _ASM_X86_PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ #include <asm/paravirt_types.h> #ifndef __ASSEMBLY__ struct mm_struct; #endif #ifdef CONFIG_PARAVIRT #include <asm/pgtable_types.h> #include <asm/asm.h> #include <asm/nospec-branch.h> #ifndef __ASSEMBLY__ #include <linux/bug.h> #include <linux/types.h> #include <linux/cpumask.h> #include <linux/static_call_types.h> #include <asm/frame.h> u64 dummy_steal_clock(int cpu); u64 dummy_sched_clock(void); DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); void paravirt_set_sched_clock(u64 (*func)(void)); static __always_inline u64 paravirt_sched_clock(void) { return static_call(pv_sched_clock)(); } struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; __visible void __native_queued_spin_unlock(struct qspinlock *lock); bool pv_is_native_spin_unlock(void); __visible bool __native_vcpu_is_preempted(long cpu); bool pv_is_native_vcpu_is_preempted(void); static inline u64 paravirt_steal_clock(int cpu) { return static_call(pv_steal_clock)(cpu); } #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init paravirt_set_cap(void); #endif /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { PVOP_VCALL0(cpu.io_delay); #ifdef REALLY_SLOW_IO PVOP_VCALL0(cpu.io_delay); PVOP_VCALL0(cpu.io_delay); PVOP_VCALL0(cpu.io_delay); #endif } void native_flush_tlb_local(void); void native_flush_tlb_global(void); void native_flush_tlb_one_user(unsigned long addr); void native_flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info); static inline void __flush_tlb_local(void) { PVOP_VCALL0(mmu.flush_tlb_user); } static inline void __flush_tlb_global(void) { PVOP_VCALL0(mmu.flush_tlb_kernel); } static inline void __flush_tlb_one_user(unsigned long addr) { PVOP_VCALL1(mmu.flush_tlb_one_user, addr); } static inline void __flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info) { PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info); } static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) { PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); } static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { PVOP_VCALL1(mmu.exit_mmap, mm); } static inline void notify_page_enc_status_changed(unsigned long pfn, int npages, bool enc) { PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); } #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { PVOP_VCALL1(cpu.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); } /* * These special macros can be used to get or set a debugging register */ static __always_inline unsigned long paravirt_get_debugreg(int reg) { return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) static __always_inline void set_debugreg(unsigned long val, int reg) { PVOP_VCALL2(cpu.set_debugreg, reg, val); } static inline unsigned long read_cr0(void) { return PVOP_CALL0(unsigned long, cpu.read_cr0); } static inline void write_cr0(unsigned long x) { PVOP_VCALL1(cpu.write_cr0, x); } static __always_inline unsigned long read_cr2(void) { return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, "mov %%cr2, %%rax;", ALT_NOT_XEN); } static __always_inline void write_cr2(unsigned long x) { PVOP_VCALL1(mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3, "mov %%cr3, %%rax;", ALT_NOT_XEN); } static inline void write_cr3(unsigned long x) { PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN); } static inline void __write_cr4(unsigned long x) { PVOP_VCALL1(cpu.write_cr4, x); } static __always_inline void arch_safe_halt(void) { PVOP_VCALL0(irq.safe_halt); } static inline void halt(void) { PVOP_VCALL0(irq.halt); } extern noinstr void pv_native_wbinvd(void); static __always_inline void wbinvd(void) { PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN); } static inline u64 paravirt_read_msr(unsigned msr) { return PVOP_CALL1(u64, cpu.read_msr, msr); } static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { PVOP_VCALL3(cpu.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) { return PVOP_CALL2(u64, cpu.read_msr_safe, msr, err); } static inline int paravirt_write_msr_safe(unsigned msr, unsigned low, unsigned high) { return PVOP_CALL3(int, cpu.write_msr_safe, msr, low, high); } #define rdmsr(msr, val1, val2) \ do { \ u64 _l = paravirt_read_msr(msr); \ val1 = (u32)_l; \ val2 = _l >> 32; \ } while (0) #define wrmsr(msr, val1, val2) \ do { \ paravirt_write_msr(msr, val1, val2); \ } while (0) #define rdmsrl(msr, val) \ do { \ val = paravirt_read_msr(msr); \ } while (0) static inline void wrmsrl(unsigned msr, u64 val) { wrmsr(msr, (u32)val, (u32)(val>>32)); } #define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b) /* rdmsr with exception handling */ #define rdmsr_safe(msr, a, b) \ ({ \ int _err; \ u64 _l = paravirt_read_msr_safe(msr, &_err); \ (*a) = (u32)_l; \ (*b) = _l >> 32; \ _err; \ }) static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) { int err; *p = paravirt_read_msr_safe(msr, &err); return err; } static inline unsigned long long paravirt_read_pmc(int counter) { return PVOP_CALL1(u64, cpu.read_pmc, counter); } #define rdpmc(counter, low, high) \ do { \ u64 _l = paravirt_read_pmc(counter); \ low = (u32)_l; \ high = _l >> 32; \ } while (0) #define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter)) static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(cpu.alloc_ldt, ldt, entries); } static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { PVOP_VCALL2(cpu.free_ldt, ldt, entries); } static inline void load_TR_desc(void) { PVOP_VCALL0(cpu.load_tr_desc); } static inline void load_gdt(const struct desc_ptr *dtr) { PVOP_VCALL1(cpu.load_gdt, dtr); } static inline void load_idt(const struct desc_ptr *dtr) { PVOP_VCALL1(cpu.load_idt, dtr); } static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(cpu.set_ldt, addr, entries); } static inline unsigned long paravirt_store_tr(void) { return PVOP_CALL0(unsigned long, cpu.store_tr); } #define store_tr(tr) ((tr) = paravirt_store_tr()) static inline void load_TLS(struct thread_struct *t, unsigned cpu) { PVOP_VCALL2(cpu.load_tls, t, cpu); } static inline void load_gs_index(unsigned int gs) { PVOP_VCALL1(cpu.load_gs_index, gs); } static inline void write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { PVOP_VCALL3(cpu.write_ldt_entry, dt, entry, desc); } static inline void write_gdt_entry(struct desc_struct *dt, int entry, void *desc, int type) { PVOP_VCALL4(cpu.write_gdt_entry, dt, entry, desc, type); } static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } #ifdef CONFIG_X86_IOPL_IOPERM static inline void tss_invalidate_io_bitmap(void) { PVOP_VCALL0(cpu.invalidate_io_bitmap); } static inline void tss_update_io_bitmap(void) { PVOP_VCALL0(cpu.update_io_bitmap); } #endif static inline void paravirt_enter_mmap(struct mm_struct *next) { PVOP_VCALL1(mmu.enter_mmap, next); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) { return PVOP_CALL1(int, mmu.pgd_alloc, mm); } static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) { PVOP_VCALL2(mmu.pgd_free, mm, pgd); } static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_pte, mm, pfn); } static inline void paravirt_release_pte(unsigned long pfn) { PVOP_VCALL1(mmu.release_pte, pfn); } static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_pmd, mm, pfn); } static inline void paravirt_release_pmd(unsigned long pfn) { PVOP_VCALL1(mmu.release_pmd, pfn); } static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_pud, mm, pfn); } static inline void paravirt_release_pud(unsigned long pfn) { PVOP_VCALL1(mmu.release_pud, pfn); } static inline void paravirt_alloc_p4d(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(mmu.alloc_p4d, mm, pfn); } static inline void paravirt_release_p4d(unsigned long pfn) { PVOP_VCALL1(mmu.release_p4d, pfn); } static inline pte_t __pte(pteval_t val) { return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val, "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pteval_t pte_val(pte_t pte) { return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline pgd_t __pgd(pgdval_t val) { return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val, "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pgdval_t pgd_val(pgd_t pgd) { return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd, "mov %%rdi, %%rax", ALT_NOT_XEN); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pteval_t ret; ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep); return (pte_t) { .pte = ret }; } static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t pte) { PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte); } static inline void set_pte(pte_t *ptep, pte_t pte) { PVOP_VCALL2(mmu.set_pte, ptep, pte.pte); } static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); } static inline pmd_t __pmd(pmdval_t val) { return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val, "mov %%rdi, %%rax", ALT_NOT_XEN) }; } static inline pmdval_t pmd_val(pmd_t pmd) { return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void set_pud(pud_t *pudp, pud_t pud) { PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud)); } static inline pud_t __pud(pudval_t val) { pudval_t ret; ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val, "mov %%rdi, %%rax", ALT_NOT_XEN); return (pud_t) { ret }; } static inline pudval_t pud_val(pud_t pud) { return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void pud_clear(pud_t *pudp) { set_pud(pudp, native_make_pud(0)); } static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { p4dval_t val = native_p4d_val(p4d); PVOP_VCALL2(mmu.set_p4d, p4dp, val); } #if CONFIG_PGTABLE_LEVELS >= 5 static inline p4d_t __p4d(p4dval_t val) { p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val, "mov %%rdi, %%rax", ALT_NOT_XEN); return (p4d_t) { ret }; } static inline p4dval_t p4d_val(p4d_t p4d) { return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d, "mov %%rdi, %%rax", ALT_NOT_XEN); } static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { PVOP_VCALL2(mmu.set_pgd, pgdp, native_pgd_val(pgd)); } #define set_pgd(pgdp, pgdval) do { \ if (pgtable_l5_enabled()) \ __set_pgd(pgdp, pgdval); \ else \ set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ } while (0) #define pgd_clear(pgdp) do { \ if (pgtable_l5_enabled()) \ set_pgd(pgdp, native_make_pgd(0)); \ } while (0) #endif /* CONFIG_PGTABLE_LEVELS == 5 */ static inline void p4d_clear(p4d_t *p4dp) { set_p4d(p4dp, native_make_p4d(0)); } static inline void set_pte_atomic(pte_t *ptep, pte_t pte) { set_pte(ptep, pte); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { set_pte(ptep, native_make_pte(0)); } static inline void pmd_clear(pmd_t *pmdp) { set_pmd(pmdp, native_make_pmd(0)); } #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(struct task_struct *prev) { PVOP_VCALL1(cpu.start_context_switch, prev); } static inline void arch_end_context_switch(struct task_struct *next) { PVOP_VCALL1(cpu.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { PVOP_VCALL0(mmu.lazy_mode.enter); } static inline void arch_leave_lazy_mmu_mode(void) { PVOP_VCALL0(mmu.lazy_mode.leave); } static inline void arch_flush_lazy_mmu_mode(void) { PVOP_VCALL0(mmu.lazy_mode.flush); } static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) { pv_ops.mmu.set_fixmap(idx, phys, flags); } #endif #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { PVOP_VCALL2(lock.queued_spin_lock_slowpath, lock, val); } static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) { PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock, "movb $0, (%%" _ASM_ARG1 ");", ALT_NOT(X86_FEATURE_PVUNLOCK)); } static __always_inline void pv_wait(u8 *ptr, u8 val) { PVOP_VCALL2(lock.wait, ptr, val); } static __always_inline void pv_kick(int cpu) { PVOP_VCALL1(lock.kick, cpu); } static __always_inline bool pv_vcpu_is_preempted(long cpu) { return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu, "xor %%" _ASM_AX ", %%" _ASM_AX ";", ALT_NOT(X86_FEATURE_VCPUPREEMPT)); } void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock); bool __raw_callee_save___native_vcpu_is_preempted(long cpu); #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 /* save and restore all caller-save registers, except return value */ #define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" #define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" #else /* save and restore all caller-save registers, except return value */ #define PV_SAVE_ALL_CALLER_REGS \ "push %rcx;" \ "push %rdx;" \ "push %rsi;" \ "push %rdi;" \ "push %r8;" \ "push %r9;" \ "push %r10;" \ "push %r11;" #define PV_RESTORE_ALL_CALLER_REGS \ "pop %r11;" \ "pop %r10;" \ "pop %r9;" \ "pop %r8;" \ "pop %rdi;" \ "pop %rsi;" \ "pop %rdx;" \ "pop %rcx;" #endif /* * Generate a thunk around a function which saves all caller-save * registers except for the return value. This allows C functions to * be called from assembler code where fewer than normal registers are * available. It may also help code generation around calls from C * code if the common case doesn't use many registers. * * When a callee is wrapped in a thunk, the caller can assume that all * arg regs and all scratch registers are preserved across the * call. The return value in rax/eax will not be saved, even for void * functions. */ #define PV_THUNK_NAME(func) "__raw_callee_save_" #func #define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \ extern typeof(func) __raw_callee_save_##func; \ \ asm(".pushsection " section ", \"ax\";" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ ASM_FUNC_ALIGN \ PV_THUNK_NAME(func) ":" \ ASM_ENDBR \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ PV_RESTORE_ALL_CALLER_REGS \ FRAME_END \ ASM_RET \ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") #define PV_CALLEE_SAVE_REGS_THUNK(func) \ __PV_CALLEE_SAVE_REGS_THUNK(func, ".text") /* Get a reference to a callee-save function */ #define PV_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { __raw_callee_save_##func }) /* Promise that "func" already uses the right calling convention */ #define __PV_IS_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { func }) #ifdef CONFIG_PARAVIRT_XXL static __always_inline unsigned long arch_local_save_flags(void) { return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", ALT_NOT_XEN); } static __always_inline void arch_local_irq_disable(void) { PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN); } static __always_inline void arch_local_irq_enable(void) { PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN); } static __always_inline unsigned long arch_local_irq_save(void) { unsigned long f; f = arch_local_save_flags(); arch_local_irq_disable(); return f; } #endif /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef __PVOP_CALL #undef __PVOP_VCALL #undef PVOP_VCALL0 #undef PVOP_CALL0 #undef PVOP_VCALL1 #undef PVOP_CALL1 #undef PVOP_VCALL2 #undef PVOP_CALL2 #undef PVOP_VCALL3 #undef PVOP_CALL3 #undef PVOP_VCALL4 #undef PVOP_CALL4 extern void default_banner(void); void native_pv_lock_init(void) __init; #else /* __ASSEMBLY__ */ #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL #ifdef CONFIG_DEBUG_ENTRY #define PARA_INDIRECT(addr) *addr(%rip) .macro PARA_IRQ_save_fl ANNOTATE_RETPOLINE_SAFE; call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); .endm #define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \ "ALT_CALL_INSTR;", ALT_CALL_ALWAYS, \ "pushf; pop %rax;", ALT_NOT_XEN #endif #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop #ifndef __ASSEMBLY__ static inline void native_pv_lock_init(void) { } #endif #endif /* !CONFIG_PARAVIRT */ #ifndef __ASSEMBLY__ #ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_enter_mmap(struct mm_struct *mm) { } #endif #ifndef CONFIG_PARAVIRT static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } #endif #ifndef CONFIG_PARAVIRT_SPINLOCKS static inline void paravirt_set_cap(void) { } #endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PARAVIRT_H */
8716 8729 8713 8720 8752 8714 8721 8721 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/memblock.h> #include <linux/page_ext.h> #include <linux/memory.h> #include <linux/vmalloc.h> #include <linux/kmemleak.h> #include <linux/page_owner.h> #include <linux/page_idle.h> #include <linux/page_table_check.h> #include <linux/rcupdate.h> #include <linux/pgalloc_tag.h> /* * struct page extension * * This is the feature to manage memory for extended data per page. * * Until now, we must modify struct page itself to store extra data per page. * This requires rebuilding the kernel and it is really time consuming process. * And, sometimes, rebuild is impossible due to third party module dependency. * At last, enlarging struct page could cause un-wanted system behaviour change. * * This feature is intended to overcome above mentioned problems. This feature * allocates memory for extended data per page in certain place rather than * the struct page itself. This memory can be accessed by the accessor * functions provided by this code. During the boot process, it checks whether * allocation of huge chunk of memory is needed or not. If not, it avoids * allocating memory at all. With this advantage, we can include this feature * into the kernel in default and can avoid rebuild and solve related problems. * * To help these things to work well, there are two callbacks for clients. One * is the need callback which is mandatory if user wants to avoid useless * memory allocation at boot-time. The other is optional, init callback, which * is used to do proper initialization after memory is allocated. * * The need callback is used to decide whether extended memory allocation is * needed or not. Sometimes users want to deactivate some features in this * boot and extra memory would be unnecessary. In this case, to avoid * allocating huge chunk of memory, each clients represent their need of * extra memory through the need callback. If one of the need callbacks * returns true, it means that someone needs extra memory so that * page extension core should allocates memory for page extension. If * none of need callbacks return true, memory isn't needed at all in this boot * and page extension core can skip to allocate memory. As result, * none of memory is wasted. * * When need callback returns true, page_ext checks if there is a request for * extra memory through size in struct page_ext_operations. If it is non-zero, * extra space is allocated for each page_ext entry and offset is returned to * user through offset in struct page_ext_operations. * * The init callback is used to do proper initialization after page extension * is completely initialized. In sparse memory system, extra memory is * allocated some time later than memmap is allocated. In other words, lifetime * of memory for page extension isn't same with memmap for struct page. * Therefore, clients can't store extra data until page extension is * initialized, even if pages are allocated and used freely. This could * cause inadequate state of extra data per page, so, to prevent it, client * can utilize this callback to initialize the state of it correctly. */ #ifdef CONFIG_SPARSEMEM #define PAGE_EXT_INVALID (0x1) #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) static bool need_page_idle(void) { return true; } static struct page_ext_operations page_idle_ops __initdata = { .need = need_page_idle, .need_shared_flags = true, }; #endif static struct page_ext_operations *page_ext_ops[] __initdata = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif #ifdef CONFIG_MEM_ALLOC_PROFILING &page_alloc_tagging_ops, #endif #ifdef CONFIG_PAGE_TABLE_CHECK &page_table_check_ops, #endif }; unsigned long page_ext_size; static unsigned long total_usage; #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG /* * To ensure correct allocation tagging for pages, page_ext should be available * before the first page allocation. Otherwise early task stacks will be * allocated before page_ext initialization and missing tags will be flagged. */ bool early_page_ext __meminitdata = true; #else bool early_page_ext __meminitdata; #endif static int __init setup_early_page_ext(char *str) { early_page_ext = true; return 0; } early_param("early_page_ext", setup_early_page_ext); static bool __init invoke_need_callbacks(void) { int i; int entries = ARRAY_SIZE(page_ext_ops); bool need = false; for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need()) { if (page_ext_ops[i]->need_shared_flags) { page_ext_size = sizeof(struct page_ext); break; } } } for (i = 0; i < entries; i++) { if (page_ext_ops[i]->need()) { page_ext_ops[i]->offset = page_ext_size; page_ext_size += page_ext_ops[i]->size; need = true; } } return need; } static void __init invoke_init_callbacks(void) { int i; int entries = ARRAY_SIZE(page_ext_ops); for (i = 0; i < entries; i++) { if (page_ext_ops[i]->init) page_ext_ops[i]->init(); } } static inline struct page_ext *get_entry(void *base, unsigned long index) { return base + page_ext_size * index; } #ifndef CONFIG_SPARSEMEM void __init page_ext_init_flatmem_late(void) { invoke_init_callbacks(); } void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) { pgdat->node_page_ext = NULL; } static struct page_ext *lookup_page_ext(const struct page *page) { unsigned long pfn = page_to_pfn(page); unsigned long index; struct page_ext *base; WARN_ON_ONCE(!rcu_read_lock_held()); base = NODE_DATA(page_to_nid(page))->node_page_ext; /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. */ if (unlikely(!base)) return NULL; index = pfn - round_down(node_start_pfn(page_to_nid(page)), MAX_ORDER_NR_PAGES); return get_entry(base, index); } static int __init alloc_node_page_ext(int nid) { struct page_ext *base; unsigned long table_size; unsigned long nr_pages; nr_pages = NODE_DATA(nid)->node_spanned_pages; if (!nr_pages) return 0; /* * Need extra space if node range is not aligned with * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm * checks buddy's status, range could be out of exact node range. */ if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) || !IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES)) nr_pages += MAX_ORDER_NR_PAGES; table_size = page_ext_size * nr_pages; base = memblock_alloc_try_nid( table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); if (!base) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } void __init page_ext_init_flatmem(void) { int nid, fail; if (!invoke_need_callbacks()) return; for_each_online_node(nid) { fail = alloc_node_page_ext(nid); if (fail) goto fail; } pr_info("allocated %ld bytes of page_ext\n", total_usage); return; fail: pr_crit("allocation of page_ext failed.\n"); panic("Out of memory"); } #else /* CONFIG_SPARSEMEM */ static bool page_ext_invalid(struct page_ext *page_ext) { return !page_ext || (((unsigned long)page_ext & PAGE_EXT_INVALID) == PAGE_EXT_INVALID); } static struct page_ext *lookup_page_ext(const struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); struct page_ext *page_ext = READ_ONCE(section->page_ext); WARN_ON_ONCE(!rcu_read_lock_held()); /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. */ if (page_ext_invalid(page_ext)) return NULL; return get_entry(page_ext, pfn); } static void *__meminit alloc_page_ext(size_t size, int nid) { gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN; void *addr = NULL; addr = alloc_pages_exact_nid(nid, size, flags); if (addr) kmemleak_alloc(addr, size, 1, flags); else addr = vzalloc_node(size, nid); if (addr) memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } static int __meminit init_section_page_ext(unsigned long pfn, int nid) { struct mem_section *section; struct page_ext *base; unsigned long table_size; section = __pfn_to_section(pfn); if (section->page_ext) return 0; table_size = page_ext_size * PAGES_PER_SECTION; base = alloc_page_ext(table_size, nid); /* * The value stored in section->page_ext is (base - pfn) * and it does not point to the memory block allocated above, * causing kmemleak false positives. */ kmemleak_not_leak(base); if (!base) { pr_err("page ext allocation failure\n"); return -ENOMEM; } /* * The passed "pfn" may not be aligned to SECTION. For the calculation * we need to apply a mask. */ pfn &= PAGE_SECTION_MASK; section->page_ext = (void *)base - page_ext_size * pfn; total_usage += table_size; return 0; } static void free_page_ext(void *addr) { size_t table_size; struct page *page; table_size = page_ext_size * PAGES_PER_SECTION; memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { vfree(addr); } else { page = virt_to_page(addr); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } } static void __free_page_ext(unsigned long pfn) { struct mem_section *ms; struct page_ext *base; ms = __pfn_to_section(pfn); if (!ms || !ms->page_ext) return; base = READ_ONCE(ms->page_ext); /* * page_ext here can be valid while doing the roll back * operation in online_page_ext(). */ if (page_ext_invalid(base)) base = (void *)base - PAGE_EXT_INVALID; WRITE_ONCE(ms->page_ext, NULL); base = get_entry(base, pfn); free_page_ext(base); } static void __invalidate_page_ext(unsigned long pfn) { struct mem_section *ms; void *val; ms = __pfn_to_section(pfn); if (!ms || !ms->page_ext) return; val = (void *)ms->page_ext + PAGE_EXT_INVALID; WRITE_ONCE(ms->page_ext, val); } static int __meminit online_page_ext(unsigned long start_pfn, unsigned long nr_pages, int nid) { unsigned long start, end, pfn; int fail = 0; start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); if (nid == NUMA_NO_NODE) { /* * In this case, "nid" already exists and contains valid memory. * "start_pfn" passed to us is a pfn which is an arg for * online__pages(), and start_pfn should exist. */ nid = pfn_to_nid(start_pfn); VM_BUG_ON(!node_online(nid)); } for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) fail = init_section_page_ext(pfn, nid); if (!fail) return 0; /* rollback */ end = pfn - PAGES_PER_SECTION; for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_ext(pfn); return -ENOMEM; } static void __meminit offline_page_ext(unsigned long start_pfn, unsigned long nr_pages) { unsigned long start, end, pfn; start = SECTION_ALIGN_DOWN(start_pfn); end = SECTION_ALIGN_UP(start_pfn + nr_pages); /* * Freeing of page_ext is done in 3 steps to avoid * use-after-free of it: * 1) Traverse all the sections and mark their page_ext * as invalid. * 2) Wait for all the existing users of page_ext who * started before invalidation to finish. * 3) Free the page_ext. */ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __invalidate_page_ext(pfn); synchronize_rcu(); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) __free_page_ext(pfn); } static int __meminit page_ext_callback(struct notifier_block *self, unsigned long action, void *arg) { struct memory_notify *mn = arg; int ret = 0; switch (action) { case MEM_GOING_ONLINE: ret = online_page_ext(mn->start_pfn, mn->nr_pages, mn->status_change_nid); break; case MEM_OFFLINE: offline_page_ext(mn->start_pfn, mn->nr_pages); break; case MEM_CANCEL_ONLINE: offline_page_ext(mn->start_pfn, mn->nr_pages); break; case MEM_GOING_OFFLINE: break; case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; } return notifier_from_errno(ret); } void __init page_ext_init(void) { unsigned long pfn; int nid; if (!invoke_need_callbacks()) return; for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; start_pfn = node_start_pfn(nid); end_pfn = node_end_pfn(nid); /* * start_pfn and end_pfn may not be aligned to SECTION and the * page->flags of out of node pages are not initialized. So we * scan [start_pfn, the biggest section's pfn < end_pfn) here. */ for (pfn = start_pfn; pfn < end_pfn; pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) { if (!pfn_valid(pfn)) continue; /* * Nodes's pfns can be overlapping. * We know some arch can have a nodes layout such as * -------------pfn--------------> * N0 | N1 | N2 | N0 | N1 | N2|.... */ if (pfn_to_nid(pfn) != nid) continue; if (init_section_page_ext(pfn, nid)) goto oom; cond_resched(); } } hotplug_memory_notifier(page_ext_callback, DEFAULT_CALLBACK_PRI); pr_info("allocated %ld bytes of page_ext\n", total_usage); invoke_init_callbacks(); return; oom: panic("Out of memory"); } void __meminit pgdat_page_ext_init(struct pglist_data *pgdat) { } #endif /** * page_ext_get() - Get the extended information for a page. * @page: The page we're interested in. * * Ensures that the page_ext will remain valid until page_ext_put() * is called. * * Return: NULL if no page_ext exists for this page. * Context: Any context. Caller may not sleep until they have called * page_ext_put(). */ struct page_ext *page_ext_get(const struct page *page) { struct page_ext *page_ext; rcu_read_lock(); page_ext = lookup_page_ext(page); if (!page_ext) { rcu_read_unlock(); return NULL; } return page_ext; } /** * page_ext_put() - Working with page extended information is done. * @page_ext: Page extended information received from page_ext_get(). * * The page extended information of the page may not be valid after this * function is called. * * Return: None. * Context: Any context with corresponding page_ext_get() is called. */ void page_ext_put(struct page_ext *page_ext) { if (unlikely(!page_ext)) return; rcu_read_unlock(); }
1981 106 78 78 57 9 9 9 3 8 5 5 865 20 844 1198 1194 1091 798 65 5 36 47 875 874 875 875 873 873 877 875 873 3 834 47 46 874 872 6 6 878 872 875 878 876 877 874 877 876 878 870 875 878 47 835 878 875 876 1272 1272 412 888 396 14 913 1271 1266 1267 424 898 899 860 51 34 875 37 844 876 830 50 878 878 836 878 874 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> */ #include <linux/dcache.h> #include <linux/fs.h> #include <linux/gfp.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/srcu.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" /* * Clear all of the marks on an inode when it is being evicted from core */ void __fsnotify_inode_delete(struct inode *inode) { fsnotify_clear_marks_by_inode(inode); } EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); void __fsnotify_vfsmount_delete(struct vfsmount *mnt) { fsnotify_clear_marks_by_mount(mnt); } /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. * * Called during unmount with no locks held, so needs to be safe against * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. */ static void fsnotify_unmount_inodes(struct super_block *sb) { struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point * the inode cannot have any associated watches. */ spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); continue; } /* * If i_count is zero, the inode cannot have any watches and * doing an __iget/iput with SB_ACTIVE clear would actually * evict all inodes with zero i_count from icache which is * unnecessarily violent and may in fact be illegal to do. * However, we should have been called /after/ evict_inodes * removed all zero refcount inodes, in any case. Test to * be sure. */ if (!atomic_read(&inode->i_count)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify_inode(inode, FS_UNMOUNT); fsnotify_inode_delete(inode); iput_inode = inode; cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); iput(iput_inode); } void fsnotify_sb_delete(struct super_block *sb) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); /* Were any marks ever added to any object on this sb? */ if (!sbinfo) return; fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); /* Wait for outstanding object references from connectors */ wait_var_event(fsnotify_sb_watched_objects(sb), !atomic_long_read(fsnotify_sb_watched_objects(sb))); WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT)); WARN_ON(fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT)); } void fsnotify_sb_free(struct super_block *sb) { kfree(sb->s_fsnotify_info); } /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the * parent cares. Thus when an event happens on a child it can quickly tell * if there is a need to find a parent and send the event to the parent. */ void fsnotify_set_children_dentry_flags(struct inode *inode) { struct dentry *alias; if (!S_ISDIR(inode->i_mode)) return; spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); hlist_for_each_entry(child, &alias->d_children, d_sib) { if (!child->d_inode) continue; spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } spin_unlock(&alias->d_lock); } spin_unlock(&inode->i_lock); } /* * Lazily clear false positive PARENT_WATCHED flag for child whose parent had * stopped watching children. */ static void fsnotify_clear_child_dentry_flag(struct inode *pinode, struct dentry *dentry) { spin_lock(&dentry->d_lock); /* * d_lock is a sufficient barrier to prevent observing a non-watched * parent state from before the fsnotify_set_children_dentry_flags() * or fsnotify_update_flags() call that had set PARENT_WATCHED. */ if (!fsnotify_inode_watches_children(pinode)) dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&dentry->d_lock); } /* Are inode/sb/mount interested in parent and name info with this event? */ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, __u32 mask) { __u32 marks_mask = 0; /* We only send parent/name to inode/sb/mount for events on non-dir */ if (mask & FS_ISDIR) return false; /* * All events that are possible on child can also may be reported with * parent/name info to inode/sb/mount. Otherwise, a watching parent * could result in events reported with unexpected name info to sb/mount. */ BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); /* Did either inode/sb/mount subscribe for events with parent/name? */ marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); marks_mask |= fsnotify_parent_needed_mask(mnt_mask); /* Did they subscribe for this event with parent/name info? */ return mask & marks_mask; } /* Are there any inode/mount/sb objects that are interested in this event? */ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, __u32 mask) { __u32 marks_mask = inode->i_fsnotify_mask | mnt_mask | inode->i_sb->s_fsnotify_mask; return mask & marks_mask & ALL_FSNOTIFY_EVENTS; } /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with * parent and name info. * * Notify only the child without name info if parent is not watching and * inode/sb/mount are not interested in events with parent and name info. */ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { const struct path *path = fsnotify_data_path(data, data_type); __u32 mnt_mask = path ? real_mount(path->mnt)->mnt_fsnotify_mask : 0; struct inode *inode = d_inode(dentry); struct dentry *parent; bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED; bool parent_needed, parent_interested; __u32 p_mask; struct inode *p_inode = NULL; struct name_snapshot name; struct qstr *file_name = NULL; int ret = 0; /* Optimize the likely case of nobody watching this path */ if (likely(!parent_watched && !fsnotify_object_watched(inode, mnt_mask, mask))) return 0; parent = NULL; parent_needed = fsnotify_event_needs_parent(inode, mnt_mask, mask); if (!parent_watched && !parent_needed) goto notify; /* Does parent inode care about events on children? */ parent = dget_parent(dentry); p_inode = parent->d_inode; p_mask = fsnotify_inode_watches_children(p_inode); if (unlikely(parent_watched && !p_mask)) fsnotify_clear_child_dentry_flag(p_inode, dentry); /* * Include parent/name in notification either if some notification * groups require parent info or the parent is interested in this event. */ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); /* Notify both parent and child with child name info */ take_dentry_name_snapshot(&name, dentry); file_name = &name.name; if (parent_interested) mask |= FS_EVENT_ON_CHILD; } notify: ret = fsnotify(mask, data, data_type, p_inode, file_name, inode, 0); if (file_name) release_dentry_name_snapshot(&name); dput(parent); return ret; } EXPORT_SYMBOL_GPL(__fsnotify_parent); static int fsnotify_handle_inode_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); struct inode *inode = fsnotify_data_inode(data, data_type); const struct fsnotify_ops *ops = group->ops; if (WARN_ON_ONCE(!ops->handle_inode_event)) return 0; if (WARN_ON_ONCE(!inode && !dir)) return 0; if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; /* Check interest of this mark in case event was sent with two marks */ if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS)) return 0; return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie); } static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info); int ret; if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) || WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; /* * For FS_RENAME, 'dir' is old dir and 'data' is new dentry. * The only ->handle_inode_event() backend that supports FS_RENAME is * dnotify, where it means file was renamed within same parent. */ if (mask & FS_RENAME) { struct dentry *moved = fsnotify_data_dentry(data, data_type); if (dir != moved->d_parent->d_inode) return 0; } if (parent_mark) { ret = fsnotify_handle_inode_event(group, parent_mark, mask, data, data_type, dir, name, 0); if (ret) return ret; } if (!inode_mark) return 0; if (mask & FS_EVENT_ON_CHILD) { /* * Some events can be sent on both parent dir and child marks * (e.g. FS_ATTRIB). If both parent dir and child are * watching, report the event once to parent dir with name (if * interested) and once to child without name (if interested). * The child watcher is expecting an event without a file name * and without the FS_EVENT_ON_CHILD flag. */ mask &= ~FS_EVENT_ON_CHILD; dir = NULL; name = NULL; } return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type, dir, name, cookie); } static int send_to_group(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; __u32 marks_ignore_mask = 0; bool is_dir = mask & FS_ISDIR; struct fsnotify_mark *mark; int type; if (!iter_info->report_mask) return 0; /* clear ignored on inode modification */ if (mask & FS_MODIFY) { fsnotify_foreach_iter_mark_type(iter_info, mark, type) { if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mark->ignore_mask = 0; } } /* Are any of the group marks interested in this event? */ fsnotify_foreach_iter_mark_type(iter_info, mark, type) { group = mark->group; marks_mask |= mark->mask; marks_ignore_mask |= fsnotify_effective_ignore_mask(mark, is_dir, type); } pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", __func__, group, mask, marks_mask, marks_ignore_mask, data, data_type, dir, cookie); if (!(test_mask & marks_mask & ~marks_ignore_mask)) return 0; if (group->ops->handle_event) { return group->ops->handle_event(group, mask, data, data_type, dir, file_name, cookie, iter_info); } return fsnotify_handle_event(group, mask, data, data_type, dir, file_name, cookie, iter_info); } static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; conn = srcu_dereference(*connp, &fsnotify_mark_srcu); if (conn) node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu); return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) { struct hlist_node *node = NULL; if (mark) node = srcu_dereference(mark->obj_list.next, &fsnotify_mark_srcu); return hlist_entry_safe(node, struct fsnotify_mark, obj_list); } /* * iter_info is a multi head priority queue of marks. * Pick a subset of marks from queue heads, all with the same group * and set the report_mask to a subset of the selected marks. * Returns false if there are no more groups to iterate. */ static bool fsnotify_iter_select_report_types( struct fsnotify_iter_info *iter_info) { struct fsnotify_group *max_prio_group = NULL; struct fsnotify_mark *mark; int type; /* Choose max prio group among groups of all queue heads */ fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && fsnotify_compare_groups(max_prio_group, mark->group) > 0) max_prio_group = mark->group; } if (!max_prio_group) return false; /* Set the report mask for marks from same group as max prio group */ iter_info->current_group = max_prio_group; iter_info->report_mask = 0; fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && mark->group == iter_info->current_group) { /* * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode * is watching children and interested in this event, * which is an event possible on child. * But is *this mark* watching children? */ if (type == FSNOTIFY_ITER_TYPE_PARENT && !(mark->mask & FS_EVENT_ON_CHILD) && !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD)) continue; fsnotify_iter_set_report_type(iter_info, type); } } return true; } /* * Pop from iter_info multi head queue, the marks that belong to the group of * current iteration step. */ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *mark; int type; /* * We cannot use fsnotify_foreach_iter_mark_type() here because we * may need to advance a mark of type X that belongs to current_group * but was not selected for reporting. */ fsnotify_foreach_iter_type(type) { mark = iter_info->marks[type]; if (mark && mark->group == iter_info->current_group) iter_info->marks[type] = fsnotify_next_mark(iter_info->marks[type]); } } /* * fsnotify - This is the main call to fsnotify. * * The VFS calls into hook specific functions in linux/fsnotify.h. * Those functions then in turn call here. Here will call out to all of the * registered fsnotify_group. Those groups can then use the notification event * in whatever means they feel necessary. * * @mask: event type and flags * @data: object that event happened on * @data_type: type of object for fanotify_data_XXX() accessors * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to * @file_name: optional file name associated with event * @inode: optional inode associated with event - * If @dir and @inode are both non-NULL, event may be * reported to both. * @cookie: inotify rename cookie */ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, struct inode *inode, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; struct dentry *moved; int inode2_type; int ret = 0; __u32 test_mask, marks_mask; if (path) mnt = real_mount(path->mnt); if (!inode) { /* Dirent event - report on TYPE_INODE to dir */ inode = dir; /* For FS_RENAME, inode is old_dir and inode2 is new_dir */ if (mask & FS_RENAME) { moved = fsnotify_data_dentry(data, data_type); inode2 = moved->d_parent->d_inode; inode2_type = FSNOTIFY_ITER_TYPE_INODE2; } } else if (mask & FS_EVENT_ON_CHILD) { /* * Event on child - report on TYPE_PARENT to dir if it is * watching children and on TYPE_INODE to child. */ inode2 = dir; inode2_type = FSNOTIFY_ITER_TYPE_PARENT; } /* * Optimization: srcu_read_lock() has a memory barrier which can * be expensive. It protects walking the *_fsnotify_marks lists. * However, if we do not walk the lists, we do not have to do * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && (!inode2 || !inode2->i_fsnotify_marks)) return 0; marks_mask = sb->s_fsnotify_mask; if (mnt) marks_mask |= mnt->mnt_fsnotify_mask; if (inode) marks_mask |= inode->i_fsnotify_mask; if (inode2) marks_mask |= inode2->i_fsnotify_mask; /* * If this is a modify event we may need to clear some ignore masks. * In that case, the object with ignore masks will have the FS_MODIFY * event in its mask. * Otherwise, return if none of the marks care about this type of event. */ test_mask = (mask & ALL_FSNOTIFY_EVENTS); if (!(test_mask & marks_mask)) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); if (sbinfo) { iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = fsnotify_first_mark(&sbinfo->sb_marks); } if (mnt) { iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } if (inode) { iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } if (inode2) { iter_info.marks[inode2_type] = fsnotify_first_mark(&inode2->i_fsnotify_marks); } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark * ignore masks are properly reflected for mount/sb mark notifications. * That's why this traversal is so complicated... */ while (fsnotify_iter_select_report_types(&iter_info)) { ret = send_to_group(mask, data, data_type, dir, file_name, cookie, &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; fsnotify_iter_next(&iter_info); } ret = 0; out: srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); return ret; } EXPORT_SYMBOL_GPL(fsnotify); static __init int fsnotify_init(void) { int ret; BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) panic("initializing fsnotify_mark_srcu"); fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, SLAB_PANIC); return 0; } core_initcall(fsnotify_init);
33 33 33 33 33 33 279 278 109 114 1 10 109 109 109 108 109 109 100 12 109 109 109 109 91 3 109 101 96 114 105 12 12 114 113 115 1 114 114 113 114 114 113 114 113 10 108 114 109 12 105 104 114 114 4 4 146 146 2 144 146 1 1 143 1 219 218 1 1 216 1 1 218 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_ag.h" #include "xfs_inode.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" #include "xfs_dir2.h" #include "xfs_health.h" #include <linux/iversion.h> /* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence * has not had the inode cores stamped into it. Hence for readahead, the buffer * may be potentially invalid. * * If the readahead buffer is invalid, we need to mark it with an error and * clear the DONE status of the buffer so that a followup read will re-read it * from disk. We don't report the error otherwise to avoid warnings during log * recovery and we don't get unnecessary panics on debug kernels. We use EIO here * because all we want to do is say readahead failed; there is no-one to report * the error to, so this will distinguish it from a non-ra verifier failure. * Changes to this readahead error behaviour also need to be reflected in * xfs_dquot_buf_readahead_verify(). */ static void xfs_inode_buf_verify( struct xfs_buf *bp, bool readahead) { struct xfs_mount *mp = bp->b_mount; int i; int ni; /* * Validate the magic number and version of every inode in the buffer */ ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { struct xfs_dinode *dip; xfs_agino_t unlinked_ino; int di_ok; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = xfs_verify_magic16(bp, dip->di_magic) && xfs_dinode_good_version(mp, dip->di_version) && xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; xfs_buf_ioerror(bp, -EIO); return; } #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", (unsigned long long)xfs_buf_daddr(bp), i, be16_to_cpu(dip->di_magic)); #endif xfs_buf_verifier_error(bp, -EFSCORRUPTED, __func__, dip, sizeof(*dip), NULL); return; } } } static void xfs_inode_buf_read_verify( struct xfs_buf *bp) { xfs_inode_buf_verify(bp, false); } static void xfs_inode_buf_readahead_verify( struct xfs_buf *bp) { xfs_inode_buf_verify(bp, true); } static void xfs_inode_buf_write_verify( struct xfs_buf *bp) { xfs_inode_buf_verify(bp, false); } const struct xfs_buf_ops xfs_inode_buf_ops = { .name = "xfs_inode", .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), cpu_to_be16(XFS_DINODE_MAGIC) }, .verify_read = xfs_inode_buf_read_verify, .verify_write = xfs_inode_buf_write_verify, }; const struct xfs_buf_ops xfs_inode_buf_ra_ops = { .name = "xfs_inode_ra", .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), cpu_to_be16(XFS_DINODE_MAGIC) }, .verify_read = xfs_inode_buf_readahead_verify, .verify_write = xfs_inode_buf_write_verify, }; /* * This routine is called to map an inode to the buffer containing the on-disk * version of the inode. It returns a pointer to the buffer containing the * on-disk inode in the bpp parameter. */ int xfs_imap_to_bp( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_imap *imap, struct xfs_buf **bpp) { int error; error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); if (xfs_metadata_is_sick(error)) xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), XFS_SICK_AG_INODES); return error; } static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) { struct timespec64 tv; uint32_t n; tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); tv.tv_nsec = n; return tv; } /* Convert an ondisk timestamp to an incore timestamp. */ struct timespec64 xfs_inode_from_disk_ts( struct xfs_dinode *dip, const xfs_timestamp_t ts) { struct timespec64 tv; struct xfs_legacy_timestamp *lts; if (xfs_dinode_has_bigtime(dip)) return xfs_inode_decode_bigtime(be64_to_cpu(ts)); lts = (struct xfs_legacy_timestamp *)&ts; tv.tv_sec = (int)be32_to_cpu(lts->t_sec); tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); return tv; } int xfs_inode_from_disk( struct xfs_inode *ip, struct xfs_dinode *from) { struct inode *inode = VFS_I(ip); int error; xfs_failaddr_t fa; ASSERT(ip->i_cowfp == NULL); fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, sizeof(*from), fa); return -EFSCORRUPTED; } /* * First get the permanent information that is needed to allocate an * inode. If the inode is unused, mode is zero and we shouldn't mess * with the uninitialized part of it. */ if (!xfs_has_v3inodes(ip->i_mount)) ip->i_flushiter = be16_to_cpu(from->di_flushiter); inode->i_generation = be32_to_cpu(from->di_gen); inode->i_mode = be16_to_cpu(from->di_mode); if (!inode->i_mode) return 0; /* * Convert v1 inodes immediately to v2 inode format as this is the * minimum inode version format we support in the rest of the code. * They will also be unconditionally written back to disk as v2 inodes. */ if (unlikely(from->di_version == 1)) { set_nlink(inode, be16_to_cpu(from->di_onlink)); ip->i_projid = 0; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | be16_to_cpu(from->di_projid_lo); } i_uid_write(inode, be32_to_cpu(from->di_uid)); i_gid_write(inode, be32_to_cpu(from->di_gid)); /* * Time is signed, so need to convert to signed 32 bit before * storing in inode timestamp which may be 64 bit. Otherwise * a time before epoch is converted to a time long after epoch * on 64 bit systems. */ inode_set_atime_to_ts(inode, xfs_inode_from_disk_ts(from, from->di_atime)); inode_set_mtime_to_ts(inode, xfs_inode_from_disk_ts(from, from->di_mtime)); inode_set_ctime_to_ts(inode, xfs_inode_from_disk_ts(from, from->di_ctime)); ip->i_disk_size = be64_to_cpu(from->di_size); ip->i_nblocks = be64_to_cpu(from->di_nblocks); ip->i_extsize = be32_to_cpu(from->di_extsize); ip->i_forkoff = from->di_forkoff; ip->i_diflags = be16_to_cpu(from->di_flags); ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); if (from->di_dmevmask || from->di_dmstate) xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); if (xfs_has_v3inodes(ip->i_mount)) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); ip->i_diflags2 = be64_to_cpu(from->di_flags2); ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); } error = xfs_iformat_data_fork(ip, from); if (error) return error; if (from->di_forkoff) { error = xfs_iformat_attr_fork(ip, from); if (error) goto out_destroy_data_fork; } if (xfs_is_reflink_inode(ip)) xfs_ifork_init_cow(ip); return 0; out_destroy_data_fork: xfs_idestroy_fork(&ip->i_df); return error; } /* Convert an incore timestamp to an ondisk timestamp. */ static inline xfs_timestamp_t xfs_inode_to_disk_ts( struct xfs_inode *ip, const struct timespec64 tv) { struct xfs_legacy_timestamp *lts; xfs_timestamp_t ts; if (xfs_inode_has_bigtime(ip)) return cpu_to_be64(xfs_inode_encode_bigtime(tv)); lts = (struct xfs_legacy_timestamp *)&ts; lts->t_sec = cpu_to_be32(tv.tv_sec); lts->t_nsec = cpu_to_be32(tv.tv_nsec); return ts; } static inline void xfs_inode_to_disk_iext_counters( struct xfs_inode *ip, struct xfs_dinode *to) { if (xfs_inode_has_large_extent_counts(ip)) { to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); /* * We might be upgrading the inode to use larger extent counters * than was previously used. Hence zero the unused field. */ to->di_nrext64_pad = cpu_to_be16(0); } else { to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); } } void xfs_inode_to_disk( struct xfs_inode *ip, struct xfs_dinode *to, xfs_lsn_t lsn) { struct inode *inode = VFS_I(ip); to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); to->di_onlink = 0; to->di_format = xfs_ifork_format(&ip->i_df); to->di_uid = cpu_to_be32(i_uid_read(inode)); to->di_gid = cpu_to_be32(i_gid_read(inode)); to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); to->di_nlink = cpu_to_be32(inode->i_nlink); to->di_gen = cpu_to_be32(inode->i_generation); to->di_mode = cpu_to_be16(inode->i_mode); to->di_size = cpu_to_be64(ip->i_disk_size); to->di_nblocks = cpu_to_be64(ip->i_nblocks); to->di_extsize = cpu_to_be32(ip->i_extsize); to->di_forkoff = ip->i_forkoff; to->di_aformat = xfs_ifork_format(&ip->i_af); to->di_flags = cpu_to_be16(ip->i_diflags); if (xfs_has_v3inodes(ip->i_mount)) { to->di_version = 3; to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); to->di_flags2 = cpu_to_be64(ip->i_diflags2); to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); to->di_ino = cpu_to_be64(ip->i_ino); to->di_lsn = cpu_to_be64(lsn); memset(to->di_pad2, 0, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); to->di_v3_pad = 0; } else { to->di_version = 2; to->di_flushiter = cpu_to_be16(ip->i_flushiter); memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); } xfs_inode_to_disk_iext_counters(ip, to); } static xfs_failaddr_t xfs_dinode_verify_fork( struct xfs_dinode *dip, struct xfs_mount *mp, int whichfork) { xfs_extnum_t di_nextents; xfs_extnum_t max_extents; mode_t mode = be16_to_cpu(dip->di_mode); uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); di_nextents = xfs_dfork_nextents(dip, whichfork); /* * For fork types that can contain local data, check that the fork * format matches the size of local data contained within the fork. */ if (whichfork == XFS_DATA_FORK) { /* * A directory small enough to fit in the inode must be stored * in local format. The directory sf <-> extents conversion * code updates the directory size accordingly. Directories * being truncated have zero size and are not subject to this * check. */ if (S_ISDIR(mode)) { if (dip->di_size && be64_to_cpu(dip->di_size) <= fork_size && fork_format != XFS_DINODE_FMT_LOCAL) return __this_address; } /* * A symlink with a target small enough to fit in the inode can * be stored in extents format if xattrs were added (thus * converting the data fork from shortform to remote format) * and then removed. */ if (S_ISLNK(mode)) { if (be64_to_cpu(dip->di_size) <= fork_size && fork_format != XFS_DINODE_FMT_EXTENTS && fork_format != XFS_DINODE_FMT_LOCAL) return __this_address; } /* * For all types, check that when the size says the fork should * be in extent or btree format, the inode isn't claiming to be * in local format. */ if (be64_to_cpu(dip->di_size) > fork_size && fork_format == XFS_DINODE_FMT_LOCAL) return __this_address; } switch (fork_format) { case XFS_DINODE_FMT_LOCAL: /* * No local regular files yet. */ if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) return __this_address; if (di_nextents) return __this_address; break; case XFS_DINODE_FMT_EXTENTS: if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) return __this_address; break; case XFS_DINODE_FMT_BTREE: max_extents = xfs_iext_max_nextents( xfs_dinode_has_large_extent_counts(dip), whichfork); if (di_nextents > max_extents) return __this_address; break; default: return __this_address; } return NULL; } static xfs_failaddr_t xfs_dinode_verify_forkoff( struct xfs_dinode *dip, struct xfs_mount *mp) { if (!dip->di_forkoff) return NULL; switch (dip->di_format) { case XFS_DINODE_FMT_DEV: if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) return __this_address; break; case XFS_DINODE_FMT_LOCAL: /* fall through ... */ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ case XFS_DINODE_FMT_BTREE: if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) return __this_address; break; default: return __this_address; } return NULL; } static xfs_failaddr_t xfs_dinode_verify_nrext64( struct xfs_mount *mp, struct xfs_dinode *dip) { if (xfs_dinode_has_large_extent_counts(dip)) { if (!xfs_has_large_extent_counts(mp)) return __this_address; if (dip->di_nrext64_pad != 0) return __this_address; } else if (dip->di_version >= 3) { if (dip->di_v3_pad != 0) return __this_address; } return NULL; } xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip) { xfs_failaddr_t fa; uint16_t mode; uint16_t flags; uint64_t flags2; uint64_t di_size; xfs_extnum_t nextents; xfs_extnum_t naextents; xfs_filblks_t nblocks; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return __this_address; /* Verify v3 integrity information first */ if (dip->di_version >= 3) { if (!xfs_has_v3inodes(mp)) return __this_address; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF)) return __this_address; if (be64_to_cpu(dip->di_ino) != ino) return __this_address; if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; } /* * Historical note: xfsprogs in the 3.2 era set up its incore inodes to * have di_nlink track the link count, even if the actual filesystem * only supported V1 inodes (i.e. di_onlink). When writing out the * ondisk inode, it would set both the ondisk di_nlink and di_onlink to * the the incore di_nlink value, which is why we cannot check for * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with * di_onlink==0, so we can check that. */ if (dip->di_version >= 2) { if (dip->di_onlink) return __this_address; } /* don't allow invalid i_size */ di_size = be64_to_cpu(dip->di_size); if (di_size & (1ULL << 63)) return __this_address; mode = be16_to_cpu(dip->di_mode); if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) return __this_address; /* * No zero-length symlinks/dirs unless they're unlinked and hence being * inactivated. */ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { if (dip->di_version > 1) { if (dip->di_nlink) return __this_address; } else { if (dip->di_onlink) return __this_address; } } fa = xfs_dinode_verify_nrext64(mp, dip); if (fa) return fa; nextents = xfs_dfork_data_extents(dip); naextents = xfs_dfork_attr_extents(dip); nblocks = be64_to_cpu(dip->di_nblocks); /* Fork checks carried over from xfs_iformat_fork */ if (mode && nextents + naextents > nblocks) return __this_address; if (nextents + naextents == 0 && nblocks != 0) return __this_address; if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) return __this_address; if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) return __this_address; flags = be16_to_cpu(dip->di_flags); if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) return __this_address; /* check for illegal values of forkoff */ fa = xfs_dinode_verify_forkoff(dip, mp); if (fa) return fa; /* Do we have appropriate data fork formats for the mode? */ switch (mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: if (dip->di_format != XFS_DINODE_FMT_DEV) return __this_address; break; case S_IFREG: case S_IFLNK: case S_IFDIR: fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); if (fa) return fa; break; case 0: /* Uninitialized inode ok. */ break; default: return __this_address; } if (dip->di_forkoff) { fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); if (fa) return fa; } else { /* * If there is no fork offset, this may be a freshly-made inode * in a new disk cluster, in which case di_aformat is zeroed. * Otherwise, such an inode must be in EXTENTS format; this goes * for freed inodes as well. */ switch (dip->di_aformat) { case 0: case XFS_DINODE_FMT_EXTENTS: break; default: return __this_address; } if (naextents) return __this_address; } /* extent size hint validation */ fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), mode, flags); if (fa) return fa; /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return NULL; flags2 = be64_to_cpu(dip->di_flags2); /* don't allow reflink/cowextsize if we don't have reflink */ if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && !xfs_has_reflink(mp)) return __this_address; /* only regular files get reflink */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) return __this_address; /* don't let reflink and realtime mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) return __this_address; /* COW extent size hint validation */ fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), mode, flags, flags2); if (fa) return fa; /* bigtime iflag can only happen on bigtime filesystems */ if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp)) return __this_address; return NULL; } void xfs_dinode_calc_crc( struct xfs_mount *mp, struct xfs_dinode *dip) { uint32_t crc; if (dip->di_version < 3) return; ASSERT(xfs_has_crc(mp)); crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF); dip->di_crc = xfs_end_cksum(crc); } /* * Validate di_extsize hint. * * 1. Extent size hint is only valid for directories and regular files. * 2. FS_XFLAG_EXTSIZE is only valid for regular files. * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. * 4. Hint cannot be larger than MAXTEXTLEN. * 5. Can be changed on directories at any time. * 6. Hint value of 0 turns off hints, clears inode flags. * 7. Extent size must be a multiple of the appropriate block size. * For realtime files, this is the rt extent size. * 8. For non-realtime files, the extent size hint must be limited * to half the AG size to avoid alignment extending the extent beyond the * limits of the AG. */ xfs_failaddr_t xfs_inode_validate_extsize( struct xfs_mount *mp, uint32_t extsize, uint16_t mode, uint16_t flags) { bool rt_flag; bool hint_flag; bool inherit_flag; uint32_t extsize_bytes; uint32_t blocksize_bytes; rt_flag = (flags & XFS_DIFLAG_REALTIME); hint_flag = (flags & XFS_DIFLAG_EXTSIZE); inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); extsize_bytes = XFS_FSB_TO_B(mp, extsize); /* * This comment describes a historic gap in this verifier function. * * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this * function has never checked that the extent size hint is an integer * multiple of the realtime extent size. Since we allow users to set * this combination on non-rt filesystems /and/ to change the rt * extent size when adding a rt device to a filesystem, the net effect * is that users can configure a filesystem anticipating one rt * geometry and change their minds later. Directories do not use the * extent size hint, so this is harmless for them. * * If a directory with a misaligned extent size hint is allowed to * propagate that hint into a new regular realtime file, the result * is that the inode cluster buffer verifier will trigger a corruption * shutdown the next time it is run, because the verifier has always * enforced the alignment rule for regular files. * * Because we allow administrators to set a new rt extent size when * adding a rt section, we cannot add a check to this verifier because * that will result a new source of directory corruption errors when * reading an existing filesystem. Instead, we rely on callers to * decide when alignment checks are appropriate, and fix things up as * needed. */ if (rt_flag) blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); else blocksize_bytes = mp->m_sb.sb_blocksize; if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) return __this_address; if (hint_flag && !S_ISREG(mode)) return __this_address; if (inherit_flag && !S_ISDIR(mode)) return __this_address; if ((hint_flag || inherit_flag) && extsize == 0) return __this_address; /* free inodes get flags set to zero but extsize remains */ if (mode && !(hint_flag || inherit_flag) && extsize != 0) return __this_address; if (extsize_bytes % blocksize_bytes) return __this_address; if (extsize > XFS_MAX_BMBT_EXTLEN) return __this_address; if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) return __this_address; return NULL; } /* * Validate di_cowextsize hint. * * 1. CoW extent size hint can only be set if reflink is enabled on the fs. * The inode does not have to have any shared blocks, but it must be a v3. * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; * for a directory, the hint is propagated to new files. * 3. Can be changed on files & directories at any time. * 4. Hint value of 0 turns off hints, clears inode flags. * 5. Extent size must be a multiple of the appropriate block size. * 6. The extent size hint must be limited to half the AG size to avoid * alignment extending the extent beyond the limits of the AG. */ xfs_failaddr_t xfs_inode_validate_cowextsize( struct xfs_mount *mp, uint32_t cowextsize, uint16_t mode, uint16_t flags, uint64_t flags2) { bool rt_flag; bool hint_flag; uint32_t cowextsize_bytes; rt_flag = (flags & XFS_DIFLAG_REALTIME); hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); if (hint_flag && !xfs_has_reflink(mp)) return __this_address; if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) return __this_address; if (hint_flag && cowextsize == 0) return __this_address; /* free inodes get flags set to zero but cowextsize remains */ if (mode && !hint_flag && cowextsize != 0) return __this_address; if (hint_flag && rt_flag) return __this_address; if (cowextsize_bytes % mp->m_sb.sb_blocksize) return __this_address; if (cowextsize > XFS_MAX_BMBT_EXTLEN) return __this_address; if (cowextsize > mp->m_sb.sb_agblocks / 2) return __this_address; return NULL; }
2 2 574 442 571 383 11 422 48 54 41 65 85 12 40 22 85 12 21 363 363 238 207 242 94 204 204 19 279 312 161 532 562 141 258 517 517 4 160 561 257 256 10 10 140 442 3 3 509 521 520 84 424 4 19 424 19 4 354 437 70 68 2 474 81 435 460 35 38 19 2 61 62 62 630 305 35 633 2 89 10 213 110 128 7 56 29 16 6 21 34 16 126 46 3 90 51 266 127 4 30 50 2 20 15 1 12 2 40 30 12 128 328 35 11 237 109 12 59 126 33 5 77 45 145 35 146 37 138 10 10 9 230 230 272 230 272 272 6 547 547 50 50 267 50 267 537 537 537 241 239 12 27 10 92 49 5 82 114 113 194 15 37 1 89 48 18 54 47 106 92 30 259 88 238 236 3 8 6 6 3 1 6 2 5 5 148 112 236 242 119 25 218 30 42 21 35 26 3 26 10 115 115 115 211 2 21 10 4 4 3 6 2 4 1 40 11 263 11 440 428 1 11 341 11 340 11 91 88 341 4 340 94 94 217 211 6 35 35 35 242 241 78 210 77 35 35 33 33 229 81 39 40 38 75 72 236 87 29 247 61 39 15 43 21 31 30 21 16 50 8 8 11 6 1 1 34 34 53 52 4 7 3 42 6 31 43 43 7 7 7 43 38 7 1 31 7 9 3 8 8 8 9 10 10 6 22 22 6 131 131 131 579 200 526 267 589 267 562 520 225 55 55 7 1 10 20 10 27 134 37 1 19 61 1 6 231 231 231 231 213 231 115 2 115 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2009, Christoph Hellwig * All Rights Reserved. * * NOTE: none of these tracepoints shall be considered a stable kernel ABI * as they can change at any time. * * Current conventions for printing numbers measuring specific units: * * agno: allocation group number * * agino: per-AG inode number * ino: filesystem inode number * * agbno: per-AG block number in fs blocks * startblock: physical block number for file mappings. This is either a * segmented fsblock for data device mappings, or a rfsblock * for realtime device mappings * fsbcount: number of blocks in an extent, in fs blocks * * daddr: physical block number in 512b blocks * bbcount: number of blocks in a physical extent, in 512b blocks * * rtx: physical rt extent number for extent mappings * rtxcount: number of rt extents in an extent mapping * * owner: reverse-mapping owner, usually inodes * * fileoff: file offset, in fs blocks * pos: file offset, in bytes * bytecount: number of bytes * * dablk: directory or xattr block offset, in filesystem blocks * * disize: ondisk file size, in bytes * isize: incore file size, in bytes * * forkoff: inode fork offset, in bytes * * ireccount: number of inode records * * Numbers describing space allocations (blocks, extents, inodes) should be * formatted in hexadecimal. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM xfs #if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_XFS_H #include <linux/tracepoint.h> struct xfs_agf; struct xfs_alloc_arg; struct xfs_attr_list_context; struct xfs_buf_log_item; struct xfs_da_args; struct xfs_da_node_entry; struct xfs_dquot; struct xfs_log_item; struct xlog; struct xlog_ticket; struct xlog_recover; struct xlog_recover_item; struct xlog_rec_header; struct xlog_in_core; struct xfs_buf_log_format; struct xfs_inode_log_format; struct xfs_bmbt_irec; struct xfs_btree_cur; struct xfs_defer_op_type; struct xfs_refcount_irec; struct xfs_fsmap; struct xfs_rmap_irec; struct xfs_icreate_log; struct xfs_owner_info; struct xfs_trans_res; struct xfs_inobt_rec_incore; union xfs_btree_ptr; struct xfs_dqtrx; struct xfs_icwalk; struct xfs_perag; struct xfbtree; struct xfs_btree_ops; struct xfs_bmap_intent; struct xfs_exchmaps_intent; struct xfs_exchmaps_req; struct xfs_exchrange; struct xfs_getparents; struct xfs_parent_irec; struct xfs_attrlist_cursor_kern; struct xfs_extent_free_item; struct xfs_rmap_intent; struct xfs_refcount_intent; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ { XFS_ATTR_SECURE, "SECURE" }, \ { XFS_ATTR_INCOMPLETE, "INCOMPLETE" }, \ { XFS_ATTR_PARENT, "PARENT" } DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), TP_ARGS(ctx), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(u32, hashval) __field(u32, blkno) __field(u32, offset) __field(void *, buffer) __field(int, bufsize) __field(int, count) __field(int, firstu) __field(int, dupcnt) __field(unsigned int, attr_filter) ), TP_fast_assign( __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; __entry->ino = ctx->dp->i_ino; __entry->hashval = ctx->cursor.hashval; __entry->blkno = ctx->cursor.blkno; __entry->offset = ctx->cursor.offset; __entry->buffer = ctx->buffer; __entry->bufsize = ctx->bufsize; __entry->count = ctx->count; __entry->firstu = ctx->firstu; __entry->attr_filter = ctx->attr_filter; ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " "buffer %p size %u count %u firstu %u filter %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->hashval, __entry->blkno, __entry->offset, __entry->dupcnt, __entry->buffer, __entry->bufsize, __entry->count, __entry->firstu, __print_flags(__entry->attr_filter, "|", XFS_ATTR_FILTER_FLAGS) ) ) #define DEFINE_ATTR_LIST_EVENT(name) \ DEFINE_EVENT(xfs_attr_list_class, name, \ TP_PROTO(struct xfs_attr_list_context *ctx), \ TP_ARGS(ctx)) DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); TRACE_EVENT(xlog_intent_recovery_failed, TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops, int error), TP_ARGS(mp, ops, error), TP_STRUCT__entry( __field(dev_t, dev) __string(name, ops->name) __field(int, error) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __assign_str(name); __entry->error = error; ), TP_printk("dev %d:%d optype %s error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->error) ); DECLARE_EVENT_CLASS(xfs_perag_class, TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), TP_ARGS(pag, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, refcount) __field(int, active_refcount) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = pag->pag_mount->m_super->s_dev; __entry->agno = pag->pag_agno; __entry->refcount = atomic_read(&pag->pag_ref); __entry->active_refcount = atomic_read(&pag->pag_active_ref); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d agno 0x%x passive refs %d active refs %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->refcount, __entry->active_refcount, (char *)__entry->caller_ip) ); #define DEFINE_PERAG_REF_EVENT(name) \ DEFINE_EVENT(xfs_perag_class, name, \ TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \ TP_ARGS(pag, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_hold); DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_grab); DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_rele); DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag); TRACE_EVENT(xfs_inodegc_worker, TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits), TP_ARGS(mp, shrinker_hits), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, shrinker_hits) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->shrinker_hits = shrinker_hits; ), TP_printk("dev %d:%d shrinker_hits %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->shrinker_hits) ); DECLARE_EVENT_CLASS(xfs_fs_class, TP_PROTO(struct xfs_mount *mp, void *caller_ip), TP_ARGS(mp, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long long, mflags) __field(unsigned long, opstate) __field(unsigned long, sbflags) __field(void *, caller_ip) ), TP_fast_assign( if (mp) { __entry->dev = mp->m_super->s_dev; __entry->mflags = mp->m_features; __entry->opstate = mp->m_opstate; __entry->sbflags = mp->m_super->s_flags; } __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d m_features 0x%llx opstate (%s) s_flags 0x%lx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->mflags, __print_flags(__entry->opstate, "|", XFS_OPSTATE_STRINGS), __entry->sbflags, __entry->caller_ip) ); #define DEFINE_FS_EVENT(name) \ DEFINE_EVENT(xfs_fs_class, name, \ TP_PROTO(struct xfs_mount *mp, void *caller_ip), \ TP_ARGS(mp, caller_ip)) DEFINE_FS_EVENT(xfs_inodegc_flush); DEFINE_FS_EVENT(xfs_inodegc_push); DEFINE_FS_EVENT(xfs_inodegc_start); DEFINE_FS_EVENT(xfs_inodegc_stop); DEFINE_FS_EVENT(xfs_inodegc_queue); DEFINE_FS_EVENT(xfs_inodegc_throttle); DEFINE_FS_EVENT(xfs_fs_sync_fs); DEFINE_FS_EVENT(xfs_blockgc_start); DEFINE_FS_EVENT(xfs_blockgc_stop); DEFINE_FS_EVENT(xfs_blockgc_worker); DEFINE_FS_EVENT(xfs_blockgc_flush_all); TRACE_EVENT(xfs_inodegc_shrinker_scan, TP_PROTO(struct xfs_mount *mp, struct shrink_control *sc, void *caller_ip), TP_ARGS(mp, sc, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, nr_to_scan) __field(void *, caller_ip) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->nr_to_scan = sc->nr_to_scan; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d nr_to_scan %lu caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_to_scan, __entry->caller_ip) ); DECLARE_EVENT_CLASS(xfs_ag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), TP_ARGS(mp, agno), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; ), TP_printk("dev %d:%d agno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno) ); #define DEFINE_AG_EVENT(name) \ DEFINE_EVENT(xfs_ag_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), \ TP_ARGS(mp, agno)) DEFINE_AG_EVENT(xfs_read_agf); DEFINE_AG_EVENT(xfs_alloc_read_agf); DEFINE_AG_EVENT(xfs_read_agi); DEFINE_AG_EVENT(xfs_ialloc_read_agi); TRACE_EVENT(xfs_attr_list_node_descend, TP_PROTO(struct xfs_attr_list_context *ctx, struct xfs_da_node_entry *btree), TP_ARGS(ctx, btree), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(u32, hashval) __field(u32, blkno) __field(u32, offset) __field(void *, buffer) __field(int, bufsize) __field(int, count) __field(int, firstu) __field(int, dupcnt) __field(unsigned int, attr_filter) __field(u32, bt_hashval) __field(u32, bt_before) ), TP_fast_assign( __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; __entry->ino = ctx->dp->i_ino; __entry->hashval = ctx->cursor.hashval; __entry->blkno = ctx->cursor.blkno; __entry->offset = ctx->cursor.offset; __entry->buffer = ctx->buffer; __entry->bufsize = ctx->bufsize; __entry->count = ctx->count; __entry->firstu = ctx->firstu; __entry->attr_filter = ctx->attr_filter; __entry->bt_hashval = be32_to_cpu(btree->hashval); __entry->bt_before = be32_to_cpu(btree->before); ), TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " "buffer %p size %u count %u firstu %u filter %s " "node hashval %u, node before %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->hashval, __entry->blkno, __entry->offset, __entry->dupcnt, __entry->buffer, __entry->bufsize, __entry->count, __entry->firstu, __print_flags(__entry->attr_filter, "|", XFS_ATTR_FILTER_FLAGS), __entry->bt_hashval, __entry->bt_before) ); DECLARE_EVENT_CLASS(xfs_bmap_class, TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, unsigned long caller_ip), TP_ARGS(ip, cur, state, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(void *, leaf) __field(int, pos) __field(xfs_fileoff_t, startoff) __field(xfs_fsblock_t, startblock) __field(xfs_filblks_t, blockcount) __field(xfs_exntst_t, state) __field(int, bmap_state) __field(unsigned long, caller_ip) ), TP_fast_assign( struct xfs_ifork *ifp; struct xfs_bmbt_irec r; ifp = xfs_iext_state_to_fork(ip, state); xfs_iext_get_extent(ifp, cur, &r); __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->leaf = cur->leaf; __entry->pos = cur->pos; __entry->startoff = r.br_startoff; __entry->startblock = r.br_startblock; __entry->blockcount = r.br_blockcount; __entry->state = r.br_state; __entry->bmap_state = state; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d " "fileoff 0x%llx startblock 0x%llx fsbcount 0x%llx flag %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), __entry->leaf, __entry->pos, __entry->startoff, (int64_t)__entry->startblock, __entry->blockcount, __entry->state, (char *)__entry->caller_ip) ) #define DEFINE_BMAP_EVENT(name) \ DEFINE_EVENT(xfs_bmap_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, \ unsigned long caller_ip), \ TP_ARGS(ip, cur, state, caller_ip)) DEFINE_BMAP_EVENT(xfs_iext_insert); DEFINE_BMAP_EVENT(xfs_iext_remove); DEFINE_BMAP_EVENT(xfs_bmap_pre_update); DEFINE_BMAP_EVENT(xfs_bmap_post_update); DEFINE_BMAP_EVENT(xfs_read_extent); DEFINE_BMAP_EVENT(xfs_write_extent); DECLARE_EVENT_CLASS(xfs_buf_class, TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), TP_ARGS(bp, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, bno) __field(int, nblks) __field(int, hold) __field(int, pincount) __field(unsigned, lockval) __field(unsigned, flags) __field(unsigned long, caller_ip) __field(const void *, buf_ops) ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; __entry->bno = xfs_buf_daddr(bp); __entry->nblks = bp->b_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; __entry->buf_ops = bp->b_ops; ), TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d flags %s bufops %pS caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->nblks, __entry->hold, __entry->pincount, __entry->lockval, __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), __entry->buf_ops, (void *)__entry->caller_ip) ) #define DEFINE_BUF_EVENT(name) \ DEFINE_EVENT(xfs_buf_class, name, \ TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ TP_ARGS(bp, caller_ip)) DEFINE_BUF_EVENT(xfs_buf_init); DEFINE_BUF_EVENT(xfs_buf_free); DEFINE_BUF_EVENT(xfs_buf_hold); DEFINE_BUF_EVENT(xfs_buf_rele); DEFINE_BUF_EVENT(xfs_buf_iodone); DEFINE_BUF_EVENT(xfs_buf_submit); DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); DEFINE_BUF_EVENT(xfs_buf_trylock_fail); DEFINE_BUF_EVENT(xfs_buf_trylock); DEFINE_BUF_EVENT(xfs_buf_unlock); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_iodone_async); DEFINE_BUF_EVENT(xfs_buf_error_relse); DEFINE_BUF_EVENT(xfs_buf_drain_buftarg); DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); /* not really buffer traces, but the buf provides useful information */ DEFINE_BUF_EVENT(xfs_btree_corrupt); DEFINE_BUF_EVENT(xfs_reset_dqcounts); /* pass flags explicitly */ DECLARE_EVENT_CLASS(xfs_buf_flags_class, TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), TP_ARGS(bp, flags, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, bno) __field(unsigned int, length) __field(int, hold) __field(int, pincount) __field(unsigned, lockval) __field(unsigned, flags) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; __entry->bno = xfs_buf_daddr(bp); __entry->length = bp->b_length; __entry->flags = flags; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->length, __entry->hold, __entry->pincount, __entry->lockval, __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), (void *)__entry->caller_ip) ) #define DEFINE_BUF_FLAGS_EVENT(name) \ DEFINE_EVENT(xfs_buf_flags_class, name, \ TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ TP_ARGS(bp, flags, caller_ip)) DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); TRACE_EVENT(xfs_buf_ioerror, TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip), TP_ARGS(bp, error, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, bno) __field(unsigned int, length) __field(unsigned, flags) __field(int, hold) __field(int, pincount) __field(unsigned, lockval) __field(int, error) __field(xfs_failaddr_t, caller_ip) ), TP_fast_assign( __entry->dev = bp->b_target->bt_dev; __entry->bno = xfs_buf_daddr(bp); __entry->length = bp->b_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; __entry->error = error; __entry->flags = bp->b_flags; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d error %d flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->length, __entry->hold, __entry->pincount, __entry->lockval, __entry->error, __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), (void *)__entry->caller_ip) ); DECLARE_EVENT_CLASS(xfs_buf_item_class, TP_PROTO(struct xfs_buf_log_item *bip), TP_ARGS(bip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, buf_bno) __field(unsigned int, buf_len) __field(int, buf_hold) __field(int, buf_pincount) __field(int, buf_lockval) __field(unsigned, buf_flags) __field(unsigned, bli_recur) __field(int, bli_refcount) __field(unsigned, bli_flags) __field(unsigned long, li_flags) ), TP_fast_assign( __entry->dev = bip->bli_buf->b_target->bt_dev; __entry->bli_flags = bip->bli_flags; __entry->bli_recur = bip->bli_recur; __entry->bli_refcount = atomic_read(&bip->bli_refcount); __entry->buf_bno = xfs_buf_daddr(bip->bli_buf); __entry->buf_len = bip->bli_buf->b_length; __entry->buf_flags = bip->bli_buf->b_flags; __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); __entry->buf_lockval = bip->bli_buf->b_sema.count; __entry->li_flags = bip->bli_item.li_flags; ), TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d " "lock %d flags %s recur %d refcount %d bliflags %s " "liflags %s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->buf_bno, __entry->buf_len, __entry->buf_hold, __entry->buf_pincount, __entry->buf_lockval, __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), __entry->bli_recur, __entry->bli_refcount, __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) ) #define DEFINE_BUF_ITEM_EVENT(name) \ DEFINE_EVENT(xfs_buf_item_class, name, \ TP_PROTO(struct xfs_buf_log_item *bip), \ TP_ARGS(bip)) DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); DEFINE_BUF_ITEM_EVENT(xfs_trans_bdetach); DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); DECLARE_EVENT_CLASS(xfs_filestream_class, TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), TP_ARGS(pag, ino), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_agnumber_t, agno) __field(int, streams) ), TP_fast_assign( __entry->dev = pag->pag_mount->m_super->s_dev; __entry->ino = ino; __entry->agno = pag->pag_agno; __entry->streams = atomic_read(&pag->pagf_fstrms); ), TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->agno, __entry->streams) ) #define DEFINE_FILESTREAM_EVENT(name) \ DEFINE_EVENT(xfs_filestream_class, name, \ TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), \ TP_ARGS(pag, ino)) DEFINE_FILESTREAM_EVENT(xfs_filestream_free); DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); TRACE_EVENT(xfs_filestream_pick, TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free), TP_ARGS(pag, ino, free), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_agnumber_t, agno) __field(int, streams) __field(xfs_extlen_t, free) ), TP_fast_assign( __entry->dev = pag->pag_mount->m_super->s_dev; __entry->ino = ino; if (pag) { __entry->agno = pag->pag_agno; __entry->streams = atomic_read(&pag->pagf_fstrms); } else { __entry->agno = NULLAGNUMBER; __entry->streams = 0; } __entry->free = free; ), TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->agno, __entry->streams, __entry->free) ); DECLARE_EVENT_CLASS(xfs_lock_class, TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, unsigned long caller_ip), TP_ARGS(ip, lock_flags, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(int, lock_flags) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->lock_flags = lock_flags; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), (void *)__entry->caller_ip) ) #define DEFINE_LOCK_EVENT(name) \ DEFINE_EVENT(xfs_lock_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ unsigned long caller_ip), \ TP_ARGS(ip, lock_flags, caller_ip)) DEFINE_LOCK_EVENT(xfs_ilock); DEFINE_LOCK_EVENT(xfs_ilock_nowait); DEFINE_LOCK_EVENT(xfs_ilock_demote); DEFINE_LOCK_EVENT(xfs_iunlock); DECLARE_EVENT_CLASS(xfs_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned long, iflags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->iflags = ip->i_flags; ), TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->iflags) ) #define DEFINE_INODE_EVENT(name) \ DEFINE_EVENT(xfs_inode_class, name, \ TP_PROTO(struct xfs_inode *ip), \ TP_ARGS(ip)) DEFINE_INODE_EVENT(xfs_iget_skip); DEFINE_INODE_EVENT(xfs_iget_recycle); DEFINE_INODE_EVENT(xfs_iget_recycle_fail); DEFINE_INODE_EVENT(xfs_iget_hit); DEFINE_INODE_EVENT(xfs_iget_miss); DEFINE_INODE_EVENT(xfs_getattr); DEFINE_INODE_EVENT(xfs_setattr); DEFINE_INODE_EVENT(xfs_readlink); DEFINE_INODE_EVENT(xfs_inactive_symlink); DEFINE_INODE_EVENT(xfs_alloc_file_space); DEFINE_INODE_EVENT(xfs_free_file_space); DEFINE_INODE_EVENT(xfs_zero_file_space); DEFINE_INODE_EVENT(xfs_collapse_file_space); DEFINE_INODE_EVENT(xfs_insert_file_space); DEFINE_INODE_EVENT(xfs_readdir); #ifdef CONFIG_XFS_POSIX_ACL DEFINE_INODE_EVENT(xfs_get_acl); #endif DEFINE_INODE_EVENT(xfs_vm_bmap); DEFINE_INODE_EVENT(xfs_file_ioctl); DEFINE_INODE_EVENT(xfs_file_compat_ioctl); DEFINE_INODE_EVENT(xfs_ioctl_setattr); DEFINE_INODE_EVENT(xfs_dir_fsync); DEFINE_INODE_EVENT(xfs_file_fsync); DEFINE_INODE_EVENT(xfs_destroy_inode); DEFINE_INODE_EVENT(xfs_update_time); DEFINE_INODE_EVENT(xfs_dquot_dqalloc); DEFINE_INODE_EVENT(xfs_dquot_dqdetach); DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); DEFINE_INODE_EVENT(xfs_inode_set_reclaimable); DEFINE_INODE_EVENT(xfs_inode_reclaiming); DEFINE_INODE_EVENT(xfs_inode_set_need_inactive); DEFINE_INODE_EVENT(xfs_inode_inactivating); /* * ftrace's __print_symbolic requires that all enum values be wrapped in the * TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace * ring buffer. Somehow this was only worth mentioning in the ftrace sample * code. */ TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); TRACE_EVENT(xfs_filemap_fault, TP_PROTO(struct xfs_inode *ip, unsigned int order, bool write_fault), TP_ARGS(ip, order, write_fault), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned int, order) __field(bool, write_fault) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->order = order; __entry->write_fault = write_fault; ), TP_printk("dev %d:%d ino 0x%llx order %u write_fault %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->order, __entry->write_fault) ) DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), TP_ARGS(ip, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(int, count) __field(int, pincount) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->pincount = atomic_read(&ip->i_pincount); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, __entry->pincount, (char *)__entry->caller_ip) ) TRACE_EVENT(xfs_iomap_prealloc_size, TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t blocks, int shift, unsigned int writeio_blocks), TP_ARGS(ip, blocks, shift, writeio_blocks), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsblock_t, blocks) __field(int, shift) __field(unsigned int, writeio_blocks) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->blocks = blocks; __entry->shift = shift; __entry->writeio_blocks = writeio_blocks; ), TP_printk("dev %d:%d ino 0x%llx prealloc blocks %llu shift %d " "m_allocsize_blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->blocks, __entry->shift, __entry->writeio_blocks) ) TRACE_EVENT(xfs_irec_merge_pre, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino, uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask), TP_ARGS(mp, agno, agino, holemask, nagino, nholemask), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) __field(uint16_t, holemask) __field(xfs_agino_t, nagino) __field(uint16_t, nholemask) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agino = agino; __entry->holemask = holemask; __entry->nagino = nagino; __entry->nholemask = holemask; ), TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x new_agino 0x%x new_holemask 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino, __entry->holemask, __entry->nagino, __entry->nholemask) ) TRACE_EVENT(xfs_irec_merge_post, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino, uint16_t holemask), TP_ARGS(mp, agno, agino, holemask), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) __field(uint16_t, holemask) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agino = agino; __entry->holemask = holemask; ), TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino, __entry->holemask) ) #define DEFINE_IREF_EVENT(name) \ DEFINE_EVENT(xfs_iref_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ TP_ARGS(ip, caller_ip)) DEFINE_IREF_EVENT(xfs_irele); DEFINE_IREF_EVENT(xfs_inode_pin); DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); DECLARE_EVENT_CLASS(xfs_namespace_class, TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), TP_ARGS(dp, name), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, dp_ino) __field(int, namelen) __dynamic_array(char, name, name->len) ), TP_fast_assign( __entry->dev = VFS_I(dp)->i_sb->s_dev; __entry->dp_ino = dp->i_ino; __entry->namelen = name->len; memcpy(__get_str(name), name->name, name->len); ), TP_printk("dev %d:%d dp ino 0x%llx name %.*s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dp_ino, __entry->namelen, __get_str(name)) ) #define DEFINE_NAMESPACE_EVENT(name) \ DEFINE_EVENT(xfs_namespace_class, name, \ TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), \ TP_ARGS(dp, name)) DEFINE_NAMESPACE_EVENT(xfs_remove); DEFINE_NAMESPACE_EVENT(xfs_link); DEFINE_NAMESPACE_EVENT(xfs_lookup); DEFINE_NAMESPACE_EVENT(xfs_create); DEFINE_NAMESPACE_EVENT(xfs_symlink); TRACE_EVENT(xfs_rename, TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, struct xfs_name *src_name, struct xfs_name *target_name), TP_ARGS(src_dp, target_dp, src_name, target_name), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, src_dp_ino) __field(xfs_ino_t, target_dp_ino) __field(int, src_namelen) __field(int, target_namelen) __dynamic_array(char, src_name, src_name->len) __dynamic_array(char, target_name, target_name->len) ), TP_fast_assign( __entry->dev = VFS_I(src_dp)->i_sb->s_dev; __entry->src_dp_ino = src_dp->i_ino; __entry->target_dp_ino = target_dp->i_ino; __entry->src_namelen = src_name->len; __entry->target_namelen = target_name->len; memcpy(__get_str(src_name), src_name->name, src_name->len); memcpy(__get_str(target_name), target_name->name, target_name->len); ), TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" " src name %.*s target name %.*s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->src_dp_ino, __entry->target_dp_ino, __entry->src_namelen, __get_str(src_name), __entry->target_namelen, __get_str(target_name)) ) DECLARE_EVENT_CLASS(xfs_dquot_class, TP_PROTO(struct xfs_dquot *dqp), TP_ARGS(dqp), TP_STRUCT__entry( __field(dev_t, dev) __field(u32, id) __field(xfs_dqtype_t, type) __field(unsigned, flags) __field(unsigned, nrefs) __field(unsigned long long, res_bcount) __field(unsigned long long, res_rtbcount) __field(unsigned long long, res_icount) __field(unsigned long long, bcount) __field(unsigned long long, rtbcount) __field(unsigned long long, icount) __field(unsigned long long, blk_hardlimit) __field(unsigned long long, blk_softlimit) __field(unsigned long long, rtb_hardlimit) __field(unsigned long long, rtb_softlimit) __field(unsigned long long, ino_hardlimit) __field(unsigned long long, ino_softlimit) ), TP_fast_assign( __entry->dev = dqp->q_mount->m_super->s_dev; __entry->id = dqp->q_id; __entry->type = dqp->q_type; __entry->flags = dqp->q_flags; __entry->nrefs = dqp->q_nrefs; __entry->res_bcount = dqp->q_blk.reserved; __entry->res_rtbcount = dqp->q_rtb.reserved; __entry->res_icount = dqp->q_ino.reserved; __entry->bcount = dqp->q_blk.count; __entry->rtbcount = dqp->q_rtb.count; __entry->icount = dqp->q_ino.count; __entry->blk_hardlimit = dqp->q_blk.hardlimit; __entry->blk_softlimit = dqp->q_blk.softlimit; __entry->rtb_hardlimit = dqp->q_rtb.hardlimit; __entry->rtb_softlimit = dqp->q_rtb.softlimit; __entry->ino_hardlimit = dqp->q_ino.hardlimit; __entry->ino_softlimit = dqp->q_ino.softlimit; ), TP_printk("dev %d:%d id 0x%x type %s flags %s nrefs %u " "res_bc 0x%llx res_rtbc 0x%llx res_ic 0x%llx " "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx " "rtbcnt 0x%llx rtbhardlimit 0x%llx rtbsoftlimit 0x%llx " "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->id, __print_flags(__entry->type, "|", XFS_DQTYPE_STRINGS), __print_flags(__entry->flags, "|", XFS_DQFLAG_STRINGS), __entry->nrefs, __entry->res_bcount, __entry->res_rtbcount, __entry->res_icount, __entry->bcount, __entry->blk_hardlimit, __entry->blk_softlimit, __entry->rtbcount, __entry->rtb_hardlimit, __entry->rtb_softlimit, __entry->icount, __entry->ino_hardlimit, __entry->ino_softlimit) ) #define DEFINE_DQUOT_EVENT(name) \ DEFINE_EVENT(xfs_dquot_class, name, \ TP_PROTO(struct xfs_dquot *dqp), \ TP_ARGS(dqp)) DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy); DEFINE_DQUOT_EVENT(xfs_dqreclaim_done); DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_get); DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqread); DEFINE_DQUOT_EVENT(xfs_dqread_fail); DEFINE_DQUOT_EVENT(xfs_dqget_hit); DEFINE_DQUOT_EVENT(xfs_dqget_miss); DEFINE_DQUOT_EVENT(xfs_dqget_freeing); DEFINE_DQUOT_EVENT(xfs_dqget_dup); DEFINE_DQUOT_EVENT(xfs_dqput); DEFINE_DQUOT_EVENT(xfs_dqput_free); DEFINE_DQUOT_EVENT(xfs_dqrele); DEFINE_DQUOT_EVENT(xfs_dqflush); DEFINE_DQUOT_EVENT(xfs_dqflush_force); DEFINE_DQUOT_EVENT(xfs_dqflush_done); DEFINE_DQUOT_EVENT(xfs_trans_apply_dquot_deltas_before); DEFINE_DQUOT_EVENT(xfs_trans_apply_dquot_deltas_after); TRACE_EVENT(xfs_trans_mod_dquot, TP_PROTO(struct xfs_trans *tp, struct xfs_dquot *dqp, unsigned int field, int64_t delta), TP_ARGS(tp, dqp, field, delta), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_dqtype_t, type) __field(unsigned int, flags) __field(unsigned int, dqid) __field(unsigned int, field) __field(int64_t, delta) ), TP_fast_assign( __entry->dev = tp->t_mountp->m_super->s_dev; __entry->type = dqp->q_type; __entry->flags = dqp->q_flags; __entry->dqid = dqp->q_id; __entry->field = field; __entry->delta = delta; ), TP_printk("dev %d:%d dquot id 0x%x type %s flags %s field %s delta %lld", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dqid, __print_flags(__entry->type, "|", XFS_DQTYPE_STRINGS), __print_flags(__entry->flags, "|", XFS_DQFLAG_STRINGS), __print_flags(__entry->field, "|", XFS_QMOPT_FLAGS), __entry->delta) ); DECLARE_EVENT_CLASS(xfs_dqtrx_class, TP_PROTO(struct xfs_dqtrx *qtrx), TP_ARGS(qtrx), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_dqtype_t, type) __field(unsigned int, flags) __field(u32, dqid) __field(uint64_t, blk_res) __field(int64_t, bcount_delta) __field(int64_t, delbcnt_delta) __field(uint64_t, rtblk_res) __field(uint64_t, rtblk_res_used) __field(int64_t, rtbcount_delta) __field(int64_t, delrtb_delta) __field(uint64_t, ino_res) __field(uint64_t, ino_res_used) __field(int64_t, icount_delta) ), TP_fast_assign( __entry->dev = qtrx->qt_dquot->q_mount->m_super->s_dev; __entry->type = qtrx->qt_dquot->q_type; __entry->flags = qtrx->qt_dquot->q_flags; __entry->dqid = qtrx->qt_dquot->q_id; __entry->blk_res = qtrx->qt_blk_res; __entry->bcount_delta = qtrx->qt_bcount_delta; __entry->delbcnt_delta = qtrx->qt_delbcnt_delta; __entry->rtblk_res = qtrx->qt_rtblk_res; __entry->rtblk_res_used = qtrx->qt_rtblk_res_used; __entry->rtbcount_delta = qtrx->qt_rtbcount_delta; __entry->delrtb_delta = qtrx->qt_delrtb_delta; __entry->ino_res = qtrx->qt_ino_res; __entry->ino_res_used = qtrx->qt_ino_res_used; __entry->icount_delta = qtrx->qt_icount_delta; ), TP_printk("dev %d:%d dquot id 0x%x type %s flags %s " "blk_res %llu bcount_delta %lld delbcnt_delta %lld " "rtblk_res %llu rtblk_res_used %llu rtbcount_delta %lld delrtb_delta %lld " "ino_res %llu ino_res_used %llu icount_delta %lld", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dqid, __print_flags(__entry->type, "|", XFS_DQTYPE_STRINGS), __print_flags(__entry->flags, "|", XFS_DQFLAG_STRINGS), __entry->blk_res, __entry->bcount_delta, __entry->delbcnt_delta, __entry->rtblk_res, __entry->rtblk_res_used, __entry->rtbcount_delta, __entry->delrtb_delta, __entry->ino_res, __entry->ino_res_used, __entry->icount_delta) ) #define DEFINE_DQTRX_EVENT(name) \ DEFINE_EVENT(xfs_dqtrx_class, name, \ TP_PROTO(struct xfs_dqtrx *qtrx), \ TP_ARGS(qtrx)) DEFINE_DQTRX_EVENT(xfs_trans_apply_dquot_deltas); DEFINE_DQTRX_EVENT(xfs_trans_mod_dquot_before); DEFINE_DQTRX_EVENT(xfs_trans_mod_dquot_after); DECLARE_EVENT_CLASS(xfs_loggrant_class, TP_PROTO(struct xlog *log, struct xlog_ticket *tic), TP_ARGS(log, tic), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, tic) __field(char, ocnt) __field(char, cnt) __field(int, curr_res) __field(int, unit_res) __field(unsigned int, flags) __field(int, reserveq) __field(int, writeq) __field(uint64_t, grant_reserve_bytes) __field(uint64_t, grant_write_bytes) __field(uint64_t, tail_space) __field(int, curr_cycle) __field(int, curr_block) __field(xfs_lsn_t, tail_lsn) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->tic = (unsigned long)tic; __entry->ocnt = tic->t_ocnt; __entry->cnt = tic->t_cnt; __entry->curr_res = tic->t_curr_res; __entry->unit_res = tic->t_unit_res; __entry->flags = tic->t_flags; __entry->reserveq = list_empty(&log->l_reserve_head.waiters); __entry->writeq = list_empty(&log->l_write_head.waiters); __entry->tail_space = READ_ONCE(log->l_tail_space); __entry->grant_reserve_bytes = __entry->tail_space + atomic64_read(&log->l_reserve_head.grant); __entry->grant_write_bytes = __entry->tail_space + atomic64_read(&log->l_write_head.grant); __entry->curr_cycle = log->l_curr_cycle; __entry->curr_block = log->l_curr_block; __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); ), TP_printk("dev %d:%d tic 0x%lx t_ocnt %u t_cnt %u t_curr_res %u " "t_unit_res %u t_flags %s reserveq %s writeq %s " "tail space %llu grant_reserve_bytes %llu " "grant_write_bytes %llu curr_cycle %d curr_block %d " "tail_cycle %d tail_block %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tic, __entry->ocnt, __entry->cnt, __entry->curr_res, __entry->unit_res, __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), __entry->reserveq ? "empty" : "active", __entry->writeq ? "empty" : "active", __entry->tail_space, __entry->grant_reserve_bytes, __entry->grant_write_bytes, __entry->curr_cycle, __entry->curr_block, CYCLE_LSN(__entry->tail_lsn), BLOCK_LSN(__entry->tail_lsn) ) ) #define DEFINE_LOGGRANT_EVENT(name) \ DEFINE_EVENT(xfs_loggrant_class, name, \ TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \ TP_ARGS(log, tic)) DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); DEFINE_LOGGRANT_EVENT(xfs_log_reserve); DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait); DEFINE_LOGGRANT_EVENT(xfs_log_cil_return); DECLARE_EVENT_CLASS(xfs_log_item_class, TP_PROTO(struct xfs_log_item *lip), TP_ARGS(lip), TP_STRUCT__entry( __field(dev_t, dev) __field(void *, lip) __field(uint, type) __field(unsigned long, flags) __field(xfs_lsn_t, lsn) ), TP_fast_assign( __entry->dev = lip->li_log->l_mp->m_super->s_dev; __entry->lip = lip; __entry->type = lip->li_type; __entry->flags = lip->li_flags; __entry->lsn = lip->li_lsn; ), TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lip, CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn), __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) ) TRACE_EVENT(xfs_log_force, TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn, unsigned long caller_ip), TP_ARGS(mp, lsn, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_lsn_t, lsn) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->lsn = lsn; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d lsn 0x%llx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lsn, (void *)__entry->caller_ip) ) #define DEFINE_LOG_ITEM_EVENT(name) \ DEFINE_EVENT(xfs_log_item_class, name, \ TP_PROTO(struct xfs_log_item *lip), \ TP_ARGS(lip)) DEFINE_LOG_ITEM_EVENT(xfs_ail_push); DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), TP_ARGS(lip, old_lsn, new_lsn), TP_STRUCT__entry( __field(dev_t, dev) __field(void *, lip) __field(uint, type) __field(unsigned long, flags) __field(xfs_lsn_t, old_lsn) __field(xfs_lsn_t, new_lsn) ), TP_fast_assign( __entry->dev = lip->li_log->l_mp->m_super->s_dev; __entry->lip = lip; __entry->type = lip->li_type; __entry->flags = lip->li_flags; __entry->old_lsn = old_lsn; __entry->new_lsn = new_lsn; ), TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lip, CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) ) #define DEFINE_AIL_EVENT(name) \ DEFINE_EVENT(xfs_ail_class, name, \ TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), \ TP_ARGS(lip, old_lsn, new_lsn)) DEFINE_AIL_EVENT(xfs_ail_insert); DEFINE_AIL_EVENT(xfs_ail_move); DEFINE_AIL_EVENT(xfs_ail_delete); TRACE_EVENT(xfs_log_assign_tail_lsn, TP_PROTO(struct xlog *log, xfs_lsn_t new_lsn), TP_ARGS(log, new_lsn), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_lsn_t, new_lsn) __field(xfs_lsn_t, old_lsn) __field(xfs_lsn_t, head_lsn) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->new_lsn = new_lsn; __entry->old_lsn = atomic64_read(&log->l_tail_lsn); __entry->head_lsn = log->l_ailp->ail_head_lsn; ), TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, head lsn %d/%d", MAJOR(__entry->dev), MINOR(__entry->dev), CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), CYCLE_LSN(__entry->head_lsn), BLOCK_LSN(__entry->head_lsn)) ) DECLARE_EVENT_CLASS(xfs_file_class, TP_PROTO(struct kiocb *iocb, struct iov_iter *iter), TP_ARGS(iocb, iter), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) __field(loff_t, offset) __field(size_t, count) ), TP_fast_assign( __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; __entry->ino = XFS_I(file_inode(iocb->ki_filp))->i_ino; __entry->size = XFS_I(file_inode(iocb->ki_filp))->i_disk_size; __entry->offset = iocb->ki_pos; __entry->count = iov_iter_count(iter); ), TP_printk("dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->offset, __entry->count) ) #define DEFINE_RW_EVENT(name) \ DEFINE_EVENT(xfs_file_class, name, \ TP_PROTO(struct kiocb *iocb, struct iov_iter *iter), \ TP_ARGS(iocb, iter)) DEFINE_RW_EVENT(xfs_file_buffered_read); DEFINE_RW_EVENT(xfs_file_direct_read); DEFINE_RW_EVENT(xfs_file_dax_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_dax_write); DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write); DECLARE_EVENT_CLASS(xfs_imap_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, int whichfork, struct xfs_bmbt_irec *irec), TP_ARGS(ip, offset, count, whichfork, irec), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(loff_t, size) __field(loff_t, offset) __field(size_t, count) __field(int, whichfork) __field(xfs_fileoff_t, startoff) __field(xfs_fsblock_t, startblock) __field(xfs_filblks_t, blockcount) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->size = ip->i_disk_size; __entry->offset = offset; __entry->count = count; __entry->whichfork = whichfork; __entry->startoff = irec ? irec->br_startoff : 0; __entry->startblock = irec ? irec->br_startblock : 0; __entry->blockcount = irec ? irec->br_blockcount : 0; ), TP_printk("dev %d:%d ino 0x%llx disize 0x%llx pos 0x%llx bytecount 0x%zx " "fork %s startoff 0x%llx startblock 0x%llx fsbcount 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->offset, __entry->count, __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->startoff, (int64_t)__entry->startblock, __entry->blockcount) ) #define DEFINE_IMAP_EVENT(name) \ DEFINE_EVENT(xfs_imap_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ int whichfork, struct xfs_bmbt_irec *irec), \ TP_ARGS(ip, offset, count, whichfork, irec)) DEFINE_IMAP_EVENT(xfs_map_blocks_found); DEFINE_IMAP_EVENT(xfs_map_blocks_alloc); DEFINE_IMAP_EVENT(xfs_iomap_alloc); DEFINE_IMAP_EVENT(xfs_iomap_found); DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), TP_ARGS(ip, offset, count), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(loff_t, isize) __field(loff_t, disize) __field(loff_t, offset) __field(size_t, count) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->isize = VFS_I(ip)->i_size; __entry->disize = ip->i_disk_size; __entry->offset = offset; __entry->count = count; ), TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx " "pos 0x%llx bytecount 0x%zx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->isize, __entry->disize, __entry->offset, __entry->count) ); #define DEFINE_SIMPLE_IO_EVENT(name) \ DEFINE_EVENT(xfs_simple_io_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ TP_ARGS(ip, offset, count)) DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten); DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append); DEFINE_SIMPLE_IO_EVENT(xfs_file_splice_read); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), TP_ARGS(ip, new_size), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) __field(xfs_fsize_t, new_size) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->size = ip->i_disk_size; __entry->new_size = new_size; ), TP_printk("dev %d:%d ino 0x%llx disize 0x%llx new_size 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->new_size) ) #define DEFINE_ITRUNC_EVENT(name) \ DEFINE_EVENT(xfs_itrunc_class, name, \ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ TP_ARGS(ip, new_size)) DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start); DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end); TRACE_EVENT(xfs_pagecache_inval, TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), TP_ARGS(ip, start, finish), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) __field(xfs_off_t, start) __field(xfs_off_t, finish) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->size = ip->i_disk_size; __entry->start = start; __entry->finish = finish; ), TP_printk("dev %d:%d ino 0x%llx disize 0x%llx start 0x%llx finish 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->start, __entry->finish) ); TRACE_EVENT(xfs_bunmap, TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t fileoff, xfs_filblks_t len, int flags, unsigned long caller_ip), TP_ARGS(ip, fileoff, len, flags, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) __field(xfs_fileoff_t, fileoff) __field(xfs_filblks_t, len) __field(unsigned long, caller_ip) __field(int, flags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->size = ip->i_disk_size; __entry->fileoff = fileoff; __entry->len = len; __entry->caller_ip = caller_ip; __entry->flags = flags; ), TP_printk("dev %d:%d ino 0x%llx disize 0x%llx fileoff 0x%llx fsbcount 0x%llx " "flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->fileoff, __entry->len, __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), (void *)__entry->caller_ip) ); DECLARE_EVENT_CLASS(xfs_extent_busy_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len), TP_ARGS(mp, agno, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agbno = agbno; __entry->len = len; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len) ); #define DEFINE_BUSY_EVENT(name) \ DEFINE_EVENT(xfs_extent_busy_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ xfs_agblock_t agbno, xfs_extlen_t len), \ TP_ARGS(mp, agno, agbno, len)) DEFINE_BUSY_EVENT(xfs_extent_busy); DEFINE_BUSY_EVENT(xfs_extent_busy_force); DEFINE_BUSY_EVENT(xfs_extent_busy_reuse); DEFINE_BUSY_EVENT(xfs_extent_busy_clear); TRACE_EVENT(xfs_extent_busy_trim, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len, xfs_agblock_t tbno, xfs_extlen_t tlen), TP_ARGS(mp, agno, agbno, len, tbno, tlen), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) __field(xfs_agblock_t, tbno) __field(xfs_extlen_t, tlen) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agbno = agbno; __entry->len = len; __entry->tbno = tbno; __entry->tlen = tlen; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x found_agbno 0x%x found_fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, __entry->tbno, __entry->tlen) ); DECLARE_EVENT_CLASS(xfs_agf_class, TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, unsigned long caller_ip), TP_ARGS(mp, agf, flags, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, flags) __field(__u32, length) __field(__u32, bno_root) __field(__u32, cnt_root) __field(__u32, bno_level) __field(__u32, cnt_level) __field(__u32, flfirst) __field(__u32, fllast) __field(__u32, flcount) __field(__u32, freeblks) __field(__u32, longest) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = be32_to_cpu(agf->agf_seqno), __entry->flags = flags; __entry->length = be32_to_cpu(agf->agf_length), __entry->bno_root = be32_to_cpu(agf->agf_bno_root), __entry->cnt_root = be32_to_cpu(agf->agf_cnt_root), __entry->bno_level = be32_to_cpu(agf->agf_bno_level), __entry->cnt_level = be32_to_cpu(agf->agf_cnt_level), __entry->flfirst = be32_to_cpu(agf->agf_flfirst), __entry->fllast = be32_to_cpu(agf->agf_fllast), __entry->flcount = be32_to_cpu(agf->agf_flcount), __entry->freeblks = be32_to_cpu(agf->agf_freeblks), __entry->longest = be32_to_cpu(agf->agf_longest); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d agno 0x%x flags %s length %u roots b %u c %u " "levels b %u c %u flfirst %u fllast %u flcount %u " "freeblks %u longest %u caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), __entry->length, __entry->bno_root, __entry->cnt_root, __entry->bno_level, __entry->cnt_level, __entry->flfirst, __entry->fllast, __entry->flcount, __entry->freeblks, __entry->longest, (void *)__entry->caller_ip) ); #define DEFINE_AGF_EVENT(name) \ DEFINE_EVENT(xfs_agf_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \ unsigned long caller_ip), \ TP_ARGS(mp, agf, flags, caller_ip)) DEFINE_AGF_EVENT(xfs_agf); DEFINE_AGF_EVENT(xfs_agfl_reset); TRACE_EVENT(xfs_free_extent, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len, enum xfs_ag_resv_type resv, int haveleft, int haveright), TP_ARGS(mp, agno, agbno, len, resv, haveleft, haveright), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) __field(int, resv) __field(int, haveleft) __field(int, haveright) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agbno = agbno; __entry->len = len; __entry->resv = resv; __entry->haveleft = haveleft; __entry->haveright = haveright; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x resv %d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, __entry->resv, __entry->haveleft ? (__entry->haveright ? "both" : "left") : (__entry->haveright ? "right" : "none")) ); DECLARE_EVENT_CLASS(xfs_alloc_class, TP_PROTO(struct xfs_alloc_arg *args), TP_ARGS(args), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, minlen) __field(xfs_extlen_t, maxlen) __field(xfs_extlen_t, mod) __field(xfs_extlen_t, prod) __field(xfs_extlen_t, minleft) __field(xfs_extlen_t, total) __field(xfs_extlen_t, alignment) __field(xfs_extlen_t, minalignslop) __field(xfs_extlen_t, len) __field(char, wasdel) __field(char, wasfromfl) __field(int, resv) __field(int, datatype) __field(xfs_agnumber_t, highest_agno) ), TP_fast_assign( __entry->dev = args->mp->m_super->s_dev; __entry->agno = args->agno; __entry->agbno = args->agbno; __entry->minlen = args->minlen; __entry->maxlen = args->maxlen; __entry->mod = args->mod; __entry->prod = args->prod; __entry->minleft = args->minleft; __entry->total = args->total; __entry->alignment = args->alignment; __entry->minalignslop = args->minalignslop; __entry->len = args->len; __entry->wasdel = args->wasdel; __entry->wasfromfl = args->wasfromfl; __entry->resv = args->resv; __entry->datatype = args->datatype; __entry->highest_agno = args->tp->t_highest_agno; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u " "prod %u minleft %u total %u alignment %u minalignslop %u " "len %u wasdel %d wasfromfl %d resv %d " "datatype 0x%x highest_agno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->minlen, __entry->maxlen, __entry->mod, __entry->prod, __entry->minleft, __entry->total, __entry->alignment, __entry->minalignslop, __entry->len, __entry->wasdel, __entry->wasfromfl, __entry->resv, __entry->datatype, __entry->highest_agno) ) #define DEFINE_ALLOC_EVENT(name) \ DEFINE_EVENT(xfs_alloc_class, name, \ TP_PROTO(struct xfs_alloc_arg *args), \ TP_ARGS(args)) DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_near_first); DEFINE_ALLOC_EVENT(xfs_alloc_cur); DEFINE_ALLOC_EVENT(xfs_alloc_cur_right); DEFINE_ALLOC_EVENT(xfs_alloc_cur_left); DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup); DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup_done); DEFINE_ALLOC_EVENT(xfs_alloc_near_error); DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_size_done); DEFINE_ALLOC_EVENT(xfs_alloc_size_error); DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); DEFINE_ALLOC_EVENT(xfs_alloc_small_done); DEFINE_ALLOC_EVENT(xfs_alloc_small_error); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_skip_deadlock); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_this_ag); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_start_ag); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_first_ag); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_exact_bno); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_near_bno); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_finish); TRACE_EVENT(xfs_alloc_cur_check, TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, xfs_extlen_t diff, bool new), TP_ARGS(cur, bno, len, diff, new), TP_STRUCT__entry( __field(dev_t, dev) __string(name, cur->bc_ops->name) __field(xfs_agblock_t, bno) __field(xfs_extlen_t, len) __field(xfs_extlen_t, diff) __field(bool, new) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __assign_str(name); __entry->bno = bno; __entry->len = len; __entry->diff = diff; __entry->new = new; ), TP_printk("dev %d:%d %sbt agbno 0x%x fsbcount 0x%x diff 0x%x new %d", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->bno, __entry->len, __entry->diff, __entry->new) ) DECLARE_EVENT_CLASS(xfs_da_class, TP_PROTO(struct xfs_da_args *args), TP_ARGS(args), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __dynamic_array(char, name, args->namelen) __field(int, namelen) __field(xfs_dahash_t, hashval) __field(xfs_ino_t, inumber) __field(uint32_t, op_flags) __field(xfs_ino_t, owner) ), TP_fast_assign( __entry->dev = VFS_I(args->dp)->i_sb->s_dev; __entry->ino = args->dp->i_ino; if (args->namelen) memcpy(__get_str(name), args->name, args->namelen); __entry->namelen = args->namelen; __entry->hashval = args->hashval; __entry->inumber = args->inumber; __entry->op_flags = args->op_flags; __entry->owner = args->owner; ), TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " "inumber 0x%llx op_flags %s owner 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->namelen, __entry->namelen ? __get_str(name) : NULL, __entry->namelen, __entry->hashval, __entry->inumber, __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), __entry->owner) ) #define DEFINE_DIR2_EVENT(name) \ DEFINE_EVENT(xfs_da_class, name, \ TP_PROTO(struct xfs_da_args *args), \ TP_ARGS(args)) DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); DEFINE_DIR2_EVENT(xfs_dir2_sf_create); DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); DEFINE_DIR2_EVENT(xfs_dir2_block_addname); DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); DEFINE_DIR2_EVENT(xfs_dir2_block_replace); DEFINE_DIR2_EVENT(xfs_dir2_block_removename); DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); DEFINE_DIR2_EVENT(xfs_dir2_node_addname); DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); DEFINE_DIR2_EVENT(xfs_dir2_node_replace); DEFINE_DIR2_EVENT(xfs_dir2_node_removename); DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); DECLARE_EVENT_CLASS(xfs_attr_class, TP_PROTO(struct xfs_da_args *args), TP_ARGS(args), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __dynamic_array(char, name, args->namelen) __field(int, namelen) __field(int, valuelen) __field(xfs_dahash_t, hashval) __field(unsigned int, attr_filter) __field(uint32_t, op_flags) ), TP_fast_assign( __entry->dev = VFS_I(args->dp)->i_sb->s_dev; __entry->ino = args->dp->i_ino; if (args->namelen) memcpy(__get_str(name), args->name, args->namelen); __entry->namelen = args->namelen; __entry->valuelen = args->valuelen; __entry->hashval = args->hashval; __entry->attr_filter = args->attr_filter; __entry->op_flags = args->op_flags; ), TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d " "hashval 0x%x filter %s op_flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->namelen, __entry->namelen ? __get_str(name) : NULL, __entry->namelen, __entry->valuelen, __entry->hashval, __print_flags(__entry->attr_filter, "|", XFS_ATTR_FILTER_FLAGS), __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) ) #define DEFINE_ATTR_EVENT(name) \ DEFINE_EVENT(xfs_attr_class, name, \ TP_PROTO(struct xfs_da_args *args), \ TP_ARGS(args)) DEFINE_ATTR_EVENT(xfs_attr_sf_add); DEFINE_ATTR_EVENT(xfs_attr_sf_addname); DEFINE_ATTR_EVENT(xfs_attr_sf_create); DEFINE_ATTR_EVENT(xfs_attr_sf_lookup); DEFINE_ATTR_EVENT(xfs_attr_sf_remove); DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); DEFINE_ATTR_EVENT(xfs_attr_leaf_add); DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old); DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new); DEFINE_ATTR_EVENT(xfs_attr_leaf_add_work); DEFINE_ATTR_EVENT(xfs_attr_leaf_create); DEFINE_ATTR_EVENT(xfs_attr_leaf_compact); DEFINE_ATTR_EVENT(xfs_attr_leaf_get); DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup); DEFINE_ATTR_EVENT(xfs_attr_leaf_replace); DEFINE_ATTR_EVENT(xfs_attr_leaf_remove); DEFINE_ATTR_EVENT(xfs_attr_leaf_removename); DEFINE_ATTR_EVENT(xfs_attr_leaf_split); DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before); DEFINE_ATTR_EVENT(xfs_attr_leaf_split_after); DEFINE_ATTR_EVENT(xfs_attr_leaf_clearflag); DEFINE_ATTR_EVENT(xfs_attr_leaf_setflag); DEFINE_ATTR_EVENT(xfs_attr_leaf_flipflags); DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf); DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node); DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance); DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance); DEFINE_ATTR_EVENT(xfs_attr_leaf_toosmall); DEFINE_ATTR_EVENT(xfs_attr_node_addname); DEFINE_ATTR_EVENT(xfs_attr_node_get); DEFINE_ATTR_EVENT(xfs_attr_node_replace); DEFINE_ATTR_EVENT(xfs_attr_node_removename); DEFINE_ATTR_EVENT(xfs_attr_fillstate); DEFINE_ATTR_EVENT(xfs_attr_refillstate); DEFINE_ATTR_EVENT(xfs_attr_rmtval_get); DEFINE_ATTR_EVENT(xfs_attr_rmtval_set); #define DEFINE_DA_EVENT(name) \ DEFINE_EVENT(xfs_da_class, name, \ TP_PROTO(struct xfs_da_args *args), \ TP_ARGS(args)) DEFINE_DA_EVENT(xfs_da_split); DEFINE_DA_EVENT(xfs_da_join); DEFINE_DA_EVENT(xfs_da_link_before); DEFINE_DA_EVENT(xfs_da_link_after); DEFINE_DA_EVENT(xfs_da_unlink_back); DEFINE_DA_EVENT(xfs_da_unlink_forward); DEFINE_DA_EVENT(xfs_da_root_split); DEFINE_DA_EVENT(xfs_da_root_join); DEFINE_DA_EVENT(xfs_da_node_add); DEFINE_DA_EVENT(xfs_da_node_create); DEFINE_DA_EVENT(xfs_da_node_split); DEFINE_DA_EVENT(xfs_da_node_remove); DEFINE_DA_EVENT(xfs_da_node_rebalance); DEFINE_DA_EVENT(xfs_da_node_unbalance); DEFINE_DA_EVENT(xfs_da_node_toosmall); DEFINE_DA_EVENT(xfs_da_swap_lastblock); DEFINE_DA_EVENT(xfs_da_grow_inode); DEFINE_DA_EVENT(xfs_da_shrink_inode); DEFINE_DA_EVENT(xfs_da_fixhashpath); DEFINE_DA_EVENT(xfs_da_path_shift); DECLARE_EVENT_CLASS(xfs_dir2_space_class, TP_PROTO(struct xfs_da_args *args, int idx), TP_ARGS(args, idx), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(uint32_t, op_flags) __field(int, idx) ), TP_fast_assign( __entry->dev = VFS_I(args->dp)->i_sb->s_dev; __entry->ino = args->dp->i_ino; __entry->op_flags = args->op_flags; __entry->idx = idx; ), TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), __entry->idx) ) #define DEFINE_DIR2_SPACE_EVENT(name) \ DEFINE_EVENT(xfs_dir2_space_class, name, \ TP_PROTO(struct xfs_da_args *args, int idx), \ TP_ARGS(args, idx)) DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); TRACE_EVENT(xfs_dir2_leafn_moveents, TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), TP_ARGS(args, src_idx, dst_idx, count), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(uint32_t, op_flags) __field(int, src_idx) __field(int, dst_idx) __field(int, count) ), TP_fast_assign( __entry->dev = VFS_I(args->dp)->i_sb->s_dev; __entry->ino = args->dp->i_ino; __entry->op_flags = args->op_flags; __entry->src_idx = src_idx; __entry->dst_idx = dst_idx; __entry->count = count; ), TP_printk("dev %d:%d ino 0x%llx op_flags %s " "src_idx %d dst_idx %d count %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), __entry->src_idx, __entry->dst_idx, __entry->count) ); #define XFS_SWAPEXT_INODES \ { 0, "target" }, \ { 1, "temp" } TRACE_DEFINE_ENUM(XFS_DINODE_FMT_DEV); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_LOCAL); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_EXTENTS); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_BTREE); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_UUID); DECLARE_EVENT_CLASS(xfs_swap_extent_class, TP_PROTO(struct xfs_inode *ip, int which), TP_ARGS(ip, which), TP_STRUCT__entry( __field(dev_t, dev) __field(int, which) __field(xfs_ino_t, ino) __field(int, format) __field(xfs_extnum_t, nex) __field(int, broot_size) __field(int, fork_off) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->which = which; __entry->ino = ip->i_ino; __entry->format = ip->i_df.if_format; __entry->nex = ip->i_df.if_nextents; __entry->broot_size = ip->i_df.if_broot_bytes; __entry->fork_off = xfs_inode_fork_boff(ip); ), TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %llu, " "broot size %d, forkoff 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), __entry->nex, __entry->broot_size, __entry->fork_off) ) #define DEFINE_SWAPEXT_EVENT(name) \ DEFINE_EVENT(xfs_swap_extent_class, name, \ TP_PROTO(struct xfs_inode *ip, int which), \ TP_ARGS(ip, which)) DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); TRACE_EVENT(xfs_log_recover, TP_PROTO(struct xlog *log, xfs_daddr_t headblk, xfs_daddr_t tailblk), TP_ARGS(log, headblk, tailblk), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_daddr_t, headblk) __field(xfs_daddr_t, tailblk) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->headblk = headblk; __entry->tailblk = tailblk; ), TP_printk("dev %d:%d headblk 0x%llx tailblk 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->headblk, __entry->tailblk) ) TRACE_EVENT(xfs_log_recover_record, TP_PROTO(struct xlog *log, struct xlog_rec_header *rhead, int pass), TP_ARGS(log, rhead, pass), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_lsn_t, lsn) __field(int, len) __field(int, num_logops) __field(int, pass) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->lsn = be64_to_cpu(rhead->h_lsn); __entry->len = be32_to_cpu(rhead->h_len); __entry->num_logops = be32_to_cpu(rhead->h_num_logops); __entry->pass = pass; ), TP_printk("dev %d:%d lsn 0x%llx len 0x%x num_logops 0x%x pass %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->lsn, __entry->len, __entry->num_logops, __entry->pass) ) DECLARE_EVENT_CLASS(xfs_log_recover_item_class, TP_PROTO(struct xlog *log, struct xlog_recover *trans, struct xlog_recover_item *item, int pass), TP_ARGS(log, trans, item, pass), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, item) __field(xlog_tid_t, tid) __field(xfs_lsn_t, lsn) __field(int, type) __field(int, pass) __field(int, count) __field(int, total) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->item = (unsigned long)item; __entry->tid = trans->r_log_tid; __entry->lsn = trans->r_lsn; __entry->type = ITEM_TYPE(item); __entry->pass = pass; __entry->count = item->ri_cnt; __entry->total = item->ri_total; ), TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, " "item type %s item region count/total %d/%d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, __entry->lsn, __entry->pass, (void *)__entry->item, __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), __entry->count, __entry->total) ) #define DEFINE_LOG_RECOVER_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_item_class, name, \ TP_PROTO(struct xlog *log, struct xlog_recover *trans, \ struct xlog_recover_item *item, int pass), \ TP_ARGS(log, trans, item, pass)) DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), TP_ARGS(log, buf_f), TP_STRUCT__entry( __field(dev_t, dev) __field(int64_t, blkno) __field(unsigned short, len) __field(unsigned short, flags) __field(unsigned short, size) __field(unsigned int, map_size) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->blkno = buf_f->blf_blkno; __entry->len = buf_f->blf_len; __entry->flags = buf_f->blf_flags; __entry->size = buf_f->blf_size; __entry->map_size = buf_f->blf_map_size; ), TP_printk("dev %d:%d daddr 0x%llx, bbcount 0x%x, flags 0x%x, size %d, " "map_size %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->blkno, __entry->len, __entry->flags, __entry->size, __entry->map_size) ) #define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), \ TP_ARGS(log, buf_f)) DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_skip); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), TP_ARGS(log, in_f), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned short, size) __field(int, fields) __field(unsigned short, asize) __field(unsigned short, dsize) __field(int64_t, blkno) __field(int, len) __field(int, boffset) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->ino = in_f->ilf_ino; __entry->size = in_f->ilf_size; __entry->fields = in_f->ilf_fields; __entry->asize = in_f->ilf_asize; __entry->dsize = in_f->ilf_dsize; __entry->blkno = in_f->ilf_blkno; __entry->len = in_f->ilf_len; __entry->boffset = in_f->ilf_boffset; ), TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " "dsize %d, daddr 0x%llx, bbcount 0x%x, boffset %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->fields, __entry->asize, __entry->dsize, __entry->blkno, __entry->len, __entry->boffset) ) #define DEFINE_LOG_RECOVER_INO_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), \ TP_ARGS(log, in_f)) DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); DECLARE_EVENT_CLASS(xfs_log_recover_icreate_item_class, TP_PROTO(struct xlog *log, struct xfs_icreate_log *in_f), TP_ARGS(log, in_f), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(unsigned int, count) __field(unsigned int, isize) __field(xfs_agblock_t, length) __field(unsigned int, gen) ), TP_fast_assign( __entry->dev = log->l_mp->m_super->s_dev; __entry->agno = be32_to_cpu(in_f->icl_ag); __entry->agbno = be32_to_cpu(in_f->icl_agbno); __entry->count = be32_to_cpu(in_f->icl_count); __entry->isize = be32_to_cpu(in_f->icl_isize); __entry->length = be32_to_cpu(in_f->icl_length); __entry->gen = be32_to_cpu(in_f->icl_gen); ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x ireccount %u isize %u gen 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->length, __entry->count, __entry->isize, __entry->gen) ) #define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \ DEFINE_EVENT(xfs_log_recover_icreate_item_class, name, \ TP_PROTO(struct xlog *log, struct xfs_icreate_log *in_f), \ TP_ARGS(log, in_f)) DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_cancel); DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_recover); DECLARE_EVENT_CLASS(xfs_discard_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len), TP_ARGS(mp, agno, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agbno = agbno; __entry->len = len; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len) ) #define DEFINE_DISCARD_EVENT(name) \ DEFINE_EVENT(xfs_discard_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ xfs_agblock_t agbno, xfs_extlen_t len), \ TP_ARGS(mp, agno, agbno, len)) DEFINE_DISCARD_EVENT(xfs_discard_extent); DEFINE_DISCARD_EVENT(xfs_discard_toosmall); DEFINE_DISCARD_EVENT(xfs_discard_exclude); DEFINE_DISCARD_EVENT(xfs_discard_busy); DECLARE_EVENT_CLASS(xfs_rtdiscard_class, TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t rtbno, xfs_rtblock_t len), TP_ARGS(mp, rtbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_rtblock_t, rtbno) __field(xfs_rtblock_t, len) ), TP_fast_assign( __entry->dev = mp->m_rtdev_targp->bt_dev; __entry->rtbno = rtbno; __entry->len = len; ), TP_printk("dev %d:%d rtbno 0x%llx rtbcount 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rtbno, __entry->len) ) #define DEFINE_RTDISCARD_EVENT(name) \ DEFINE_EVENT(xfs_rtdiscard_class, name, \ TP_PROTO(struct xfs_mount *mp, \ xfs_rtblock_t rtbno, xfs_rtblock_t len), \ TP_ARGS(mp, rtbno, len)) DEFINE_RTDISCARD_EVENT(xfs_discard_rtextent); DEFINE_RTDISCARD_EVENT(xfs_discard_rttoosmall); DEFINE_RTDISCARD_EVENT(xfs_discard_rtrelax); DECLARE_EVENT_CLASS(xfs_btree_cur_class, TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), TP_ARGS(cur, level, bp), TP_STRUCT__entry( __field(dev_t, dev) __string(name, cur->bc_ops->name) __field(int, level) __field(int, nlevels) __field(int, ptr) __field(xfs_daddr_t, daddr) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __assign_str(name); __entry->level = level; __entry->nlevels = cur->bc_nlevels; __entry->ptr = cur->bc_levels[level].ptr; __entry->daddr = bp ? xfs_buf_daddr(bp) : -1; ), TP_printk("dev %d:%d %sbt level %d/%d ptr %d daddr 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->level, __entry->nlevels, __entry->ptr, (unsigned long long)__entry->daddr) ) #define DEFINE_BTREE_CUR_EVENT(name) \ DEFINE_EVENT(xfs_btree_cur_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \ TP_ARGS(cur, level, bp)) DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys); DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); TRACE_EVENT(xfs_btree_alloc_block, TP_PROTO(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, int stat, int error), TP_ARGS(cur, ptr, stat, error), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_ino_t, ino) __string(name, cur->bc_ops->name) __field(int, error) __field(xfs_agblock_t, agbno) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; switch (cur->bc_ops->type) { case XFS_BTREE_TYPE_INODE: __entry->agno = 0; __entry->ino = cur->bc_ino.ip->i_ino; break; case XFS_BTREE_TYPE_AG: __entry->agno = cur->bc_ag.pag->pag_agno; __entry->ino = 0; break; case XFS_BTREE_TYPE_MEM: __entry->agno = 0; __entry->ino = 0; break; } __assign_str(name); __entry->error = error; if (!error && stat) { if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { xfs_fsblock_t fsb = be64_to_cpu(ptr->l); __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsb); __entry->agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsb); } else { __entry->agbno = be32_to_cpu(ptr->s); } } else { __entry->agbno = NULLAGBLOCK; } ), TP_printk("dev %d:%d %sbt agno 0x%x ino 0x%llx agbno 0x%x error %d", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->agno, __entry->ino, __entry->agbno, __entry->error) ); TRACE_EVENT(xfs_btree_free_block, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_buf *bp), TP_ARGS(cur, bp), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_ino_t, ino) __string(name, cur->bc_ops->name) __field(xfs_agblock_t, agbno) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = xfs_daddr_to_agno(cur->bc_mp, xfs_buf_daddr(bp)); if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) __entry->ino = cur->bc_ino.ip->i_ino; else __entry->ino = 0; __assign_str(name); __entry->agbno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); ), TP_printk("dev %d:%d %sbt agno 0x%x ino 0x%llx agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->agno, __entry->ino, __entry->agbno) ); /* deferred ops */ struct xfs_defer_pending; DECLARE_EVENT_CLASS(xfs_defer_class, TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), TP_ARGS(tp, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(struct xfs_trans *, tp) __field(char, committed) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = tp->t_mountp->m_super->s_dev; __entry->tp = tp; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d tp %p caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tp, (char *)__entry->caller_ip) ) #define DEFINE_DEFER_EVENT(name) \ DEFINE_EVENT(xfs_defer_class, name, \ TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), \ TP_ARGS(tp, caller_ip)) DECLARE_EVENT_CLASS(xfs_defer_error_class, TP_PROTO(struct xfs_trans *tp, int error), TP_ARGS(tp, error), TP_STRUCT__entry( __field(dev_t, dev) __field(struct xfs_trans *, tp) __field(char, committed) __field(int, error) ), TP_fast_assign( __entry->dev = tp->t_mountp->m_super->s_dev; __entry->tp = tp; __entry->error = error; ), TP_printk("dev %d:%d tp %p err %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tp, __entry->error) ) #define DEFINE_DEFER_ERROR_EVENT(name) \ DEFINE_EVENT(xfs_defer_error_class, name, \ TP_PROTO(struct xfs_trans *tp, int error), \ TP_ARGS(tp, error)) DECLARE_EVENT_CLASS(xfs_defer_pending_class, TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), TP_ARGS(mp, dfp), TP_STRUCT__entry( __field(dev_t, dev) __string(name, dfp->dfp_ops->name) __field(void *, intent) __field(unsigned int, flags) __field(char, committed) __field(int, nr) ), TP_fast_assign( __entry->dev = mp ? mp->m_super->s_dev : 0; __assign_str(name); __entry->intent = dfp->dfp_intent; __entry->flags = dfp->dfp_flags; __entry->committed = dfp->dfp_done != NULL; __entry->nr = dfp->dfp_count; ), TP_printk("dev %d:%d optype %s intent %p flags %s committed %d nr %d", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->intent, __print_flags(__entry->flags, "|", XFS_DEFER_PENDING_STRINGS), __entry->committed, __entry->nr) ) #define DEFINE_DEFER_PENDING_EVENT(name) \ DEFINE_EVENT(xfs_defer_pending_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), \ TP_ARGS(mp, dfp)) DEFINE_DEFER_EVENT(xfs_defer_cancel); DEFINE_DEFER_EVENT(xfs_defer_trans_roll); DEFINE_DEFER_EVENT(xfs_defer_trans_abort); DEFINE_DEFER_EVENT(xfs_defer_finish); DEFINE_DEFER_EVENT(xfs_defer_finish_done); DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error); DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error); DEFINE_DEFER_PENDING_EVENT(xfs_defer_create_intent); DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort); DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent); DEFINE_DEFER_PENDING_EVENT(xfs_defer_isolate_paused); DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_pause); DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_unpause); DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class, TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), TP_ARGS(mp, free), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); __entry->agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); __entry->len = free->xefi_blockcount; __entry->flags = free->xefi_flags; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, __entry->flags) ); #define DEFINE_FREE_EXTENT_DEFERRED_EVENT(name) \ DEFINE_EVENT(xfs_free_extent_deferred_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), \ TP_ARGS(mp, free)) DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_defer); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred); DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, void *item), TP_ARGS(mp, dfp, item), TP_STRUCT__entry( __field(dev_t, dev) __string(name, dfp->dfp_ops->name) __field(void *, intent) __field(void *, item) __field(char, committed) __field(unsigned int, flags) __field(int, nr) ), TP_fast_assign( __entry->dev = mp ? mp->m_super->s_dev : 0; __assign_str(name); __entry->intent = dfp->dfp_intent; __entry->item = item; __entry->committed = dfp->dfp_done != NULL; __entry->flags = dfp->dfp_flags; __entry->nr = dfp->dfp_count; ), TP_printk("dev %d:%d optype %s intent %p item %p flags %s committed %d nr %d", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->intent, __entry->item, __print_flags(__entry->flags, "|", XFS_DEFER_PENDING_STRINGS), __entry->committed, __entry->nr) ) #define DEFINE_DEFER_PENDING_ITEM_EVENT(name) \ DEFINE_EVENT(xfs_defer_pending_item_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, \ void *item), \ TP_ARGS(mp, dfp, item)) DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_add_item); DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_cancel_item); DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item); /* rmap tracepoints */ DECLARE_EVENT_CLASS(xfs_rmap_class, TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, const struct xfs_owner_info *oinfo), TP_ARGS(cur, agbno, len, unwritten, oinfo), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) __field(uint64_t, owner) __field(uint64_t, offset) __field(unsigned long, flags) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->agbno = agbno; __entry->len = len; __entry->owner = oinfo->oi_owner; __entry->offset = oinfo->oi_offset; __entry->flags = oinfo->oi_flags; if (unwritten) __entry->flags |= XFS_RMAP_UNWRITTEN; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, __entry->owner, __entry->offset, __entry->flags) ); #define DEFINE_RMAP_EVENT(name) \ DEFINE_EVENT(xfs_rmap_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, \ xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \ const struct xfs_owner_info *oinfo), \ TP_ARGS(cur, agbno, len, unwritten, oinfo)) /* btree cursor error/%ip tracepoint class */ DECLARE_EVENT_CLASS(xfs_btree_error_class, TP_PROTO(struct xfs_btree_cur *cur, int error, unsigned long caller_ip), TP_ARGS(cur, error, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_ino_t, ino) __field(int, error) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; switch (cur->bc_ops->type) { case XFS_BTREE_TYPE_INODE: __entry->agno = 0; __entry->ino = cur->bc_ino.ip->i_ino; break; case XFS_BTREE_TYPE_AG: __entry->agno = cur->bc_ag.pag->pag_agno; __entry->ino = 0; break; case XFS_BTREE_TYPE_MEM: __entry->agno = 0; __entry->ino = 0; break; } __entry->error = error; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d agno 0x%x ino 0x%llx error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->ino, __entry->error, (char *)__entry->caller_ip) ); #define DEFINE_BTREE_ERROR_EVENT(name) \ DEFINE_EVENT(xfs_btree_error_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, int error, \ unsigned long caller_ip), \ TP_ARGS(cur, error, caller_ip)) DEFINE_RMAP_EVENT(xfs_rmap_unmap); DEFINE_RMAP_EVENT(xfs_rmap_unmap_done); DEFINE_BTREE_ERROR_EVENT(xfs_rmap_unmap_error); DEFINE_RMAP_EVENT(xfs_rmap_map); DEFINE_RMAP_EVENT(xfs_rmap_map_done); DEFINE_BTREE_ERROR_EVENT(xfs_rmap_map_error); DEFINE_RMAP_EVENT(xfs_rmap_convert); DEFINE_RMAP_EVENT(xfs_rmap_convert_done); DEFINE_BTREE_ERROR_EVENT(xfs_rmap_convert_error); TRACE_EVENT(xfs_rmap_convert_state, TP_PROTO(struct xfs_btree_cur *cur, int state, unsigned long caller_ip), TP_ARGS(cur, state, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_ino_t, ino) __field(int, state) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; switch (cur->bc_ops->type) { case XFS_BTREE_TYPE_INODE: __entry->agno = 0; __entry->ino = cur->bc_ino.ip->i_ino; break; case XFS_BTREE_TYPE_AG: __entry->agno = cur->bc_ag.pag->pag_agno; __entry->ino = 0; break; case XFS_BTREE_TYPE_MEM: __entry->agno = 0; __entry->ino = 0; break; } __entry->state = state; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d agno 0x%x ino 0x%llx state %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->ino, __entry->state, (char *)__entry->caller_ip) ); DECLARE_EVENT_CLASS(xfs_rmapbt_class, TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t len, uint64_t owner, uint64_t offset, unsigned int flags), TP_ARGS(cur, agbno, len, owner, offset, flags), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) __field(uint64_t, owner) __field(uint64_t, offset) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->agbno = agbno; __entry->len = len; __entry->owner = owner; __entry->offset = offset; __entry->flags = flags; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, __entry->owner, __entry->offset, __entry->flags) ); #define DEFINE_RMAPBT_EVENT(name) \ DEFINE_EVENT(xfs_rmapbt_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, \ xfs_agblock_t agbno, xfs_extlen_t len, \ uint64_t owner, uint64_t offset, unsigned int flags), \ TP_ARGS(cur, agbno, len, owner, offset, flags)) TRACE_DEFINE_ENUM(XFS_RMAP_MAP); TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED); TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP); TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED); TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT); TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED); TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC); TRACE_DEFINE_ENUM(XFS_RMAP_FREE); DECLARE_EVENT_CLASS(xfs_rmap_deferred_class, TP_PROTO(struct xfs_mount *mp, struct xfs_rmap_intent *ri), TP_ARGS(mp, ri), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long long, owner) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(int, whichfork) __field(xfs_fileoff_t, l_loff) __field(xfs_filblks_t, l_len) __field(xfs_exntst_t, l_state) __field(int, op) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock); __entry->agbno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock); __entry->owner = ri->ri_owner; __entry->whichfork = ri->ri_whichfork; __entry->l_loff = ri->ri_bmap.br_startoff; __entry->l_len = ri->ri_bmap.br_blockcount; __entry->l_state = ri->ri_bmap.br_state; __entry->op = ri->ri_type; ), TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->op, XFS_RMAP_INTENT_STRINGS), __entry->agno, __entry->agbno, __entry->owner, __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->l_loff, __entry->l_len, __entry->l_state) ); #define DEFINE_RMAP_DEFERRED_EVENT(name) \ DEFINE_EVENT(xfs_rmap_deferred_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_rmap_intent *ri), \ TP_ARGS(mp, ri)) DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer); DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred); DEFINE_RMAPBT_EVENT(xfs_rmap_update); DEFINE_RMAPBT_EVENT(xfs_rmap_insert); DEFINE_RMAPBT_EVENT(xfs_rmap_delete); DEFINE_BTREE_ERROR_EVENT(xfs_rmap_insert_error); DEFINE_BTREE_ERROR_EVENT(xfs_rmap_delete_error); DEFINE_BTREE_ERROR_EVENT(xfs_rmap_update_error); DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_candidate); DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_query); DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_candidate); DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range); DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result); DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); /* deferred bmbt updates */ TRACE_DEFINE_ENUM(XFS_BMAP_MAP); TRACE_DEFINE_ENUM(XFS_BMAP_UNMAP); DECLARE_EVENT_CLASS(xfs_bmap_deferred_class, TP_PROTO(struct xfs_bmap_intent *bi), TP_ARGS(bi), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, opdev) __field(xfs_agnumber_t, agno) __field(xfs_ino_t, ino) __field(xfs_agblock_t, agbno) __field(xfs_fsblock_t, rtbno) __field(int, whichfork) __field(xfs_fileoff_t, l_loff) __field(xfs_filblks_t, l_len) __field(xfs_exntst_t, l_state) __field(int, op) ), TP_fast_assign( struct xfs_inode *ip = bi->bi_owner; __entry->dev = ip->i_mount->m_super->s_dev; if (xfs_ifork_is_realtime(ip, bi->bi_whichfork)) { __entry->agno = 0; __entry->agbno = 0; __entry->rtbno = bi->bi_bmap.br_startblock; __entry->opdev = ip->i_mount->m_rtdev_targp->bt_dev; } else { __entry->agno = XFS_FSB_TO_AGNO(ip->i_mount, bi->bi_bmap.br_startblock); __entry->agbno = XFS_FSB_TO_AGBNO(ip->i_mount, bi->bi_bmap.br_startblock); __entry->rtbno = 0; __entry->opdev = __entry->dev; } __entry->ino = ip->i_ino; __entry->whichfork = bi->bi_whichfork; __entry->l_loff = bi->bi_bmap.br_startoff; __entry->l_len = bi->bi_bmap.br_blockcount; __entry->l_state = bi->bi_bmap.br_state; __entry->op = bi->bi_type; ), TP_printk("dev %d:%d op %s opdev %d:%d ino 0x%llx agno 0x%x agbno 0x%x rtbno 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->op, XFS_BMAP_INTENT_STRINGS), MAJOR(__entry->opdev), MINOR(__entry->opdev), __entry->ino, __entry->agno, __entry->agbno, __entry->rtbno, __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->l_loff, __entry->l_len, __entry->l_state) ); #define DEFINE_BMAP_DEFERRED_EVENT(name) \ DEFINE_EVENT(xfs_bmap_deferred_class, name, \ TP_PROTO(struct xfs_bmap_intent *bi), \ TP_ARGS(bi)) DEFINE_BMAP_DEFERRED_EVENT(xfs_bmap_defer); DEFINE_BMAP_DEFERRED_EVENT(xfs_bmap_deferred); /* per-AG reservation */ DECLARE_EVENT_CLASS(xfs_ag_resv_class, TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv, xfs_extlen_t len), TP_ARGS(pag, resv, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, resv) __field(xfs_extlen_t, freeblks) __field(xfs_extlen_t, flcount) __field(xfs_extlen_t, reserved) __field(xfs_extlen_t, asked) __field(xfs_extlen_t, len) ), TP_fast_assign( struct xfs_ag_resv *r = xfs_perag_resv(pag, resv); __entry->dev = pag->pag_mount->m_super->s_dev; __entry->agno = pag->pag_agno; __entry->resv = resv; __entry->freeblks = pag->pagf_freeblks; __entry->flcount = pag->pagf_flcount; __entry->reserved = r ? r->ar_reserved : 0; __entry->asked = r ? r->ar_asked : 0; __entry->len = len; ), TP_printk("dev %d:%d agno 0x%x resv %d freeblks %u flcount %u " "resv %u ask %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->resv, __entry->freeblks, __entry->flcount, __entry->reserved, __entry->asked, __entry->len) ) #define DEFINE_AG_RESV_EVENT(name) \ DEFINE_EVENT(xfs_ag_resv_class, name, \ TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type type, \ xfs_extlen_t len), \ TP_ARGS(pag, type, len)) /* per-AG reservation tracepoints */ DEFINE_AG_RESV_EVENT(xfs_ag_resv_init); DEFINE_AG_RESV_EVENT(xfs_ag_resv_free); DEFINE_AG_RESV_EVENT(xfs_ag_resv_alloc_extent); DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent); DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical); DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed); /* simple AG-based error/%ip tracepoint class */ DECLARE_EVENT_CLASS(xfs_ag_error_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, unsigned long caller_ip), TP_ARGS(mp, agno, error, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, error) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->error = error; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d agno 0x%x error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->error, (char *)__entry->caller_ip) ); #define DEFINE_AG_ERROR_EVENT(name) \ DEFINE_EVENT(xfs_ag_error_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \ unsigned long caller_ip), \ TP_ARGS(mp, agno, error, caller_ip)) DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error); /* refcount tracepoint classes */ DECLARE_EVENT_CLASS(xfs_refcount_class, TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t len), TP_ARGS(cur, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->agbno = agbno; __entry->len = len; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len) ); #define DEFINE_REFCOUNT_EVENT(name) \ DEFINE_EVENT(xfs_refcount_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, \ xfs_extlen_t len), \ TP_ARGS(cur, agbno, len)) TRACE_DEFINE_ENUM(XFS_LOOKUP_EQi); TRACE_DEFINE_ENUM(XFS_LOOKUP_LEi); TRACE_DEFINE_ENUM(XFS_LOOKUP_GEi); TRACE_EVENT(xfs_refcount_lookup, TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_lookup_t dir), TP_ARGS(cur, agbno, dir), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_lookup_t, dir) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->agbno = agbno; __entry->dir = dir; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x cmp %s(%d)", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __print_symbolic(__entry->dir, XFS_AG_BTREE_CMP_FORMAT_STR), __entry->dir) ) /* single-rcext tracepoint class */ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec), TP_ARGS(cur, irec), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount) ) #define DEFINE_REFCOUNT_EXTENT_EVENT(name) \ DEFINE_EVENT(xfs_refcount_extent_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec), \ TP_ARGS(cur, irec)) /* single-rcext and an agbno tracepoint class */ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, xfs_agblock_t agbno), TP_ARGS(cur, irec, agbno), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) __field(xfs_agblock_t, agbno) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount, __entry->agbno) ) #define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \ DEFINE_EVENT(xfs_refcount_extent_at_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, \ xfs_agblock_t agbno), \ TP_ARGS(cur, irec, agbno)) /* double-rcext tracepoint class */ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), TP_ARGS(cur, i1, i2), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; ), TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount) ) #define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \ DEFINE_EVENT(xfs_refcount_double_extent_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \ struct xfs_refcount_irec *i2), \ TP_ARGS(cur, i1, i2)) /* double-rcext and an agbno tracepoint class */ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, xfs_agblock_t agbno), TP_ARGS(cur, i1, i2, agbno), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) __field(xfs_agblock_t, agbno) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; __entry->agbno = agbno; ), TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, __entry->agbno) ) #define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \ DEFINE_EVENT(xfs_refcount_double_extent_at_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \ struct xfs_refcount_irec *i2, xfs_agblock_t agbno), \ TP_ARGS(cur, i1, i2, agbno)) /* triple-rcext tracepoint class */ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, struct xfs_refcount_irec *i3), TP_ARGS(cur, i1, i2, i3), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) __field(enum xfs_refc_domain, i3_domain) __field(xfs_agblock_t, i3_startblock) __field(xfs_extlen_t, i3_blockcount) __field(xfs_nlink_t, i3_refcount) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __entry->agno = cur->bc_ag.pag->pag_agno; __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; __entry->i3_domain = i3->rc_domain; __entry->i3_startblock = i3->rc_startblock; __entry->i3_blockcount = i3->rc_blockcount; __entry->i3_refcount = i3->rc_refcount; ), TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i3_startblock, __entry->i3_blockcount, __entry->i3_refcount) ); #define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \ DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \ TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \ struct xfs_refcount_irec *i2, struct xfs_refcount_irec *i3), \ TP_ARGS(cur, i1, i2, i3)) /* refcount btree tracepoints */ DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_insert); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_delete); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_insert_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_delete_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_update_error); /* refcount adjustment tracepoints */ DEFINE_REFCOUNT_EVENT(xfs_refcount_increase); DEFINE_REFCOUNT_EVENT(xfs_refcount_decrease); DEFINE_REFCOUNT_EVENT(xfs_refcount_cow_increase); DEFINE_REFCOUNT_EVENT(xfs_refcount_cow_decrease); DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent); DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_adjust_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_adjust_cow_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_center_extents_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_modify_extent_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_split_extent_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_left_extent_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_right_extent_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_left_extent_error); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_right_extent_error); /* reflink helpers */ DEFINE_REFCOUNT_EVENT(xfs_refcount_find_shared); DEFINE_REFCOUNT_EVENT(xfs_refcount_find_shared_result); DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_shared_error); TRACE_DEFINE_ENUM(XFS_REFCOUNT_INCREASE); TRACE_DEFINE_ENUM(XFS_REFCOUNT_DECREASE); TRACE_DEFINE_ENUM(XFS_REFCOUNT_ALLOC_COW); TRACE_DEFINE_ENUM(XFS_REFCOUNT_FREE_COW); DECLARE_EVENT_CLASS(xfs_refcount_deferred_class, TP_PROTO(struct xfs_mount *mp, struct xfs_refcount_intent *refc), TP_ARGS(mp, refc), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, op) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = XFS_FSB_TO_AGNO(mp, refc->ri_startblock); __entry->op = refc->ri_type; __entry->agbno = XFS_FSB_TO_AGBNO(mp, refc->ri_startblock); __entry->len = refc->ri_blockcount; ), TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->op, XFS_REFCOUNT_INTENT_STRINGS), __entry->agno, __entry->agbno, __entry->len) ); #define DEFINE_REFCOUNT_DEFERRED_EVENT(name) \ DEFINE_EVENT(xfs_refcount_deferred_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_refcount_intent *refc), \ TP_ARGS(mp, refc)) DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_defer); DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred); DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_finish_one_leftover); /* simple inode-based error/%ip tracepoint class */ DECLARE_EVENT_CLASS(xfs_inode_error_class, TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip), TP_ARGS(ip, error, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(int, error) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->error = error; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx error %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->error, (char *)__entry->caller_ip) ); #define DEFINE_INODE_ERROR_EVENT(name) \ DEFINE_EVENT(xfs_inode_error_class, name, \ TP_PROTO(struct xfs_inode *ip, int error, \ unsigned long caller_ip), \ TP_ARGS(ip, error, caller_ip)) /* reflink tracepoint classes */ /* two-file io tracepoint class */ DECLARE_EVENT_CLASS(xfs_double_io_class, TP_PROTO(struct xfs_inode *src, xfs_off_t soffset, xfs_off_t len, struct xfs_inode *dest, xfs_off_t doffset), TP_ARGS(src, soffset, len, dest, doffset), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, src_ino) __field(loff_t, src_isize) __field(loff_t, src_disize) __field(loff_t, src_offset) __field(long long, len) __field(xfs_ino_t, dest_ino) __field(loff_t, dest_isize) __field(loff_t, dest_disize) __field(loff_t, dest_offset) ), TP_fast_assign( __entry->dev = VFS_I(src)->i_sb->s_dev; __entry->src_ino = src->i_ino; __entry->src_isize = VFS_I(src)->i_size; __entry->src_disize = src->i_disk_size; __entry->src_offset = soffset; __entry->len = len; __entry->dest_ino = dest->i_ino; __entry->dest_isize = VFS_I(dest)->i_size; __entry->dest_disize = dest->i_disk_size; __entry->dest_offset = doffset; ), TP_printk("dev %d:%d bytecount 0x%llx " "ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> " "ino 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->len, __entry->src_ino, __entry->src_isize, __entry->src_disize, __entry->src_offset, __entry->dest_ino, __entry->dest_isize, __entry->dest_disize, __entry->dest_offset) ) #define DEFINE_DOUBLE_IO_EVENT(name) \ DEFINE_EVENT(xfs_double_io_class, name, \ TP_PROTO(struct xfs_inode *src, xfs_off_t soffset, xfs_off_t len, \ struct xfs_inode *dest, xfs_off_t doffset), \ TP_ARGS(src, soffset, len, dest, doffset)) /* inode/irec events */ DECLARE_EVENT_CLASS(xfs_inode_irec_class, TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), TP_ARGS(ip, irec), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fileoff_t, lblk) __field(xfs_extlen_t, len) __field(xfs_fsblock_t, pblk) __field(int, state) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->lblk = irec->br_startoff; __entry->len = irec->br_blockcount; __entry->pblk = irec->br_startblock; __entry->state = irec->br_state; ), TP_printk("dev %d:%d ino 0x%llx fileoff 0x%llx fsbcount 0x%x startblock 0x%llx st %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->lblk, __entry->len, __entry->pblk, __entry->state) ); #define DEFINE_INODE_IREC_EVENT(name) \ DEFINE_EVENT(xfs_inode_irec_class, name, \ TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \ TP_ARGS(ip, irec)) /* inode iomap invalidation events */ DECLARE_EVENT_CLASS(xfs_wb_invalid_class, TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), TP_ARGS(ip, iomap, wpcseq, whichfork), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(u64, addr) __field(loff_t, pos) __field(u64, len) __field(u16, type) __field(u16, flags) __field(u32, wpcseq) __field(u32, forkseq) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->addr = iomap->addr; __entry->pos = iomap->offset; __entry->len = iomap->length; __entry->type = iomap->type; __entry->flags = iomap->flags; __entry->wpcseq = wpcseq; __entry->forkseq = READ_ONCE(xfs_ifork_ptr(ip, whichfork)->if_seq); ), TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x wpcseq 0x%x forkseq 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pos, __entry->addr, __entry->len, __entry->type, __entry->flags, __entry->wpcseq, __entry->forkseq) ); #define DEFINE_WB_INVALID_EVENT(name) \ DEFINE_EVENT(xfs_wb_invalid_class, name, \ TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), \ TP_ARGS(ip, iomap, wpcseq, whichfork)) DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid); DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid); DECLARE_EVENT_CLASS(xfs_iomap_invalid_class, TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), TP_ARGS(ip, iomap), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(u64, addr) __field(loff_t, pos) __field(u64, len) __field(u64, validity_cookie) __field(u64, inodeseq) __field(u16, type) __field(u16, flags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; __entry->addr = iomap->addr; __entry->pos = iomap->offset; __entry->len = iomap->length; __entry->validity_cookie = iomap->validity_cookie; __entry->type = iomap->type; __entry->flags = iomap->flags; __entry->inodeseq = xfs_iomap_inode_sequence(ip, iomap->flags); ), TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x validity_cookie 0x%llx inodeseq 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pos, __entry->addr, __entry->len, __entry->type, __entry->flags, __entry->validity_cookie, __entry->inodeseq) ); #define DEFINE_IOMAP_INVALID_EVENT(name) \ DEFINE_EVENT(xfs_iomap_invalid_class, name, \ TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), \ TP_ARGS(ip, iomap)) DEFINE_IOMAP_INVALID_EVENT(xfs_iomap_invalid); /* refcount/reflink tracepoint definitions */ /* reflink tracepoints */ DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag); DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag); DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size); TRACE_EVENT(xfs_reflink_remap_blocks, TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset, xfs_filblks_t len, struct xfs_inode *dest, xfs_fileoff_t doffset), TP_ARGS(src, soffset, len, dest, doffset), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, src_ino) __field(xfs_fileoff_t, src_lblk) __field(xfs_filblks_t, len) __field(xfs_ino_t, dest_ino) __field(xfs_fileoff_t, dest_lblk) ), TP_fast_assign( __entry->dev = VFS_I(src)->i_sb->s_dev; __entry->src_ino = src->i_ino; __entry->src_lblk = soffset; __entry->len = len; __entry->dest_ino = dest->i_ino; __entry->dest_lblk = doffset; ), TP_printk("dev %d:%d fsbcount 0x%llx " "ino 0x%llx fileoff 0x%llx -> ino 0x%llx fileoff 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->len, __entry->src_ino, __entry->src_lblk, __entry->dest_ino, __entry->dest_lblk) ); DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range); DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error); DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_src); DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_dest); /* dedupe tracepoints */ DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents); DEFINE_INODE_ERROR_EVENT(xfs_reflink_compare_extents_error); /* ioctl tracepoints */ TRACE_EVENT(xfs_ioctl_clone, TP_PROTO(struct inode *src, struct inode *dest), TP_ARGS(src, dest), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, src_ino) __field(loff_t, src_isize) __field(unsigned long, dest_ino) __field(loff_t, dest_isize) ), TP_fast_assign( __entry->dev = src->i_sb->s_dev; __entry->src_ino = src->i_ino; __entry->src_isize = i_size_read(src); __entry->dest_ino = dest->i_ino; __entry->dest_isize = i_size_read(dest); ), TP_printk("dev %d:%d ino 0x%lx isize 0x%llx -> ino 0x%lx isize 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->src_ino, __entry->src_isize, __entry->dest_ino, __entry->dest_isize) ); /* unshare tracepoints */ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare); DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); /* copy on write */ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_from); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_to); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow); /* rmap swapext tracepoints */ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap); DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece); DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); /* fsmap traces */ DECLARE_EVENT_CLASS(xfs_fsmap_class, TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, const struct xfs_rmap_irec *rmap), TP_ARGS(mp, keydev, agno, rmap), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(xfs_agnumber_t, agno) __field(xfs_fsblock_t, bno) __field(xfs_filblks_t, len) __field(uint64_t, owner) __field(uint64_t, offset) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->keydev = new_decode_dev(keydev); __entry->agno = agno; __entry->bno = rmap->rm_startblock; __entry->len = rmap->rm_blockcount; __entry->owner = rmap->rm_owner; __entry->offset = rmap->rm_offset; __entry->flags = rmap->rm_flags; ), TP_printk("dev %d:%d keydev %d:%d agno 0x%x startblock 0x%llx fsbcount 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->agno, __entry->bno, __entry->len, __entry->owner, __entry->offset, __entry->flags) ) #define DEFINE_FSMAP_EVENT(name) \ DEFINE_EVENT(xfs_fsmap_class, name, \ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \ const struct xfs_rmap_irec *rmap), \ TP_ARGS(mp, keydev, agno, rmap)) DEFINE_FSMAP_EVENT(xfs_fsmap_low_key); DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); DECLARE_EVENT_CLASS(xfs_fsmap_linear_class, TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), TP_ARGS(mp, keydev, bno), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(xfs_fsblock_t, bno) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->keydev = new_decode_dev(keydev); __entry->bno = bno; ), TP_printk("dev %d:%d keydev %d:%d bno 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->bno) ) #define DEFINE_FSMAP_LINEAR_EVENT(name) \ DEFINE_EVENT(xfs_fsmap_linear_class, name, \ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \ TP_ARGS(mp, keydev, bno)) DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear); DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear); DECLARE_EVENT_CLASS(xfs_getfsmap_class, TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), TP_ARGS(mp, fsmap), TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, keydev) __field(xfs_daddr_t, block) __field(xfs_daddr_t, len) __field(uint64_t, owner) __field(uint64_t, offset) __field(uint64_t, flags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->keydev = new_decode_dev(fsmap->fmr_device); __entry->block = fsmap->fmr_physical; __entry->len = fsmap->fmr_length; __entry->owner = fsmap->fmr_owner; __entry->offset = fsmap->fmr_offset; __entry->flags = fsmap->fmr_flags; ), TP_printk("dev %d:%d keydev %d:%d daddr 0x%llx bbcount 0x%llx owner 0x%llx fileoff_daddr 0x%llx flags 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->keydev), MINOR(__entry->keydev), __entry->block, __entry->len, __entry->owner, __entry->offset, __entry->flags) ) #define DEFINE_GETFSMAP_EVENT(name) \ DEFINE_EVENT(xfs_getfsmap_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), \ TP_ARGS(mp, fsmap)) DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); DECLARE_EVENT_CLASS(xfs_trans_resv_class, TP_PROTO(struct xfs_mount *mp, unsigned int type, struct xfs_trans_res *res), TP_ARGS(mp, type, res), TP_STRUCT__entry( __field(dev_t, dev) __field(int, type) __field(uint, logres) __field(int, logcount) __field(int, logflags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->type = type; __entry->logres = res->tr_logres; __entry->logcount = res->tr_logcount; __entry->logflags = res->tr_logflags; ), TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->logres, __entry->logcount, __entry->logflags) ) #define DEFINE_TRANS_RESV_EVENT(name) \ DEFINE_EVENT(xfs_trans_resv_class, name, \ TP_PROTO(struct xfs_mount *mp, unsigned int type, \ struct xfs_trans_res *res), \ TP_ARGS(mp, type, res)) DEFINE_TRANS_RESV_EVENT(xfs_trans_resv_calc); DEFINE_TRANS_RESV_EVENT(xfs_trans_resv_calc_minlogsize); TRACE_EVENT(xfs_log_get_max_trans_res, TP_PROTO(struct xfs_mount *mp, const struct xfs_trans_res *res), TP_ARGS(mp, res), TP_STRUCT__entry( __field(dev_t, dev) __field(uint, logres) __field(int, logcount) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->logres = res->tr_logres; __entry->logcount = res->tr_logcount; ), TP_printk("dev %d:%d logres %u logcount %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->logres, __entry->logcount) ); DECLARE_EVENT_CLASS(xfs_trans_class, TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), TP_ARGS(tp, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(uint32_t, tid) __field(uint32_t, flags) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = tp->t_mountp->m_super->s_dev; __entry->tid = 0; if (tp->t_ticket) __entry->tid = tp->t_ticket->t_tid; __entry->flags = tp->t_flags; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d trans %x flags 0x%x caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, __entry->flags, (char *)__entry->caller_ip) ) #define DEFINE_TRANS_EVENT(name) \ DEFINE_EVENT(xfs_trans_class, name, \ TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), \ TP_ARGS(tp, caller_ip)) DEFINE_TRANS_EVENT(xfs_trans_alloc); DEFINE_TRANS_EVENT(xfs_trans_cancel); DEFINE_TRANS_EVENT(xfs_trans_commit); DEFINE_TRANS_EVENT(xfs_trans_dup); DEFINE_TRANS_EVENT(xfs_trans_free); DEFINE_TRANS_EVENT(xfs_trans_roll); DEFINE_TRANS_EVENT(xfs_trans_add_item); DEFINE_TRANS_EVENT(xfs_trans_commit_items); DEFINE_TRANS_EVENT(xfs_trans_free_items); TRACE_EVENT(xfs_iunlink_update_bucket, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int bucket, xfs_agino_t old_ptr, xfs_agino_t new_ptr), TP_ARGS(mp, agno, bucket, old_ptr, new_ptr), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(unsigned int, bucket) __field(xfs_agino_t, old_ptr) __field(xfs_agino_t, new_ptr) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->bucket = bucket; __entry->old_ptr = old_ptr; __entry->new_ptr = new_ptr; ), TP_printk("dev %d:%d agno 0x%x bucket %u old 0x%x new 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->bucket, __entry->old_ptr, __entry->new_ptr) ); TRACE_EVENT(xfs_iunlink_update_dinode, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino, xfs_agino_t old_ptr, xfs_agino_t new_ptr), TP_ARGS(mp, agno, agino, old_ptr, new_ptr), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) __field(xfs_agino_t, old_ptr) __field(xfs_agino_t, new_ptr) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agino = agino; __entry->old_ptr = old_ptr; __entry->new_ptr = new_ptr; ), TP_printk("dev %d:%d agno 0x%x agino 0x%x old 0x%x new 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino, __entry->old_ptr, __entry->new_ptr) ); TRACE_EVENT(xfs_iunlink_reload_next, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) __field(xfs_agino_t, prev_agino) __field(xfs_agino_t, next_agino) ), TP_fast_assign( __entry->dev = ip->i_mount->m_super->s_dev; __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); __entry->prev_agino = ip->i_prev_unlinked; __entry->next_agino = ip->i_next_unlinked; ), TP_printk("dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino, __entry->prev_agino, __entry->next_agino) ); TRACE_EVENT(xfs_inode_reload_unlinked_bucket, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) ), TP_fast_assign( __entry->dev = ip->i_mount->m_super->s_dev; __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); ), TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino, __entry->agino % XFS_AGI_UNLINKED_BUCKETS) ); DECLARE_EVENT_CLASS(xfs_ag_inode_class, TP_PROTO(struct xfs_inode *ip), TP_ARGS(ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); ), TP_printk("dev %d:%d agno 0x%x agino 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agino) ) #define DEFINE_AGINODE_EVENT(name) \ DEFINE_EVENT(xfs_ag_inode_class, name, \ TP_PROTO(struct xfs_inode *ip), \ TP_ARGS(ip)) DEFINE_AGINODE_EVENT(xfs_iunlink); DEFINE_AGINODE_EVENT(xfs_iunlink_remove); DECLARE_EVENT_CLASS(xfs_fs_corrupt_class, TP_PROTO(struct xfs_mount *mp, unsigned int flags), TP_ARGS(mp, flags), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->flags = flags; ), TP_printk("dev %d:%d flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags) ); #define DEFINE_FS_CORRUPT_EVENT(name) \ DEFINE_EVENT(xfs_fs_corrupt_class, name, \ TP_PROTO(struct xfs_mount *mp, unsigned int flags), \ TP_ARGS(mp, flags)) DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_sick); DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_corrupt); DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_healthy); DEFINE_FS_CORRUPT_EVENT(xfs_fs_unfixed_corruption); DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_sick); DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_corrupt); DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_healthy); DEFINE_FS_CORRUPT_EVENT(xfs_rt_unfixed_corruption); DECLARE_EVENT_CLASS(xfs_ag_corrupt_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int flags), TP_ARGS(mp, agno, flags), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->flags = flags; ), TP_printk("dev %d:%d agno 0x%x flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->flags) ); #define DEFINE_AG_CORRUPT_EVENT(name) \ DEFINE_EVENT(xfs_ag_corrupt_class, name, \ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ unsigned int flags), \ TP_ARGS(mp, agno, flags)) DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_sick); DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_corrupt); DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_healthy); DEFINE_AG_CORRUPT_EVENT(xfs_ag_unfixed_corruption); DECLARE_EVENT_CLASS(xfs_inode_corrupt_class, TP_PROTO(struct xfs_inode *ip, unsigned int flags), TP_ARGS(ip, flags), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = ip->i_mount->m_super->s_dev; __entry->ino = ip->i_ino; __entry->flags = flags; ), TP_printk("dev %d:%d ino 0x%llx flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->flags) ); #define DEFINE_INODE_CORRUPT_EVENT(name) \ DEFINE_EVENT(xfs_inode_corrupt_class, name, \ TP_PROTO(struct xfs_inode *ip, unsigned int flags), \ TP_ARGS(ip, flags)) DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_corrupt); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_unfixed_corruption); TRACE_EVENT(xfs_iwalk_ag, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t startino), TP_ARGS(mp, agno, startino), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, startino) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->startino = startino; ), TP_printk("dev %d:%d agno 0x%x startino 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startino) ) TRACE_EVENT(xfs_iwalk_ag_rec, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_inobt_rec_incore *irec), TP_ARGS(mp, agno, irec), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, startino) __field(uint64_t, freemask) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->startino = irec->ir_startino; __entry->freemask = irec->ir_free; ), TP_printk("dev %d:%d agno 0x%x startino 0x%x freemask 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startino, __entry->freemask) ) TRACE_EVENT(xfs_pwork_init, TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid), TP_ARGS(mp, nr_threads, pid), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, nr_threads) __field(pid_t, pid) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->nr_threads = nr_threads; __entry->pid = pid; ), TP_printk("dev %d:%d nr_threads %u pid %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_threads, __entry->pid) ) TRACE_EVENT(xfs_check_new_dalign, TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino), TP_ARGS(mp, new_dalign, calc_rootino), TP_STRUCT__entry( __field(dev_t, dev) __field(int, new_dalign) __field(xfs_ino_t, sb_rootino) __field(xfs_ino_t, calc_rootino) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->new_dalign = new_dalign; __entry->sb_rootino = mp->m_sb.sb_rootino; __entry->calc_rootino = calc_rootino; ), TP_printk("dev %d:%d new_dalign %d sb_rootino 0x%llx calc_rootino 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->new_dalign, __entry->sb_rootino, __entry->calc_rootino) ) TRACE_EVENT(xfs_btree_commit_afakeroot, TP_PROTO(struct xfs_btree_cur *cur), TP_ARGS(cur), TP_STRUCT__entry( __field(dev_t, dev) __string(name, cur->bc_ops->name) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(unsigned int, levels) __field(unsigned int, blocks) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __assign_str(name); __entry->agno = cur->bc_ag.pag->pag_agno; __entry->agbno = cur->bc_ag.afake->af_root; __entry->levels = cur->bc_ag.afake->af_levels; __entry->blocks = cur->bc_ag.afake->af_blocks; ), TP_printk("dev %d:%d %sbt agno 0x%x levels %u blocks %u root %u", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->agno, __entry->levels, __entry->blocks, __entry->agbno) ) TRACE_EVENT(xfs_btree_commit_ifakeroot, TP_PROTO(struct xfs_btree_cur *cur), TP_ARGS(cur), TP_STRUCT__entry( __field(dev_t, dev) __string(name, cur->bc_ops->name) __field(xfs_agnumber_t, agno) __field(xfs_agino_t, agino) __field(unsigned int, levels) __field(unsigned int, blocks) __field(int, whichfork) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __assign_str(name); __entry->agno = XFS_INO_TO_AGNO(cur->bc_mp, cur->bc_ino.ip->i_ino); __entry->agino = XFS_INO_TO_AGINO(cur->bc_mp, cur->bc_ino.ip->i_ino); __entry->levels = cur->bc_ino.ifake->if_levels; __entry->blocks = cur->bc_ino.ifake->if_blocks; __entry->whichfork = cur->bc_ino.whichfork; ), TP_printk("dev %d:%d %sbt agno 0x%x agino 0x%x whichfork %s levels %u blocks %u", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->agno, __entry->agino, __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), __entry->levels, __entry->blocks) ) TRACE_EVENT(xfs_btree_bload_level_geometry, TP_PROTO(struct xfs_btree_cur *cur, unsigned int level, uint64_t nr_this_level, unsigned int nr_per_block, unsigned int desired_npb, uint64_t blocks, uint64_t blocks_with_extra), TP_ARGS(cur, level, nr_this_level, nr_per_block, desired_npb, blocks, blocks_with_extra), TP_STRUCT__entry( __field(dev_t, dev) __string(name, cur->bc_ops->name) __field(unsigned int, level) __field(unsigned int, nlevels) __field(uint64_t, nr_this_level) __field(unsigned int, nr_per_block) __field(unsigned int, desired_npb) __field(unsigned long long, blocks) __field(unsigned long long, blocks_with_extra) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __assign_str(name); __entry->level = level; __entry->nlevels = cur->bc_nlevels; __entry->nr_this_level = nr_this_level; __entry->nr_per_block = nr_per_block; __entry->desired_npb = desired_npb; __entry->blocks = blocks; __entry->blocks_with_extra = blocks_with_extra; ), TP_printk("dev %d:%d %sbt level %u/%u nr_this_level %llu nr_per_block %u desired_npb %u blocks %llu blocks_with_extra %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->level, __entry->nlevels, __entry->nr_this_level, __entry->nr_per_block, __entry->desired_npb, __entry->blocks, __entry->blocks_with_extra) ) TRACE_EVENT(xfs_btree_bload_block, TP_PROTO(struct xfs_btree_cur *cur, unsigned int level, uint64_t block_idx, uint64_t nr_blocks, union xfs_btree_ptr *ptr, unsigned int nr_records), TP_ARGS(cur, level, block_idx, nr_blocks, ptr, nr_records), TP_STRUCT__entry( __field(dev_t, dev) __string(name, cur->bc_ops->name) __field(unsigned int, level) __field(unsigned long long, block_idx) __field(unsigned long long, nr_blocks) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(unsigned int, nr_records) ), TP_fast_assign( __entry->dev = cur->bc_mp->m_super->s_dev; __assign_str(name); __entry->level = level; __entry->block_idx = block_idx; __entry->nr_blocks = nr_blocks; if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { xfs_fsblock_t fsb = be64_to_cpu(ptr->l); __entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsb); __entry->agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsb); } else { __entry->agno = cur->bc_ag.pag->pag_agno; __entry->agbno = be32_to_cpu(ptr->s); } __entry->nr_records = nr_records; ), TP_printk("dev %d:%d %sbt level %u block %llu/%llu agno 0x%x agbno 0x%x recs %u", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(name), __entry->level, __entry->block_idx, __entry->nr_blocks, __entry->agno, __entry->agbno, __entry->nr_records) ) DECLARE_EVENT_CLASS(xfs_timestamp_range_class, TP_PROTO(struct xfs_mount *mp, time64_t min, time64_t max), TP_ARGS(mp, min, max), TP_STRUCT__entry( __field(dev_t, dev) __field(long long, min) __field(long long, max) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->min = min; __entry->max = max; ), TP_printk("dev %d:%d min %lld max %lld", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->min, __entry->max) ) #define DEFINE_TIMESTAMP_RANGE_EVENT(name) \ DEFINE_EVENT(xfs_timestamp_range_class, name, \ TP_PROTO(struct xfs_mount *mp, long long min, long long max), \ TP_ARGS(mp, min, max)) DEFINE_TIMESTAMP_RANGE_EVENT(xfs_inode_timestamp_range); DEFINE_TIMESTAMP_RANGE_EVENT(xfs_quota_expiry_range); DECLARE_EVENT_CLASS(xfs_icwalk_class, TP_PROTO(struct xfs_mount *mp, struct xfs_icwalk *icw, unsigned long caller_ip), TP_ARGS(mp, icw, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(__u32, flags) __field(uint32_t, uid) __field(uint32_t, gid) __field(prid_t, prid) __field(__u64, min_file_size) __field(long, scan_limit) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->flags = icw ? icw->icw_flags : 0; __entry->uid = icw ? from_kuid(mp->m_super->s_user_ns, icw->icw_uid) : 0; __entry->gid = icw ? from_kgid(mp->m_super->s_user_ns, icw->icw_gid) : 0; __entry->prid = icw ? icw->icw_prid : 0; __entry->min_file_size = icw ? icw->icw_min_file_size : 0; __entry->scan_limit = icw ? icw->icw_scan_limit : 0; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %ld caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags, __entry->uid, __entry->gid, __entry->prid, __entry->min_file_size, __entry->scan_limit, (char *)__entry->caller_ip) ); #define DEFINE_ICWALK_EVENT(name) \ DEFINE_EVENT(xfs_icwalk_class, name, \ TP_PROTO(struct xfs_mount *mp, struct xfs_icwalk *icw, \ unsigned long caller_ip), \ TP_ARGS(mp, icw, caller_ip)) DEFINE_ICWALK_EVENT(xfs_ioc_free_eofblocks); DEFINE_ICWALK_EVENT(xfs_blockgc_free_space); TRACE_DEFINE_ENUM(XLOG_STATE_ACTIVE); TRACE_DEFINE_ENUM(XLOG_STATE_WANT_SYNC); TRACE_DEFINE_ENUM(XLOG_STATE_SYNCING); TRACE_DEFINE_ENUM(XLOG_STATE_DONE_SYNC); TRACE_DEFINE_ENUM(XLOG_STATE_CALLBACK); TRACE_DEFINE_ENUM(XLOG_STATE_DIRTY); DECLARE_EVENT_CLASS(xlog_iclog_class, TP_PROTO(struct xlog_in_core *iclog, unsigned long caller_ip), TP_ARGS(iclog, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(uint32_t, state) __field(int32_t, refcount) __field(uint32_t, offset) __field(uint32_t, flags) __field(unsigned long long, lsn) __field(unsigned long, caller_ip) ), TP_fast_assign( __entry->dev = iclog->ic_log->l_mp->m_super->s_dev; __entry->state = iclog->ic_state; __entry->refcount = atomic_read(&iclog->ic_refcnt); __entry->offset = iclog->ic_offset; __entry->flags = iclog->ic_flags; __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->state, XLOG_STATE_STRINGS), __entry->refcount, __entry->offset, __entry->lsn, __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS), (char *)__entry->caller_ip) ); #define DEFINE_ICLOG_EVENT(name) \ DEFINE_EVENT(xlog_iclog_class, name, \ TP_PROTO(struct xlog_in_core *iclog, unsigned long caller_ip), \ TP_ARGS(iclog, caller_ip)) DEFINE_ICLOG_EVENT(xlog_iclog_activate); DEFINE_ICLOG_EVENT(xlog_iclog_clean); DEFINE_ICLOG_EVENT(xlog_iclog_callback); DEFINE_ICLOG_EVENT(xlog_iclog_callbacks_start); DEFINE_ICLOG_EVENT(xlog_iclog_callbacks_done); DEFINE_ICLOG_EVENT(xlog_iclog_force); DEFINE_ICLOG_EVENT(xlog_iclog_force_lsn); DEFINE_ICLOG_EVENT(xlog_iclog_get_space); DEFINE_ICLOG_EVENT(xlog_iclog_release); DEFINE_ICLOG_EVENT(xlog_iclog_switch); DEFINE_ICLOG_EVENT(xlog_iclog_sync); DEFINE_ICLOG_EVENT(xlog_iclog_syncing); DEFINE_ICLOG_EVENT(xlog_iclog_sync_done); DEFINE_ICLOG_EVENT(xlog_iclog_want_sync); DEFINE_ICLOG_EVENT(xlog_iclog_wait_on); DEFINE_ICLOG_EVENT(xlog_iclog_write); TRACE_DEFINE_ENUM(XFS_DAS_UNINIT); TRACE_DEFINE_ENUM(XFS_DAS_SF_ADD); TRACE_DEFINE_ENUM(XFS_DAS_SF_REMOVE); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_ADD); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE); TRACE_DEFINE_ENUM(XFS_DAS_NODE_ADD); TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_SET_RMT); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_ALLOC_RMT); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REPLACE); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE_OLD); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE_RMT); TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE_ATTR); TRACE_DEFINE_ENUM(XFS_DAS_NODE_SET_RMT); TRACE_DEFINE_ENUM(XFS_DAS_NODE_ALLOC_RMT); TRACE_DEFINE_ENUM(XFS_DAS_NODE_REPLACE); TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE_OLD); TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE_RMT); TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE_ATTR); TRACE_DEFINE_ENUM(XFS_DAS_DONE); DECLARE_EVENT_CLASS(xfs_das_state_class, TP_PROTO(int das, struct xfs_inode *ip), TP_ARGS(das, ip), TP_STRUCT__entry( __field(int, das) __field(xfs_ino_t, ino) ), TP_fast_assign( __entry->das = das; __entry->ino = ip->i_ino; ), TP_printk("state change %s ino 0x%llx", __print_symbolic(__entry->das, XFS_DAS_STRINGS), __entry->ino) ) #define DEFINE_DAS_STATE_EVENT(name) \ DEFINE_EVENT(xfs_das_state_class, name, \ TP_PROTO(int das, struct xfs_inode *ip), \ TP_ARGS(das, ip)) DEFINE_DAS_STATE_EVENT(xfs_attr_sf_addname_return); DEFINE_DAS_STATE_EVENT(xfs_attr_set_iter_return); DEFINE_DAS_STATE_EVENT(xfs_attr_leaf_addname_return); DEFINE_DAS_STATE_EVENT(xfs_attr_node_addname_return); DEFINE_DAS_STATE_EVENT(xfs_attr_remove_iter_return); DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_alloc); DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_remove_return); DEFINE_DAS_STATE_EVENT(xfs_attr_defer_add); TRACE_EVENT(xfs_force_shutdown, TP_PROTO(struct xfs_mount *mp, int ptag, int flags, const char *fname, int line_num), TP_ARGS(mp, ptag, flags, fname, line_num), TP_STRUCT__entry( __field(dev_t, dev) __field(int, ptag) __field(int, flags) __string(fname, fname) __field(int, line_num) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->ptag = ptag; __entry->flags = flags; __assign_str(fname); __entry->line_num = line_num; ), TP_printk("dev %d:%d tag %s flags %s file %s line_num %d", MAJOR(__entry->dev), MINOR(__entry->dev), __print_flags(__entry->ptag, "|", XFS_PTAG_STRINGS), __print_flags(__entry->flags, "|", XFS_SHUTDOWN_STRINGS), __get_str(fname), __entry->line_num) ); #ifdef CONFIG_XFS_DRAIN_INTENTS DECLARE_EVENT_CLASS(xfs_perag_intents_class, TP_PROTO(struct xfs_perag *pag, void *caller_ip), TP_ARGS(pag, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(long, nr_intents) __field(void *, caller_ip) ), TP_fast_assign( __entry->dev = pag->pag_mount->m_super->s_dev; __entry->agno = pag->pag_agno; __entry->nr_intents = atomic_read(&pag->pag_intents_drain.dr_count); __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->nr_intents, __entry->caller_ip) ); #define DEFINE_PERAG_INTENTS_EVENT(name) \ DEFINE_EVENT(xfs_perag_intents_class, name, \ TP_PROTO(struct xfs_perag *pag, void *caller_ip), \ TP_ARGS(pag, caller_ip)) DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_hold); DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_rele); DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents); #endif /* CONFIG_XFS_DRAIN_INTENTS */ #ifdef CONFIG_XFS_MEMORY_BUFS TRACE_EVENT(xmbuf_create, TP_PROTO(struct xfs_buftarg *btp), TP_ARGS(btp), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; path = file_path(file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) strncpy(__entry->pathname, "(unknown)", sizeof(__entry->pathname)); ), TP_printk("dev %d:%d xmino 0x%lx path '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->pathname) ); TRACE_EVENT(xmbuf_free, TP_PROTO(struct xfs_buftarg *btp), TP_ARGS(btp), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) __field(unsigned long long, bytes) __field(loff_t, size) ), TP_fast_assign( struct file *file = btp->bt_file; struct inode *inode = file_inode(file); __entry->dev = btp->bt_mount->m_super->s_dev; __entry->size = i_size_read(inode); __entry->bytes = (inode->i_blocks << SECTOR_SHIFT) + inode->i_bytes; __entry->ino = inode->i_ino; ), TP_printk("dev %d:%d xmino 0x%lx mem_bytes 0x%llx isize 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->bytes, __entry->size) ); #endif /* CONFIG_XFS_MEMORY_BUFS */ #ifdef CONFIG_XFS_BTREE_IN_MEM TRACE_EVENT(xfbtree_init, TP_PROTO(struct xfs_mount *mp, struct xfbtree *xfbt, const struct xfs_btree_ops *ops), TP_ARGS(mp, xfbt, ops), TP_STRUCT__entry( __field(const void *, btree_ops) __field(unsigned long, xfino) __field(unsigned int, leaf_mxr) __field(unsigned int, leaf_mnr) __field(unsigned int, node_mxr) __field(unsigned int, node_mnr) __field(unsigned long long, owner) ), TP_fast_assign( __entry->btree_ops = ops; __entry->xfino = file_inode(xfbt->target->bt_file)->i_ino; __entry->leaf_mxr = xfbt->maxrecs[0]; __entry->node_mxr = xfbt->maxrecs[1]; __entry->leaf_mnr = xfbt->minrecs[0]; __entry->node_mnr = xfbt->minrecs[1]; __entry->owner = xfbt->owner; ), TP_printk("xfino 0x%lx btree_ops %pS owner 0x%llx leaf_mxr %u leaf_mnr %u node_mxr %u node_mnr %u", __entry->xfino, __entry->btree_ops, __entry->owner, __entry->leaf_mxr, __entry->leaf_mnr, __entry->node_mxr, __entry->node_mnr) ); DECLARE_EVENT_CLASS(xfbtree_buf_class, TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp), TP_ARGS(xfbt, bp), TP_STRUCT__entry( __field(unsigned long, xfino) __field(xfs_daddr_t, bno) __field(int, nblks) __field(int, hold) __field(int, pincount) __field(unsigned int, lockval) __field(unsigned int, flags) ), TP_fast_assign( __entry->xfino = file_inode(xfbt->target->bt_file)->i_ino; __entry->bno = xfs_buf_daddr(bp); __entry->nblks = bp->b_length; __entry->hold = atomic_read(&bp->b_hold); __entry->pincount = atomic_read(&bp->b_pin_count); __entry->lockval = bp->b_sema.count; __entry->flags = bp->b_flags; ), TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s", __entry->xfino, (unsigned long long)__entry->bno, __entry->nblks, __entry->hold, __entry->pincount, __entry->lockval, __print_flags(__entry->flags, "|", XFS_BUF_FLAGS)) ) #define DEFINE_XFBTREE_BUF_EVENT(name) \ DEFINE_EVENT(xfbtree_buf_class, name, \ TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp), \ TP_ARGS(xfbt, bp)) DEFINE_XFBTREE_BUF_EVENT(xfbtree_create_root_buf); DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_commit_buf); DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_cancel_buf); DECLARE_EVENT_CLASS(xfbtree_freesp_class, TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur, xfs_fileoff_t fileoff), TP_ARGS(xfbt, cur, fileoff), TP_STRUCT__entry( __field(unsigned long, xfino) __string(btname, cur->bc_ops->name) __field(int, nlevels) __field(xfs_fileoff_t, fileoff) ), TP_fast_assign( __entry->xfino = file_inode(xfbt->target->bt_file)->i_ino; __assign_str(btname); __entry->nlevels = cur->bc_nlevels; __entry->fileoff = fileoff; ), TP_printk("xfino 0x%lx %sbt nlevels %d fileoff 0x%llx", __entry->xfino, __get_str(btname), __entry->nlevels, (unsigned long long)__entry->fileoff) ) #define DEFINE_XFBTREE_FREESP_EVENT(name) \ DEFINE_EVENT(xfbtree_freesp_class, name, \ TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur, \ xfs_fileoff_t fileoff), \ TP_ARGS(xfbt, cur, fileoff)) DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block); DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block); #endif /* CONFIG_XFS_BTREE_IN_MEM */ /* exchmaps tracepoints */ #define XFS_EXCHMAPS_STRINGS \ { XFS_EXCHMAPS_ATTR_FORK, "ATTRFORK" }, \ { XFS_EXCHMAPS_SET_SIZES, "SETSIZES" }, \ { XFS_EXCHMAPS_INO1_WRITTEN, "INO1_WRITTEN" }, \ { XFS_EXCHMAPS_CLEAR_INO1_REFLINK, "CLEAR_INO1_REFLINK" }, \ { XFS_EXCHMAPS_CLEAR_INO2_REFLINK, "CLEAR_INO2_REFLINK" }, \ { __XFS_EXCHMAPS_INO2_SHORTFORM, "INO2_SF" } DEFINE_INODE_IREC_EVENT(xfs_exchmaps_mapping1_skip); DEFINE_INODE_IREC_EVENT(xfs_exchmaps_mapping1); DEFINE_INODE_IREC_EVENT(xfs_exchmaps_mapping2); DEFINE_ITRUNC_EVENT(xfs_exchmaps_update_inode_size); #define XFS_EXCHRANGE_INODES \ { 1, "file1" }, \ { 2, "file2" } DECLARE_EVENT_CLASS(xfs_exchrange_inode_class, TP_PROTO(struct xfs_inode *ip, int whichfile), TP_ARGS(ip, whichfile), TP_STRUCT__entry( __field(dev_t, dev) __field(int, whichfile) __field(xfs_ino_t, ino) __field(int, format) __field(xfs_extnum_t, nex) __field(int, broot_size) __field(int, fork_off) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->whichfile = whichfile; __entry->ino = ip->i_ino; __entry->format = ip->i_df.if_format; __entry->nex = ip->i_df.if_nextents; __entry->fork_off = xfs_inode_fork_boff(ip); ), TP_printk("dev %d:%d ino 0x%llx whichfile %s format %s num_extents %llu forkoff 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_symbolic(__entry->whichfile, XFS_EXCHRANGE_INODES), __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), __entry->nex, __entry->fork_off) ) #define DEFINE_EXCHRANGE_INODE_EVENT(name) \ DEFINE_EVENT(xfs_exchrange_inode_class, name, \ TP_PROTO(struct xfs_inode *ip, int whichfile), \ TP_ARGS(ip, whichfile)) DEFINE_EXCHRANGE_INODE_EVENT(xfs_exchrange_before); DEFINE_EXCHRANGE_INODE_EVENT(xfs_exchrange_after); DEFINE_INODE_ERROR_EVENT(xfs_exchrange_error); #define XFS_EXCHANGE_RANGE_FLAGS_STRS \ { XFS_EXCHANGE_RANGE_TO_EOF, "TO_EOF" }, \ { XFS_EXCHANGE_RANGE_DSYNC , "DSYNC" }, \ { XFS_EXCHANGE_RANGE_DRY_RUN, "DRY_RUN" }, \ { XFS_EXCHANGE_RANGE_FILE1_WRITTEN, "F1_WRITTEN" }, \ { __XFS_EXCHANGE_RANGE_UPD_CMTIME1, "CMTIME1" }, \ { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" } /* file exchange-range tracepoint class */ DECLARE_EVENT_CLASS(xfs_exchrange_class, TP_PROTO(const struct xfs_exchrange *fxr, struct xfs_inode *ip1, struct xfs_inode *ip2), TP_ARGS(fxr, ip1, ip2), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ip1_ino) __field(loff_t, ip1_isize) __field(loff_t, ip1_disize) __field(xfs_ino_t, ip2_ino) __field(loff_t, ip2_isize) __field(loff_t, ip2_disize) __field(loff_t, file1_offset) __field(loff_t, file2_offset) __field(unsigned long long, length) __field(unsigned long long, flags) ), TP_fast_assign( __entry->dev = VFS_I(ip1)->i_sb->s_dev; __entry->ip1_ino = ip1->i_ino; __entry->ip1_isize = VFS_I(ip1)->i_size; __entry->ip1_disize = ip1->i_disk_size; __entry->ip2_ino = ip2->i_ino; __entry->ip2_isize = VFS_I(ip2)->i_size; __entry->ip2_disize = ip2->i_disk_size; __entry->file1_offset = fxr->file1_offset; __entry->file2_offset = fxr->file2_offset; __entry->length = fxr->length; __entry->flags = fxr->flags; ), TP_printk("dev %d:%d flags %s bytecount 0x%llx " "ino1 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx -> " "ino2 0x%llx isize 0x%llx disize 0x%llx pos 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __print_flags_u64(__entry->flags, "|", XFS_EXCHANGE_RANGE_FLAGS_STRS), __entry->length, __entry->ip1_ino, __entry->ip1_isize, __entry->ip1_disize, __entry->file1_offset, __entry->ip2_ino, __entry->ip2_isize, __entry->ip2_disize, __entry->file2_offset) ) #define DEFINE_EXCHRANGE_EVENT(name) \ DEFINE_EVENT(xfs_exchrange_class, name, \ TP_PROTO(const struct xfs_exchrange *fxr, struct xfs_inode *ip1, \ struct xfs_inode *ip2), \ TP_ARGS(fxr, ip1, ip2)) DEFINE_EXCHRANGE_EVENT(xfs_exchrange_prep); DEFINE_EXCHRANGE_EVENT(xfs_exchrange_flush); DEFINE_EXCHRANGE_EVENT(xfs_exchrange_mappings); TRACE_EVENT(xfs_exchmaps_overhead, TP_PROTO(struct xfs_mount *mp, unsigned long long bmbt_blocks, unsigned long long rmapbt_blocks), TP_ARGS(mp, bmbt_blocks, rmapbt_blocks), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long long, bmbt_blocks) __field(unsigned long long, rmapbt_blocks) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->bmbt_blocks = bmbt_blocks; __entry->rmapbt_blocks = rmapbt_blocks; ), TP_printk("dev %d:%d bmbt_blocks 0x%llx rmapbt_blocks 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->bmbt_blocks, __entry->rmapbt_blocks) ); DECLARE_EVENT_CLASS(xfs_exchmaps_estimate_class, TP_PROTO(const struct xfs_exchmaps_req *req), TP_ARGS(req), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino1) __field(xfs_ino_t, ino2) __field(xfs_fileoff_t, startoff1) __field(xfs_fileoff_t, startoff2) __field(xfs_filblks_t, blockcount) __field(uint64_t, flags) __field(xfs_filblks_t, ip1_bcount) __field(xfs_filblks_t, ip2_bcount) __field(xfs_filblks_t, ip1_rtbcount) __field(xfs_filblks_t, ip2_rtbcount) __field(unsigned long long, resblks) __field(unsigned long long, nr_exchanges) ), TP_fast_assign( __entry->dev = req->ip1->i_mount->m_super->s_dev; __entry->ino1 = req->ip1->i_ino; __entry->ino2 = req->ip2->i_ino; __entry->startoff1 = req->startoff1; __entry->startoff2 = req->startoff2; __entry->blockcount = req->blockcount; __entry->flags = req->flags; __entry->ip1_bcount = req->ip1_bcount; __entry->ip2_bcount = req->ip2_bcount; __entry->ip1_rtbcount = req->ip1_rtbcount; __entry->ip2_rtbcount = req->ip2_rtbcount; __entry->resblks = req->resblks; __entry->nr_exchanges = req->nr_exchanges; ), TP_printk("dev %d:%d ino1 0x%llx fileoff1 0x%llx ino2 0x%llx fileoff2 0x%llx fsbcount 0x%llx flags (%s) bcount1 0x%llx rtbcount1 0x%llx bcount2 0x%llx rtbcount2 0x%llx resblks 0x%llx nr_exchanges %llu", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino1, __entry->startoff1, __entry->ino2, __entry->startoff2, __entry->blockcount, __print_flags_u64(__entry->flags, "|", XFS_EXCHMAPS_STRINGS), __entry->ip1_bcount, __entry->ip1_rtbcount, __entry->ip2_bcount, __entry->ip2_rtbcount, __entry->resblks, __entry->nr_exchanges) ); #define DEFINE_EXCHMAPS_ESTIMATE_EVENT(name) \ DEFINE_EVENT(xfs_exchmaps_estimate_class, name, \ TP_PROTO(const struct xfs_exchmaps_req *req), \ TP_ARGS(req)) DEFINE_EXCHMAPS_ESTIMATE_EVENT(xfs_exchmaps_initial_estimate); DEFINE_EXCHMAPS_ESTIMATE_EVENT(xfs_exchmaps_final_estimate); DECLARE_EVENT_CLASS(xfs_exchmaps_intent_class, TP_PROTO(struct xfs_mount *mp, const struct xfs_exchmaps_intent *xmi), TP_ARGS(mp, xmi), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino1) __field(xfs_ino_t, ino2) __field(uint64_t, flags) __field(xfs_fileoff_t, startoff1) __field(xfs_fileoff_t, startoff2) __field(xfs_filblks_t, blockcount) __field(xfs_fsize_t, isize1) __field(xfs_fsize_t, isize2) __field(xfs_fsize_t, new_isize1) __field(xfs_fsize_t, new_isize2) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->ino1 = xmi->xmi_ip1->i_ino; __entry->ino2 = xmi->xmi_ip2->i_ino; __entry->flags = xmi->xmi_flags; __entry->startoff1 = xmi->xmi_startoff1; __entry->startoff2 = xmi->xmi_startoff2; __entry->blockcount = xmi->xmi_blockcount; __entry->isize1 = xmi->xmi_ip1->i_disk_size; __entry->isize2 = xmi->xmi_ip2->i_disk_size; __entry->new_isize1 = xmi->xmi_isize1; __entry->new_isize2 = xmi->xmi_isize2; ), TP_printk("dev %d:%d ino1 0x%llx fileoff1 0x%llx ino2 0x%llx fileoff2 0x%llx fsbcount 0x%llx flags (%s) isize1 0x%llx newisize1 0x%llx isize2 0x%llx newisize2 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino1, __entry->startoff1, __entry->ino2, __entry->startoff2, __entry->blockcount, __print_flags_u64(__entry->flags, "|", XFS_EXCHMAPS_STRINGS), __entry->isize1, __entry->new_isize1, __entry->isize2, __entry->new_isize2) ); #define DEFINE_EXCHMAPS_INTENT_EVENT(name) \ DEFINE_EVENT(xfs_exchmaps_intent_class, name, \ TP_PROTO(struct xfs_mount *mp, const struct xfs_exchmaps_intent *xmi), \ TP_ARGS(mp, xmi)) DEFINE_EXCHMAPS_INTENT_EVENT(xfs_exchmaps_defer); DEFINE_EXCHMAPS_INTENT_EVENT(xfs_exchmaps_recover); TRACE_EVENT(xfs_exchmaps_delta_nextents_step, TP_PROTO(struct xfs_mount *mp, const struct xfs_bmbt_irec *left, const struct xfs_bmbt_irec *curr, const struct xfs_bmbt_irec *new, const struct xfs_bmbt_irec *right, int delta, unsigned int state), TP_ARGS(mp, left, curr, new, right, delta, state), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_fileoff_t, loff) __field(xfs_fsblock_t, lstart) __field(xfs_filblks_t, lcount) __field(xfs_fileoff_t, coff) __field(xfs_fsblock_t, cstart) __field(xfs_filblks_t, ccount) __field(xfs_fileoff_t, noff) __field(xfs_fsblock_t, nstart) __field(xfs_filblks_t, ncount) __field(xfs_fileoff_t, roff) __field(xfs_fsblock_t, rstart) __field(xfs_filblks_t, rcount) __field(int, delta) __field(unsigned int, state) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->loff = left->br_startoff; __entry->lstart = left->br_startblock; __entry->lcount = left->br_blockcount; __entry->coff = curr->br_startoff; __entry->cstart = curr->br_startblock; __entry->ccount = curr->br_blockcount; __entry->noff = new->br_startoff; __entry->nstart = new->br_startblock; __entry->ncount = new->br_blockcount; __entry->roff = right->br_startoff; __entry->rstart = right->br_startblock; __entry->rcount = right->br_blockcount; __entry->delta = delta; __entry->state = state; ), TP_printk("dev %d:%d left 0x%llx:0x%llx:0x%llx; curr 0x%llx:0x%llx:0x%llx <- new 0x%llx:0x%llx:0x%llx; right 0x%llx:0x%llx:0x%llx delta %d state 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->loff, __entry->lstart, __entry->lcount, __entry->coff, __entry->cstart, __entry->ccount, __entry->noff, __entry->nstart, __entry->ncount, __entry->roff, __entry->rstart, __entry->rcount, __entry->delta, __entry->state) ); TRACE_EVENT(xfs_exchmaps_delta_nextents, TP_PROTO(const struct xfs_exchmaps_req *req, int64_t d_nexts1, int64_t d_nexts2), TP_ARGS(req, d_nexts1, d_nexts2), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino1) __field(xfs_ino_t, ino2) __field(xfs_extnum_t, nexts1) __field(xfs_extnum_t, nexts2) __field(int64_t, d_nexts1) __field(int64_t, d_nexts2) ), TP_fast_assign( int whichfork = xfs_exchmaps_reqfork(req); __entry->dev = req->ip1->i_mount->m_super->s_dev; __entry->ino1 = req->ip1->i_ino; __entry->ino2 = req->ip2->i_ino; __entry->nexts1 = xfs_ifork_ptr(req->ip1, whichfork)->if_nextents; __entry->nexts2 = xfs_ifork_ptr(req->ip2, whichfork)->if_nextents; __entry->d_nexts1 = d_nexts1; __entry->d_nexts2 = d_nexts2; ), TP_printk("dev %d:%d ino1 0x%llx nexts %llu ino2 0x%llx nexts %llu delta1 %lld delta2 %lld", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino1, __entry->nexts1, __entry->ino2, __entry->nexts2, __entry->d_nexts1, __entry->d_nexts2) ); DECLARE_EVENT_CLASS(xfs_getparents_rec_class, TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, const struct xfs_attr_list_context *context, const struct xfs_getparents_rec *pptr), TP_ARGS(ip, ppi, context, pptr), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned int, firstu) __field(unsigned short, reclen) __field(unsigned int, bufsize) __field(xfs_ino_t, parent_ino) __field(unsigned int, parent_gen) __string(name, pptr->gpr_name) ), TP_fast_assign( __entry->dev = ip->i_mount->m_super->s_dev; __entry->ino = ip->i_ino; __entry->firstu = context->firstu; __entry->reclen = pptr->gpr_reclen; __entry->bufsize = ppi->gp_bufsize; __entry->parent_ino = pptr->gpr_parent.ha_fid.fid_ino; __entry->parent_gen = pptr->gpr_parent.ha_fid.fid_gen; __assign_str(name); ), TP_printk("dev %d:%d ino 0x%llx firstu %u reclen %u bufsize %u parent_ino 0x%llx parent_gen 0x%x name '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->firstu, __entry->reclen, __entry->bufsize, __entry->parent_ino, __entry->parent_gen, __get_str(name)) ) #define DEFINE_XFS_GETPARENTS_REC_EVENT(name) \ DEFINE_EVENT(xfs_getparents_rec_class, name, \ TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \ const struct xfs_attr_list_context *context, \ const struct xfs_getparents_rec *pptr), \ TP_ARGS(ip, ppi, context, pptr)) DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_put_listent); DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_expand_lastrec); DECLARE_EVENT_CLASS(xfs_getparents_class, TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, const struct xfs_attrlist_cursor_kern *cur), TP_ARGS(ip, ppi, cur), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(unsigned short, iflags) __field(unsigned short, oflags) __field(unsigned int, bufsize) __field(unsigned int, hashval) __field(unsigned int, blkno) __field(unsigned int, offset) __field(int, initted) ), TP_fast_assign( __entry->dev = ip->i_mount->m_super->s_dev; __entry->ino = ip->i_ino; __entry->iflags = ppi->gp_iflags; __entry->oflags = ppi->gp_oflags; __entry->bufsize = ppi->gp_bufsize; __entry->hashval = cur->hashval; __entry->blkno = cur->blkno; __entry->offset = cur->offset; __entry->initted = cur->initted; ), TP_printk("dev %d:%d ino 0x%llx iflags 0x%x oflags 0x%x bufsize %u cur_init? %d hashval 0x%x blkno %u offset %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->iflags, __entry->oflags, __entry->bufsize, __entry->initted, __entry->hashval, __entry->blkno, __entry->offset) ) #define DEFINE_XFS_GETPARENTS_EVENT(name) \ DEFINE_EVENT(xfs_getparents_class, name, \ TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \ const struct xfs_attrlist_cursor_kern *cur), \ TP_ARGS(ip, ppi, cur)) DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin); DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end); #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE xfs_trace #include <trace/define_trace.h>
36 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM scsi #if !defined(_TRACE_SCSI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCSI_H #include <scsi/scsi_cmnd.h> #include <scsi/scsi_host.h> #include <linux/tracepoint.h> #include <linux/trace_seq.h> #define scsi_opcode_name(opcode) { opcode, #opcode } #define show_opcode_name(val) \ __print_symbolic(val, \ scsi_opcode_name(TEST_UNIT_READY), \ scsi_opcode_name(REZERO_UNIT), \ scsi_opcode_name(REQUEST_SENSE), \ scsi_opcode_name(FORMAT_UNIT), \ scsi_opcode_name(READ_BLOCK_LIMITS), \ scsi_opcode_name(REASSIGN_BLOCKS), \ scsi_opcode_name(INITIALIZE_ELEMENT_STATUS), \ scsi_opcode_name(READ_6), \ scsi_opcode_name(WRITE_6), \ scsi_opcode_name(SEEK_6), \ scsi_opcode_name(READ_REVERSE), \ scsi_opcode_name(WRITE_FILEMARKS), \ scsi_opcode_name(SPACE), \ scsi_opcode_name(INQUIRY), \ scsi_opcode_name(RECOVER_BUFFERED_DATA), \ scsi_opcode_name(MODE_SELECT), \ scsi_opcode_name(RESERVE), \ scsi_opcode_name(RELEASE), \ scsi_opcode_name(COPY), \ scsi_opcode_name(ERASE), \ scsi_opcode_name(MODE_SENSE), \ scsi_opcode_name(START_STOP), \ scsi_opcode_name(RECEIVE_DIAGNOSTIC), \ scsi_opcode_name(SEND_DIAGNOSTIC), \ scsi_opcode_name(ALLOW_MEDIUM_REMOVAL), \ scsi_opcode_name(SET_WINDOW), \ scsi_opcode_name(READ_CAPACITY), \ scsi_opcode_name(READ_10), \ scsi_opcode_name(WRITE_10), \ scsi_opcode_name(SEEK_10), \ scsi_opcode_name(POSITION_TO_ELEMENT), \ scsi_opcode_name(WRITE_VERIFY), \ scsi_opcode_name(VERIFY), \ scsi_opcode_name(SEARCH_HIGH), \ scsi_opcode_name(SEARCH_EQUAL), \ scsi_opcode_name(SEARCH_LOW), \ scsi_opcode_name(SET_LIMITS), \ scsi_opcode_name(PRE_FETCH), \ scsi_opcode_name(READ_POSITION), \ scsi_opcode_name(SYNCHRONIZE_CACHE), \ scsi_opcode_name(LOCK_UNLOCK_CACHE), \ scsi_opcode_name(READ_DEFECT_DATA), \ scsi_opcode_name(MEDIUM_SCAN), \ scsi_opcode_name(COMPARE), \ scsi_opcode_name(COPY_VERIFY), \ scsi_opcode_name(WRITE_BUFFER), \ scsi_opcode_name(READ_BUFFER), \ scsi_opcode_name(UPDATE_BLOCK), \ scsi_opcode_name(READ_LONG), \ scsi_opcode_name(WRITE_LONG), \ scsi_opcode_name(CHANGE_DEFINITION), \ scsi_opcode_name(WRITE_SAME), \ scsi_opcode_name(UNMAP), \ scsi_opcode_name(READ_TOC), \ scsi_opcode_name(LOG_SELECT), \ scsi_opcode_name(LOG_SENSE), \ scsi_opcode_name(XDWRITEREAD_10), \ scsi_opcode_name(MODE_SELECT_10), \ scsi_opcode_name(RESERVE_10), \ scsi_opcode_name(RELEASE_10), \ scsi_opcode_name(MODE_SENSE_10), \ scsi_opcode_name(PERSISTENT_RESERVE_IN), \ scsi_opcode_name(PERSISTENT_RESERVE_OUT), \ scsi_opcode_name(VARIABLE_LENGTH_CMD), \ scsi_opcode_name(REPORT_LUNS), \ scsi_opcode_name(MAINTENANCE_IN), \ scsi_opcode_name(MAINTENANCE_OUT), \ scsi_opcode_name(MOVE_MEDIUM), \ scsi_opcode_name(EXCHANGE_MEDIUM), \ scsi_opcode_name(READ_12), \ scsi_opcode_name(WRITE_12), \ scsi_opcode_name(WRITE_VERIFY_12), \ scsi_opcode_name(SEARCH_HIGH_12), \ scsi_opcode_name(SEARCH_EQUAL_12), \ scsi_opcode_name(SEARCH_LOW_12), \ scsi_opcode_name(READ_ELEMENT_STATUS), \ scsi_opcode_name(SEND_VOLUME_TAG), \ scsi_opcode_name(WRITE_LONG_2), \ scsi_opcode_name(READ_16), \ scsi_opcode_name(WRITE_16), \ scsi_opcode_name(VERIFY_16), \ scsi_opcode_name(WRITE_SAME_16), \ scsi_opcode_name(ZBC_OUT), \ scsi_opcode_name(ZBC_IN), \ scsi_opcode_name(SERVICE_ACTION_IN_16), \ scsi_opcode_name(READ_32), \ scsi_opcode_name(WRITE_32), \ scsi_opcode_name(WRITE_SAME_32), \ scsi_opcode_name(ATA_16), \ scsi_opcode_name(WRITE_ATOMIC_16), \ scsi_opcode_name(ATA_12)) #define scsi_hostbyte_name(result) { result, #result } #define show_hostbyte_name(val) \ __print_symbolic(val, \ scsi_hostbyte_name(DID_OK), \ scsi_hostbyte_name(DID_NO_CONNECT), \ scsi_hostbyte_name(DID_BUS_BUSY), \ scsi_hostbyte_name(DID_TIME_OUT), \ scsi_hostbyte_name(DID_BAD_TARGET), \ scsi_hostbyte_name(DID_ABORT), \ scsi_hostbyte_name(DID_PARITY), \ scsi_hostbyte_name(DID_ERROR), \ scsi_hostbyte_name(DID_RESET), \ scsi_hostbyte_name(DID_BAD_INTR), \ scsi_hostbyte_name(DID_PASSTHROUGH), \ scsi_hostbyte_name(DID_SOFT_ERROR), \ scsi_hostbyte_name(DID_IMM_RETRY), \ scsi_hostbyte_name(DID_REQUEUE), \ scsi_hostbyte_name(DID_TRANSPORT_DISRUPTED), \ scsi_hostbyte_name(DID_TRANSPORT_FAILFAST)) #define scsi_statusbyte_name(result) { result, #result } #define show_statusbyte_name(val) \ __print_symbolic(val, \ scsi_statusbyte_name(SAM_STAT_GOOD), \ scsi_statusbyte_name(SAM_STAT_CHECK_CONDITION), \ scsi_statusbyte_name(SAM_STAT_CONDITION_MET), \ scsi_statusbyte_name(SAM_STAT_BUSY), \ scsi_statusbyte_name(SAM_STAT_INTERMEDIATE), \ scsi_statusbyte_name(SAM_STAT_INTERMEDIATE_CONDITION_MET), \ scsi_statusbyte_name(SAM_STAT_RESERVATION_CONFLICT), \ scsi_statusbyte_name(SAM_STAT_COMMAND_TERMINATED), \ scsi_statusbyte_name(SAM_STAT_TASK_SET_FULL), \ scsi_statusbyte_name(SAM_STAT_ACA_ACTIVE), \ scsi_statusbyte_name(SAM_STAT_TASK_ABORTED)) #define scsi_prot_op_name(result) { result, #result } #define show_prot_op_name(val) \ __print_symbolic(val, \ scsi_prot_op_name(SCSI_PROT_NORMAL), \ scsi_prot_op_name(SCSI_PROT_READ_INSERT), \ scsi_prot_op_name(SCSI_PROT_WRITE_STRIP), \ scsi_prot_op_name(SCSI_PROT_READ_STRIP), \ scsi_prot_op_name(SCSI_PROT_WRITE_INSERT), \ scsi_prot_op_name(SCSI_PROT_READ_PASS), \ scsi_prot_op_name(SCSI_PROT_WRITE_PASS)) const char *scsi_trace_parse_cdb(struct trace_seq*, unsigned char*, int); #define __parse_cdb(cdb, len) scsi_trace_parse_cdb(p, cdb, len) TRACE_EVENT(scsi_dispatch_cmd_start, TP_PROTO(struct scsi_cmnd *cmd), TP_ARGS(cmd), TP_STRUCT__entry( __field( unsigned int, host_no ) __field( unsigned int, channel ) __field( unsigned int, id ) __field( unsigned int, lun ) __field( unsigned int, opcode ) __field( unsigned int, cmd_len ) __field( int, driver_tag) __field( int, scheduler_tag) __field( unsigned int, data_sglen ) __field( unsigned int, prot_sglen ) __field( unsigned char, prot_op ) __dynamic_array(unsigned char, cmnd, cmd->cmd_len) ), TP_fast_assign( __entry->host_no = cmd->device->host->host_no; __entry->channel = cmd->device->channel; __entry->id = cmd->device->id; __entry->lun = cmd->device->lun; __entry->opcode = cmd->cmnd[0]; __entry->cmd_len = cmd->cmd_len; __entry->driver_tag = scsi_cmd_to_rq(cmd)->tag; __entry->scheduler_tag = scsi_cmd_to_rq(cmd)->internal_tag; __entry->data_sglen = scsi_sg_count(cmd); __entry->prot_sglen = scsi_prot_sg_count(cmd); __entry->prot_op = scsi_get_prot_op(cmd); memcpy(__get_dynamic_array(cmnd), cmd->cmnd, cmd->cmd_len); ), TP_printk("host_no=%u channel=%u id=%u lun=%u data_sgl=%u prot_sgl=%u" \ " prot_op=%s driver_tag=%d scheduler_tag=%d cmnd=(%s %s raw=%s)", __entry->host_no, __entry->channel, __entry->id, __entry->lun, __entry->data_sglen, __entry->prot_sglen, show_prot_op_name(__entry->prot_op), __entry->driver_tag, __entry->scheduler_tag, show_opcode_name(__entry->opcode), __parse_cdb(__get_dynamic_array(cmnd), __entry->cmd_len), __print_hex(__get_dynamic_array(cmnd), __entry->cmd_len)) ); TRACE_EVENT(scsi_dispatch_cmd_error, TP_PROTO(struct scsi_cmnd *cmd, int rtn), TP_ARGS(cmd, rtn), TP_STRUCT__entry( __field( unsigned int, host_no ) __field( unsigned int, channel ) __field( unsigned int, id ) __field( unsigned int, lun ) __field( int, rtn ) __field( unsigned int, opcode ) __field( unsigned int, cmd_len ) __field( int, driver_tag) __field( int, scheduler_tag) __field( unsigned int, data_sglen ) __field( unsigned int, prot_sglen ) __field( unsigned char, prot_op ) __dynamic_array(unsigned char, cmnd, cmd->cmd_len) ), TP_fast_assign( __entry->host_no = cmd->device->host->host_no; __entry->channel = cmd->device->channel; __entry->id = cmd->device->id; __entry->lun = cmd->device->lun; __entry->rtn = rtn; __entry->opcode = cmd->cmnd[0]; __entry->cmd_len = cmd->cmd_len; __entry->driver_tag = scsi_cmd_to_rq(cmd)->tag; __entry->scheduler_tag = scsi_cmd_to_rq(cmd)->internal_tag; __entry->data_sglen = scsi_sg_count(cmd); __entry->prot_sglen = scsi_prot_sg_count(cmd); __entry->prot_op = scsi_get_prot_op(cmd); memcpy(__get_dynamic_array(cmnd), cmd->cmnd, cmd->cmd_len); ), TP_printk("host_no=%u channel=%u id=%u lun=%u data_sgl=%u prot_sgl=%u" \ " prot_op=%s driver_tag=%d scheduler_tag=%d cmnd=(%s %s raw=%s)" \ " rtn=%d", __entry->host_no, __entry->channel, __entry->id, __entry->lun, __entry->data_sglen, __entry->prot_sglen, show_prot_op_name(__entry->prot_op), __entry->driver_tag, __entry->scheduler_tag, show_opcode_name(__entry->opcode), __parse_cdb(__get_dynamic_array(cmnd), __entry->cmd_len), __print_hex(__get_dynamic_array(cmnd), __entry->cmd_len), __entry->rtn) ); DECLARE_EVENT_CLASS(scsi_cmd_done_timeout_template, TP_PROTO(struct scsi_cmnd *cmd), TP_ARGS(cmd), TP_STRUCT__entry( __field( unsigned int, host_no ) __field( unsigned int, channel ) __field( unsigned int, id ) __field( unsigned int, lun ) __field( int, result ) __field( unsigned int, opcode ) __field( unsigned int, cmd_len ) __field( int, driver_tag) __field( int, scheduler_tag) __field( unsigned int, data_sglen ) __field( unsigned int, prot_sglen ) __field( unsigned char, prot_op ) __dynamic_array(unsigned char, cmnd, cmd->cmd_len) __field( u8, sense_key ) __field( u8, asc ) __field( u8, ascq ) ), TP_fast_assign( struct scsi_sense_hdr sshdr; __entry->host_no = cmd->device->host->host_no; __entry->channel = cmd->device->channel; __entry->id = cmd->device->id; __entry->lun = cmd->device->lun; __entry->result = cmd->result; __entry->opcode = cmd->cmnd[0]; __entry->cmd_len = cmd->cmd_len; __entry->driver_tag = scsi_cmd_to_rq(cmd)->tag; __entry->scheduler_tag = scsi_cmd_to_rq(cmd)->internal_tag; __entry->data_sglen = scsi_sg_count(cmd); __entry->prot_sglen = scsi_prot_sg_count(cmd); __entry->prot_op = scsi_get_prot_op(cmd); memcpy(__get_dynamic_array(cmnd), cmd->cmnd, cmd->cmd_len); if (cmd->sense_buffer && SCSI_SENSE_VALID(cmd) && scsi_command_normalize_sense(cmd, &sshdr)) { __entry->sense_key = sshdr.sense_key; __entry->asc = sshdr.asc; __entry->ascq = sshdr.ascq; } else { __entry->sense_key = 0; __entry->asc = 0; __entry->ascq = 0; } ), TP_printk("host_no=%u channel=%u id=%u lun=%u data_sgl=%u prot_sgl=%u " \ "prot_op=%s driver_tag=%d scheduler_tag=%d cmnd=(%s %s raw=%s) " \ "result=(driver=%s host=%s message=%s status=%s) " "sense=(key=%#x asc=%#x ascq=%#x)", __entry->host_no, __entry->channel, __entry->id, __entry->lun, __entry->data_sglen, __entry->prot_sglen, show_prot_op_name(__entry->prot_op), __entry->driver_tag, __entry->scheduler_tag, show_opcode_name(__entry->opcode), __parse_cdb(__get_dynamic_array(cmnd), __entry->cmd_len), __print_hex(__get_dynamic_array(cmnd), __entry->cmd_len), "DRIVER_OK", show_hostbyte_name(((__entry->result) >> 16) & 0xff), "COMMAND_COMPLETE", show_statusbyte_name(__entry->result & 0xff), __entry->sense_key, __entry->asc, __entry->ascq) ); DEFINE_EVENT(scsi_cmd_done_timeout_template, scsi_dispatch_cmd_done, TP_PROTO(struct scsi_cmnd *cmd), TP_ARGS(cmd)); DEFINE_EVENT(scsi_cmd_done_timeout_template, scsi_dispatch_cmd_timeout, TP_PROTO(struct scsi_cmnd *cmd), TP_ARGS(cmd)); TRACE_EVENT(scsi_eh_wakeup, TP_PROTO(struct Scsi_Host *shost), TP_ARGS(shost), TP_STRUCT__entry( __field( unsigned int, host_no ) ), TP_fast_assign( __entry->host_no = shost->host_no; ), TP_printk("host_no=%u", __entry->host_no) ); #endif /* _TRACE_SCSI_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
6 6 9 9 1 1 1 1 120 118 99 99 187 187 185 185 185 2 2 316 316 209 209 4 4 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 // SPDX-License-Identifier: GPL-2.0 #include <linux/bitops.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/sched/mm.h> #include <linux/atomic.h> #include <linux/vmalloc.h> #include "ctree.h" #include "volumes.h" #include "zoned.h" #include "rcu-string.h" #include "disk-io.h" #include "block-group.h" #include "dev-replace.h" #include "space-info.h" #include "fs.h" #include "accessors.h" #include "bio.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 /* Invalid allocation pointer value for missing devices */ #define WP_MISSING_DEV ((u64)-1) /* Pseudo write pointer value for conventional zone */ #define WP_CONVENTIONAL ((u64)-2) /* * Location of the first zone of superblock logging zone pairs. * * - primary superblock: 0B (zone 0) * - first copy: 512G (zone starting at that offset) * - second copy: 4T (zone starting at that offset) */ #define BTRFS_SB_LOG_PRIMARY_OFFSET (0ULL) #define BTRFS_SB_LOG_FIRST_OFFSET (512ULL * SZ_1G) #define BTRFS_SB_LOG_SECOND_OFFSET (4096ULL * SZ_1G) #define BTRFS_SB_LOG_FIRST_SHIFT const_ilog2(BTRFS_SB_LOG_FIRST_OFFSET) #define BTRFS_SB_LOG_SECOND_SHIFT const_ilog2(BTRFS_SB_LOG_SECOND_OFFSET) /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 /* * Minimum of active zones we need: * * - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors * - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group * - 1 zone for tree-log dedicated block group * - 1 zone for relocation */ #define BTRFS_MIN_ACTIVE_ZONES (BTRFS_SUPER_MIRROR_MAX + 5) /* * Minimum / maximum supported zone size. Currently, SMR disks have a zone * size of 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. * We do not expect the zone size to become larger than 8GiB or smaller than * 4MiB in the near future. */ #define BTRFS_MAX_ZONE_SIZE SZ_8G #define BTRFS_MIN_ZONE_SIZE SZ_4M #define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT) static void wait_eb_writebacks(struct btrfs_block_group *block_group); static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written); static inline bool sb_zone_is_full(const struct blk_zone *zone) { return (zone->cond == BLK_ZONE_COND_FULL) || (zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity); } static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_zone *zones = data; memcpy(&zones[idx], zone, sizeof(*zone)); return 0; } static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, u64 *wp_ret) { bool empty[BTRFS_NR_SB_LOG_ZONES]; bool full[BTRFS_NR_SB_LOG_ZONES]; sector_t sector; for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL); empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY); full[i] = sb_zone_is_full(&zones[i]); } /* * Possible states of log buffer zones * * Empty[0] In use[0] Full[0] * Empty[1] * 0 1 * In use[1] x x 1 * Full[1] 0 0 C * * Log position: * *: Special case, no superblock is written * 0: Use write pointer of zones[0] * 1: Use write pointer of zones[1] * C: Compare super blocks from zones[0] and zones[1], use the latest * one determined by generation * x: Invalid state */ if (empty[0] && empty[1]) { /* Special case to distinguish no superblock to read */ *wp_ret = zones[0].start << SECTOR_SHIFT; return -ENOENT; } else if (full[0] && full[1]) { /* Compare two super blocks */ struct address_space *mapping = bdev->bd_mapping; struct page *page[BTRFS_NR_SB_LOG_ZONES]; struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES]; for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT; u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - BTRFS_SUPER_INFO_SIZE; page[i] = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); if (IS_ERR(page[i])) { if (i == 1) btrfs_release_disk_super(super[0]); return PTR_ERR(page[i]); } super[i] = page_address(page[i]); } if (btrfs_super_generation(super[0]) > btrfs_super_generation(super[1])) sector = zones[1].start; else sector = zones[0].start; for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) btrfs_release_disk_super(super[i]); } else if (!full[0] && (empty[1] || full[1])) { sector = zones[0].wp; } else if (full[0]) { sector = zones[1].wp; } else { return -EUCLEAN; } *wp_ret = sector << SECTOR_SHIFT; return 0; } /* * Get the first zone number of the superblock mirror */ static inline u32 sb_zone_number(int shift, int mirror) { u64 zone = U64_MAX; ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); switch (mirror) { case 0: zone = 0; break; case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; } ASSERT(zone <= U32_MAX); return (u32)zone; } static inline sector_t zone_start_sector(u32 zone_number, struct block_device *bdev) { return (sector_t)zone_number << ilog2(bdev_zone_sectors(bdev)); } static inline u64 zone_start_physical(u32 zone_number, struct btrfs_zoned_device_info *zone_info) { return (u64)zone_number << zone_info->zone_size_shift; } /* * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block * device into static sized chunks and fake a conventional zone on each of * them. */ static int emulate_report_zones(struct btrfs_device *device, u64 pos, struct blk_zone *zones, unsigned int nr_zones) { const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT; sector_t bdev_size = bdev_nr_sectors(device->bdev); unsigned int i; pos >>= SECTOR_SHIFT; for (i = 0; i < nr_zones; i++) { zones[i].start = i * zone_sectors + pos; zones[i].len = zone_sectors; zones[i].capacity = zone_sectors; zones[i].wp = zones[i].start + zone_sectors; zones[i].type = BLK_ZONE_TYPE_CONVENTIONAL; zones[i].cond = BLK_ZONE_COND_NOT_WP; if (zones[i].wp >= bdev_size) { i++; break; } } return i; } static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, struct blk_zone *zones, unsigned int *nr_zones) { struct btrfs_zoned_device_info *zinfo = device->zone_info; int ret; if (!*nr_zones) return 0; if (!bdev_is_zoned(device->bdev)) { ret = emulate_report_zones(device, pos, zones, *nr_zones); *nr_zones = ret; return 0; } /* Check cache */ if (zinfo->zone_cache) { unsigned int i; u32 zno; ASSERT(IS_ALIGNED(pos, zinfo->zone_size)); zno = pos >> zinfo->zone_size_shift; /* * We cannot report zones beyond the zone end. So, it is OK to * cap *nr_zones to at the end. */ *nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno); for (i = 0; i < *nr_zones; i++) { struct blk_zone *zone_info; zone_info = &zinfo->zone_cache[zno + i]; if (!zone_info->len) break; } if (i == *nr_zones) { /* Cache hit on all the zones */ memcpy(zones, zinfo->zone_cache + zno, sizeof(*zinfo->zone_cache) * *nr_zones); return 0; } } ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, copy_zone_info_cb, zones); if (ret < 0) { btrfs_err_in_rcu(device->fs_info, "zoned: failed to read zone %llu on %s (devid %llu)", pos, rcu_str_deref(device->name), device->devid); return ret; } *nr_zones = ret; if (!ret) return -EIO; /* Populate cache */ if (zinfo->zone_cache) { u32 zno = pos >> zinfo->zone_size_shift; memcpy(zinfo->zone_cache + zno, zones, sizeof(*zinfo->zone_cache) * *nr_zones); } return 0; } /* The emulated zone size is determined from the size of device extent */ static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info) { struct btrfs_path *path; struct btrfs_root *root = fs_info->dev_root; struct btrfs_key key; struct extent_buffer *leaf; struct btrfs_dev_extent *dext; int ret = 0; key.objectid = 1; key.type = BTRFS_DEV_EXTENT_KEY; key.offset = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto out; /* No dev extents at all? Not good */ if (ret > 0) { ret = -EUCLEAN; goto out; } } leaf = path->nodes[0]; dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); fs_info->zone_size = btrfs_dev_extent_length(leaf, dext); ret = 0; out: btrfs_free_path(path); return ret; } int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; int ret = 0; /* fs_info->zone_size might not set yet. Use the incomapt flag here. */ if (!btrfs_fs_incompat(fs_info, ZONED)) return 0; mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { /* We can skip reading of zone info for missing devices */ if (!device->bdev) continue; ret = btrfs_get_dev_zone_info(device, true); if (ret) break; } mutex_unlock(&fs_devices->device_list_mutex); return ret; } int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_zoned_device_info *zone_info = NULL; struct block_device *bdev = device->bdev; unsigned int max_active_zones; unsigned int nactive; sector_t nr_sectors; sector_t sector = 0; struct blk_zone *zones = NULL; unsigned int i, nreported = 0, nr_zones; sector_t zone_sectors; char *model, *emulated; int ret; /* * Cannot use btrfs_is_zoned here, since fs_info::zone_size might not * yet be set. */ if (!btrfs_fs_incompat(fs_info, ZONED)) return 0; if (device->zone_info) return 0; zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL); if (!zone_info) return -ENOMEM; device->zone_info = zone_info; if (!bdev_is_zoned(bdev)) { if (!fs_info->zone_size) { ret = calculate_emulated_zone_size(fs_info); if (ret) goto out; } ASSERT(fs_info->zone_size); zone_sectors = fs_info->zone_size >> SECTOR_SHIFT; } else { zone_sectors = bdev_zone_sectors(bdev); } ASSERT(is_power_of_two_u64(zone_sectors)); zone_info->zone_size = zone_sectors << SECTOR_SHIFT; /* We reject devices with a zone size larger than 8GB */ if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { btrfs_err_in_rcu(fs_info, "zoned: %s: zone size %llu larger than supported maximum %llu", rcu_str_deref(device->name), zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); ret = -EINVAL; goto out; } else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) { btrfs_err_in_rcu(fs_info, "zoned: %s: zone size %llu smaller than supported minimum %u", rcu_str_deref(device->name), zone_info->zone_size, BTRFS_MIN_ZONE_SIZE); ret = -EINVAL; goto out; } nr_sectors = bdev_nr_sectors(bdev); zone_info->zone_size_shift = ilog2(zone_info->zone_size); zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; max_active_zones = bdev_max_active_zones(bdev); if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { btrfs_err_in_rcu(fs_info, "zoned: %s: max active zones %u is too small, need at least %u active zones", rcu_str_deref(device->name), max_active_zones, BTRFS_MIN_ACTIVE_ZONES); ret = -EINVAL; goto out; } zone_info->max_active_zones = max_active_zones; zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->seq_zones) { ret = -ENOMEM; goto out; } zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->empty_zones) { ret = -ENOMEM; goto out; } zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->active_zones) { ret = -ENOMEM; goto out; } zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; goto out; } /* * Enable zone cache only for a zoned device. On a non-zoned device, we * fill the zone info with emulated CONVENTIONAL zones, so no need to * use the cache. */ if (populate_cache && bdev_is_zoned(device->bdev)) { zone_info->zone_cache = vcalloc(zone_info->nr_zones, sizeof(struct blk_zone)); if (!zone_info->zone_cache) { btrfs_err_in_rcu(device->fs_info, "zoned: failed to allocate zone cache for %s", rcu_str_deref(device->name)); ret = -ENOMEM; goto out; } } /* Get zones type */ nactive = 0; while (sector < nr_sectors) { nr_zones = BTRFS_REPORT_NR_ZONES; ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones, &nr_zones); if (ret) goto out; for (i = 0; i < nr_zones; i++) { if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ) __set_bit(nreported, zone_info->seq_zones); switch (zones[i].cond) { case BLK_ZONE_COND_EMPTY: __set_bit(nreported, zone_info->empty_zones); break; case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_EXP_OPEN: case BLK_ZONE_COND_CLOSED: __set_bit(nreported, zone_info->active_zones); nactive++; break; } nreported++; } sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; } if (nreported != zone_info->nr_zones) { btrfs_err_in_rcu(device->fs_info, "inconsistent number of zones on %s (%u/%u)", rcu_str_deref(device->name), nreported, zone_info->nr_zones); ret = -EIO; goto out; } if (max_active_zones) { if (nactive > max_active_zones) { btrfs_err_in_rcu(device->fs_info, "zoned: %u active zones on %s exceeds max_active_zones %u", nactive, rcu_str_deref(device->name), max_active_zones); ret = -EIO; goto out; } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); } /* Validate superblock log */ nr_zones = BTRFS_NR_SB_LOG_ZONES; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { u32 sb_zone; u64 sb_wp; int sb_pos = BTRFS_NR_SB_LOG_ZONES * i; sb_zone = sb_zone_number(zone_info->zone_size_shift, i); if (sb_zone + 1 >= zone_info->nr_zones) continue; ret = btrfs_get_dev_zones(device, zone_start_physical(sb_zone, zone_info), &zone_info->sb_zones[sb_pos], &nr_zones); if (ret) goto out; if (nr_zones != BTRFS_NR_SB_LOG_ZONES) { btrfs_err_in_rcu(device->fs_info, "zoned: failed to read super block log zone info at devid %llu zone %u", device->devid, sb_zone); ret = -EUCLEAN; goto out; } /* * If zones[0] is conventional, always use the beginning of the * zone to record superblock. No need to validate in that case. */ if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type == BLK_ZONE_TYPE_CONVENTIONAL) continue; ret = sb_write_pointer(device->bdev, &zone_info->sb_zones[sb_pos], &sb_wp); if (ret != -ENOENT && ret) { btrfs_err_in_rcu(device->fs_info, "zoned: super block log zone corrupted devid %llu zone %u", device->devid, sb_zone); ret = -EUCLEAN; goto out; } } kvfree(zones); if (bdev_is_zoned(bdev)) { model = "host-managed zoned"; emulated = ""; } else { model = "regular"; emulated = "emulated "; } btrfs_info_in_rcu(fs_info, "%s block device %s, %u %szones of %llu bytes", model, rcu_str_deref(device->name), zone_info->nr_zones, emulated, zone_info->zone_size); return 0; out: kvfree(zones); btrfs_destroy_dev_zone_info(device); return ret; } void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { struct btrfs_zoned_device_info *zone_info = device->zone_info; if (!zone_info) return; bitmap_free(zone_info->active_zones); bitmap_free(zone_info->seq_zones); bitmap_free(zone_info->empty_zones); vfree(zone_info->zone_cache); kfree(zone_info); device->zone_info = NULL; } struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) { struct btrfs_zoned_device_info *zone_info; zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); if (!zone_info) return NULL; zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->seq_zones) goto out; bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, zone_info->nr_zones); zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->empty_zones) goto out; bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, zone_info->nr_zones); zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->active_zones) goto out; bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, zone_info->nr_zones); zone_info->zone_cache = NULL; return zone_info; out: bitmap_free(zone_info->seq_zones); bitmap_free(zone_info->empty_zones); bitmap_free(zone_info->active_zones); kfree(zone_info); return NULL; } static int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) { unsigned int nr_zones = 1; int ret; ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones); if (ret != 0 || !nr_zones) return ret ? ret : -EIO; return 0; } static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info) { struct btrfs_device *device; list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { if (device->bdev && bdev_is_zoned(device->bdev)) { btrfs_err(fs_info, "zoned: mode not enabled but zoned device found: %pg", device->bdev); return -EINVAL; } } return 0; } int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) { struct queue_limits *lim = &fs_info->limits; struct btrfs_device *device; u64 zone_size = 0; int ret; /* * Host-Managed devices can't be used without the ZONED flag. With the * ZONED all devices can be used, using zone emulation if required. */ if (!btrfs_fs_incompat(fs_info, ZONED)) return btrfs_check_for_zoned_device(fs_info); blk_set_stacking_limits(lim); list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { struct btrfs_zoned_device_info *zone_info = device->zone_info; if (!device->bdev) continue; if (!zone_size) { zone_size = zone_info->zone_size; } else if (zone_info->zone_size != zone_size) { btrfs_err(fs_info, "zoned: unequal block device zone sizes: have %llu found %llu", zone_info->zone_size, zone_size); return -EINVAL; } /* * With the zoned emulation, we can have non-zoned device on the * zoned mode. In this case, we don't have a valid max zone * append size. */ if (bdev_is_zoned(device->bdev)) { blk_stack_limits(lim, &bdev_get_queue(device->bdev)->limits, 0); } } /* * stripe_size is always aligned to BTRFS_STRIPE_LEN in * btrfs_create_chunk(). Since we want stripe_len == zone_size, * check the alignment here. */ if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) { btrfs_err(fs_info, "zoned: zone size %llu not aligned to stripe %u", zone_size, BTRFS_STRIPE_LEN); return -EINVAL; } if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { btrfs_err(fs_info, "zoned: mixed block groups not supported"); return -EINVAL; } fs_info->zone_size = zone_size; /* * Also limit max_zone_append_size by max_segments * PAGE_SIZE. * Technically, we can have multiple pages per segment. But, since * we add the pages one by one to a bio, and cannot increase the * metadata reservation even if it increases the number of extents, it * is safe to stick with the limit. */ fs_info->max_zone_append_size = ALIGN_DOWN( min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT, (u64)lim->max_sectors << SECTOR_SHIFT, (u64)lim->max_segments << PAGE_SHIFT), fs_info->sectorsize); fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; if (fs_info->max_zone_append_size < fs_info->max_extent_size) fs_info->max_extent_size = fs_info->max_zone_append_size; /* * Check mount options here, because we might change fs_info->zoned * from fs_info->zone_size. */ ret = btrfs_check_mountopts_zoned(fs_info, &fs_info->mount_opt); if (ret) return ret; btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); return 0; } int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info, unsigned long long *mount_opt) { if (!btrfs_is_zoned(info)) return 0; /* * Space cache writing is not COWed. Disable that to avoid write errors * in sequential zones. */ if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_err(info, "zoned: space cache v1 is not supported"); return -EINVAL; } if (btrfs_raw_test_opt(*mount_opt, NODATACOW)) { btrfs_err(info, "zoned: NODATACOW not supported"); return -EINVAL; } if (btrfs_raw_test_opt(*mount_opt, DISCARD_ASYNC)) { btrfs_info(info, "zoned: async discard ignored and disabled for zoned mode"); btrfs_clear_opt(*mount_opt, DISCARD_ASYNC); } return 0; } static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, int rw, u64 *bytenr_ret) { u64 wp; int ret; if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) { *bytenr_ret = zones[0].start << SECTOR_SHIFT; return 0; } ret = sb_write_pointer(bdev, zones, &wp); if (ret != -ENOENT && ret < 0) return ret; if (rw == WRITE) { struct blk_zone *reset = NULL; if (wp == zones[0].start << SECTOR_SHIFT) reset = &zones[0]; else if (wp == zones[1].start << SECTOR_SHIFT) reset = &zones[1]; if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { unsigned int nofs_flags; ASSERT(sb_zone_is_full(reset)); nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, reset->start, reset->len); memalloc_nofs_restore(nofs_flags); if (ret) return ret; reset->cond = BLK_ZONE_COND_EMPTY; reset->wp = reset->start; } } else if (ret != -ENOENT) { /* * For READ, we want the previous one. Move write pointer to * the end of a zone, if it is at the head of a zone. */ u64 zone_end = 0; if (wp == zones[0].start << SECTOR_SHIFT) zone_end = zones[1].start + zones[1].capacity; else if (wp == zones[1].start << SECTOR_SHIFT) zone_end = zones[0].start + zones[0].capacity; if (zone_end) wp = ALIGN_DOWN(zone_end << SECTOR_SHIFT, BTRFS_SUPER_INFO_SIZE); wp -= BTRFS_SUPER_INFO_SIZE; } *bytenr_ret = wp; return 0; } int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, u64 *bytenr_ret) { struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES]; sector_t zone_sectors; u32 sb_zone; int ret; u8 zone_sectors_shift; sector_t nr_sectors; u32 nr_zones; if (!bdev_is_zoned(bdev)) { *bytenr_ret = btrfs_sb_offset(mirror); return 0; } ASSERT(rw == READ || rw == WRITE); zone_sectors = bdev_zone_sectors(bdev); if (!is_power_of_2(zone_sectors)) return -EINVAL; zone_sectors_shift = ilog2(zone_sectors); nr_sectors = bdev_nr_sectors(bdev); nr_zones = nr_sectors >> zone_sectors_shift; sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); if (sb_zone + 1 >= nr_zones) return -ENOENT; ret = blkdev_report_zones(bdev, zone_start_sector(sb_zone, bdev), BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb, zones); if (ret < 0) return ret; if (ret != BTRFS_NR_SB_LOG_ZONES) return -EIO; return sb_log_location(bdev, zones, rw, bytenr_ret); } int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw, u64 *bytenr_ret) { struct btrfs_zoned_device_info *zinfo = device->zone_info; u32 zone_num; /* * For a zoned filesystem on a non-zoned block device, use the same * super block locations as regular filesystem. Doing so, the super * block can always be retrieved and the zoned flag of the volume * detected from the super block information. */ if (!bdev_is_zoned(device->bdev)) { *bytenr_ret = btrfs_sb_offset(mirror); return 0; } zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); if (zone_num + 1 >= zinfo->nr_zones) return -ENOENT; return sb_log_location(device->bdev, &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], rw, bytenr_ret); } static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo, int mirror) { u32 zone_num; if (!zinfo) return false; zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); if (zone_num + 1 >= zinfo->nr_zones) return false; if (!test_bit(zone_num, zinfo->seq_zones)) return false; return true; } int btrfs_advance_sb_log(struct btrfs_device *device, int mirror) { struct btrfs_zoned_device_info *zinfo = device->zone_info; struct blk_zone *zone; int i; if (!is_sb_log_zone(zinfo, mirror)) return 0; zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { /* Advance the next zone */ if (zone->cond == BLK_ZONE_COND_FULL) { zone++; continue; } if (zone->cond == BLK_ZONE_COND_EMPTY) zone->cond = BLK_ZONE_COND_IMP_OPEN; zone->wp += SUPER_INFO_SECTORS; if (sb_zone_is_full(zone)) { /* * No room left to write new superblock. Since * superblock is written with REQ_SYNC, it is safe to * finish the zone now. * * If the write pointer is exactly at the capacity, * explicit ZONE_FINISH is not necessary. */ if (zone->wp != zone->start + zone->capacity) { unsigned int nofs_flags; int ret; nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, zone->start, zone->len); memalloc_nofs_restore(nofs_flags); if (ret) return ret; } zone->wp = zone->start + zone->len; zone->cond = BLK_ZONE_COND_FULL; } return 0; } /* All the zones are FULL. Should not reach here. */ ASSERT(0); return -EIO; } int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) { unsigned int nofs_flags; sector_t zone_sectors; sector_t nr_sectors; u8 zone_sectors_shift; u32 sb_zone; u32 nr_zones; int ret; zone_sectors = bdev_zone_sectors(bdev); zone_sectors_shift = ilog2(zone_sectors); nr_sectors = bdev_nr_sectors(bdev); nr_zones = nr_sectors >> zone_sectors_shift; sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); if (sb_zone + 1 >= nr_zones) return -ENOENT; nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, zone_start_sector(sb_zone, bdev), zone_sectors * BTRFS_NR_SB_LOG_ZONES); memalloc_nofs_restore(nofs_flags); return ret; } /* * Find allocatable zones within a given region. * * @device: the device to allocate a region on * @hole_start: the position of the hole to allocate the region * @num_bytes: size of wanted region * @hole_end: the end of the hole * @return: position of allocatable zones * * Allocatable region should not contain any superblock locations. */ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, u64 hole_end, u64 num_bytes) { struct btrfs_zoned_device_info *zinfo = device->zone_info; const u8 shift = zinfo->zone_size_shift; u64 nzones = num_bytes >> shift; u64 pos = hole_start; u64 begin, end; bool have_sb; int i; ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); while (pos < hole_end) { begin = pos >> shift; end = begin + nzones; if (end > zinfo->nr_zones) return hole_end; /* Check if zones in the region are all empty */ if (btrfs_dev_is_sequential(device, pos) && !bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) { pos += zinfo->zone_size; continue; } have_sb = false; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { u32 sb_zone; u64 sb_pos; sb_zone = sb_zone_number(shift, i); if (!(end <= sb_zone || sb_zone + BTRFS_NR_SB_LOG_ZONES <= begin)) { have_sb = true; pos = zone_start_physical( sb_zone + BTRFS_NR_SB_LOG_ZONES, zinfo); break; } /* We also need to exclude regular superblock positions */ sb_pos = btrfs_sb_offset(i); if (!(pos + num_bytes <= sb_pos || sb_pos + BTRFS_SUPER_INFO_SIZE <= pos)) { have_sb = true; pos = ALIGN(sb_pos + BTRFS_SUPER_INFO_SIZE, zinfo->zone_size); break; } } if (!have_sb) break; } return pos; } static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos) { struct btrfs_zoned_device_info *zone_info = device->zone_info; unsigned int zno = (pos >> zone_info->zone_size_shift); /* We can use any number of zones */ if (zone_info->max_active_zones == 0) return true; if (!test_bit(zno, zone_info->active_zones)) { /* Active zone left? */ if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) return false; if (test_and_set_bit(zno, zone_info->active_zones)) { /* Someone already set the bit */ atomic_inc(&zone_info->active_zones_left); } } return true; } static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos) { struct btrfs_zoned_device_info *zone_info = device->zone_info; unsigned int zno = (pos >> zone_info->zone_size_shift); /* We can use any number of zones */ if (zone_info->max_active_zones == 0) return; if (test_and_clear_bit(zno, zone_info->active_zones)) atomic_inc(&zone_info->active_zones_left); } int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, u64 length, u64 *bytes) { unsigned int nofs_flags; int ret; *bytes = 0; nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT); memalloc_nofs_restore(nofs_flags); if (ret) return ret; *bytes = length; while (length) { btrfs_dev_set_zone_empty(device, physical); btrfs_dev_clear_active_zone(device, physical); physical += device->zone_info->zone_size; length -= device->zone_info->zone_size; } return 0; } int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) { struct btrfs_zoned_device_info *zinfo = device->zone_info; const u8 shift = zinfo->zone_size_shift; unsigned long begin = start >> shift; unsigned long nbits = size >> shift; u64 pos; int ret; ASSERT(IS_ALIGNED(start, zinfo->zone_size)); ASSERT(IS_ALIGNED(size, zinfo->zone_size)); if (begin + nbits > zinfo->nr_zones) return -ERANGE; /* All the zones are conventional */ if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits)) return 0; /* All the zones are sequential and empty */ if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) && bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits)) return 0; for (pos = start; pos < start + size; pos += zinfo->zone_size) { u64 reset_bytes; if (!btrfs_dev_is_sequential(device, pos) || btrfs_dev_is_empty_zone(device, pos)) continue; /* Free regions should be empty */ btrfs_warn_in_rcu( device->fs_info, "zoned: resetting device %s (devid %llu) zone %llu for allocation", rcu_str_deref(device->name), device->devid, pos >> shift); WARN_ON_ONCE(1); ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, &reset_bytes); if (ret) return ret; } return 0; } /* * Calculate an allocation pointer from the extent allocation information * for a block group consist of conventional zones. It is pointed to the * end of the highest addressed extent in the block group as an allocation * offset. */ static int calculate_alloc_pointer(struct btrfs_block_group *cache, u64 *offset_ret, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_root *root; struct btrfs_path *path; struct btrfs_key key; struct btrfs_key found_key; int ret; u64 length; /* * Avoid tree lookups for a new block group, there's no use for it. * It must always be 0. * * Also, we have a lock chain of extent buffer lock -> chunk mutex. * For new a block group, this function is called from * btrfs_make_block_group() which is already taking the chunk mutex. * Thus, we cannot call calculate_alloc_pointer() which takes extent * buffer locks to avoid deadlock. */ if (new) { *offset_ret = 0; return 0; } path = btrfs_alloc_path(); if (!path) return -ENOMEM; key.objectid = cache->start + cache->length; key.type = 0; key.offset = 0; root = btrfs_extent_root(fs_info, key.objectid); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); /* We should not find the exact match */ if (!ret) ret = -EUCLEAN; if (ret < 0) goto out; ret = btrfs_previous_extent_item(root, path, cache->start); if (ret) { if (ret == 1) { ret = 0; *offset_ret = 0; } goto out; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); if (found_key.type == BTRFS_EXTENT_ITEM_KEY) length = found_key.offset; else length = fs_info->nodesize; if (!(found_key.objectid >= cache->start && found_key.objectid + length <= cache->start + cache->length)) { ret = -EUCLEAN; goto out; } *offset_ret = found_key.objectid + length - cache->start; ret = 0; out: btrfs_free_path(path); return ret; } struct zone_info { u64 physical; u64 capacity; u64 alloc_offset; }; static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, struct zone_info *info, unsigned long *active, struct btrfs_chunk_map *map) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_device *device; int dev_replace_is_ongoing = 0; unsigned int nofs_flag; struct blk_zone zone; int ret; info->physical = map->stripes[zone_idx].physical; down_read(&dev_replace->rwsem); device = map->stripes[zone_idx].dev; if (!device->bdev) { up_read(&dev_replace->rwsem); info->alloc_offset = WP_MISSING_DEV; return 0; } /* Consider a zone as active if we can allow any number of active zones. */ if (!device->zone_info->max_active_zones) __set_bit(zone_idx, active); if (!btrfs_dev_is_sequential(device, info->physical)) { up_read(&dev_replace->rwsem); info->alloc_offset = WP_CONVENTIONAL; return 0; } /* This zone will be used for allocation, so mark this zone non-empty. */ btrfs_dev_clear_zone_empty(device, info->physical); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); /* * The group is mapped to a sequential zone. Get the zone write pointer * to determine the allocation offset within the zone. */ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); nofs_flag = memalloc_nofs_save(); ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); if (ret) { up_read(&dev_replace->rwsem); if (ret != -EIO && ret != -EOPNOTSUPP) return ret; info->alloc_offset = WP_MISSING_DEV; return 0; } if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { btrfs_err_in_rcu(fs_info, "zoned: unexpected conventional zone %llu on device %s (devid %llu)", zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), device->devid); up_read(&dev_replace->rwsem); return -EIO; } info->capacity = (zone.capacity << SECTOR_SHIFT); switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: btrfs_err(fs_info, "zoned: offline/readonly zone %llu on device %s (devid %llu)", (info->physical >> device->zone_info->zone_size_shift), rcu_str_deref(device->name), device->devid); info->alloc_offset = WP_MISSING_DEV; break; case BLK_ZONE_COND_EMPTY: info->alloc_offset = 0; break; case BLK_ZONE_COND_FULL: info->alloc_offset = info->capacity; break; default: /* Partially used zone. */ info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); __set_bit(zone_idx, active); break; } up_read(&dev_replace->rwsem); return 0; } static int btrfs_load_block_group_single(struct btrfs_block_group *bg, struct zone_info *info, unsigned long *active) { if (info->alloc_offset == WP_MISSING_DEV) { btrfs_err(bg->fs_info, "zoned: cannot recover write pointer for zone %llu", info->physical); return -EIO; } bg->alloc_offset = info->alloc_offset; bg->zone_capacity = info->capacity; if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); return 0; } static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, unsigned long *active) { struct btrfs_fs_info *fs_info = bg->fs_info; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data DUP profile needs raid-stripe-tree"); return -EINVAL; } if (zone_info[0].alloc_offset == WP_MISSING_DEV) { btrfs_err(bg->fs_info, "zoned: cannot recover write pointer for zone %llu", zone_info[0].physical); return -EIO; } if (zone_info[1].alloc_offset == WP_MISSING_DEV) { btrfs_err(bg->fs_info, "zoned: cannot recover write pointer for zone %llu", zone_info[1].physical); return -EIO; } if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { btrfs_err(bg->fs_info, "zoned: write pointer offset mismatch of zones in DUP profile"); return -EIO; } if (test_bit(0, active) != test_bit(1, active)) { if (!btrfs_zone_activate(bg)) return -EIO; } else if (test_bit(0, active)) { set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } bg->alloc_offset = zone_info[0].alloc_offset; bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); return 0; } static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, unsigned long *active) { struct btrfs_fs_info *fs_info = bg->fs_info; int i; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); return -EINVAL; } for (i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV || zone_info[i].alloc_offset == WP_CONVENTIONAL) continue; if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && !btrfs_test_opt(fs_info, DEGRADED)) { btrfs_err(fs_info, "zoned: write pointer offset mismatch of zones in %s profile", btrfs_bg_type_to_raid_name(map->type)); return -EIO; } if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_test_opt(fs_info, DEGRADED) && !btrfs_zone_activate(bg)) { return -EIO; } } else { if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } /* In case a device is missing we have a cap of 0, so don't use it. */ bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); } if (zone_info[0].alloc_offset != WP_MISSING_DEV) bg->alloc_offset = zone_info[0].alloc_offset; else bg->alloc_offset = zone_info[i - 1].alloc_offset; return 0; } static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, unsigned long *active) { struct btrfs_fs_info *fs_info = bg->fs_info; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); return -EINVAL; } for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV || zone_info[i].alloc_offset == WP_CONVENTIONAL) continue; if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_zone_activate(bg)) return -EIO; } else { if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } bg->zone_capacity += zone_info[i].capacity; bg->alloc_offset += zone_info[i].alloc_offset; } return 0; } static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, unsigned long *active) { struct btrfs_fs_info *fs_info = bg->fs_info; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); return -EINVAL; } for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV || zone_info[i].alloc_offset == WP_CONVENTIONAL) continue; if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_zone_activate(bg)) return -EIO; } else { if (test_bit(0, active)) set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } if ((i % map->sub_stripes) == 0) { bg->zone_capacity += zone_info[i].capacity; bg->alloc_offset += zone_info[i].alloc_offset; } } return 0; } int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_chunk_map *map; u64 logical = cache->start; u64 length = cache->length; struct zone_info *zone_info = NULL; int ret; int i; unsigned long *active = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; if (!btrfs_is_zoned(fs_info)) return 0; /* Sanity check */ if (!IS_ALIGNED(length, fs_info->zone_size)) { btrfs_err(fs_info, "zoned: block group %llu len %llu unaligned to zone size %llu", logical, length, fs_info->zone_size); return -EIO; } map = btrfs_find_chunk_map(fs_info, logical, length); if (!map) return -EINVAL; cache->physical_map = map; zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); if (!zone_info) { ret = -ENOMEM; goto out; } active = bitmap_zalloc(map->num_stripes, GFP_NOFS); if (!active) { ret = -ENOMEM; goto out; } for (i = 0; i < map->num_stripes; i++) { ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); if (ret) goto out; if (zone_info[i].alloc_offset == WP_CONVENTIONAL) num_conventional++; else num_sequential++; } if (num_sequential > 0) set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); if (num_conventional > 0) { /* Zone capacity is always zone size in emulation */ cache->zone_capacity = cache->length; ret = calculate_alloc_pointer(cache, &last_alloc, new); if (ret) { btrfs_err(fs_info, "zoned: failed to determine allocation offset of bg %llu", cache->start); goto out; } else if (map->num_stripes == num_conventional) { cache->alloc_offset = last_alloc; set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); goto out; } } switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { case 0: /* single */ ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: ret = btrfs_load_block_group_dup(cache, map, zone_info, active); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID1C3: case BTRFS_BLOCK_GROUP_RAID1C4: ret = btrfs_load_block_group_raid1(cache, map, zone_info, active); break; case BTRFS_BLOCK_GROUP_RAID0: ret = btrfs_load_block_group_raid0(cache, map, zone_info, active); break; case BTRFS_BLOCK_GROUP_RAID10: ret = btrfs_load_block_group_raid10(cache, map, zone_info, active); break; case BTRFS_BLOCK_GROUP_RAID5: case BTRFS_BLOCK_GROUP_RAID6: default: btrfs_err(fs_info, "zoned: profile %s not yet supported", btrfs_bg_type_to_raid_name(map->type)); ret = -EINVAL; goto out; } out: /* Reject non SINGLE data profiles without RST */ if ((map->type & BTRFS_BLOCK_GROUP_DATA) && (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); return -EINVAL; } if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", cache->alloc_offset, cache->zone_capacity, cache->start); ret = -EIO; } /* An extent is allocated after the write pointer */ if (!ret && num_conventional && last_alloc > cache->alloc_offset) { btrfs_err(fs_info, "zoned: got wrong write pointer in BG %llu: %llu > %llu", logical, last_alloc, cache->alloc_offset); ret = -EIO; } if (!ret) { cache->meta_write_pointer = cache->alloc_offset + cache->start; if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) { btrfs_get_block_group(cache); spin_lock(&fs_info->zone_active_bgs_lock); list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs); spin_unlock(&fs_info->zone_active_bgs_lock); } } else { btrfs_free_chunk_map(cache->physical_map); cache->physical_map = NULL; } bitmap_free(active); kfree(zone_info); return ret; } void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { u64 unusable, free; if (!btrfs_is_zoned(cache->fs_info)) return; WARN_ON(cache->bytes_super != 0); unusable = (cache->alloc_offset - cache->used) + (cache->length - cache->zone_capacity); free = cache->zone_capacity - cache->alloc_offset; /* We only need ->free_space in ALLOC_SEQ block groups */ cache->cached = BTRFS_CACHE_FINISHED; cache->free_space_ctl->free_space = free; cache->zone_unusable = unusable; } bool btrfs_use_zone_append(struct btrfs_bio *bbio) { u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT); struct btrfs_inode *inode = bbio->inode; struct btrfs_fs_info *fs_info = bbio->fs_info; struct btrfs_block_group *cache; bool ret = false; if (!btrfs_is_zoned(fs_info)) return false; if (!inode || !is_data_inode(inode)) return false; if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) return false; /* * Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the * extent layout the relocation code has. * Furthermore we have set aside own block-group from which only the * relocation "process" can allocate and make sure only one process at a * time can add pages to an extent that gets relocated, so it's safe to * use regular REQ_OP_WRITE for this special case. */ if (btrfs_is_data_reloc_root(inode->root)) return false; cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) return false; ret = !!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); btrfs_put_block_group(cache); return ret; } void btrfs_record_physical_zoned(struct btrfs_bio *bbio) { const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; struct btrfs_ordered_sum *sum = bbio->sums; if (physical < bbio->orig_physical) sum->logical -= bbio->orig_physical - physical; else sum->logical += physical - bbio->orig_physical; } static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered, u64 logical) { struct extent_map_tree *em_tree = &ordered->inode->extent_tree; struct extent_map *em; ordered->disk_bytenr = logical; write_lock(&em_tree->lock); em = search_extent_mapping(em_tree, ordered->file_offset, ordered->num_bytes); /* The em should be a new COW extent, thus it should not have an offset. */ ASSERT(em->offset == 0); em->disk_bytenr = logical; free_extent_map(em); write_unlock(&em_tree->lock); } static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered, u64 logical, u64 len) { struct btrfs_ordered_extent *new; if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && split_extent_map(ordered->inode, ordered->file_offset, ordered->num_bytes, len, logical)) return false; new = btrfs_split_ordered_extent(ordered, len); if (IS_ERR(new)) return false; new->disk_bytenr = logical; btrfs_finish_one_ordered(new); return true; } void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered) { struct btrfs_inode *inode = ordered->inode; struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_ordered_sum *sum; u64 logical, len; /* * Write to pre-allocated region is for the data relocation, and so * it should use WRITE operation. No split/rewrite are necessary. */ if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) return; ASSERT(!list_empty(&ordered->list)); /* The ordered->list can be empty in the above pre-alloc case. */ sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list); logical = sum->logical; len = sum->len; while (len < ordered->disk_num_bytes) { sum = list_next_entry(sum, list); if (sum->logical == logical + len) { len += sum->len; continue; } if (!btrfs_zoned_split_ordered(ordered, logical, len)) { set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); btrfs_err(fs_info, "failed to split ordered extent"); goto out; } logical = sum->logical; len = sum->len; } if (ordered->disk_bytenr != logical) btrfs_rewrite_logical_zoned(ordered, logical); out: /* * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures * were allocated by btrfs_alloc_dummy_sum only to record the logical * addresses and don't contain actual checksums. We thus must free them * here so that we don't attempt to log the csums later. */ if ((inode->flags & BTRFS_INODE_NODATASUM) || test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) { while ((sum = list_first_entry_or_null(&ordered->list, typeof(*sum), list))) { list_del(&sum->list); kfree(sum); } } } static bool check_bg_is_active(struct btrfs_eb_write_context *ctx, struct btrfs_block_group **active_bg) { const struct writeback_control *wbc = ctx->wbc; struct btrfs_block_group *block_group = ctx->zoned_bg; struct btrfs_fs_info *fs_info = block_group->fs_info; if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) return true; if (fs_info->treelog_bg == block_group->start) { if (!btrfs_zone_activate(block_group)) { int ret_fin = btrfs_zone_finish_one_bg(fs_info); if (ret_fin != 1 || !btrfs_zone_activate(block_group)) return false; } } else if (*active_bg != block_group) { struct btrfs_block_group *tgt = *active_bg; /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */ lockdep_assert_held(&fs_info->zoned_meta_io_lock); if (tgt) { /* * If there is an unsent IO left in the allocated area, * we cannot wait for them as it may cause a deadlock. */ if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) { if (wbc->sync_mode == WB_SYNC_NONE || (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)) return false; } /* Pivot active metadata/system block group. */ btrfs_zoned_meta_io_unlock(fs_info); wait_eb_writebacks(tgt); do_zone_finish(tgt, true); btrfs_zoned_meta_io_lock(fs_info); if (*active_bg == tgt) { btrfs_put_block_group(tgt); *active_bg = NULL; } } if (!btrfs_zone_activate(block_group)) return false; if (*active_bg != block_group) { ASSERT(*active_bg == NULL); *active_bg = block_group; btrfs_get_block_group(block_group); } } return true; } /* * Check if @ctx->eb is aligned to the write pointer. * * Return: * 0: @ctx->eb is at the write pointer. You can write it. * -EAGAIN: There is a hole. The caller should handle the case. * -EBUSY: There is a hole, but the caller can just bail out. */ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, struct btrfs_eb_write_context *ctx) { const struct writeback_control *wbc = ctx->wbc; const struct extent_buffer *eb = ctx->eb; struct btrfs_block_group *block_group = ctx->zoned_bg; if (!btrfs_is_zoned(fs_info)) return 0; if (block_group) { if (block_group->start > eb->start || block_group->start + block_group->length <= eb->start) { btrfs_put_block_group(block_group); block_group = NULL; ctx->zoned_bg = NULL; } } if (!block_group) { block_group = btrfs_lookup_block_group(fs_info, eb->start); if (!block_group) return 0; ctx->zoned_bg = block_group; } if (block_group->meta_write_pointer == eb->start) { struct btrfs_block_group **tgt; if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) return 0; if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) tgt = &fs_info->active_system_bg; else tgt = &fs_info->active_meta_bg; if (check_bg_is_active(ctx, tgt)) return 0; } /* * Since we may release fs_info->zoned_meta_io_lock, someone can already * start writing this eb. In that case, we can just bail out. */ if (block_group->meta_write_pointer > eb->start) return -EBUSY; /* If for_sync, this hole will be filled with trasnsaction commit. */ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) return -EAGAIN; return -EBUSY; } int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length) { if (!btrfs_dev_is_sequential(device, physical)) return -EOPNOTSUPP; return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT, GFP_NOFS, 0); } static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, struct blk_zone *zone) { struct btrfs_io_context *bioc = NULL; u64 mapped_length = PAGE_SIZE; unsigned int nofs_flag; int nmirrors; int i, ret; ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &mapped_length, &bioc, NULL, NULL); if (ret || !bioc || mapped_length < PAGE_SIZE) { ret = -EIO; goto out_put_bioc; } if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = -EINVAL; goto out_put_bioc; } nofs_flag = memalloc_nofs_save(); nmirrors = (int)bioc->num_stripes; for (i = 0; i < nmirrors; i++) { u64 physical = bioc->stripes[i].physical; struct btrfs_device *dev = bioc->stripes[i].dev; /* Missing device */ if (!dev->bdev) continue; ret = btrfs_get_dev_zone(dev, physical, zone); /* Failing device */ if (ret == -EIO || ret == -EOPNOTSUPP) continue; break; } memalloc_nofs_restore(nofs_flag); out_put_bioc: btrfs_put_bioc(bioc); return ret; } /* * Synchronize write pointer in a zone at @physical_start on @tgt_dev, by * filling zeros between @physical_pos to a write pointer of dev-replace * source device. */ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical, u64 physical_start, u64 physical_pos) { struct btrfs_fs_info *fs_info = tgt_dev->fs_info; struct blk_zone zone; u64 length; u64 wp; int ret; if (!btrfs_dev_is_sequential(tgt_dev, physical_pos)) return 0; ret = read_zone_info(fs_info, logical, &zone); if (ret) return ret; wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT); if (physical_pos == wp) return 0; if (physical_pos > wp) return -EUCLEAN; length = wp - physical_pos; return btrfs_zoned_issue_zeroout(tgt_dev, physical_pos, length); } /* * Activate block group and underlying device zones * * @block_group: the block group to activate * * Return: true on success, false otherwise */ bool btrfs_zone_activate(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_chunk_map *map; struct btrfs_device *device; u64 physical; const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA); bool ret; int i; if (!btrfs_is_zoned(block_group->fs_info)) return true; map = block_group->physical_map; spin_lock(&fs_info->zone_active_bgs_lock); spin_lock(&block_group->lock); if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { ret = true; goto out_unlock; } /* No space left */ if (btrfs_zoned_bg_is_full(block_group)) { ret = false; goto out_unlock; } for (i = 0; i < map->num_stripes; i++) { struct btrfs_zoned_device_info *zinfo; int reserved = 0; device = map->stripes[i].dev; physical = map->stripes[i].physical; zinfo = device->zone_info; if (zinfo->max_active_zones == 0) continue; if (is_data) reserved = zinfo->reserved_active_zones; /* * For the data block group, leave active zones for one * metadata block group and one system block group. */ if (atomic_read(&zinfo->active_zones_left) <= reserved) { ret = false; goto out_unlock; } if (!btrfs_dev_set_active_zone(device, physical)) { /* Cannot activate the zone */ ret = false; goto out_unlock; } if (!is_data) zinfo->reserved_active_zones--; } /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); spin_unlock(&block_group->lock); /* For the active block group list */ btrfs_get_block_group(block_group); list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); spin_unlock(&fs_info->zone_active_bgs_lock); return true; out_unlock: spin_unlock(&block_group->lock); spin_unlock(&fs_info->zone_active_bgs_lock); return ret; } static void wait_eb_writebacks(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; const u64 end = block_group->start + block_group->length; struct radix_tree_iter iter; struct extent_buffer *eb; void __rcu **slot; rcu_read_lock(); radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, block_group->start >> fs_info->sectorsize_bits) { eb = radix_tree_deref_slot(slot); if (!eb) continue; if (radix_tree_deref_retry(eb)) { slot = radix_tree_iter_retry(&iter); continue; } if (eb->start < block_group->start) continue; if (eb->start >= end) break; slot = radix_tree_iter_resume(slot, &iter); rcu_read_unlock(); wait_on_extent_buffer_writeback(eb); rcu_read_lock(); } rcu_read_unlock(); } static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_chunk_map *map; const bool is_metadata = (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)); struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret = 0; int i; spin_lock(&block_group->lock); if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); return 0; } /* Check if we have unwritten allocated space */ if (is_metadata && block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { spin_unlock(&block_group->lock); return -EAGAIN; } /* * If we are sure that the block group is full (= no more room left for * new allocation) and the IO for the last usable block is completed, we * don't need to wait for the other IOs. This holds because we ensure * the sequential IO submissions using the ZONE_APPEND command for data * and block_group->meta_write_pointer for metadata. */ if (!fully_written) { if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); return -EAGAIN; } spin_unlock(&block_group->lock); ret = btrfs_inc_block_group_ro(block_group, false); if (ret) return ret; /* Ensure all writes in this block group finish */ btrfs_wait_block_group_reservations(block_group); /* No need to wait for NOCOW writers. Zoned mode does not allow that */ btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group); /* Wait for extent buffers to be written. */ if (is_metadata) wait_eb_writebacks(block_group); spin_lock(&block_group->lock); /* * Bail out if someone already deactivated the block group, or * allocated space is left in the block group. */ if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); btrfs_dec_block_group_ro(block_group); return 0; } if (block_group->reserved || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); btrfs_dec_block_group_ro(block_group); return -EAGAIN; } } clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); block_group->alloc_offset = block_group->zone_capacity; if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) block_group->meta_write_pointer = block_group->start + block_group->zone_capacity; block_group->free_space_ctl->free_space = 0; btrfs_clear_treelog_bg(block_group); btrfs_clear_data_reloc_bg(block_group); spin_unlock(&block_group->lock); down_read(&dev_replace->rwsem); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *device = map->stripes[i].dev; const u64 physical = map->stripes[i].physical; struct btrfs_zoned_device_info *zinfo = device->zone_info; unsigned int nofs_flags; if (zinfo->max_active_zones == 0) continue; nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, physical >> SECTOR_SHIFT, zinfo->zone_size >> SECTOR_SHIFT); memalloc_nofs_restore(nofs_flags); if (ret) { up_read(&dev_replace->rwsem); return ret; } if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) zinfo->reserved_active_zones++; btrfs_dev_clear_active_zone(device, physical); } up_read(&dev_replace->rwsem); if (!fully_written) btrfs_dec_block_group_ro(block_group); spin_lock(&fs_info->zone_active_bgs_lock); ASSERT(!list_empty(&block_group->active_bg_list)); list_del_init(&block_group->active_bg_list); spin_unlock(&fs_info->zone_active_bgs_lock); /* For active_bg_list */ btrfs_put_block_group(block_group); clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); return 0; } int btrfs_zone_finish(struct btrfs_block_group *block_group) { if (!btrfs_is_zoned(block_group->fs_info)) return 0; return do_zone_finish(block_group, false); } bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) { struct btrfs_fs_info *fs_info = fs_devices->fs_info; struct btrfs_device *device; bool ret = false; if (!btrfs_is_zoned(fs_info)) return true; /* Check if there is a device with active zones left */ mutex_lock(&fs_info->chunk_mutex); spin_lock(&fs_info->zone_active_bgs_lock); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { struct btrfs_zoned_device_info *zinfo = device->zone_info; int reserved = 0; if (!device->bdev) continue; if (!zinfo->max_active_zones) { ret = true; break; } if (flags & BTRFS_BLOCK_GROUP_DATA) reserved = zinfo->reserved_active_zones; switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) { case 0: /* single */ ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved)); break; case BTRFS_BLOCK_GROUP_DUP: ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved)); break; } if (ret) break; } spin_unlock(&fs_info->zone_active_bgs_lock); mutex_unlock(&fs_info->chunk_mutex); if (!ret) set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); return ret; } void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length) { struct btrfs_block_group *block_group; u64 min_alloc_bytes; if (!btrfs_is_zoned(fs_info)) return; block_group = btrfs_lookup_block_group(fs_info, logical); ASSERT(block_group); /* No MIXED_BG on zoned btrfs. */ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) min_alloc_bytes = fs_info->sectorsize; else min_alloc_bytes = fs_info->nodesize; /* Bail out if we can allocate more data from this block group. */ if (logical + length + min_alloc_bytes <= block_group->start + block_group->zone_capacity) goto out; do_zone_finish(block_group, true); out: btrfs_put_block_group(block_group); } static void btrfs_zone_finish_endio_workfn(struct work_struct *work) { struct btrfs_block_group *bg = container_of(work, struct btrfs_block_group, zone_finish_work); wait_on_extent_buffer_writeback(bg->last_eb); free_extent_buffer(bg->last_eb); btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length); btrfs_put_block_group(bg); } void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg, struct extent_buffer *eb) { if (!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &bg->runtime_flags) || eb->start + eb->len * 2 <= bg->start + bg->zone_capacity) return; if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) { btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing", bg->start); return; } /* For the work */ btrfs_get_block_group(bg); atomic_inc(&eb->refs); bg->last_eb = eb; INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); queue_work(system_unbound_wq, &bg->zone_finish_work); } void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; spin_lock(&fs_info->relocation_bg_lock); if (fs_info->data_reloc_bg == bg->start) fs_info->data_reloc_bg = 0; spin_unlock(&fs_info->relocation_bg_lock); } void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; if (!btrfs_is_zoned(fs_info)) return; mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->zone_info) { vfree(device->zone_info->zone_cache); device->zone_info->zone_cache = NULL; } } mutex_unlock(&fs_devices->device_list_mutex); } bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; u64 used = 0; u64 total = 0; u64 factor; ASSERT(btrfs_is_zoned(fs_info)); if (fs_info->bg_reclaim_threshold == 0) return false; mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (!device->bdev) continue; total += device->disk_total_bytes; used += device->bytes_used; } mutex_unlock(&fs_devices->device_list_mutex); factor = div64_u64(used * 100, total); return factor >= fs_info->bg_reclaim_threshold; } void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, u64 length) { struct btrfs_block_group *block_group; if (!btrfs_is_zoned(fs_info)) return; block_group = btrfs_lookup_block_group(fs_info, logical); /* It should be called on a previous data relocation block group. */ ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); spin_lock(&block_group->lock); if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) goto out; /* All relocation extents are written. */ if (block_group->start + block_group->alloc_offset == logical + length) { /* * Now, release this block group for further allocations and * zone finish. */ clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags); } out: spin_unlock(&block_group->lock); btrfs_put_block_group(block_group); } int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) { struct btrfs_block_group *block_group; struct btrfs_block_group *min_bg = NULL; u64 min_avail = U64_MAX; int ret; spin_lock(&fs_info->zone_active_bgs_lock); list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { u64 avail; spin_lock(&block_group->lock); if (block_group->reserved || block_group->alloc_offset == 0 || (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); continue; } avail = block_group->zone_capacity - block_group->alloc_offset; if (min_avail > avail) { if (min_bg) btrfs_put_block_group(min_bg); min_bg = block_group; min_avail = avail; btrfs_get_block_group(min_bg); } spin_unlock(&block_group->lock); } spin_unlock(&fs_info->zone_active_bgs_lock); if (!min_bg) return 0; ret = btrfs_zone_finish(min_bg); btrfs_put_block_group(min_bg); return ret < 0 ? ret : 1; } int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, bool do_finish) { struct btrfs_block_group *bg; int index; if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) return 0; for (;;) { int ret; bool need_finish = false; down_read(&space_info->groups_sem); for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) { list_for_each_entry(bg, &space_info->block_groups[index], list) { if (!spin_trylock(&bg->lock)) continue; if (btrfs_zoned_bg_is_full(bg) || test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags)) { spin_unlock(&bg->lock); continue; } spin_unlock(&bg->lock); if (btrfs_zone_activate(bg)) { up_read(&space_info->groups_sem); return 1; } need_finish = true; } } up_read(&space_info->groups_sem); if (!do_finish || !need_finish) break; ret = btrfs_zone_finish_one_bg(fs_info); if (ret == 0) break; if (ret < 0) return ret; } return 0; } /* * Reserve zones for one metadata block group, one tree-log block group, and one * system block group. */ void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_block_group *block_group; struct btrfs_device *device; /* Reserve zones for normal SINGLE metadata and tree-log block group. */ unsigned int metadata_reserve = 2; /* Reserve a zone for SINGLE system block group. */ unsigned int system_reserve = 1; if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) return; /* * This function is called from the mount context. So, there is no * parallel process touching the bits. No need for read_seqretry(). */ if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP) metadata_reserve = 4; if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP) system_reserve = 2; /* Apply the reservation on all the devices. */ mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { if (!device->bdev) continue; device->zone_info->reserved_active_zones = metadata_reserve + system_reserve; } mutex_unlock(&fs_devices->device_list_mutex); /* Release reservation for currently active block groups. */ spin_lock(&fs_info->zone_active_bgs_lock); list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { struct btrfs_chunk_map *map = block_group->physical_map; if (!(block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))) continue; for (int i = 0; i < map->num_stripes; i++) map->stripes[i].dev->zone_info->reserved_active_zones--; } spin_unlock(&fs_info->zone_active_bgs_lock); }
3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * Copyright (c) Nokia Corporation, 2007 * * Author: Artem Bityutskiy (Битюцкий Артём), * Frank Haverkamp */ /* * This file includes UBI initialization and building of UBI devices. * * When UBI is initialized, it attaches all the MTD devices specified as the * module load parameters or the kernel boot parameters. If MTD devices were * specified, UBI does not attach any MTD device, but it is possible to do * later using the "UBI control device". */ #include <linux/err.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/stringify.h> #include <linux/namei.h> #include <linux/stat.h> #include <linux/miscdevice.h> #include <linux/mtd/partitions.h> #include <linux/log2.h> #include <linux/kthread.h> #include <linux/kernel.h> #include <linux/of.h> #include <linux/slab.h> #include <linux/major.h> #include "ubi.h" /* Maximum length of the 'mtd=' parameter */ #define MTD_PARAM_LEN_MAX 64 /* Maximum number of comma-separated items in the 'mtd=' parameter */ #define MTD_PARAM_MAX_COUNT 6 /* Maximum value for the number of bad PEBs per 1024 PEBs */ #define MAX_MTD_UBI_BEB_LIMIT 768 #ifdef CONFIG_MTD_UBI_MODULE #define ubi_is_module() 1 #else #define ubi_is_module() 0 #endif /** * struct mtd_dev_param - MTD device parameter description data structure. * @name: MTD character device node path, MTD device name, or MTD device number * string * @ubi_num: UBI number * @vid_hdr_offs: VID header offset * @max_beb_per1024: maximum expected number of bad PEBs per 1024 PEBs * @enable_fm: enable fastmap when value is non-zero * @need_resv_pool: reserve pool->max_size pebs when value is none-zero */ struct mtd_dev_param { char name[MTD_PARAM_LEN_MAX]; int ubi_num; int vid_hdr_offs; int max_beb_per1024; int enable_fm; int need_resv_pool; }; /* Numbers of elements set in the @mtd_dev_param array */ static int mtd_devs; /* MTD devices specification parameters */ static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; #ifdef CONFIG_MTD_UBI_FASTMAP /* UBI module parameter to enable fastmap automatically on non-fastmap images */ static bool fm_autoconvert; static bool fm_debug; #endif /* Slab cache for wear-leveling entries */ struct kmem_cache *ubi_wl_entry_slab; /* UBI control character device */ static struct miscdevice ubi_ctrl_cdev = { .minor = MISC_DYNAMIC_MINOR, .name = "ubi_ctrl", .fops = &ubi_ctrl_cdev_operations, }; /* All UBI devices in system */ static struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; /* Serializes UBI devices creations and removals */ DEFINE_MUTEX(ubi_devices_mutex); /* Protects @ubi_devices, @ubi->ref_count and @ubi->is_dead */ static DEFINE_SPINLOCK(ubi_devices_lock); /* "Show" method for files in '/<sysfs>/class/ubi/' */ /* UBI version attribute ('/<sysfs>/class/ubi/version') */ static ssize_t version_show(const struct class *class, const struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", UBI_VERSION); } static CLASS_ATTR_RO(version); static struct attribute *ubi_class_attrs[] = { &class_attr_version.attr, NULL, }; ATTRIBUTE_GROUPS(ubi_class); /* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ const struct class ubi_class = { .name = UBI_NAME_STR, .class_groups = ubi_class_groups, }; static ssize_t dev_attribute_show(struct device *dev, struct device_attribute *attr, char *buf); /* UBI device attributes (correspond to files in '/<sysfs>/class/ubi/ubiX') */ static struct device_attribute dev_eraseblock_size = __ATTR(eraseblock_size, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_avail_eraseblocks = __ATTR(avail_eraseblocks, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_total_eraseblocks = __ATTR(total_eraseblocks, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_volumes_count = __ATTR(volumes_count, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_max_ec = __ATTR(max_ec, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_reserved_for_bad = __ATTR(reserved_for_bad, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_bad_peb_count = __ATTR(bad_peb_count, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_max_vol_count = __ATTR(max_vol_count, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_min_io_size = __ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_bgt_enabled = __ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_mtd_num = __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); static struct device_attribute dev_ro_mode = __ATTR(ro_mode, S_IRUGO, dev_attribute_show, NULL); /** * ubi_volume_notify - send a volume change notification. * @ubi: UBI device description object * @vol: volume description object of the changed volume * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) * * This is a helper function which notifies all subscribers about a volume * change event (creation, removal, re-sizing, re-naming, updating). Returns * zero in case of success and a negative error code in case of failure. */ int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype) { int ret; struct ubi_notification nt; ubi_do_get_device_info(ubi, &nt.di); ubi_do_get_volume_info(ubi, vol, &nt.vi); switch (ntype) { case UBI_VOLUME_ADDED: case UBI_VOLUME_REMOVED: case UBI_VOLUME_RESIZED: case UBI_VOLUME_RENAMED: ret = ubi_update_fastmap(ubi); if (ret) ubi_msg(ubi, "Unable to write a new fastmap: %i", ret); } return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt); } /** * ubi_notify_all - send a notification to all volumes. * @ubi: UBI device description object * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) * @nb: the notifier to call * * This function walks all volumes of UBI device @ubi and sends the @ntype * notification for each volume. If @nb is %NULL, then all registered notifiers * are called, otherwise only the @nb notifier is called. Returns the number of * sent notifications. */ int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb) { struct ubi_notification nt; int i, count = 0; ubi_do_get_device_info(ubi, &nt.di); mutex_lock(&ubi->device_mutex); for (i = 0; i < ubi->vtbl_slots; i++) { /* * Since the @ubi->device is locked, and we are not going to * change @ubi->volumes, we do not have to lock * @ubi->volumes_lock. */ if (!ubi->volumes[i]) continue; ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi); if (nb) nb->notifier_call(nb, ntype, &nt); else blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt); count += 1; } mutex_unlock(&ubi->device_mutex); return count; } /** * ubi_enumerate_volumes - send "add" notification for all existing volumes. * @nb: the notifier to call * * This function walks all UBI devices and volumes and sends the * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all * registered notifiers are called, otherwise only the @nb notifier is called. * Returns the number of sent notifications. */ int ubi_enumerate_volumes(struct notifier_block *nb) { int i, count = 0; /* * Since the @ubi_devices_mutex is locked, and we are not going to * change @ubi_devices, we do not have to lock @ubi_devices_lock. */ for (i = 0; i < UBI_MAX_DEVICES; i++) { struct ubi_device *ubi = ubi_devices[i]; if (!ubi) continue; count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb); } return count; } /** * ubi_get_device - get UBI device. * @ubi_num: UBI device number * * This function returns UBI device description object for UBI device number * @ubi_num, or %NULL if the device does not exist. This function increases the * device reference count to prevent removal of the device. In other words, the * device cannot be removed if its reference count is not zero. */ struct ubi_device *ubi_get_device(int ubi_num) { struct ubi_device *ubi; spin_lock(&ubi_devices_lock); ubi = ubi_devices[ubi_num]; if (ubi && ubi->is_dead) ubi = NULL; if (ubi) { ubi_assert(ubi->ref_count >= 0); ubi->ref_count += 1; get_device(&ubi->dev); } spin_unlock(&ubi_devices_lock); return ubi; } /** * ubi_put_device - drop an UBI device reference. * @ubi: UBI device description object */ void ubi_put_device(struct ubi_device *ubi) { spin_lock(&ubi_devices_lock); ubi->ref_count -= 1; put_device(&ubi->dev); spin_unlock(&ubi_devices_lock); } /** * ubi_get_by_major - get UBI device by character device major number. * @major: major number * * This function is similar to 'ubi_get_device()', but it searches the device * by its major number. */ struct ubi_device *ubi_get_by_major(int major) { int i; struct ubi_device *ubi; spin_lock(&ubi_devices_lock); for (i = 0; i < UBI_MAX_DEVICES; i++) { ubi = ubi_devices[i]; if (ubi && !ubi->is_dead && MAJOR(ubi->cdev.dev) == major) { ubi_assert(ubi->ref_count >= 0); ubi->ref_count += 1; get_device(&ubi->dev); spin_unlock(&ubi_devices_lock); return ubi; } } spin_unlock(&ubi_devices_lock); return NULL; } /** * ubi_major2num - get UBI device number by character device major number. * @major: major number * * This function searches UBI device number object by its major number. If UBI * device was not found, this function returns -ENODEV, otherwise the UBI device * number is returned. */ int ubi_major2num(int major) { int i, ubi_num = -ENODEV; spin_lock(&ubi_devices_lock); for (i = 0; i < UBI_MAX_DEVICES; i++) { struct ubi_device *ubi = ubi_devices[i]; if (ubi && !ubi->is_dead && MAJOR(ubi->cdev.dev) == major) { ubi_num = ubi->ubi_num; break; } } spin_unlock(&ubi_devices_lock); return ubi_num; } /* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ static ssize_t dev_attribute_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t ret; struct ubi_device *ubi; /* * The below code looks weird, but it actually makes sense. We get the * UBI device reference from the contained 'struct ubi_device'. But it * is unclear if the device was removed or not yet. Indeed, if the * device was removed before we increased its reference count, * 'ubi_get_device()' will return -ENODEV and we fail. * * Remember, 'struct ubi_device' is freed in the release function, so * we still can use 'ubi->ubi_num'. */ ubi = container_of(dev, struct ubi_device, dev); if (attr == &dev_eraseblock_size) ret = sprintf(buf, "%d\n", ubi->leb_size); else if (attr == &dev_avail_eraseblocks) ret = sprintf(buf, "%d\n", ubi->avail_pebs); else if (attr == &dev_total_eraseblocks) ret = sprintf(buf, "%d\n", ubi->good_peb_count); else if (attr == &dev_volumes_count) ret = sprintf(buf, "%d\n", ubi->vol_count - UBI_INT_VOL_COUNT); else if (attr == &dev_max_ec) ret = sprintf(buf, "%d\n", ubi->max_ec); else if (attr == &dev_reserved_for_bad) ret = sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); else if (attr == &dev_bad_peb_count) ret = sprintf(buf, "%d\n", ubi->bad_peb_count); else if (attr == &dev_max_vol_count) ret = sprintf(buf, "%d\n", ubi->vtbl_slots); else if (attr == &dev_min_io_size) ret = sprintf(buf, "%d\n", ubi->min_io_size); else if (attr == &dev_bgt_enabled) ret = sprintf(buf, "%d\n", ubi->thread_enabled); else if (attr == &dev_mtd_num) ret = sprintf(buf, "%d\n", ubi->mtd->index); else if (attr == &dev_ro_mode) ret = sprintf(buf, "%d\n", ubi->ro_mode); else ret = -EINVAL; return ret; } static struct attribute *ubi_dev_attrs[] = { &dev_eraseblock_size.attr, &dev_avail_eraseblocks.attr, &dev_total_eraseblocks.attr, &dev_volumes_count.attr, &dev_max_ec.attr, &dev_reserved_for_bad.attr, &dev_bad_peb_count.attr, &dev_max_vol_count.attr, &dev_min_io_size.attr, &dev_bgt_enabled.attr, &dev_mtd_num.attr, &dev_ro_mode.attr, NULL }; ATTRIBUTE_GROUPS(ubi_dev); static void dev_release(struct device *dev) { struct ubi_device *ubi = container_of(dev, struct ubi_device, dev); kfree(ubi); } /** * kill_volumes - destroy all user volumes. * @ubi: UBI device description object */ static void kill_volumes(struct ubi_device *ubi) { int i; for (i = 0; i < ubi->vtbl_slots; i++) if (ubi->volumes[i]) ubi_free_volume(ubi, ubi->volumes[i]); } /** * uif_init - initialize user interfaces for an UBI device. * @ubi: UBI device description object * * This function initializes various user interfaces for an UBI device. If the * initialization fails at an early stage, this function frees all the * resources it allocated, returns an error. * * This function returns zero in case of success and a negative error code in * case of failure. */ static int uif_init(struct ubi_device *ubi) { int i, err; dev_t dev; sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); /* * Major numbers for the UBI character devices are allocated * dynamically. Major numbers of volume character devices are * equivalent to ones of the corresponding UBI character device. Minor * numbers of UBI character devices are 0, while minor numbers of * volume character devices start from 1. Thus, we allocate one major * number and ubi->vtbl_slots + 1 minor numbers. */ err = alloc_chrdev_region(&dev, 0, ubi->vtbl_slots + 1, ubi->ubi_name); if (err) { ubi_err(ubi, "cannot register UBI character devices"); return err; } ubi->dev.devt = dev; ubi_assert(MINOR(dev) == 0); cdev_init(&ubi->cdev, &ubi_cdev_operations); dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev)); ubi->cdev.owner = THIS_MODULE; dev_set_name(&ubi->dev, UBI_NAME_STR "%d", ubi->ubi_num); err = cdev_device_add(&ubi->cdev, &ubi->dev); if (err) goto out_unreg; for (i = 0; i < ubi->vtbl_slots; i++) if (ubi->volumes[i]) { err = ubi_add_volume(ubi, ubi->volumes[i]); if (err) { ubi_err(ubi, "cannot add volume %d", i); ubi->volumes[i] = NULL; goto out_volumes; } } return 0; out_volumes: kill_volumes(ubi); cdev_device_del(&ubi->cdev, &ubi->dev); out_unreg: unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); ubi_err(ubi, "cannot initialize UBI %s, error %d", ubi->ubi_name, err); return err; } /** * uif_close - close user interfaces for an UBI device. * @ubi: UBI device description object * * Note, since this function un-registers UBI volume device objects (@vol->dev), * the memory allocated voe the volumes is freed as well (in the release * function). */ static void uif_close(struct ubi_device *ubi) { kill_volumes(ubi); cdev_device_del(&ubi->cdev, &ubi->dev); unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); } /** * ubi_free_volumes_from - free volumes from specific index. * @ubi: UBI device description object * @from: the start index used for volume free. */ static void ubi_free_volumes_from(struct ubi_device *ubi, int from) { int i; for (i = from; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { if (!ubi->volumes[i] || ubi->volumes[i]->is_dead) continue; ubi_eba_replace_table(ubi->volumes[i], NULL); ubi_fastmap_destroy_checkmap(ubi->volumes[i]); kfree(ubi->volumes[i]); ubi->volumes[i] = NULL; } } /** * ubi_free_all_volumes - free all volumes. * @ubi: UBI device description object */ void ubi_free_all_volumes(struct ubi_device *ubi) { ubi_free_volumes_from(ubi, 0); } /** * ubi_free_internal_volumes - free internal volumes. * @ubi: UBI device description object */ void ubi_free_internal_volumes(struct ubi_device *ubi) { ubi_free_volumes_from(ubi, ubi->vtbl_slots); } static int get_bad_peb_limit(const struct ubi_device *ubi, int max_beb_per1024) { int limit, device_pebs; uint64_t device_size; if (!max_beb_per1024) { /* * Since max_beb_per1024 has not been set by the user in either * the cmdline or Kconfig, use mtd_max_bad_blocks to set the * limit if it is supported by the device. */ limit = mtd_max_bad_blocks(ubi->mtd, 0, ubi->mtd->size); if (limit < 0) return 0; return limit; } /* * Here we are using size of the entire flash chip and * not just the MTD partition size because the maximum * number of bad eraseblocks is a percentage of the * whole device and bad eraseblocks are not fairly * distributed over the flash chip. So the worst case * is that all the bad eraseblocks of the chip are in * the MTD partition we are attaching (ubi->mtd). */ device_size = mtd_get_device_size(ubi->mtd); device_pebs = mtd_div_by_eb(device_size, ubi->mtd); limit = mult_frac(device_pebs, max_beb_per1024, 1024); /* Round it up */ if (mult_frac(limit, 1024, max_beb_per1024) < device_pebs) limit += 1; return limit; } /** * io_init - initialize I/O sub-system for a given UBI device. * @ubi: UBI device description object * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are * assumed: * o EC header is always at offset zero - this cannot be changed; * o VID header starts just after the EC header at the closest address * aligned to @io->hdrs_min_io_size; * o data starts just after the VID header at the closest address aligned to * @io->min_io_size * * This function returns zero in case of success and a negative error code in * case of failure. */ static int io_init(struct ubi_device *ubi, int max_beb_per1024) { dbg_gen("sizeof(struct ubi_ainf_peb) %zu", sizeof(struct ubi_ainf_peb)); dbg_gen("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry)); if (ubi->mtd->numeraseregions != 0) { /* * Some flashes have several erase regions. Different regions * may have different eraseblock size and other * characteristics. It looks like mostly multi-region flashes * have one "main" region and one or more small regions to * store boot loader code or boot parameters or whatever. I * guess we should just pick the largest region. But this is * not implemented. */ ubi_err(ubi, "multiple regions, not implemented"); return -EINVAL; } if (ubi->vid_hdr_offset < 0) return -EINVAL; /* * Note, in this implementation we support MTD devices with 0x7FFFFFFF * physical eraseblocks maximum. */ ubi->peb_size = ubi->mtd->erasesize; ubi->peb_count = mtd_div_by_eb(ubi->mtd->size, ubi->mtd); ubi->flash_size = ubi->mtd->size; if (mtd_can_have_bb(ubi->mtd)) { ubi->bad_allowed = 1; ubi->bad_peb_limit = get_bad_peb_limit(ubi, max_beb_per1024); } if (ubi->mtd->type == MTD_NORFLASH) ubi->nor_flash = 1; ubi->min_io_size = ubi->mtd->writesize; ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; /* * Make sure minimal I/O unit is power of 2. Note, there is no * fundamental reason for this assumption. It is just an optimization * which allows us to avoid costly division operations. */ if (!is_power_of_2(ubi->min_io_size)) { ubi_err(ubi, "min. I/O unit (%d) is not power of 2", ubi->min_io_size); return -EINVAL; } ubi_assert(ubi->hdrs_min_io_size > 0); ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); ubi->max_write_size = ubi->mtd->writebufsize; /* * Maximum write size has to be greater or equivalent to min. I/O * size, and be multiple of min. I/O size. */ if (ubi->max_write_size < ubi->min_io_size || ubi->max_write_size % ubi->min_io_size || !is_power_of_2(ubi->max_write_size)) { ubi_err(ubi, "bad write buffer size %d for %d min. I/O unit", ubi->max_write_size, ubi->min_io_size); return -EINVAL; } /* Calculate default aligned sizes of EC and VID headers */ ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); dbg_gen("min_io_size %d", ubi->min_io_size); dbg_gen("max_write_size %d", ubi->max_write_size); dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size); dbg_gen("ec_hdr_alsize %d", ubi->ec_hdr_alsize); dbg_gen("vid_hdr_alsize %d", ubi->vid_hdr_alsize); if (ubi->vid_hdr_offset == 0) /* Default offset */ ubi->vid_hdr_offset = ubi->vid_hdr_aloffset = ubi->ec_hdr_alsize; else { ubi->vid_hdr_aloffset = ubi->vid_hdr_offset & ~(ubi->hdrs_min_io_size - 1); ubi->vid_hdr_shift = ubi->vid_hdr_offset - ubi->vid_hdr_aloffset; } /* * Memory allocation for VID header is ubi->vid_hdr_alsize * which is described in comments in io.c. * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds * ubi->vid_hdr_alsize, so that all vid header operations * won't access memory out of bounds. */ if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) { ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)" " + VID header size(%zu) > VID header aligned size(%d).", ubi->vid_hdr_offset, ubi->vid_hdr_shift, UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize); return -EINVAL; } /* Similar for the data offset */ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); dbg_gen("vid_hdr_offset %d", ubi->vid_hdr_offset); dbg_gen("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); dbg_gen("vid_hdr_shift %d", ubi->vid_hdr_shift); dbg_gen("leb_start %d", ubi->leb_start); /* The shift must be aligned to 32-bit boundary */ if (ubi->vid_hdr_shift % 4) { ubi_err(ubi, "unaligned VID header shift %d", ubi->vid_hdr_shift); return -EINVAL; } /* Check sanity */ if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || ubi->leb_start & (ubi->min_io_size - 1)) { ubi_err(ubi, "bad VID header (%d) or data offsets (%d)", ubi->vid_hdr_offset, ubi->leb_start); return -EINVAL; } /* * Set maximum amount of physical erroneous eraseblocks to be 10%. * Erroneous PEB are those which have read errors. */ ubi->max_erroneous = ubi->peb_count / 10; if (ubi->max_erroneous < 16) ubi->max_erroneous = 16; dbg_gen("max_erroneous %d", ubi->max_erroneous); /* * It may happen that EC and VID headers are situated in one minimal * I/O unit. In this case we can only accept this UBI image in * read-only mode. */ if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) { ubi_warn(ubi, "EC and VID headers are in the same minimal I/O unit, switch to read-only mode"); ubi->ro_mode = 1; } ubi->leb_size = ubi->peb_size - ubi->leb_start; if (!(ubi->mtd->flags & MTD_WRITEABLE)) { ubi_msg(ubi, "MTD device %d is write-protected, attach in read-only mode", ubi->mtd->index); ubi->ro_mode = 1; } /* * Note, ideally, we have to initialize @ubi->bad_peb_count here. But * unfortunately, MTD does not provide this information. We should loop * over all physical eraseblocks and invoke mtd->block_is_bad() for * each physical eraseblock. So, we leave @ubi->bad_peb_count * uninitialized so far. */ return 0; } /** * autoresize - re-size the volume which has the "auto-resize" flag set. * @ubi: UBI device description object * @vol_id: ID of the volume to re-size * * This function re-sizes the volume marked by the %UBI_VTBL_AUTORESIZE_FLG in * the volume table to the largest possible size. See comments in ubi-header.h * for more description of the flag. Returns zero in case of success and a * negative error code in case of failure. */ static int autoresize(struct ubi_device *ubi, int vol_id) { struct ubi_volume_desc desc; struct ubi_volume *vol = ubi->volumes[vol_id]; int err, old_reserved_pebs = vol->reserved_pebs; if (ubi->ro_mode) { ubi_warn(ubi, "skip auto-resize because of R/O mode"); return 0; } /* * Clear the auto-resize flag in the volume in-memory copy of the * volume table, and 'ubi_resize_volume()' will propagate this change * to the flash. */ ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; if (ubi->avail_pebs == 0) { struct ubi_vtbl_record vtbl_rec; /* * No available PEBs to re-size the volume, clear the flag on * flash and exit. */ vtbl_rec = ubi->vtbl[vol_id]; err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) ubi_err(ubi, "cannot clean auto-resize flag for volume %d", vol_id); } else { desc.vol = vol; err = ubi_resize_volume(&desc, old_reserved_pebs + ubi->avail_pebs); if (err) ubi_err(ubi, "cannot auto-resize volume %d", vol_id); } if (err) return err; ubi_msg(ubi, "volume %d (\"%s\") re-sized from %d to %d LEBs", vol_id, vol->name, old_reserved_pebs, vol->reserved_pebs); return 0; } /** * ubi_attach_mtd_dev - attach an MTD device. * @mtd: MTD device description object * @ubi_num: number to assign to the new UBI device * @vid_hdr_offset: VID header offset * @max_beb_per1024: maximum expected number of bad PEB per 1024 PEBs * @disable_fm: whether disable fastmap * @need_resv_pool: whether reserve pebs to fill fm_pool * * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in * which case this function finds a vacant device number and assigns it * automatically. Returns the new UBI device number in case of success and a * negative error code in case of failure. * * If @disable_fm is true, ubi doesn't create new fastmap even the module param * 'fm_autoconvert' is set, and existed old fastmap will be destroyed after * doing full scanning. * * Note, the invocations of this function has to be serialized by the * @ubi_devices_mutex. */ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset, int max_beb_per1024, bool disable_fm, bool need_resv_pool) { struct ubi_device *ubi; int i, err; if (max_beb_per1024 < 0 || max_beb_per1024 > MAX_MTD_UBI_BEB_LIMIT) return -EINVAL; if (!max_beb_per1024) max_beb_per1024 = CONFIG_MTD_UBI_BEB_LIMIT; /* * Check if we already have the same MTD device attached. * * Note, this function assumes that UBI devices creations and deletions * are serialized, so it does not take the &ubi_devices_lock. */ for (i = 0; i < UBI_MAX_DEVICES; i++) { ubi = ubi_devices[i]; if (ubi && mtd->index == ubi->mtd->index) { pr_err("ubi: mtd%d is already attached to ubi%d\n", mtd->index, i); return -EEXIST; } } /* * Make sure this MTD device is not emulated on top of an UBI volume * already. Well, generally this recursion works fine, but there are * different problems like the UBI module takes a reference to itself * by attaching (and thus, opening) the emulated MTD device. This * results in inability to unload the module. And in general it makes * no sense to attach emulated MTD devices, so we prohibit this. */ if (mtd->type == MTD_UBIVOLUME) { pr_err("ubi: refuse attaching mtd%d - it is already emulated on top of UBI\n", mtd->index); return -EINVAL; } /* * Both UBI and UBIFS have been designed for SLC NAND and NOR flashes. * MLC NAND is different and needs special care, otherwise UBI or UBIFS * will die soon and you will lose all your data. * Relax this rule if the partition we're attaching to operates in SLC * mode. */ if (mtd->type == MTD_MLCNANDFLASH && !(mtd->flags & MTD_SLC_ON_MLC_EMULATION)) { pr_err("ubi: refuse attaching mtd%d - MLC NAND is not supported\n", mtd->index); return -EINVAL; } /* UBI cannot work on flashes with zero erasesize. */ if (!mtd->erasesize) { pr_err("ubi: refuse attaching mtd%d - zero erasesize flash is not supported\n", mtd->index); return -EINVAL; } if (ubi_num == UBI_DEV_NUM_AUTO) { /* Search for an empty slot in the @ubi_devices array */ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) if (!ubi_devices[ubi_num]) break; if (ubi_num == UBI_MAX_DEVICES) { pr_err("ubi: only %d UBI devices may be created\n", UBI_MAX_DEVICES); return -ENFILE; } } else { if (ubi_num >= UBI_MAX_DEVICES) return -EINVAL; /* Make sure ubi_num is not busy */ if (ubi_devices[ubi_num]) { pr_err("ubi: ubi%i already exists\n", ubi_num); return -EEXIST; } } ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL); if (!ubi) return -ENOMEM; device_initialize(&ubi->dev); ubi->dev.release = dev_release; ubi->dev.class = &ubi_class; ubi->dev.groups = ubi_dev_groups; ubi->dev.parent = &mtd->dev; ubi->mtd = mtd; ubi->ubi_num = ubi_num; ubi->vid_hdr_offset = vid_hdr_offset; ubi->autoresize_vol_id = -1; #ifdef CONFIG_MTD_UBI_FASTMAP ubi->fm_pool.used = ubi->fm_pool.size = 0; ubi->fm_wl_pool.used = ubi->fm_wl_pool.size = 0; /* * fm_pool.max_size is 5% of the total number of PEBs but it's also * between UBI_FM_MAX_POOL_SIZE and UBI_FM_MIN_POOL_SIZE. */ ubi->fm_pool.max_size = min(((int)mtd_div_by_eb(ubi->mtd->size, ubi->mtd) / 100) * 5, UBI_FM_MAX_POOL_SIZE); ubi->fm_pool.max_size = max(ubi->fm_pool.max_size, UBI_FM_MIN_POOL_SIZE); ubi->fm_wl_pool.max_size = ubi->fm_pool.max_size / 2; ubi->fm_pool_rsv_cnt = need_resv_pool ? ubi->fm_pool.max_size : 0; ubi->fm_disabled = (!fm_autoconvert || disable_fm) ? 1 : 0; if (fm_debug) ubi_enable_dbg_chk_fastmap(ubi); if (!ubi->fm_disabled && (int)mtd_div_by_eb(ubi->mtd->size, ubi->mtd) <= UBI_FM_MAX_START) { ubi_err(ubi, "More than %i PEBs are needed for fastmap, sorry.", UBI_FM_MAX_START); ubi->fm_disabled = 1; } ubi_msg(ubi, "default fastmap pool size: %d", ubi->fm_pool.max_size); ubi_msg(ubi, "default fastmap WL pool size: %d", ubi->fm_wl_pool.max_size); #else ubi->fm_disabled = 1; #endif mutex_init(&ubi->buf_mutex); mutex_init(&ubi->ckvol_mutex); mutex_init(&ubi->device_mutex); spin_lock_init(&ubi->volumes_lock); init_rwsem(&ubi->fm_protect); init_rwsem(&ubi->fm_eba_sem); ubi_msg(ubi, "attaching mtd%d", mtd->index); err = io_init(ubi, max_beb_per1024); if (err) goto out_free; err = -ENOMEM; ubi->peb_buf = vmalloc(ubi->peb_size); if (!ubi->peb_buf) goto out_free; #ifdef CONFIG_MTD_UBI_FASTMAP ubi->fm_size = ubi_calc_fm_size(ubi); ubi->fm_buf = vzalloc(ubi->fm_size); if (!ubi->fm_buf) goto out_free; #endif err = ubi_attach(ubi, disable_fm ? 1 : 0); if (err) { ubi_err(ubi, "failed to attach mtd%d, error %d", mtd->index, err); goto out_free; } if (ubi->autoresize_vol_id != -1) { err = autoresize(ubi, ubi->autoresize_vol_id); if (err) goto out_detach; } err = uif_init(ubi); if (err) goto out_detach; err = ubi_debugfs_init_dev(ubi); if (err) goto out_uif; ubi->bgt_thread = kthread_create(ubi_thread, ubi, "%s", ubi->bgt_name); if (IS_ERR(ubi->bgt_thread)) { err = PTR_ERR(ubi->bgt_thread); ubi_err(ubi, "cannot spawn \"%s\", error %d", ubi->bgt_name, err); goto out_debugfs; } ubi_msg(ubi, "attached mtd%d (name \"%s\", size %llu MiB)", mtd->index, mtd->name, ubi->flash_size >> 20); ubi_msg(ubi, "PEB size: %d bytes (%d KiB), LEB size: %d bytes", ubi->peb_size, ubi->peb_size >> 10, ubi->leb_size); ubi_msg(ubi, "min./max. I/O unit sizes: %d/%d, sub-page size %d", ubi->min_io_size, ubi->max_write_size, ubi->hdrs_min_io_size); ubi_msg(ubi, "VID header offset: %d (aligned %d), data offset: %d", ubi->vid_hdr_offset, ubi->vid_hdr_aloffset, ubi->leb_start); ubi_msg(ubi, "good PEBs: %d, bad PEBs: %d, corrupted PEBs: %d", ubi->good_peb_count, ubi->bad_peb_count, ubi->corr_peb_count); ubi_msg(ubi, "user volume: %d, internal volumes: %d, max. volumes count: %d", ubi->vol_count - UBI_INT_VOL_COUNT, UBI_INT_VOL_COUNT, ubi->vtbl_slots); ubi_msg(ubi, "max/mean erase counter: %d/%d, WL threshold: %d, image sequence number: %u", ubi->max_ec, ubi->mean_ec, CONFIG_MTD_UBI_WL_THRESHOLD, ubi->image_seq); ubi_msg(ubi, "available PEBs: %d, total reserved PEBs: %d, PEBs reserved for bad PEB handling: %d", ubi->avail_pebs, ubi->rsvd_pebs, ubi->beb_rsvd_pebs); /* * The below lock makes sure we do not race with 'ubi_thread()' which * checks @ubi->thread_enabled. Otherwise we may fail to wake it up. */ spin_lock(&ubi->wl_lock); ubi->thread_enabled = 1; wake_up_process(ubi->bgt_thread); spin_unlock(&ubi->wl_lock); ubi_devices[ubi_num] = ubi; ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); return ubi_num; out_debugfs: ubi_debugfs_exit_dev(ubi); out_uif: uif_close(ubi); out_detach: ubi_wl_close(ubi); ubi_free_all_volumes(ubi); vfree(ubi->vtbl); out_free: vfree(ubi->peb_buf); vfree(ubi->fm_buf); put_device(&ubi->dev); return err; } /** * ubi_detach_mtd_dev - detach an MTD device. * @ubi_num: UBI device number to detach from * @anyway: detach MTD even if device reference count is not zero * * This function destroys an UBI device number @ubi_num and detaches the * underlying MTD device. Returns zero in case of success and %-EBUSY if the * UBI device is busy and cannot be destroyed, and %-EINVAL if it does not * exist. * * Note, the invocations of this function has to be serialized by the * @ubi_devices_mutex. */ int ubi_detach_mtd_dev(int ubi_num, int anyway) { struct ubi_device *ubi; if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) return -EINVAL; ubi = ubi_get_device(ubi_num); if (!ubi) return -EINVAL; spin_lock(&ubi_devices_lock); ubi->ref_count -= 1; if (ubi->ref_count) { if (!anyway) { spin_unlock(&ubi_devices_lock); return -EBUSY; } /* This may only happen if there is a bug */ ubi_err(ubi, "%s reference count %d, destroy anyway", ubi->ubi_name, ubi->ref_count); } ubi->is_dead = true; spin_unlock(&ubi_devices_lock); ubi_notify_all(ubi, UBI_VOLUME_SHUTDOWN, NULL); spin_lock(&ubi_devices_lock); put_device(&ubi->dev); ubi_devices[ubi_num] = NULL; spin_unlock(&ubi_devices_lock); ubi_assert(ubi_num == ubi->ubi_num); ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL); ubi_msg(ubi, "detaching mtd%d", ubi->mtd->index); #ifdef CONFIG_MTD_UBI_FASTMAP /* If we don't write a new fastmap at detach time we lose all * EC updates that have been made since the last written fastmap. * In case of fastmap debugging we omit the update to simulate an * unclean shutdown. */ if (!ubi_dbg_chk_fastmap(ubi)) ubi_update_fastmap(ubi); #endif /* * Before freeing anything, we have to stop the background thread to * prevent it from doing anything on this device while we are freeing. */ if (ubi->bgt_thread) kthread_stop(ubi->bgt_thread); #ifdef CONFIG_MTD_UBI_FASTMAP cancel_work_sync(&ubi->fm_work); #endif ubi_debugfs_exit_dev(ubi); uif_close(ubi); ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); vfree(ubi->peb_buf); vfree(ubi->fm_buf); ubi_msg(ubi, "mtd%d is detached", ubi->mtd->index); put_mtd_device(ubi->mtd); put_device(&ubi->dev); return 0; } /** * open_mtd_by_chdev - open an MTD device by its character device node path. * @mtd_dev: MTD character device node path * * This helper function opens an MTD device by its character node device path. * Returns MTD device description object in case of success and a negative * error code in case of failure. */ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) { int err, minor; struct path path; struct kstat stat; /* Probably this is an MTD character device node path */ err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path); if (err) return ERR_PTR(err); err = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); path_put(&path); if (err) return ERR_PTR(err); /* MTD device number is defined by the major / minor numbers */ if (MAJOR(stat.rdev) != MTD_CHAR_MAJOR || !S_ISCHR(stat.mode)) return ERR_PTR(-EINVAL); minor = MINOR(stat.rdev); if (minor & 1) /* * Just do not think the "/dev/mtdrX" devices support is need, * so do not support them to avoid doing extra work. */ return ERR_PTR(-EINVAL); return get_mtd_device(NULL, minor / 2); } /** * open_mtd_device - open MTD device by name, character device path, or number. * @mtd_dev: name, character device node path, or MTD device device number * * This function tries to open and MTD device described by @mtd_dev string, * which is first treated as ASCII MTD device number, and if it is not true, it * is treated as MTD device name, and if that is also not true, it is treated * as MTD character device node path. Returns MTD device description object in * case of success and a negative error code in case of failure. */ static struct mtd_info * __init open_mtd_device(const char *mtd_dev) { struct mtd_info *mtd; int mtd_num; char *endp; mtd_num = simple_strtoul(mtd_dev, &endp, 0); if (*endp != '\0' || mtd_dev == endp) { /* * This does not look like an ASCII integer, probably this is * MTD device name. */ mtd = get_mtd_device_nm(mtd_dev); if (PTR_ERR(mtd) == -ENODEV) /* Probably this is an MTD character device node path */ mtd = open_mtd_by_chdev(mtd_dev); } else mtd = get_mtd_device(NULL, mtd_num); return mtd; } static void ubi_notify_add(struct mtd_info *mtd) { struct device_node *np = mtd_get_of_node(mtd); int err; if (!of_device_is_compatible(np, "linux,ubi")) return; /* * we are already holding &mtd_table_mutex, but still need * to bump refcount */ err = __get_mtd_device(mtd); if (err) return; /* called while holding mtd_table_mutex */ mutex_lock_nested(&ubi_devices_mutex, SINGLE_DEPTH_NESTING); err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, 0, 0, false, false); mutex_unlock(&ubi_devices_mutex); if (err < 0) __put_mtd_device(mtd); } static void ubi_notify_remove(struct mtd_info *mtd) { /* do nothing for now */ } static struct mtd_notifier ubi_mtd_notifier = { .add = ubi_notify_add, .remove = ubi_notify_remove, }; static int __init ubi_init_attach(void) { int err, i, k; /* Attach MTD devices */ for (i = 0; i < mtd_devs; i++) { struct mtd_dev_param *p = &mtd_dev_param[i]; struct mtd_info *mtd; cond_resched(); mtd = open_mtd_device(p->name); if (IS_ERR(mtd)) { err = PTR_ERR(mtd); pr_err("UBI error: cannot open mtd %s, error %d\n", p->name, err); /* See comment below re-ubi_is_module(). */ if (ubi_is_module()) goto out_detach; continue; } mutex_lock(&ubi_devices_mutex); err = ubi_attach_mtd_dev(mtd, p->ubi_num, p->vid_hdr_offs, p->max_beb_per1024, p->enable_fm == 0, p->need_resv_pool != 0); mutex_unlock(&ubi_devices_mutex); if (err < 0) { pr_err("UBI error: cannot attach mtd%d\n", mtd->index); put_mtd_device(mtd); /* * Originally UBI stopped initializing on any error. * However, later on it was found out that this * behavior is not very good when UBI is compiled into * the kernel and the MTD devices to attach are passed * through the command line. Indeed, UBI failure * stopped whole boot sequence. * * To fix this, we changed the behavior for the * non-module case, but preserved the old behavior for * the module case, just for compatibility. This is a * little inconsistent, though. */ if (ubi_is_module()) goto out_detach; } } return 0; out_detach: for (k = 0; k < i; k++) if (ubi_devices[k]) { mutex_lock(&ubi_devices_mutex); ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } return err; } #ifndef CONFIG_MTD_UBI_MODULE late_initcall(ubi_init_attach); #endif static int __init ubi_init(void) { int err; /* Ensure that EC and VID headers have correct size */ BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64); BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); if (mtd_devs > UBI_MAX_DEVICES) { pr_err("UBI error: too many MTD devices, maximum is %d\n", UBI_MAX_DEVICES); return -EINVAL; } /* Create base sysfs directory and sysfs files */ err = class_register(&ubi_class); if (err < 0) return err; err = misc_register(&ubi_ctrl_cdev); if (err) { pr_err("UBI error: cannot register device\n"); goto out; } ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", sizeof(struct ubi_wl_entry), 0, 0, NULL); if (!ubi_wl_entry_slab) { err = -ENOMEM; goto out_dev_unreg; } err = ubi_debugfs_init(); if (err) goto out_slab; err = ubiblock_init(); if (err) { pr_err("UBI error: block: cannot initialize, error %d\n", err); /* See comment above re-ubi_is_module(). */ if (ubi_is_module()) goto out_debugfs; } register_mtd_user(&ubi_mtd_notifier); if (ubi_is_module()) { err = ubi_init_attach(); if (err) goto out_mtd_notifier; } return 0; out_mtd_notifier: unregister_mtd_user(&ubi_mtd_notifier); ubiblock_exit(); out_debugfs: ubi_debugfs_exit(); out_slab: kmem_cache_destroy(ubi_wl_entry_slab); out_dev_unreg: misc_deregister(&ubi_ctrl_cdev); out: class_unregister(&ubi_class); pr_err("UBI error: cannot initialize UBI, error %d\n", err); return err; } device_initcall(ubi_init); static void __exit ubi_exit(void) { int i; ubiblock_exit(); unregister_mtd_user(&ubi_mtd_notifier); for (i = 0; i < UBI_MAX_DEVICES; i++) if (ubi_devices[i]) { mutex_lock(&ubi_devices_mutex); ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); mutex_unlock(&ubi_devices_mutex); } ubi_debugfs_exit(); kmem_cache_destroy(ubi_wl_entry_slab); misc_deregister(&ubi_ctrl_cdev); class_unregister(&ubi_class); } module_exit(ubi_exit); /** * bytes_str_to_int - convert a number of bytes string into an integer. * @str: the string to convert * * This function returns positive resulting integer in case of success and a * negative error code in case of failure. */ static int bytes_str_to_int(const char *str) { char *endp; unsigned long result; result = simple_strtoul(str, &endp, 0); if (str == endp || result >= INT_MAX) { pr_err("UBI error: incorrect bytes count: \"%s\"\n", str); return -EINVAL; } switch (*endp) { case 'G': result *= 1024; fallthrough; case 'M': result *= 1024; fallthrough; case 'K': result *= 1024; break; case '\0': break; default: pr_err("UBI error: incorrect bytes count: \"%s\"\n", str); return -EINVAL; } return result; } /** * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter. * @val: the parameter value to parse * @kp: not used * * This function returns zero in case of success and a negative error code in * case of error. */ static int ubi_mtd_param_parse(const char *val, const struct kernel_param *kp) { int i, len; struct mtd_dev_param *p; char buf[MTD_PARAM_LEN_MAX]; char *pbuf = &buf[0]; char *tokens[MTD_PARAM_MAX_COUNT], *token; if (!val) return -EINVAL; if (mtd_devs == UBI_MAX_DEVICES) { pr_err("UBI error: too many parameters, max. is %d\n", UBI_MAX_DEVICES); return -EINVAL; } len = strnlen(val, MTD_PARAM_LEN_MAX); if (len == MTD_PARAM_LEN_MAX) { pr_err("UBI error: parameter \"%s\" is too long, max. is %d\n", val, MTD_PARAM_LEN_MAX); return -EINVAL; } if (len == 0) { pr_warn("UBI warning: empty 'mtd=' parameter - ignored\n"); return 0; } strcpy(buf, val); /* Get rid of the final newline */ if (buf[len - 1] == '\n') buf[len - 1] = '\0'; for (i = 0; i < MTD_PARAM_MAX_COUNT; i++) tokens[i] = strsep(&pbuf, ","); if (pbuf) { pr_err("UBI error: too many arguments at \"%s\"\n", val); return -EINVAL; } p = &mtd_dev_param[mtd_devs]; strcpy(&p->name[0], tokens[0]); token = tokens[1]; if (token) { p->vid_hdr_offs = bytes_str_to_int(token); if (p->vid_hdr_offs < 0) return p->vid_hdr_offs; } token = tokens[2]; if (token) { int err = kstrtoint(token, 10, &p->max_beb_per1024); if (err) { pr_err("UBI error: bad value for max_beb_per1024 parameter: %s\n", token); return -EINVAL; } } token = tokens[3]; if (token) { int err = kstrtoint(token, 10, &p->ubi_num); if (err) { pr_err("UBI error: bad value for ubi_num parameter: %s\n", token); return -EINVAL; } } else p->ubi_num = UBI_DEV_NUM_AUTO; token = tokens[4]; if (token) { int err = kstrtoint(token, 10, &p->enable_fm); if (err) { pr_err("UBI error: bad value for enable_fm parameter: %s\n", token); return -EINVAL; } } else p->enable_fm = 0; token = tokens[5]; if (token) { int err = kstrtoint(token, 10, &p->need_resv_pool); if (err) { pr_err("UBI error: bad value for need_resv_pool parameter: %s\n", token); return -EINVAL; } } else p->need_resv_pool = 0; mtd_devs += 1; return 0; } module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 0400); MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: mtd=<name|num|path>[,<vid_hdr_offs>[,max_beb_per1024[,ubi_num]]].\n" "Multiple \"mtd\" parameters may be specified.\n" "MTD devices may be specified by their number, name, or path to the MTD character device node.\n" "Optional \"vid_hdr_offs\" parameter specifies UBI VID header position to be used by UBI. (default value if 0)\n" "Optional \"max_beb_per1024\" parameter specifies the maximum expected bad eraseblock per 1024 eraseblocks. (default value (" __stringify(CONFIG_MTD_UBI_BEB_LIMIT) ") if 0)\n" "Optional \"ubi_num\" parameter specifies UBI device number which have to be assigned to the newly created UBI device (assigned automatically by default)\n" "Optional \"enable_fm\" parameter determines whether to enable fastmap during attach. If the value is non-zero, fastmap is enabled. Default value is 0.\n" "Optional \"need_resv_pool\" parameter determines whether to reserve pool->max_size pebs during attach. If the value is non-zero, peb reservation is enabled. Default value is 0.\n" "\n" "Example 1: mtd=/dev/mtd0 - attach MTD device /dev/mtd0.\n" "Example 2: mtd=content,1984 mtd=4 - attach MTD device with name \"content\" using VID header offset 1984, and MTD device number 4 with default VID header offset.\n" "Example 3: mtd=/dev/mtd1,0,25 - attach MTD device /dev/mtd1 using default VID header offset and reserve 25*nand_size_in_blocks/1024 erase blocks for bad block handling.\n" "Example 4: mtd=/dev/mtd1,0,0,5 - attach MTD device /dev/mtd1 to UBI 5 and using default values for the other fields.\n" "example 5: mtd=1,0,0,5 mtd=2,0,0,6,1 - attach MTD device /dev/mtd1 to UBI 5 and disable fastmap; attach MTD device /dev/mtd2 to UBI 6 and enable fastmap.(only works when fastmap is enabled and fm_autoconvert=Y).\n" "\t(e.g. if the NAND *chipset* has 4096 PEB, 100 will be reserved for this UBI device)."); #ifdef CONFIG_MTD_UBI_FASTMAP module_param(fm_autoconvert, bool, 0644); MODULE_PARM_DESC(fm_autoconvert, "Set this parameter to enable fastmap automatically on images without a fastmap."); module_param(fm_debug, bool, 0); MODULE_PARM_DESC(fm_debug, "Set this parameter to enable fastmap debugging by default. Warning, this will make fastmap slow!"); #endif MODULE_VERSION(__stringify(UBI_VERSION)); MODULE_DESCRIPTION("UBI - Unsorted Block Images"); MODULE_AUTHOR("Artem Bityutskiy"); MODULE_LICENSE("GPL");
9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/oom_kill.c * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... * Copyright (C) 2010 Google, Inc. * Rewritten by David Rientjes * * The routines in this file are used to kill a process when * we're seriously out of memory. This gets called from __alloc_pages() * in mm/page_alloc.c when we really run out of memory. * * Since we won't call these routines often (on a well-configured * machine) this file will double as a 'coding guide' and a signpost * for newbie kernel hackers. It features several pointers to major * kernel subsystems and hints as to where to find out what things do. */ #include <linux/oom.h> #include <linux/mm.h> #include <linux/err.h> #include <linux/gfp.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/sched/debug.h> #include <linux/swap.h> #include <linux/syscalls.h> #include <linux/timex.h> #include <linux/jiffies.h> #include <linux/cpuset.h> #include <linux/export.h> #include <linux/notifier.h> #include <linux/memcontrol.h> #include <linux/mempolicy.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/freezer.h> #include <linux/ftrace.h> #include <linux/ratelimit.h> #include <linux/kthread.h> #include <linux/init.h> #include <linux/mmu_notifier.h> #include <linux/cred.h> #include <asm/tlb.h> #include "internal.h" #include "slab.h" #define CREATE_TRACE_POINTS #include <trace/events/oom.h> static int sysctl_panic_on_oom; static int sysctl_oom_kill_allocating_task; static int sysctl_oom_dump_tasks = 1; /* * Serializes oom killer invocations (out_of_memory()) from all contexts to * prevent from over eager oom killing (e.g. when the oom killer is invoked * from different domains). * * oom_killer_disable() relies on this lock to stabilize oom_killer_disabled * and mark_oom_victim */ DEFINE_MUTEX(oom_lock); /* Serializes oom_score_adj and oom_score_adj_min updates */ DEFINE_MUTEX(oom_adj_mutex); static inline bool is_memcg_oom(struct oom_control *oc) { return oc->memcg != NULL; } #ifdef CONFIG_NUMA /** * oom_cpuset_eligible() - check task eligibility for kill * @start: task struct of which task to consider * @oc: pointer to struct oom_control * * Task eligibility is determined by whether or not a candidate task, @tsk, * shares the same mempolicy nodes as current if it is bound by such a policy * and whether or not it has the same set of allowed cpuset nodes. * * This function is assuming oom-killer context and 'current' has triggered * the oom-killer. */ static bool oom_cpuset_eligible(struct task_struct *start, struct oom_control *oc) { struct task_struct *tsk; bool ret = false; const nodemask_t *mask = oc->nodemask; rcu_read_lock(); for_each_thread(start, tsk) { if (mask) { /* * If this is a mempolicy constrained oom, tsk's * cpuset is irrelevant. Only return true if its * mempolicy intersects current, otherwise it may be * needlessly killed. */ ret = mempolicy_in_oom_domain(tsk, mask); } else { /* * This is not a mempolicy constrained oom, so only * check the mems of tsk's cpuset. */ ret = cpuset_mems_allowed_intersects(current, tsk); } if (ret) break; } rcu_read_unlock(); return ret; } #else static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc) { return true; } #endif /* CONFIG_NUMA */ /* * The process p may have detached its own ->mm while exiting or through * kthread_use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ struct task_struct *find_lock_task_mm(struct task_struct *p) { struct task_struct *t; rcu_read_lock(); for_each_thread(p, t) { task_lock(t); if (likely(t->mm)) goto found; task_unlock(t); } t = NULL; found: rcu_read_unlock(); return t; } /* * order == -1 means the oom kill is required by sysrq, otherwise only * for display purposes. */ static inline bool is_sysrq_oom(struct oom_control *oc) { return oc->order == -1; } /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p) { if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; return false; } /* * Check whether unreclaimable slab amount is greater than * all user memory(LRU pages). * dump_unreclaimable_slab() could help in the case that * oom due to too much unreclaimable slab used by kernel. */ static bool should_dump_unreclaim_slab(void) { unsigned long nr_lru; nr_lru = global_node_page_state(NR_ACTIVE_ANON) + global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE) + global_node_page_state(NR_ISOLATED_ANON) + global_node_page_state(NR_ISOLATED_FILE) + global_node_page_state(NR_UNEVICTABLE); return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru); } /** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation * * The heuristic for determining which task to kill is made to be as simple and * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ long oom_badness(struct task_struct *p, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p)) return LONG_MIN; p = find_lock_task_mm(p); if (!p) return LONG_MIN; /* * Do not even consider tasks which are explicitly marked oom * unkillable or have been already oom reaped or the are in * the middle of vfork */ adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN || test_bit(MMF_OOM_SKIP, &p->mm->flags) || in_vfork(p)) { task_unlock(p); return LONG_MIN; } /* * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + mm_pgtables_bytes(p->mm) / PAGE_SIZE; task_unlock(p); /* Normalize to oom_score_adj units */ adj *= totalpages / 1000; points += adj; return points; } static const char * const oom_constraint_text[] = { [CONSTRAINT_NONE] = "CONSTRAINT_NONE", [CONSTRAINT_CPUSET] = "CONSTRAINT_CPUSET", [CONSTRAINT_MEMORY_POLICY] = "CONSTRAINT_MEMORY_POLICY", [CONSTRAINT_MEMCG] = "CONSTRAINT_MEMCG", }; /* * Determine the type of allocation constraint. */ static enum oom_constraint constrained_alloc(struct oom_control *oc) { struct zone *zone; struct zoneref *z; enum zone_type highest_zoneidx = gfp_zone(oc->gfp_mask); bool cpuset_limited = false; int nid; if (is_memcg_oom(oc)) { oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1; return CONSTRAINT_MEMCG; } /* Default to all available memory */ oc->totalpages = totalram_pages() + total_swap_pages; if (!IS_ENABLED(CONFIG_NUMA)) return CONSTRAINT_NONE; if (!oc->zonelist) return CONSTRAINT_NONE; /* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ if (oc->gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; /* * This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). */ if (oc->nodemask && !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { oc->totalpages = total_swap_pages; for_each_node_mask(nid, *oc->nodemask) oc->totalpages += node_present_pages(nid); return CONSTRAINT_MEMORY_POLICY; } /* Check this allocation failure is caused by cpuset's wall function */ for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, highest_zoneidx, oc->nodemask) if (!cpuset_zone_allowed(zone, oc->gfp_mask)) cpuset_limited = true; if (cpuset_limited) { oc->totalpages = total_swap_pages; for_each_node_mask(nid, cpuset_current_mems_allowed) oc->totalpages += node_present_pages(nid); return CONSTRAINT_CPUSET; } return CONSTRAINT_NONE; } static int oom_evaluate_task(struct task_struct *task, void *arg) { struct oom_control *oc = arg; long points; if (oom_unkillable_task(task)) goto next; /* p may not have freeable memory in nodemask */ if (!is_memcg_oom(oc) && !oom_cpuset_eligible(task, oc)) goto next; /* * This task already has access to memory reserves and is being killed. * Don't allow any other task to have access to the reserves unless * the task has MMF_OOM_SKIP because chances that it would release * any memory is quite low. */ if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) { if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) goto next; goto abort; } /* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ if (oom_task_origin(task)) { points = LONG_MAX; goto select; } points = oom_badness(task, oc->totalpages); if (points == LONG_MIN || points < oc->chosen_points) goto next; select: if (oc->chosen) put_task_struct(oc->chosen); get_task_struct(task); oc->chosen = task; oc->chosen_points = points; next: return 0; abort: if (oc->chosen) put_task_struct(oc->chosen); oc->chosen = (void *)-1UL; return 1; } /* * Simple selection loop. We choose the process with the highest number of * 'points'. In case scan was aborted, oc->chosen is set to -1. */ static void select_bad_process(struct oom_control *oc) { oc->chosen_points = LONG_MIN; if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); else { struct task_struct *p; rcu_read_lock(); for_each_process(p) if (oom_evaluate_task(p, oc)) break; rcu_read_unlock(); } } static int dump_task(struct task_struct *p, void *arg) { struct oom_control *oc = arg; struct task_struct *task; if (oom_unkillable_task(p)) return 0; /* p may not have freeable memory in nodemask */ if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc)) return 0; task = find_lock_task_mm(p); if (!task) { /* * All of p's threads have already detached their mm's. There's * no need to report them; they can't be oom killed anyway. */ return 0; } pr_info("[%7d] %5d %5d %8lu %8lu %8lu %8lu %9lu %8ld %8lu %5hd %s\n", task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), get_mm_counter(task->mm, MM_ANONPAGES), get_mm_counter(task->mm, MM_FILEPAGES), get_mm_counter(task->mm, MM_SHMEMPAGES), mm_pgtables_bytes(task->mm), get_mm_counter(task->mm, MM_SWAPENTS), task->signal->oom_score_adj, task->comm); task_unlock(task); return 0; } /** * dump_tasks - dump current memory state of all system tasks * @oc: pointer to struct oom_control * * Dumps the current memory state of all eligible tasks. Tasks not in the same * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes * are not shown. * State information includes task's pid, uid, tgid, vm size, rss, * pgtables_bytes, swapents, oom_score_adj value, and name. */ static void dump_tasks(struct oom_control *oc) { pr_info("Tasks state (memory values in pages):\n"); pr_info("[ pid ] uid tgid total_vm rss rss_anon rss_file rss_shmem pgtables_bytes swapents oom_score_adj name\n"); if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); else { struct task_struct *p; rcu_read_lock(); for_each_process(p) dump_task(p, oc); rcu_read_unlock(); } } static void dump_oom_victim(struct oom_control *oc, struct task_struct *victim) { /* one line summary of the oom killer context. */ pr_info("oom-kill:constraint=%s,nodemask=%*pbl", oom_constraint_text[oc->constraint], nodemask_pr_args(oc->nodemask)); cpuset_print_current_mems_allowed(); mem_cgroup_print_oom_context(oc->memcg, victim); pr_cont(",task=%s,pid=%d,uid=%d\n", victim->comm, victim->pid, from_kuid(&init_user_ns, task_uid(victim))); } static void dump_header(struct oom_control *oc) { pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n", current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, current->signal->oom_score_adj); if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) pr_warn("COMPACTION is disabled!!!\n"); dump_stack(); if (is_memcg_oom(oc)) mem_cgroup_print_oom_meminfo(oc->memcg); else { __show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask, gfp_zone(oc->gfp_mask)); if (should_dump_unreclaim_slab()) dump_unreclaimable_slab(); } if (sysctl_oom_dump_tasks) dump_tasks(oc); } /* * Number of OOM victims in flight */ static atomic_t oom_victims = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); static bool oom_killer_disabled __read_mostly; /* * task->mm can be NULL if the task is the exited group leader. So to * determine whether the task is using a particular mm, we examine all the * task's threads: if one of those is using this mm then this task was also * using it. */ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) { struct task_struct *t; for_each_thread(p, t) { struct mm_struct *t_mm = READ_ONCE(t->mm); if (t_mm) return t_mm == mm; } return false; } #ifdef CONFIG_MMU /* * OOM Reaper kernel thread which tries to reap the memory used by the OOM * victim (if that is possible) to help the OOM killer to move on. */ static struct task_struct *oom_reaper_th; static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); static struct task_struct *oom_reaper_list; static DEFINE_SPINLOCK(oom_reaper_lock); static bool __oom_reap_task_mm(struct mm_struct *mm) { struct vm_area_struct *vma; bool ret = true; VMA_ITERATOR(vmi, mm, 0); /* * Tell all users of get_user/copy_from_user etc... that the content * is no longer stable. No barriers really needed because unmapping * should imply barriers already and the reader would hit a page fault * if it stumbled over a reaped memory. */ set_bit(MMF_UNSTABLE, &mm->flags); for_each_vma(vmi, vma) { if (vma->vm_flags & (VM_HUGETLB|VM_PFNMAP)) continue; /* * Only anonymous pages have a good chance to be dropped * without additional steps which we cannot afford as we * are OOM already. * * We do not even care about fs backed pages because all * which are reclaimable have already been reclaimed and * we do not want to block exit_mmap by keeping mm ref * count elevated without a good reason. */ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { struct mmu_notifier_range range; struct mmu_gather tlb; mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, mm, vma->vm_start, vma->vm_end); tlb_gather_mmu(&tlb, mm); if (mmu_notifier_invalidate_range_start_nonblock(&range)) { tlb_finish_mmu(&tlb); ret = false; continue; } unmap_page_range(&tlb, vma, range.start, range.end, NULL); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } } return ret; } /* * Reaps the address space of the give task. * * Returns true on success and false if none or part of the address space * has been reclaimed and the caller should retry later. */ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) { bool ret = true; if (!mmap_read_trylock(mm)) { trace_skip_task_reaping(tsk->pid); return false; } /* * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't * work on the mm anymore. The check for MMF_OOM_SKIP must run * under mmap_lock for reading because it serializes against the * mmap_write_lock();mmap_write_unlock() cycle in exit_mmap(). */ if (test_bit(MMF_OOM_SKIP, &mm->flags)) { trace_skip_task_reaping(tsk->pid); goto out_unlock; } trace_start_task_reaping(tsk->pid); /* failed to reap part of the address space. Try again later */ ret = __oom_reap_task_mm(mm); if (!ret) goto out_finish; pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES))); out_finish: trace_finish_task_reaping(tsk->pid); out_unlock: mmap_read_unlock(mm); return ret; } #define MAX_OOM_REAP_RETRIES 10 static void oom_reap_task(struct task_struct *tsk) { int attempts = 0; struct mm_struct *mm = tsk->signal->oom_mm; /* Retry the mmap_read_trylock(mm) a few times */ while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) schedule_timeout_idle(HZ/10); if (attempts <= MAX_OOM_REAP_RETRIES || test_bit(MMF_OOM_SKIP, &mm->flags)) goto done; pr_info("oom_reaper: unable to reap pid:%d (%s)\n", task_pid_nr(tsk), tsk->comm); sched_show_task(tsk); debug_show_all_locks(); done: tsk->oom_reaper_list = NULL; /* * Hide this mm from OOM killer because it has been either reaped or * somebody can't call mmap_write_unlock(mm). */ set_bit(MMF_OOM_SKIP, &mm->flags); /* Drop a reference taken by queue_oom_reaper */ put_task_struct(tsk); } static int oom_reaper(void *unused) { set_freezable(); while (true) { struct task_struct *tsk = NULL; wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL); spin_lock_irq(&oom_reaper_lock); if (oom_reaper_list != NULL) { tsk = oom_reaper_list; oom_reaper_list = tsk->oom_reaper_list; } spin_unlock_irq(&oom_reaper_lock); if (tsk) oom_reap_task(tsk); } return 0; } static void wake_oom_reaper(struct timer_list *timer) { struct task_struct *tsk = container_of(timer, struct task_struct, oom_reaper_timer); struct mm_struct *mm = tsk->signal->oom_mm; unsigned long flags; /* The victim managed to terminate on its own - see exit_mmap */ if (test_bit(MMF_OOM_SKIP, &mm->flags)) { put_task_struct(tsk); return; } spin_lock_irqsave(&oom_reaper_lock, flags); tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; spin_unlock_irqrestore(&oom_reaper_lock, flags); trace_wake_reaper(tsk->pid); wake_up(&oom_reaper_wait); } /* * Give the OOM victim time to exit naturally before invoking the oom_reaping. * The timers timeout is arbitrary... the longer it is, the longer the worst * case scenario for the OOM can take. If it is too small, the oom_reaper can * get in the way and release resources needed by the process exit path. * e.g. The futex robust list can sit in Anon|Private memory that gets reaped * before the exit path is able to wake the futex waiters. */ #define OOM_REAPER_DELAY (2*HZ) static void queue_oom_reaper(struct task_struct *tsk) { /* mm is already queued? */ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) return; get_task_struct(tsk); timer_setup(&tsk->oom_reaper_timer, wake_oom_reaper, 0); tsk->oom_reaper_timer.expires = jiffies + OOM_REAPER_DELAY; add_timer(&tsk->oom_reaper_timer); } #ifdef CONFIG_SYSCTL static struct ctl_table vm_oom_kill_table[] = { { .procname = "panic_on_oom", .data = &sysctl_panic_on_oom, .maxlen = sizeof(sysctl_panic_on_oom), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "oom_kill_allocating_task", .data = &sysctl_oom_kill_allocating_task, .maxlen = sizeof(sysctl_oom_kill_allocating_task), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "oom_dump_tasks", .data = &sysctl_oom_dump_tasks, .maxlen = sizeof(sysctl_oom_dump_tasks), .mode = 0644, .proc_handler = proc_dointvec, }, }; #endif static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); #ifdef CONFIG_SYSCTL register_sysctl_init("vm", vm_oom_kill_table); #endif return 0; } subsys_initcall(oom_init) #else static inline void queue_oom_reaper(struct task_struct *tsk) { } #endif /* CONFIG_MMU */ /** * mark_oom_victim - mark the given task as OOM victim * @tsk: task to mark * * Has to be called with oom_lock held and never after * oom has been disabled already. * * tsk->mm has to be non NULL and caller has to guarantee it is stable (either * under task_lock or operate on the current). */ static void mark_oom_victim(struct task_struct *tsk) { const struct cred *cred; struct mm_struct *mm = tsk->mm; WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; /* oom_mm is bound to the signal struct life time. */ if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) mmgrab(tsk->signal->oom_mm); /* * Make sure that the task is woken up from uninterruptible sleep * if it is frozen because OOM killer wouldn't be able to free * any memory and livelock. freezing_slow_path will tell the freezer * that TIF_MEMDIE tasks should be ignored. */ __thaw_task(tsk); atomic_inc(&oom_victims); cred = get_task_cred(tsk); trace_mark_victim(tsk, cred->uid.val); put_cred(cred); } /** * exit_oom_victim - note the exit of an OOM victim */ void exit_oom_victim(void) { clear_thread_flag(TIF_MEMDIE); if (!atomic_dec_return(&oom_victims)) wake_up_all(&oom_victims_wait); } /** * oom_killer_enable - enable OOM killer */ void oom_killer_enable(void) { oom_killer_disabled = false; pr_info("OOM killer enabled.\n"); } /** * oom_killer_disable - disable OOM killer * @timeout: maximum timeout to wait for oom victims in jiffies * * Forces all page allocations to fail rather than trigger OOM killer. * Will block and wait until all OOM victims are killed or the given * timeout expires. * * The function cannot be called when there are runnable user tasks because * the userspace would see unexpected allocation failures as a result. Any * new usage of this function should be consulted with MM people. * * Returns true if successful and false if the OOM killer cannot be * disabled. */ bool oom_killer_disable(signed long timeout) { signed long ret; /* * Make sure to not race with an ongoing OOM killer. Check that the * current is not killed (possibly due to sharing the victim's memory). */ if (mutex_lock_killable(&oom_lock)) return false; oom_killer_disabled = true; mutex_unlock(&oom_lock); ret = wait_event_interruptible_timeout(oom_victims_wait, !atomic_read(&oom_victims), timeout); if (ret <= 0) { oom_killer_enable(); return false; } pr_info("OOM killer disabled.\n"); return true; } static inline bool __task_will_free_mem(struct task_struct *task) { struct signal_struct *sig = task->signal; /* * A coredumping process may sleep for an extended period in * coredump_task_exit(), so the oom killer cannot assume that * the process will promptly exit and release memory. */ if (sig->core_state) return false; if (sig->flags & SIGNAL_GROUP_EXIT) return true; if (thread_group_empty(task) && (task->flags & PF_EXITING)) return true; return false; } /* * Checks whether the given task is dying or exiting and likely to * release its address space. This means that all threads and processes * sharing the same mm have to be killed or exiting. * Caller has to make sure that task->mm is stable (hold task_lock or * it operates on the current). */ static bool task_will_free_mem(struct task_struct *task) { struct mm_struct *mm = task->mm; struct task_struct *p; bool ret = true; /* * Skip tasks without mm because it might have passed its exit_mm and * exit_oom_victim. oom_reaper could have rescued that but do not rely * on that for now. We can consider find_lock_task_mm in future. */ if (!mm) return false; if (!__task_will_free_mem(task)) return false; /* * This task has already been drained by the oom reaper so there are * only small chances it will free some more */ if (test_bit(MMF_OOM_SKIP, &mm->flags)) return false; if (atomic_read(&mm->mm_users) <= 1) return true; /* * Make sure that all tasks which share the mm with the given tasks * are dying as well to make sure that a) nobody pins its mm and * b) the task is also reapable by the oom reaper. */ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(task, p)) continue; ret = __task_will_free_mem(p); if (!ret) break; } rcu_read_unlock(); return ret; } static void __oom_kill_process(struct task_struct *victim, const char *message) { struct task_struct *p; struct mm_struct *mm; bool can_oom_reap = true; p = find_lock_task_mm(victim); if (!p) { pr_info("%s: OOM victim %d (%s) is already exiting. Skip killing the task\n", message, task_pid_nr(victim), victim->comm); put_task_struct(victim); return; } else if (victim != p) { get_task_struct(p); put_task_struct(victim); victim = p; } /* Get a reference to safely compare mm after task_unlock(victim) */ mm = victim->mm; mmgrab(mm); /* Raise event before sending signal: task reaper must see this */ count_vm_event(OOM_KILL); memcg_memory_event_mm(mm, MEMCG_OOM_KILL); /* * We should send SIGKILL before granting access to memory reserves * in order to prevent the OOM victim from depleting the memory * reserves from the user space under its control. */ do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID); mark_oom_victim(victim); pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n", message, task_pid_nr(victim), victim->comm, K(mm->total_vm), K(get_mm_counter(mm, MM_ANONPAGES)), K(get_mm_counter(mm, MM_FILEPAGES)), K(get_mm_counter(mm, MM_SHMEMPAGES)), from_kuid(&init_user_ns, task_uid(victim)), mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj); task_unlock(victim); /* * Kill all user processes sharing victim->mm in other thread groups, if * any. They don't get access to memory reserves, though, to avoid * depletion of all memory. This prevents mm->mmap_lock livelock when an * oom killed thread cannot exit because it requires the semaphore and * its contended by another thread trying to allocate memory itself. * That thread will now get access to memory reserves since it has a * pending fatal signal. */ rcu_read_lock(); for_each_process(p) { if (!process_shares_mm(p, mm)) continue; if (same_thread_group(p, victim)) continue; if (is_global_init(p)) { can_oom_reap = false; set_bit(MMF_OOM_SKIP, &mm->flags); pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); continue; } /* * No kthread_use_mm() user needs to read from the userspace so * we are ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID); } rcu_read_unlock(); if (can_oom_reap) queue_oom_reaper(victim); mmdrop(mm); put_task_struct(victim); } /* * Kill provided task unless it's secured by setting * oom_score_adj to OOM_SCORE_ADJ_MIN. */ static int oom_kill_memcg_member(struct task_struct *task, void *message) { if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN && !is_global_init(task)) { get_task_struct(task); __oom_kill_process(task, message); } return 0; } static void oom_kill_process(struct oom_control *oc, const char *message) { struct task_struct *victim = oc->chosen; struct mem_cgroup *oom_group; static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); /* * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just give it access to memory reserves * so it can die quickly */ task_lock(victim); if (task_will_free_mem(victim)) { mark_oom_victim(victim); queue_oom_reaper(victim); task_unlock(victim); put_task_struct(victim); return; } task_unlock(victim); if (__ratelimit(&oom_rs)) { dump_header(oc); dump_oom_victim(oc, victim); } /* * Do we need to kill the entire memory cgroup? * Or even one of the ancestor memory cgroups? * Check this out before killing the victim task. */ oom_group = mem_cgroup_get_oom_group(victim, oc->memcg); __oom_kill_process(victim, message); /* * If necessary, kill all tasks in the selected memory cgroup. */ if (oom_group) { memcg_memory_event(oom_group, MEMCG_OOM_GROUP_KILL); mem_cgroup_print_oom_group(oom_group); mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, (void *)message); mem_cgroup_put(oom_group); } } /* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ static void check_panic_on_oom(struct oom_control *oc) { if (likely(!sysctl_panic_on_oom)) return; if (sysctl_panic_on_oom != 2) { /* * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel * does not panic for cpuset, mempolicy, or memcg allocation * failures. */ if (oc->constraint != CONSTRAINT_NONE) return; } /* Do not panic for oom kills triggered by sysrq */ if (is_sysrq_oom(oc)) return; dump_header(oc); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } static BLOCKING_NOTIFIER_HEAD(oom_notify_list); int register_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(register_oom_notifier); int unregister_oom_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&oom_notify_list, nb); } EXPORT_SYMBOL_GPL(unregister_oom_notifier); /** * out_of_memory - kill the "best" process when we run out of memory * @oc: pointer to struct oom_control * * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ bool out_of_memory(struct oom_control *oc) { unsigned long freed = 0; if (oom_killer_disabled) return false; if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0 && !is_sysrq_oom(oc)) /* Got some memory back in the last second. */ return true; } /* * If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. */ if (task_will_free_mem(current)) { mark_oom_victim(current); queue_oom_reaper(current); return true; } /* * The OOM killer does not compensate for IO-less reclaim. * But mem_cgroup_oom() has to invoke the OOM killer even * if it is a GFP_NOFS allocation. */ if (!(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc)) return true; /* * Check if there were limitations on the allocation (only relevant for * NUMA and memcg) that may require different handling. */ oc->constraint = constrained_alloc(oc); if (oc->constraint != CONSTRAINT_MEMORY_POLICY) oc->nodemask = NULL; check_panic_on_oom(oc); if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current) && oom_cpuset_eligible(current, oc) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { get_task_struct(current); oc->chosen = current; oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); return true; } select_bad_process(oc); /* Found nothing?!?! */ if (!oc->chosen) { dump_header(oc); pr_warn("Out of memory and no killable processes...\n"); /* * If we got here due to an actual allocation at the * system level, we cannot survive this and will enter * an endless loop in the allocator. Bail out now. */ if (!is_sysrq_oom(oc) && !is_memcg_oom(oc)) panic("System is deadlocked on memory\n"); } if (oc->chosen && oc->chosen != (void *)-1UL) oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : "Memory cgroup out of memory"); return !!oc->chosen; } /* * The pagefault handler calls here because some allocation has failed. We have * to take care of the memcg OOM here because this is the only safe context without * any locks held but let the oom killer triggered from the allocation context care * about the global OOM. */ void pagefault_out_of_memory(void) { static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); if (mem_cgroup_oom_synchronize(true)) return; if (fatal_signal_pending(current)) return; if (__ratelimit(&pfoom_rs)) pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n"); } SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) { #ifdef CONFIG_MMU struct mm_struct *mm = NULL; struct task_struct *task; struct task_struct *p; unsigned int f_flags; bool reap = false; long ret = 0; if (flags) return -EINVAL; task = pidfd_get_task(pidfd, &f_flags); if (IS_ERR(task)) return PTR_ERR(task); /* * Make sure to choose a thread which still has a reference to mm * during the group exit */ p = find_lock_task_mm(task); if (!p) { ret = -ESRCH; goto put_task; } mm = p->mm; mmgrab(mm); if (task_will_free_mem(p)) reap = true; else { /* Error only if the work has not been done already */ if (!test_bit(MMF_OOM_SKIP, &mm->flags)) ret = -EINVAL; } task_unlock(p); if (!reap) goto drop_mm; if (mmap_read_lock_killable(mm)) { ret = -EINTR; goto drop_mm; } /* * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure * possible change in exit_mmap is seen */ if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm)) ret = -EAGAIN; mmap_read_unlock(mm); drop_mm: mmdrop(mm); put_task: put_task_struct(task); return ret; #else return -ENOSYS; #endif /* CONFIG_MMU */ }
165 59 162 165 59 70 67 16 60 69 6 2 61 60 5 2 3 58 4 55 8 8 8 2 2 2 2 2 2 194 205 192 70 67 1 67 2 3 59 2 42 61 61 2 59 72 4 55 73 73 73 52 73 15 59 58 8 2 195 165 2 71 171 1 109 13 109 3 108 162 162 1 162 75 1 151 9 151 109 162 162 86 109 45 51 15 54 7 7 7 7 4 4 5 4 4 7 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 // SPDX-License-Identifier: GPL-2.0-only /* * vfsv0 quota IO operations on file */ #include <linux/errno.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/dqblk_v2.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/quotaops.h> #include <asm/byteorder.h> #include "quota_tree.h" MODULE_AUTHOR("Jan Kara"); MODULE_DESCRIPTION("Quota trie support"); MODULE_LICENSE("GPL"); /* * Maximum quota tree depth we support. Only to limit recursion when working * with the tree. */ #define MAX_QTREE_DEPTH 6 #define __QUOTA_QT_PARANOIA static int __get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth) { unsigned int epb = info->dqi_usable_bs >> 2; depth = info->dqi_qtree_depth - depth - 1; while (depth--) id /= epb; return id % epb; } static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth) { qid_t id = from_kqid(&init_user_ns, qid); return __get_index(info, id, depth); } /* Number of entries in one blocks */ static int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info) { return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader)) / info->dqi_entry_size; } static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) { struct super_block *sb = info->dqi_sb; memset(buf, 0, info->dqi_usable_bs); return sb->s_op->quota_read(sb, info->dqi_type, buf, info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits); } static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) { struct super_block *sb = info->dqi_sb; ssize_t ret; ret = sb->s_op->quota_write(sb, info->dqi_type, buf, info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits); if (ret != info->dqi_usable_bs) { quota_error(sb, "dquota write failed"); if (ret >= 0) ret = -EIO; } return ret; } static inline int do_check_range(struct super_block *sb, const char *val_name, uint val, uint min_val, uint max_val) { if (val < min_val || val > max_val) { quota_error(sb, "Getting %s %u out of range %u-%u", val_name, val, min_val, max_val); return -EUCLEAN; } return 0; } static int check_dquot_block_header(struct qtree_mem_dqinfo *info, struct qt_disk_dqdbheader *dh) { int err = 0; err = do_check_range(info->dqi_sb, "dqdh_next_free", le32_to_cpu(dh->dqdh_next_free), 0, info->dqi_blocks - 1); if (err) return err; err = do_check_range(info->dqi_sb, "dqdh_prev_free", le32_to_cpu(dh->dqdh_prev_free), 0, info->dqi_blocks - 1); if (err) return err; err = do_check_range(info->dqi_sb, "dqdh_entries", le16_to_cpu(dh->dqdh_entries), 0, qtree_dqstr_in_blk(info)); return err; } /* Remove empty block from list and return it */ static int get_free_dqblk(struct qtree_mem_dqinfo *info) { char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; int ret, blk; if (!buf) return -ENOMEM; if (info->dqi_free_blk) { blk = info->dqi_free_blk; ret = read_blk(info, blk, buf); if (ret < 0) goto out_buf; ret = check_dquot_block_header(info, dh); if (ret) goto out_buf; info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); } else { memset(buf, 0, info->dqi_usable_bs); /* Assure block allocation... */ ret = write_blk(info, info->dqi_blocks, buf); if (ret < 0) goto out_buf; blk = info->dqi_blocks++; } mark_info_dirty(info->dqi_sb, info->dqi_type); ret = blk; out_buf: kfree(buf); return ret; } /* Insert empty block to the list */ static int put_free_dqblk(struct qtree_mem_dqinfo *info, char *buf, uint blk) { struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; int err; dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk); dh->dqdh_prev_free = cpu_to_le32(0); dh->dqdh_entries = cpu_to_le16(0); err = write_blk(info, blk, buf); if (err < 0) return err; info->dqi_free_blk = blk; mark_info_dirty(info->dqi_sb, info->dqi_type); return 0; } /* Remove given block from the list of blocks with free entries */ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf, uint blk) { char *tmpbuf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; uint nextblk = le32_to_cpu(dh->dqdh_next_free); uint prevblk = le32_to_cpu(dh->dqdh_prev_free); int err; if (!tmpbuf) return -ENOMEM; if (nextblk) { err = read_blk(info, nextblk, tmpbuf); if (err < 0) goto out_buf; ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free; err = write_blk(info, nextblk, tmpbuf); if (err < 0) goto out_buf; } if (prevblk) { err = read_blk(info, prevblk, tmpbuf); if (err < 0) goto out_buf; ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free; err = write_blk(info, prevblk, tmpbuf); if (err < 0) goto out_buf; } else { info->dqi_free_entry = nextblk; mark_info_dirty(info->dqi_sb, info->dqi_type); } kfree(tmpbuf); dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); /* No matter whether write succeeds block is out of list */ if (write_blk(info, blk, buf) < 0) quota_error(info->dqi_sb, "Can't write block (%u) " "with free entries", blk); return 0; out_buf: kfree(tmpbuf); return err; } /* Insert given block to the beginning of list with free entries */ static int insert_free_dqentry(struct qtree_mem_dqinfo *info, char *buf, uint blk) { char *tmpbuf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; int err; if (!tmpbuf) return -ENOMEM; dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry); dh->dqdh_prev_free = cpu_to_le32(0); err = write_blk(info, blk, buf); if (err < 0) goto out_buf; if (info->dqi_free_entry) { err = read_blk(info, info->dqi_free_entry, tmpbuf); if (err < 0) goto out_buf; ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk); err = write_blk(info, info->dqi_free_entry, tmpbuf); if (err < 0) goto out_buf; } kfree(tmpbuf); info->dqi_free_entry = blk; mark_info_dirty(info->dqi_sb, info->dqi_type); return 0; out_buf: kfree(tmpbuf); return err; } /* Is the entry in the block free? */ int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk) { int i; for (i = 0; i < info->dqi_entry_size; i++) if (disk[i]) return 0; return 1; } EXPORT_SYMBOL(qtree_entry_unused); /* Find space for dquot */ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, int *err) { uint blk, i; struct qt_disk_dqdbheader *dh; char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); char *ddquot; *err = 0; if (!buf) { *err = -ENOMEM; return 0; } dh = (struct qt_disk_dqdbheader *)buf; if (info->dqi_free_entry) { blk = info->dqi_free_entry; *err = read_blk(info, blk, buf); if (*err < 0) goto out_buf; *err = check_dquot_block_header(info, dh); if (*err) goto out_buf; } else { blk = get_free_dqblk(info); if ((int)blk < 0) { *err = blk; kfree(buf); return 0; } memset(buf, 0, info->dqi_usable_bs); /* This is enough as the block is already zeroed and the entry * list is empty... */ info->dqi_free_entry = blk; mark_info_dirty(dquot->dq_sb, dquot->dq_id.type); } /* Block will be full? */ if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { *err = remove_free_dqentry(info, buf, blk); if (*err < 0) { quota_error(dquot->dq_sb, "Can't remove block (%u) " "from entry free list", blk); goto out_buf; } } le16_add_cpu(&dh->dqdh_entries, 1); /* Find free structure in block */ ddquot = buf + sizeof(struct qt_disk_dqdbheader); for (i = 0; i < qtree_dqstr_in_blk(info); i++) { if (qtree_entry_unused(info, ddquot)) break; ddquot += info->dqi_entry_size; } #ifdef __QUOTA_QT_PARANOIA if (i == qtree_dqstr_in_blk(info)) { quota_error(dquot->dq_sb, "Data block full but it shouldn't"); *err = -EIO; goto out_buf; } #endif *err = write_blk(info, blk, buf); if (*err < 0) { quota_error(dquot->dq_sb, "Can't write quota data block %u", blk); goto out_buf; } dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct qt_disk_dqdbheader) + i * info->dqi_entry_size; kfree(buf); return blk; out_buf: kfree(buf); return 0; } /* Insert reference to structure into the trie */ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, uint *blks, int depth) { char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); int ret = 0, newson = 0, newact = 0; __le32 *ref; uint newblk; int i; if (!buf) return -ENOMEM; if (!blks[depth]) { ret = get_free_dqblk(info); if (ret < 0) goto out_buf; for (i = 0; i < depth; i++) if (ret == blks[i]) { quota_error(dquot->dq_sb, "Free block already used in tree: block %u", ret); ret = -EIO; goto out_buf; } blks[depth] = ret; memset(buf, 0, info->dqi_usable_bs); newact = 1; } else { ret = read_blk(info, blks[depth], buf); if (ret < 0) { quota_error(dquot->dq_sb, "Can't read tree quota " "block %u", blks[depth]); goto out_buf; } } ref = (__le32 *)buf; newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); ret = do_check_range(dquot->dq_sb, "block", newblk, 0, info->dqi_blocks - 1); if (ret) goto out_buf; if (!newblk) { newson = 1; } else { for (i = 0; i <= depth; i++) if (newblk == blks[i]) { quota_error(dquot->dq_sb, "Cycle in quota tree detected: block %u index %u", blks[depth], get_index(info, dquot->dq_id, depth)); ret = -EIO; goto out_buf; } } blks[depth + 1] = newblk; if (depth == info->dqi_qtree_depth - 1) { #ifdef __QUOTA_QT_PARANOIA if (newblk) { quota_error(dquot->dq_sb, "Inserting already present " "quota entry (block %u)", le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)])); ret = -EIO; goto out_buf; } #endif blks[depth + 1] = find_free_dqentry(info, dquot, &ret); } else { ret = do_insert_tree(info, dquot, blks, depth + 1); } if (newson && ret >= 0) { ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(blks[depth + 1]); ret = write_blk(info, blks[depth], buf); } else if (newact && ret < 0) { put_free_dqblk(info, buf, blks[depth]); } out_buf: kfree(buf); return ret; } /* Wrapper for inserting quota structure into tree */ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot) { uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF }; #ifdef __QUOTA_QT_PARANOIA if (info->dqi_blocks <= QT_TREEOFF) { quota_error(dquot->dq_sb, "Quota tree root isn't allocated!"); return -EIO; } #endif if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) { quota_error(dquot->dq_sb, "Quota tree depth too big!"); return -EIO; } return do_insert_tree(info, dquot, blks, 0); } /* * We don't have to be afraid of deadlocks as we never have quotas on quota * files... */ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) { int type = dquot->dq_id.type; struct super_block *sb = dquot->dq_sb; ssize_t ret; char *ddquot = kmalloc(info->dqi_entry_size, GFP_KERNEL); if (!ddquot) return -ENOMEM; /* dq_off is guarded by dqio_sem */ if (!dquot->dq_off) { ret = dq_insert_tree(info, dquot); if (ret < 0) { quota_error(sb, "Error %zd occurred while creating " "quota", ret); kfree(ddquot); return ret; } } spin_lock(&dquot->dq_dqb_lock); info->dqi_ops->mem2disk_dqblk(ddquot, dquot); spin_unlock(&dquot->dq_dqb_lock); ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); if (ret != info->dqi_entry_size) { quota_error(sb, "dquota write failed"); if (ret >= 0) ret = -ENOSPC; } else { ret = 0; } dqstats_inc(DQST_WRITES); kfree(ddquot); return ret; } EXPORT_SYMBOL(qtree_write_dquot); /* Free dquot entry in data block */ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, uint blk) { struct qt_disk_dqdbheader *dh; char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); int ret = 0; if (!buf) return -ENOMEM; if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { quota_error(dquot->dq_sb, "Quota structure has offset to " "other block (%u) than it should (%u)", blk, (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); ret = -EIO; goto out_buf; } ret = read_blk(info, blk, buf); if (ret < 0) { quota_error(dquot->dq_sb, "Can't read quota data block %u", blk); goto out_buf; } dh = (struct qt_disk_dqdbheader *)buf; ret = check_dquot_block_header(info, dh); if (ret) goto out_buf; le16_add_cpu(&dh->dqdh_entries, -1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ ret = remove_free_dqentry(info, buf, blk); if (ret >= 0) ret = put_free_dqblk(info, buf, blk); if (ret < 0) { quota_error(dquot->dq_sb, "Can't move quota data block " "(%u) to free list", blk); goto out_buf; } } else { memset(buf + (dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)), 0, info->dqi_entry_size); if (le16_to_cpu(dh->dqdh_entries) == qtree_dqstr_in_blk(info) - 1) { /* Insert will write block itself */ ret = insert_free_dqentry(info, buf, blk); if (ret < 0) { quota_error(dquot->dq_sb, "Can't insert quota " "data block (%u) to free entry list", blk); goto out_buf; } } else { ret = write_blk(info, blk, buf); if (ret < 0) { quota_error(dquot->dq_sb, "Can't write quota " "data block %u", blk); goto out_buf; } } } dquot->dq_off = 0; /* Quota is now unattached */ out_buf: kfree(buf); return ret; } /* Remove reference to dquot from tree */ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, uint *blks, int depth) { char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); int ret = 0; uint newblk; __le32 *ref = (__le32 *)buf; int i; if (!buf) return -ENOMEM; ret = read_blk(info, blks[depth], buf); if (ret < 0) { quota_error(dquot->dq_sb, "Can't read quota data block %u", blks[depth]); goto out_buf; } newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); ret = do_check_range(dquot->dq_sb, "block", newblk, QT_TREEOFF, info->dqi_blocks - 1); if (ret) goto out_buf; for (i = 0; i <= depth; i++) if (newblk == blks[i]) { quota_error(dquot->dq_sb, "Cycle in quota tree detected: block %u index %u", blks[depth], get_index(info, dquot->dq_id, depth)); ret = -EIO; goto out_buf; } if (depth == info->dqi_qtree_depth - 1) { ret = free_dqentry(info, dquot, newblk); blks[depth + 1] = 0; } else { blks[depth + 1] = newblk; ret = remove_tree(info, dquot, blks, depth + 1); } if (ret >= 0 && !blks[depth + 1]) { ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0); /* Block got empty? */ for (i = 0; i < (info->dqi_usable_bs >> 2) && !ref[i]; i++) ; /* Don't put the root block into the free block list */ if (i == (info->dqi_usable_bs >> 2) && blks[depth] != QT_TREEOFF) { put_free_dqblk(info, buf, blks[depth]); blks[depth] = 0; } else { ret = write_blk(info, blks[depth], buf); if (ret < 0) quota_error(dquot->dq_sb, "Can't write quota tree block %u", blks[depth]); } } out_buf: kfree(buf); return ret; } /* Delete dquot from tree */ int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) { uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF }; if (!dquot->dq_off) /* Even not allocated? */ return 0; if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) { quota_error(dquot->dq_sb, "Quota tree depth too big!"); return -EIO; } return remove_tree(info, dquot, blks, 0); } EXPORT_SYMBOL(qtree_delete_dquot); /* Find entry in block */ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, uint blk) { char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); loff_t ret = 0; int i; char *ddquot; if (!buf) return -ENOMEM; ret = read_blk(info, blk, buf); if (ret < 0) { quota_error(dquot->dq_sb, "Can't read quota tree " "block %u", blk); goto out_buf; } ddquot = buf + sizeof(struct qt_disk_dqdbheader); for (i = 0; i < qtree_dqstr_in_blk(info); i++) { if (info->dqi_ops->is_id(ddquot, dquot)) break; ddquot += info->dqi_entry_size; } if (i == qtree_dqstr_in_blk(info)) { quota_error(dquot->dq_sb, "Quota for id %u referenced but not present", from_kqid(&init_user_ns, dquot->dq_id)); ret = -EIO; goto out_buf; } else { ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct qt_disk_dqdbheader) + i * info->dqi_entry_size; } out_buf: kfree(buf); return ret; } /* Find entry for given id in the tree */ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, uint *blks, int depth) { char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); loff_t ret = 0; __le32 *ref = (__le32 *)buf; uint blk; int i; if (!buf) return -ENOMEM; ret = read_blk(info, blks[depth], buf); if (ret < 0) { quota_error(dquot->dq_sb, "Can't read quota tree block %u", blks[depth]); goto out_buf; } ret = 0; blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); if (!blk) /* No reference? */ goto out_buf; ret = do_check_range(dquot->dq_sb, "block", blk, QT_TREEOFF, info->dqi_blocks - 1); if (ret) goto out_buf; /* Check for cycles in the tree */ for (i = 0; i <= depth; i++) if (blk == blks[i]) { quota_error(dquot->dq_sb, "Cycle in quota tree detected: block %u index %u", blks[depth], get_index(info, dquot->dq_id, depth)); ret = -EIO; goto out_buf; } blks[depth + 1] = blk; if (depth < info->dqi_qtree_depth - 1) ret = find_tree_dqentry(info, dquot, blks, depth + 1); else ret = find_block_dqentry(info, dquot, blk); out_buf: kfree(buf); return ret; } /* Find entry for given id in the tree - wrapper function */ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot) { uint blks[MAX_QTREE_DEPTH] = { QT_TREEOFF }; if (info->dqi_qtree_depth >= MAX_QTREE_DEPTH) { quota_error(dquot->dq_sb, "Quota tree depth too big!"); return -EIO; } return find_tree_dqentry(info, dquot, blks, 0); } int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) { int type = dquot->dq_id.type; struct super_block *sb = dquot->dq_sb; loff_t offset; char *ddquot; int ret = 0; #ifdef __QUOTA_QT_PARANOIA /* Invalidated quota? */ if (!sb_dqopt(dquot->dq_sb)->files[type]) { quota_error(sb, "Quota invalidated while reading!"); return -EIO; } #endif /* Do we know offset of the dquot entry in the quota file? */ if (!dquot->dq_off) { offset = find_dqentry(info, dquot); if (offset <= 0) { /* Entry not present? */ if (offset < 0) quota_error(sb,"Can't read quota structure " "for id %u", from_kqid(&init_user_ns, dquot->dq_id)); dquot->dq_off = 0; set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); ret = offset; goto out; } dquot->dq_off = offset; } ddquot = kmalloc(info->dqi_entry_size, GFP_KERNEL); if (!ddquot) return -ENOMEM; ret = sb->s_op->quota_read(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); if (ret != info->dqi_entry_size) { if (ret >= 0) ret = -EIO; quota_error(sb, "Error while reading quota structure for id %u", from_kqid(&init_user_ns, dquot->dq_id)); set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); kfree(ddquot); goto out; } spin_lock(&dquot->dq_dqb_lock); info->dqi_ops->disk2mem_dqblk(dquot, ddquot); if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) set_bit(DQ_FAKE_B, &dquot->dq_flags); spin_unlock(&dquot->dq_dqb_lock); kfree(ddquot); out: dqstats_inc(DQST_READS); return ret; } EXPORT_SYMBOL(qtree_read_dquot); /* Check whether dquot should not be deleted. We know we are * the only one operating on dquot (thanks to dq_lock) */ int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) { if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) return qtree_delete_dquot(info, dquot); return 0; } EXPORT_SYMBOL(qtree_release_dquot); static int find_next_id(struct qtree_mem_dqinfo *info, qid_t *id, unsigned int blk, int depth) { char *buf = kmalloc(info->dqi_usable_bs, GFP_KERNEL); __le32 *ref = (__le32 *)buf; ssize_t ret; unsigned int epb = info->dqi_usable_bs >> 2; unsigned int level_inc = 1; int i; if (!buf) return -ENOMEM; for (i = depth; i < info->dqi_qtree_depth - 1; i++) level_inc *= epb; ret = read_blk(info, blk, buf); if (ret < 0) { quota_error(info->dqi_sb, "Can't read quota tree block %u", blk); goto out_buf; } for (i = __get_index(info, *id, depth); i < epb; i++) { uint blk_no = le32_to_cpu(ref[i]); if (blk_no == 0) { *id += level_inc; continue; } ret = do_check_range(info->dqi_sb, "block", blk_no, 0, info->dqi_blocks - 1); if (ret) goto out_buf; if (depth == info->dqi_qtree_depth - 1) { ret = 0; goto out_buf; } ret = find_next_id(info, id, blk_no, depth + 1); if (ret != -ENOENT) break; } if (i == epb) { ret = -ENOENT; goto out_buf; } out_buf: kfree(buf); return ret; } int qtree_get_next_id(struct qtree_mem_dqinfo *info, struct kqid *qid) { qid_t id = from_kqid(&init_user_ns, *qid); int ret; ret = find_next_id(info, &id, QT_TREEOFF, 0); if (ret < 0) return ret; *qid = make_kqid(&init_user_ns, qid->type, id); return 0; } EXPORT_SYMBOL(qtree_get_next_id);
2 2 4 2 4 2 2 6 6 6 6 39 39 3 38 38 6 3 3 39 39 27 46 45 27 31 3 3 3 3 3 3 3 3 3 3 6 6 6 5 1 6 6 6 6 6 6 3 3 6 6 6 45 38 6 25 1 3 27 38 38 38 38 4 7 31 31 31 31 31 27 6 6 1 3 4 4 4 4 4 3 1 4 4 5 9 5 5 5 4 3 5 5 9 39 13 27 39 1 31 9 1 9 39 38 31 9 1 3 6 9 9 9 1 1 1 1 8 28 28 28 28 1 27 28 27 28 1 2 2 3 1 2 3 3 3 3 2 1 3 3 2 3 2 2 3 3 3 3 3 3 3 1 3 3 3 2 3 3 3 2 2 2 2 3 3 3 3 3 3 3 3 3 1 3 1 3 3 2 2 2 3 1 2 2 1 2 1 3 2 1 5 3 3 27 23 27 27 27 27 27 27 27 27 27 29 2 28 28 1 27 27 1 1 3 3 1 1 2 3 1 2 3 3 2 1 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. */ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/gfs2_ondisk.h> #include <linux/crc32.h> #include <linux/iomap.h> #include <linux/ktime.h> #include "gfs2.h" #include "incore.h" #include "bmap.h" #include "glock.h" #include "inode.h" #include "meta_io.h" #include "quota.h" #include "rgrp.h" #include "log.h" #include "super.h" #include "trans.h" #include "dir.h" #include "util.h" #include "aops.h" #include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to * keep it small. */ struct metapath { struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; __u16 mp_list[GFS2_MAX_META_HEIGHT]; int mp_fheight; /* find_metapath height */ int mp_aheight; /* actual height (lookup height) */ }; static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); /** * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio * @ip: the inode * @dibh: the dinode buffer * @block: the block number that was allocated * @folio: The folio. * * Returns: errno */ static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh, u64 block, struct folio *folio) { struct inode *inode = &ip->i_inode; if (!folio_test_uptodate(folio)) { void *kaddr = kmap_local_folio(folio, 0); u64 dsize = i_size_read(inode); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, folio_size(folio) - dsize); kunmap_local(kaddr); folio_mark_uptodate(folio); } if (gfs2_is_jdata(ip)) { struct buffer_head *bh = folio_buffers(folio); if (!bh) bh = create_empty_buffers(folio, BIT(inode->i_blkbits), BIT(BH_Uptodate)); if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, block); set_buffer_uptodate(bh); gfs2_trans_add_data(ip->i_gl, bh); } else { folio_mark_dirty(folio); gfs2_ordered_add_inode(ip); } return 0; } static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio) { struct buffer_head *bh, *dibh; struct gfs2_dinode *di; u64 block = 0; int isdir = gfs2_is_dir(ip); int error; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) return error; if (i_size_read(&ip->i_inode)) { /* Get a free block, fill it with the stuffed data, and write it out to disk */ unsigned int n = 1; error = gfs2_alloc_blocks(ip, &block, &n, 0); if (error) goto out_brelse; if (isdir) { gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1); error = gfs2_dir_get_new_buffer(ip, block, &bh); if (error) goto out_brelse; gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), dibh, sizeof(struct gfs2_dinode)); brelse(bh); } else { error = gfs2_unstuffer_folio(ip, dibh, block, folio); if (error) goto out_brelse; } } /* Set up the pointer to the new block */ gfs2_trans_add_meta(ip->i_gl, dibh); di = (struct gfs2_dinode *)dibh->b_data; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); if (i_size_read(&ip->i_inode)) { *(__be64 *)(di + 1) = cpu_to_be64(block); gfs2_add_inode_blocks(&ip->i_inode, 1); di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); } ip->i_height = 1; di->di_height = cpu_to_be16(1); out_brelse: brelse(dibh); return error; } /** * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big * @ip: The GFS2 inode to unstuff * * This routine unstuffs a dinode and returns it to a "normal" state such * that the height can be grown in the traditional way. * * Returns: errno */ int gfs2_unstuff_dinode(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; struct folio *folio; int error; down_write(&ip->i_rw_mutex); folio = filemap_grab_folio(inode->i_mapping, 0); error = PTR_ERR(folio); if (IS_ERR(folio)) goto out; error = __gfs2_unstuff_inode(ip, folio); folio_unlock(folio); folio_put(folio); out: up_write(&ip->i_rw_mutex); return error; } /** * find_metapath - Find path through the metadata tree * @sdp: The superblock * @block: The disk block to look up * @mp: The metapath to return the result in * @height: The pre-calculated height of the metadata tree * * This routine returns a struct metapath structure that defines a path * through the metadata of inode "ip" to get to block "block". * * Example: * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a * filesystem with a blocksize of 4096. * * find_metapath() would return a struct metapath structure set to: * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165. * * That means that in order to get to the block containing the byte at * offset 101342453, we would load the indirect block pointed to by pointer * 0 in the dinode. We would then load the indirect block pointed to by * pointer 48 in that indirect block. We would then load the data block * pointed to by pointer 165 in that indirect block. * * ---------------------------------------- * | Dinode | | * | | 4| * | |0 1 2 3 4 5 9| * | | 6| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Indirect Block | * | 5| * | 4 4 4 4 4 5 5 1| * |0 5 6 7 8 9 0 1 2| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Indirect Block | * | 1 1 1 1 1 5| * | 6 6 6 6 6 1| * |0 3 4 5 6 7 2| * ---------------------------------------- * | * | * V * ---------------------------------------- * | Data block containing offset | * | 101342453 | * | | * | | * ---------------------------------------- * */ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, struct metapath *mp, unsigned int height) { unsigned int i; mp->mp_fheight = height; for (i = height; i--;) mp->mp_list[i] = do_div(block, sdp->sd_inptrs); } static inline unsigned int metapath_branch_start(const struct metapath *mp) { if (mp->mp_list[0] == 0) return 2; return 1; } /** * metaptr1 - Return the first possible metadata pointer in a metapath buffer * @height: The metadata height (0 = dinode) * @mp: The metapath */ static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) { struct buffer_head *bh = mp->mp_bh[height]; if (height == 0) return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); } /** * metapointer - Return pointer to start of metadata in a buffer * @height: The metadata height (0 = dinode) * @mp: The metapath * * Return a pointer to the block number of the next height of the metadata * tree given a buffer containing the pointer to the current height of the * metadata tree. */ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) { __be64 *p = metaptr1(height, mp); return p + mp->mp_list[height]; } static inline const __be64 *metaend(unsigned int height, const struct metapath *mp) { const struct buffer_head *bh = mp->mp_bh[height]; return (const __be64 *)(bh->b_data + bh->b_size); } static void clone_metapath(struct metapath *clone, struct metapath *mp) { unsigned int hgt; *clone = *mp; for (hgt = 0; hgt < mp->mp_aheight; hgt++) get_bh(clone->mp_bh[hgt]); } static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) { const __be64 *t; for (t = start; t < end; t++) { struct buffer_head *rabh; if (!*t) continue; rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | REQ_PRIO, rabh); continue; } unlock_buffer(rabh); } brelse(rabh); } } static inline struct buffer_head * metapath_dibh(struct metapath *mp) { return mp->mp_bh[0]; } static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, unsigned int x, unsigned int h) { for (; x < h; x++) { __be64 *ptr = metapointer(x, mp); u64 dblock = be64_to_cpu(*ptr); int ret; if (!dblock) break; ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]); if (ret) return ret; } mp->mp_aheight = x + 1; return 0; } /** * lookup_metapath - Walk the metadata tree to a specific point * @ip: The inode * @mp: The metapath * * Assumes that the inode's buffer has already been looked up and * hooked onto mp->mp_bh[0] and that the metapath has been initialised * by find_metapath(). * * If this function encounters part of the tree which has not been * allocated, it returns the current height of the tree at the point * at which it found the unallocated block. Blocks which are found are * added to the mp->mp_bh[] list. * * Returns: error */ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) { return __fillup_metapath(ip, mp, 0, ip->i_height - 1); } /** * fillup_metapath - fill up buffers for the metadata path to a specific height * @ip: The inode * @mp: The metapath * @h: The height to which it should be mapped * * Similar to lookup_metapath, but does lookups for a range of heights * * Returns: error or the number of buffers filled */ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) { unsigned int x = 0; int ret; if (h) { /* find the first buffer we need to look up. */ for (x = h - 1; x > 0; x--) { if (mp->mp_bh[x]) break; } } ret = __fillup_metapath(ip, mp, x, h); if (ret) return ret; return mp->mp_aheight - x - 1; } static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp) { sector_t factor = 1, block = 0; int hgt; for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) { if (hgt < mp->mp_aheight) block += mp->mp_list[hgt] * factor; factor *= sdp->sd_inptrs; } return block; } static void release_metapath(struct metapath *mp) { int i; for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } } /** * gfs2_extent_length - Returns length of an extent of blocks * @bh: The metadata block * @ptr: Current position in @bh * @eob: Set to 1 if we hit "end of block" * * Returns: The length of the extent (minimum of one block) */ static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob) { const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; u64 d = be64_to_cpu(*ptr); *eob = 0; do { ptr++; if (ptr >= end) break; d++; } while(be64_to_cpu(*ptr) == d); if (ptr >= end) *eob = 1; return ptr - first; } enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE }; /* * gfs2_metadata_walker - walk an indirect block * @mp: Metapath to indirect block * @ptrs: Number of pointers to look at * * When returning WALK_FOLLOW, the walker must update @mp to point at the right * indirect block to follow. */ typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp, unsigned int ptrs); /* * gfs2_walk_metadata - walk a tree of indirect blocks * @inode: The inode * @mp: Starting point of walk * @max_len: Maximum number of blocks to walk * @walker: Called during the walk * * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or * past the end of metadata, and a negative error code otherwise. */ static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp, u64 max_len, gfs2_metadata_walker walker) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); u64 factor = 1; unsigned int hgt; int ret; /* * The walk starts in the lowest allocated indirect block, which may be * before the position indicated by @mp. Adjust @max_len accordingly * to avoid a short walk. */ for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) { max_len += mp->mp_list[hgt] * factor; mp->mp_list[hgt] = 0; factor *= sdp->sd_inptrs; } for (;;) { u16 start = mp->mp_list[hgt]; enum walker_status status; unsigned int ptrs; u64 len; /* Walk indirect block. */ ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start; len = ptrs * factor; if (len > max_len) ptrs = DIV_ROUND_UP_ULL(max_len, factor); status = walker(mp, ptrs); switch (status) { case WALK_STOP: return 1; case WALK_FOLLOW: BUG_ON(mp->mp_aheight == mp->mp_fheight); ptrs = mp->mp_list[hgt] - start; len = ptrs * factor; break; case WALK_CONTINUE: break; } if (len >= max_len) break; max_len -= len; if (status == WALK_FOLLOW) goto fill_up_metapath; lower_metapath: /* Decrease height of metapath. */ brelse(mp->mp_bh[hgt]); mp->mp_bh[hgt] = NULL; mp->mp_list[hgt] = 0; if (!hgt) break; hgt--; factor *= sdp->sd_inptrs; /* Advance in metadata tree. */ (mp->mp_list[hgt])++; if (hgt) { if (mp->mp_list[hgt] >= sdp->sd_inptrs) goto lower_metapath; } else { if (mp->mp_list[hgt] >= sdp->sd_diptrs) break; } fill_up_metapath: /* Increase height of metapath. */ ret = fillup_metapath(ip, mp, ip->i_height - 1); if (ret < 0) return ret; hgt += ret; for (; ret; ret--) do_div(factor, sdp->sd_inptrs); mp->mp_aheight = hgt + 1; } return 0; } static enum walker_status gfs2_hole_walker(struct metapath *mp, unsigned int ptrs) { const __be64 *start, *ptr, *end; unsigned int hgt; hgt = mp->mp_aheight - 1; start = metapointer(hgt, mp); end = start + ptrs; for (ptr = start; ptr < end; ptr++) { if (*ptr) { mp->mp_list[hgt] += ptr - start; if (mp->mp_aheight == mp->mp_fheight) return WALK_STOP; return WALK_FOLLOW; } } return WALK_CONTINUE; } /** * gfs2_hole_size - figure out the size of a hole * @inode: The inode * @lblock: The logical starting block number * @len: How far to look (in blocks) * @mp: The metapath at lblock * @iomap: The iomap to store the hole size in * * This function modifies @mp. * * Returns: errno on error */ static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, struct metapath *mp, struct iomap *iomap) { struct metapath clone; u64 hole_size; int ret; clone_metapath(&clone, mp); ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker); if (ret < 0) goto out; if (ret == 1) hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock; else hole_size = len; iomap->length = hole_size << inode->i_blkbits; ret = 0; out: release_metapath(&clone); return ret; } static inline void gfs2_indirect_init(struct metapath *mp, struct gfs2_glock *gl, unsigned int i, unsigned offset, u64 bn) { __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + ((i > 1) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode))); BUG_ON(i < 1); BUG_ON(mp->mp_bh[i] != NULL); mp->mp_bh[i] = gfs2_meta_new(gl, bn); gfs2_trans_add_meta(gl, mp->mp_bh[i]); gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); ptr += offset; *ptr = cpu_to_be64(bn); } enum alloc_state { ALLOC_DATA = 0, ALLOC_GROW_DEPTH = 1, ALLOC_GROW_HEIGHT = 2, /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ }; /** * __gfs2_iomap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode * @iomap: The iomap structure * @mp: The metapath, with proper height information calculated * * In this routine we may have to alloc: * i) Indirect blocks to grow the metadata tree height * ii) Indirect blocks to fill in lower part of the metadata tree * iii) Data blocks * * This function is called after __gfs2_iomap_get, which works out the * total number of blocks which we need via gfs2_alloc_size. * * We then do the actual allocation asking for an extent at a time (if * enough contiguous free blocks are available, there will only be one * allocation request per call) and uses the state machine to initialise * the blocks in order. * * Right now, this function will allocate at most one indirect block * worth of data -- with a default block size of 4K, that's slightly * less than 2M. If this limitation is ever removed to allow huge * allocations, we would probably still want to limit the iomap size we * return to avoid stalling other tasks during huge writes; the next * iomap iteration would then find the blocks already allocated. * * Returns: errno on error */ static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = metapath_dibh(mp); u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; size_t dblks = iomap->length >> inode->i_blkbits; const unsigned end_of_metadata = mp->mp_fheight - 1; int ret; enum alloc_state state; __be64 *ptr; __be64 zero_bn = 0; BUG_ON(mp->mp_aheight < 1); BUG_ON(dibh == NULL); BUG_ON(dblks < 1); gfs2_trans_add_meta(ip->i_gl, dibh); down_write(&ip->i_rw_mutex); if (mp->mp_fheight == mp->mp_aheight) { /* Bottom indirect block exists */ state = ALLOC_DATA; } else { /* Need to allocate indirect blocks */ if (mp->mp_fheight == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = mp->mp_fheight - mp->mp_aheight; state = ALLOC_GROW_DEPTH; } else { /* Building up tree height */ state = ALLOC_GROW_HEIGHT; iblks = mp->mp_fheight - ip->i_height; branch_start = metapath_branch_start(mp); iblks += (mp->mp_fheight - branch_start); } } /* start of the second part of the function (state machine) */ blks = dblks + iblks; i = mp->mp_aheight; do { n = blks - alloced; ret = gfs2_alloc_blocks(ip, &bn, &n, 0); if (ret) goto out; alloced += n; if (state != ALLOC_DATA || gfs2_is_jdata(ip)) gfs2_trans_remove_revoke(sdp, bn, n); switch (state) { /* Growing height of tree */ case ALLOC_GROW_HEIGHT: if (i == 1) { ptr = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); zero_bn = *ptr; } for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0; i++, n--) gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); if (i - 1 == mp->mp_fheight - ip->i_height) { i--; gfs2_buffer_copy_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header), dibh, sizeof(struct gfs2_dinode)); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + sizeof(__be64)); ptr = (__be64 *)(mp->mp_bh[i]->b_data + sizeof(struct gfs2_meta_header)); *ptr = zero_bn; state = ALLOC_GROW_DEPTH; for(i = branch_start; i < mp->mp_fheight; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } i = branch_start; } if (n == 0) break; fallthrough; /* To branching from existing tree */ case ALLOC_GROW_DEPTH: if (i > 1 && i < mp->mp_fheight) gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); for (; i < mp->mp_fheight && n > 0; i++, n--) gfs2_indirect_init(mp, ip->i_gl, i, mp->mp_list[i-1], bn++); if (i == mp->mp_fheight) state = ALLOC_DATA; if (n == 0) break; fallthrough; /* To tree complete, adding data blocks */ case ALLOC_DATA: BUG_ON(n > dblks); BUG_ON(mp->mp_bh[end_of_metadata] == NULL); gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); dblks = n; ptr = metapointer(end_of_metadata, mp); iomap->addr = bn << inode->i_blkbits; iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW; while (n-- > 0) *ptr++ = cpu_to_be64(bn++); break; } } while (iomap->addr == IOMAP_NULL_ADDR); iomap->type = IOMAP_MAPPED; iomap->length = (u64)dblks << inode->i_blkbits; ip->i_height = mp->mp_fheight; gfs2_add_inode_blocks(&ip->i_inode, alloced); gfs2_dinode_out(ip, dibh->b_data); out: up_write(&ip->i_rw_mutex); return ret; } #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE /** * gfs2_alloc_size - Compute the maximum allocation size * @inode: The inode * @mp: The metapath * @size: Requested size in blocks * * Compute the maximum size of the next allocation at @mp. * * Returns: size in blocks */ static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); const __be64 *first, *ptr, *end; /* * For writes to stuffed files, this function is called twice via * __gfs2_iomap_get, before and after unstuffing. The size we return the * first time needs to be large enough to get the reservation and * allocation sizes right. The size we return the second time must * be exact or else __gfs2_iomap_alloc won't do the right thing. */ if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { unsigned int maxsize = mp->mp_fheight > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; maxsize -= mp->mp_list[mp->mp_fheight - 1]; if (size > maxsize) size = maxsize; return size; } first = metapointer(ip->i_height - 1, mp); end = metaend(ip->i_height - 1, mp); if (end - first > size) end = first + size; for (ptr = first; ptr < end; ptr++) { if (*ptr) break; } return ptr - first; } /** * __gfs2_iomap_get - Map blocks from an inode to disk blocks * @inode: The inode * @pos: Starting position in bytes * @length: Length to map, in bytes * @flags: iomap flags * @iomap: The iomap structure * @mp: The metapath * * Returns: errno */ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); loff_t size = i_size_read(inode); __be64 *ptr; sector_t lblock; sector_t lblock_stop; int ret; int eob; u64 len; struct buffer_head *dibh = NULL, *bh; u8 height; if (!length) return -EINVAL; down_read(&ip->i_rw_mutex); ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto unlock; mp->mp_bh[0] = dibh; if (gfs2_is_stuffed(ip)) { if (flags & IOMAP_WRITE) { loff_t max_size = gfs2_max_stuffed_size(ip); if (pos + length > max_size) goto unstuff; iomap->length = max_size; } else { if (pos >= size) { if (flags & IOMAP_REPORT) { ret = -ENOENT; goto unlock; } else { iomap->offset = pos; iomap->length = length; goto hole_found; } } iomap->length = size; } iomap->addr = (ip->i_no_addr << inode->i_blkbits) + sizeof(struct gfs2_dinode); iomap->type = IOMAP_INLINE; iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); goto out; } unstuff: lblock = pos >> inode->i_blkbits; iomap->offset = lblock << inode->i_blkbits; lblock_stop = (pos + length - 1) >> inode->i_blkbits; len = lblock_stop - lblock + 1; iomap->length = len << inode->i_blkbits; height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) height++; find_metapath(sdp, lblock, mp, height); if (height > ip->i_height || gfs2_is_stuffed(ip)) goto do_alloc; ret = lookup_metapath(ip, mp); if (ret) goto unlock; if (mp->mp_aheight != ip->i_height) goto do_alloc; ptr = metapointer(ip->i_height - 1, mp); if (*ptr == 0) goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; len = gfs2_extent_length(bh, ptr, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; if (eob) iomap->flags |= IOMAP_F_GFS2_BOUNDARY; out: iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); return ret; do_alloc: if (flags & IOMAP_REPORT) { if (pos >= size) ret = -ENOENT; else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else iomap->length = size - iomap->offset; } else if (flags & IOMAP_WRITE) { u64 alloc_size; if (flags & IOMAP_DIRECT) goto out; /* (see gfs2_file_direct_write) */ len = gfs2_alloc_size(inode, mp, len); alloc_size = len << inode->i_blkbits; if (alloc_size < iomap->length) iomap->length = alloc_size; } else { if (pos < size && height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } hole_found: iomap->addr = IOMAP_NULL_ADDR; iomap->type = IOMAP_HOLE; goto out; } static struct folio * gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) { struct inode *inode = iter->inode; unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int blocks; struct folio *folio; int status; blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); if (status) return ERR_PTR(status); folio = iomap_get_folio(iter, pos, len); if (IS_ERR(folio)) gfs2_trans_end(sdp); return folio; } static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio) { struct gfs2_trans *tr = current->journal_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); if (!gfs2_is_stuffed(ip)) gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos), copied); folio_unlock(folio); folio_put(folio); if (tr->tr_num_buf_new) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); } static const struct iomap_folio_ops gfs2_iomap_folio_ops = { .get_folio = gfs2_iomap_get_folio, .put_folio = gfs2_iomap_put_folio, }; static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); bool unstuff; int ret; unstuff = gfs2_is_stuffed(ip) && pos + length > gfs2_max_stuffed_size(ip); if (unstuff || iomap->type == IOMAP_HOLE) { unsigned int data_blocks, ind_blocks; struct gfs2_alloc_parms ap = {}; unsigned int rblocks; struct gfs2_trans *tr; gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_quota_lock_check(ip, &ap); if (ret) return ret; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_qunlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks; if (ind_blocks || data_blocks) rblocks += RES_STATFS + RES_QUOTA; if (inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); if (ret) goto out_trans_fail; if (unstuff) { ret = gfs2_unstuff_dinode(ip); if (ret) goto out_trans_end; release_metapath(mp); ret = __gfs2_iomap_get(inode, iomap->offset, iomap->length, flags, iomap, mp); if (ret) goto out_trans_end; } if (iomap->type == IOMAP_HOLE) { ret = __gfs2_iomap_alloc(inode, iomap, mp); if (ret) { gfs2_trans_end(sdp); gfs2_inplace_release(ip); punch_hole(ip, iomap->offset, iomap->length); goto out_qunlock; } } tr = current->journal_info; if (tr->tr_num_buf_new) __mark_inode_dirty(inode, I_DIRTY_DATASYNC); gfs2_trans_end(sdp); } if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) iomap->folio_ops = &gfs2_iomap_folio_ops; return 0; out_trans_end: gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); return ret; } static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct iomap *srcmap) { struct gfs2_inode *ip = GFS2_I(inode); struct metapath mp = { .mp_aheight = 1, }; int ret; if (gfs2_is_jdata(ip)) iomap->flags |= IOMAP_F_BUFFER_HEAD; trace_gfs2_iomap_start(ip, pos, length, flags); ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); if (ret) goto out_unlock; switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) { case IOMAP_WRITE: if (flags & IOMAP_DIRECT) { /* * Silently fall back to buffered I/O for stuffed files * or if we've got a hole (see gfs2_file_direct_write). */ if (iomap->type != IOMAP_MAPPED) ret = -ENOTBLK; goto out_unlock; } break; case IOMAP_ZERO: if (iomap->type == IOMAP_HOLE) goto out_unlock; break; default: goto out_unlock; } ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); out_unlock: release_metapath(&mp); trace_gfs2_iomap_end(ip, iomap, ret); return ret; } static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) { case IOMAP_WRITE: if (flags & IOMAP_DIRECT) return 0; break; case IOMAP_ZERO: if (iomap->type == IOMAP_HOLE) return 0; break; default: return 0; } if (!gfs2_is_stuffed(ip)) gfs2_ordered_add_inode(ip); if (inode == sdp->sd_rindex) adjust_fs_space(inode); gfs2_inplace_release(ip); if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ loff_t hstart = round_up(pos + written, i_blocksize(inode)); loff_t hend = iomap->offset + iomap->length; if (hstart < hend) { truncate_pagecache_range(inode, hstart, hend - 1); punch_hole(ip, hstart, hend - hstart); } } if (unlikely(!written)) return 0; if (iomap->flags & IOMAP_F_SIZE_CHANGED) mark_inode_dirty(inode); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); return 0; } const struct iomap_ops gfs2_iomap_ops = { .iomap_begin = gfs2_iomap_begin, .iomap_end = gfs2_iomap_end, }; /** * gfs2_block_map - Map one or more blocks of an inode to a disk block * @inode: The inode * @lblock: The logical block number * @bh_map: The bh to be mapped * @create: True if its ok to alloc blocks to satify the request * * The size of the requested mapping is defined in bh_map->b_size. * * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged * when @lblock is not mapped. Sets buffer_mapped(bh_map) and * bh_map->b_size to indicate the size of the mapping when @lblock and * successive blocks are mapped, up to the requested size. * * Sets buffer_boundary() if a read of metadata will be required * before the next block can be mapped. Sets buffer_new() if new * blocks were allocated. * * Returns: errno */ int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh_map, int create) { struct gfs2_inode *ip = GFS2_I(inode); loff_t pos = (loff_t)lblock << inode->i_blkbits; loff_t length = bh_map->b_size; struct iomap iomap = { }; int ret; clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); clear_buffer_boundary(bh_map); trace_gfs2_bmap(ip, bh_map, lblock, create, 1); if (!create) ret = gfs2_iomap_get(inode, pos, length, &iomap); else ret = gfs2_iomap_alloc(inode, pos, length, &iomap); if (ret) goto out; if (iomap.length > bh_map->b_size) { iomap.length = bh_map->b_size; iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY; } if (iomap.addr != IOMAP_NULL_ADDR) map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits); bh_map->b_size = iomap.length; if (iomap.flags & IOMAP_F_GFS2_BOUNDARY) set_buffer_boundary(bh_map); if (iomap.flags & IOMAP_F_NEW) set_buffer_new(bh_map); out: trace_gfs2_bmap(ip, bh_map, lblock, create, ret); return ret; } int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, unsigned int *extlen) { unsigned int blkbits = inode->i_blkbits; struct iomap iomap = { }; unsigned int len; int ret; ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits, &iomap); if (ret) return ret; if (iomap.type != IOMAP_MAPPED) return -EIO; *dblock = iomap.addr >> blkbits; len = iomap.length >> blkbits; if (len < *extlen) *extlen = len; return 0; } int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, unsigned int *extlen, bool *new) { unsigned int blkbits = inode->i_blkbits; struct iomap iomap = { }; unsigned int len; int ret; ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits, &iomap); if (ret) return ret; if (iomap.type != IOMAP_MAPPED) return -EIO; *dblock = iomap.addr >> blkbits; len = iomap.length >> blkbits; if (len < *extlen) *extlen = len; *new = iomap.flags & IOMAP_F_NEW; return 0; } /* * NOTE: Never call gfs2_block_zero_range with an open transaction because it * uses iomap write to perform its actions, which begin their own transactions * (iomap_begin, get_folio, etc.) */ static int gfs2_block_zero_range(struct inode *inode, loff_t from, unsigned int length) { BUG_ON(current->journal_info); return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); } #define GFS2_JTRUNC_REVOKES 8192 /** * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files * @inode: The inode being truncated * @oldsize: The original (larger) size * @newsize: The new smaller size * * With jdata files, we have to journal a revoke for each block which is * truncated. As a result, we need to split this into separate transactions * if the number of pages being truncated gets too large. */ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_sbd *sdp = GFS2_SB(inode); u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; u64 chunk; int error; while (oldsize != newsize) { struct gfs2_trans *tr; unsigned int offs; chunk = oldsize - newsize; if (chunk > max_chunk) chunk = max_chunk; offs = oldsize & ~PAGE_MASK; if (offs && chunk > PAGE_SIZE) chunk = offs + ((chunk - offs) & PAGE_MASK); truncate_pagecache(inode, oldsize - chunk); oldsize -= chunk; tr = current->journal_info; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) continue; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); if (error) return error; } return 0; } static int trunc_start(struct inode *inode, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = NULL; int journaled = gfs2_is_jdata(ip); u64 oldsize = inode->i_size; int error; if (!gfs2_is_stuffed(ip)) { unsigned int blocksize = i_blocksize(inode); unsigned int offs = newsize & (blocksize - 1); if (offs) { error = gfs2_block_zero_range(inode, newsize, blocksize - offs); if (error) return error; } } if (journaled) error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); else error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); else ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; i_size_write(inode, newsize); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_dinode_out(ip, dibh->b_data); if (journaled) error = gfs2_journaled_truncate(inode, oldsize, newsize); else truncate_pagecache(inode, newsize); out: brelse(dibh); if (current->journal_info) gfs2_trans_end(sdp); return error; } int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap) { struct metapath mp = { .mp_aheight = 1, }; int ret; ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp); release_metapath(&mp); return ret; } int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap) { struct metapath mp = { .mp_aheight = 1, }; int ret; ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); if (!ret && iomap->type == IOMAP_HOLE) ret = __gfs2_iomap_alloc(inode, iomap, &mp); release_metapath(&mp); return ret; } /** * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein * @ip: inode * @rd_gh: holder of resource group glock * @bh: buffer head to sweep * @start: starting point in bh * @end: end point in bh * @meta: true if bh points to metadata (rather than data) * @btotal: place to keep count of total blocks freed * * We sweep a metadata buffer (provided by the metapath) for blocks we need to * free, and free them all. However, we do it one rgrp at a time. If this * block has references to multiple rgrps, we break it into individual * transactions. This allows other processes to use the rgrps while we're * focused on a single one, for better concurrency / performance. * At every transaction boundary, we rewrite the inode into the journal. * That way the bitmaps are kept consistent with the inode and we can recover * if we're interrupted by power-outages. * * Returns: 0, or return code if an error occurred. * *btotal has the total number of blocks freed */ static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, struct buffer_head *bh, __be64 *start, __be64 *end, bool meta, u32 *btotal) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; struct gfs2_trans *tr; __be64 *p; int blks_outside_rgrp; u64 bn, bstart, isize_blks; s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ int ret = 0; bool buf_in_tr = false; /* buffer was added to transaction */ more_rgrps: rgd = NULL; if (gfs2_holder_initialized(rd_gh)) { rgd = gfs2_glock2rgrp(rd_gh->gh_gl); gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); } blks_outside_rgrp = 0; bstart = 0; blen = 0; for (p = start; p < end; p++) { if (!*p) continue; bn = be64_to_cpu(*p); if (rgd) { if (!rgrp_contains_block(rgd, bn)) { blks_outside_rgrp++; continue; } } else { rgd = gfs2_blk2rgrpd(sdp, bn, true); if (unlikely(!rgd)) { ret = -EIO; goto out; } ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, rd_gh); if (ret) goto out; /* Must be done with the rgrp glock held: */ if (gfs2_rs_active(&ip->i_res) && rgd == ip->i_res.rs_rgd) gfs2_rs_deltree(&ip->i_res); } /* The size of our transactions will be unknown until we actually process all the metadata blocks that relate to the rgrp. So we estimate. We know it can't be more than the dinode's i_blocks and we don't want to exceed the journal flush threshold, sd_log_thresh2. */ if (current->journal_info == NULL) { unsigned int jblocks_rqsted, revokes; jblocks_rqsted = rgd->rd_length + RES_DINODE + RES_INDIRECT; isize_blks = gfs2_get_inode_blocks(&ip->i_inode); if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) jblocks_rqsted += atomic_read(&sdp->sd_log_thresh2); else jblocks_rqsted += isize_blks; revokes = jblocks_rqsted; if (meta) revokes += end - start; else if (ip->i_depth) revokes += sdp->sd_inptrs; ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); if (ret) goto out_unlock; down_write(&ip->i_rw_mutex); } /* check if we will exceed the transaction blocks requested */ tr = current->journal_info; if (tr->tr_num_buf_new + RES_STATFS + RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { /* We set blks_outside_rgrp to ensure the loop will be repeated for the same rgrp, but with a new transaction. */ blks_outside_rgrp++; /* This next part is tricky. If the buffer was added to the transaction, we've already set some block pointers to 0, so we better follow through and free them, or we will introduce corruption (so break). This may be impossible, or at least rare, but I decided to cover the case regardless. If the buffer was not added to the transaction (this call), doing so would exceed our transaction size, so we need to end the transaction and start a new one (so goto). */ if (buf_in_tr) break; goto out_unlock; } gfs2_trans_add_meta(ip->i_gl, bh); buf_in_tr = true; *p = 0; if (bstart + blen == bn) { blen++; continue; } if (bstart) { __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); (*btotal) += blen; gfs2_add_inode_blocks(&ip->i_inode, -blen); } bstart = bn; blen = 1; } if (bstart) { __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); (*btotal) += blen; gfs2_add_inode_blocks(&ip->i_inode, -blen); } out_unlock: if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks outside the rgrp we just processed, do it all over again. */ if (current->journal_info) { struct buffer_head *dibh; ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto out; /* Every transaction boundary, we rewrite the dinode to keep its di_blocks current in case of failure. */ inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); buf_in_tr = false; } gfs2_glock_dq_uninit(rd_gh); cond_resched(); goto more_rgrps; } out: return ret; } static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h) { if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0]))) return false; return true; } /** * find_nonnull_ptr - find a non-null pointer given a metapath and height * @sdp: The superblock * @mp: starting metapath * @h: desired height to search * @end_list: See punch_hole(). * @end_aligned: See punch_hole(). * * Assumes the metapath is valid (with buffers) out to height h. * Returns: true if a non-null pointer was found in the metapath buffer * false if all remaining pointers are NULL in the buffer */ static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, unsigned int h, __u16 *end_list, unsigned int end_aligned) { struct buffer_head *bh = mp->mp_bh[h]; __be64 *first, *ptr, *end; first = metaptr1(h, mp); ptr = first + mp->mp_list[h]; end = (__be64 *)(bh->b_data + bh->b_size); if (end_list && mp_eq_to_hgt(mp, end_list, h)) { bool keep_end = h < end_aligned; end = first + end_list[h] + keep_end; } while (ptr < end) { if (*ptr) { /* if we have a non-null pointer */ mp->mp_list[h] = ptr - first; h++; if (h < GFS2_MAX_META_HEIGHT) mp->mp_list[h] = 0; return true; } ptr++; } return false; } enum dealloc_states { DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ DEALLOC_DONE = 3, /* process complete */ }; static inline void metapointer_range(struct metapath *mp, int height, __u16 *start_list, unsigned int start_aligned, __u16 *end_list, unsigned int end_aligned, __be64 **start, __be64 **end) { struct buffer_head *bh = mp->mp_bh[height]; __be64 *first; first = metaptr1(height, mp); *start = first; if (mp_eq_to_hgt(mp, start_list, height)) { bool keep_start = height < start_aligned; *start = first + start_list[height] + keep_start; } *end = (__be64 *)(bh->b_data + bh->b_size); if (end_list && mp_eq_to_hgt(mp, end_list, height)) { bool keep_end = height < end_aligned; *end = first + end_list[height] + keep_end; } } static inline bool walk_done(struct gfs2_sbd *sdp, struct metapath *mp, int height, __u16 *end_list, unsigned int end_aligned) { __u16 end; if (end_list) { bool keep_end = height < end_aligned; if (!mp_eq_to_hgt(mp, end_list, height)) return false; end = end_list[height] + keep_end; } else end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs; return mp->mp_list[height] >= end; } /** * punch_hole - deallocate blocks in a file * @ip: inode to truncate * @offset: the start of the hole * @length: the size of the hole (or 0 for truncate) * * Punch a hole into a file or truncate a file at a given position. This * function operates in whole blocks (@offset and @length are rounded * accordingly); partially filled blocks must be cleared otherwise. * * This function works from the bottom up, and from the right to the left. In * other words, it strips off the highest layer (data) before stripping any of * the metadata. Doing it this way is best in case the operation is interrupted * by power failure, etc. The dinode is rewritten in every transaction to * guarantee integrity. */ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); u64 maxsize = sdp->sd_heightsize[ip->i_height]; struct metapath mp = {}; struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int bsize = 1 << bsize_shift; u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; unsigned int strip_h = ip->i_height - 1; u32 btotal = 0; int ret, state; int mp_h; /* metapath buffers are read in to this height */ u64 prev_bnr = 0; __be64 *start, *end; if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated metadata; * there are no blocks to deallocate. */ return 0; } /* * The start position of the hole is defined by lblock, start_list, and * start_aligned. The end position of the hole is defined by lend, * end_list, and end_aligned. * * start_aligned and end_aligned define down to which height the start * and end positions are aligned to the metadata tree (i.e., the * position is a multiple of the metadata granularity at the height * above). This determines at which heights additional meta pointers * needs to be preserved for the remaining data. */ if (length) { u64 end_offset = offset + length; u64 lend; /* * Clip the end at the maximum file size for the given height: * that's how far the metadata goes; files bigger than that * will have additional layers of indirection. */ if (end_offset > maxsize) end_offset = maxsize; lend = end_offset >> bsize_shift; if (lblock >= lend) return 0; find_metapath(sdp, lend, &mp, ip->i_height); end_list = __end_list; memcpy(end_list, mp.mp_list, sizeof(mp.mp_list)); for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { if (end_list[mp_h]) break; } end_aligned = mp_h; } find_metapath(sdp, lblock, &mp, ip->i_height); memcpy(start_list, mp.mp_list, sizeof(start_list)); for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { if (start_list[mp_h]) break; } start_aligned = mp_h; ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) return ret; mp.mp_bh[0] = dibh; ret = lookup_metapath(ip, &mp); if (ret) goto out_metapath; /* issue read-ahead on metadata */ for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) { metapointer_range(&mp, mp_h, start_list, start_aligned, end_list, end_aligned, &start, &end); gfs2_metapath_ra(ip->i_gl, start, end); } if (mp.mp_aheight == ip->i_height) state = DEALLOC_MP_FULL; /* We have a complete metapath */ else state = DEALLOC_FILL_MP; /* deal with partial metapath */ ret = gfs2_rindex_update(sdp); if (ret) goto out_metapath; ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) goto out_metapath; gfs2_holder_mark_uninitialized(&rd_gh); mp_h = strip_h; while (state != DEALLOC_DONE) { switch (state) { /* Truncate a full metapath at the given strip height. * Note that strip_h == mp_h in order to be in this state. */ case DEALLOC_MP_FULL: bh = mp.mp_bh[mp_h]; gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, prev_bnr != bh->b_blocknr)) { fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, " "s_h:%u, mp_h:%u\n", (unsigned long long)ip->i_no_addr, prev_bnr, ip->i_height, strip_h, mp_h); } prev_bnr = bh->b_blocknr; if (gfs2_metatype_check(sdp, bh, (mp_h ? GFS2_METATYPE_IN : GFS2_METATYPE_DI))) { ret = -EIO; goto out; } /* * Below, passing end_aligned as 0 gives us the * metapointer range excluding the end point: the end * point is the first metapath we must not deallocate! */ metapointer_range(&mp, mp_h, start_list, start_aligned, end_list, 0 /* end_aligned */, &start, &end); ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h], start, end, mp_h != ip->i_height - 1, &btotal); /* If we hit an error or just swept dinode buffer, just exit. */ if (ret || !mp_h) { state = DEALLOC_DONE; break; } state = DEALLOC_MP_LOWER; break; /* lower the metapath strip height */ case DEALLOC_MP_LOWER: /* We're done with the current buffer, so release it, unless it's the dinode buffer. Then back up to the previous pointer. */ if (mp_h) { brelse(mp.mp_bh[mp_h]); mp.mp_bh[mp_h] = NULL; } /* If we can't get any lower in height, we've stripped off all we can. Next step is to back up and start stripping the previous level of metadata. */ if (mp_h == 0) { strip_h--; memcpy(mp.mp_list, start_list, sizeof(start_list)); mp_h = strip_h; state = DEALLOC_FILL_MP; break; } mp.mp_list[mp_h] = 0; mp_h--; /* search one metadata height down */ mp.mp_list[mp_h]++; if (walk_done(sdp, &mp, mp_h, end_list, end_aligned)) break; /* Here we've found a part of the metapath that is not * allocated. We need to search at that height for the * next non-null pointer. */ if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) { state = DEALLOC_FILL_MP; mp_h++; } /* No more non-null pointers at this height. Back up to the previous height and try again. */ break; /* loop around in the same state */ /* Fill the metapath with buffers to the given height. */ case DEALLOC_FILL_MP: /* Fill the buffers out to the current height. */ ret = fillup_metapath(ip, &mp, mp_h); if (ret < 0) goto out; /* On the first pass, issue read-ahead on metadata. */ if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { unsigned int height = mp.mp_aheight - 1; /* No read-ahead for data blocks. */ if (mp.mp_aheight - 1 == strip_h) height--; for (; height >= mp.mp_aheight - ret; height--) { metapointer_range(&mp, height, start_list, start_aligned, end_list, end_aligned, &start, &end); gfs2_metapath_ra(ip->i_gl, start, end); } } /* If buffers found for the entire strip height */ if (mp.mp_aheight - 1 == strip_h) { state = DEALLOC_MP_FULL; break; } if (mp.mp_aheight < ip->i_height) /* We have a partial height */ mp_h = mp.mp_aheight - 1; /* If we find a non-null block pointer, crawl a bit higher up in the metapath and try again, otherwise we need to look lower for a new starting point. */ if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) mp_h++; else state = DEALLOC_MP_LOWER; break; } } if (btotal) { if (current->journal_info == NULL) { ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (ret) goto out; down_write(&ip->i_rw_mutex); } gfs2_statfs_change(sdp, 0, +btotal, 0); gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, ip->i_inode.i_gid); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); } out: if (gfs2_holder_initialized(&rd_gh)) gfs2_glock_dq_uninit(&rd_gh); if (current->journal_info) { up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); cond_resched(); } gfs2_quota_unhold(ip); out_metapath: release_metapath(&mp); return ret; } static int trunc_end(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; int error; error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; down_write(&ip->i_rw_mutex); error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out; if (!i_size_read(&ip->i_inode)) { ip->i_height = 0; ip->i_goal = ip->i_no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); gfs2_ordered_del_inode(ip); } inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: up_write(&ip->i_rw_mutex); gfs2_trans_end(sdp); return error; } /** * do_shrink - make a file smaller * @inode: the inode * @newsize: the size to make the file * * Called with an exclusive lock on @inode. The @size must * be equal to or smaller than the current inode size. * * Returns: errno */ static int do_shrink(struct inode *inode, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); int error; error = trunc_start(inode, newsize); if (error < 0) return error; if (gfs2_is_stuffed(ip)) return 0; error = punch_hole(ip, newsize, 0); if (error == 0) error = trunc_end(ip); return error; } /** * do_grow - Touch and update inode size * @inode: The inode * @size: The new size * * This function updates the timestamps on the inode and * may also increase the size of the inode. This function * must not be called with @size any smaller than the current * inode size. * * Although it is not strictly required to unstuff files here, * earlier versions of GFS2 have a bug in the stuffed file reading * code which will result in a buffer overrun if the size is larger * than the max stuffed file size. In order to prevent this from * occurring, such files are unstuffed, but in other cases we can * just update the inode size directly. * * Returns: 0 on success, or -ve on error */ static int do_grow(struct inode *inode, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .target = 1, }; struct buffer_head *dibh; int error; int unstuff = 0; if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) { error = gfs2_quota_lock_check(ip, &ap); if (error) return error; error = gfs2_inplace_reserve(ip, &ap); if (error) goto do_grow_qunlock; unstuff = 1; } error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + (unstuff && gfs2_is_jdata(ip) ? RES_JDATA : 0) + (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 0 : RES_QUOTA), 0); if (error) goto do_grow_release; if (unstuff) { error = gfs2_unstuff_dinode(ip); if (error) goto do_end_trans; } error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto do_end_trans; truncate_setsize(inode, size); inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); do_end_trans: gfs2_trans_end(sdp); do_grow_release: if (unstuff) { gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); } return error; } /** * gfs2_setattr_size - make a file a given size * @inode: the inode * @newsize: the size to make the file * * The file size can grow, shrink, or stay the same size. This * is called holding i_rwsem and an exclusive glock on the inode * in question. * * Returns: errno */ int gfs2_setattr_size(struct inode *inode, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); int ret; BUG_ON(!S_ISREG(inode->i_mode)); ret = inode_newsize_ok(inode, newsize); if (ret) return ret; inode_dio_wait(inode); ret = gfs2_qa_get(ip); if (ret) goto out; if (newsize >= inode->i_size) { ret = do_grow(inode, newsize); goto out; } ret = do_shrink(inode, newsize); out: gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } int gfs2_truncatei_resume(struct gfs2_inode *ip) { int error; error = punch_hole(ip, i_size_read(&ip->i_inode), 0); if (!error) error = trunc_end(ip); return error; } int gfs2_file_dealloc(struct gfs2_inode *ip) { return punch_hole(ip, 0, 0); } /** * gfs2_free_journal_extents - Free cached journal bmap info * @jd: The journal * */ void gfs2_free_journal_extents(struct gfs2_jdesc *jd) { struct gfs2_journal_extent *jext; while(!list_empty(&jd->extent_list)) { jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list); list_del(&jext->list); kfree(jext); } } /** * gfs2_add_jextent - Add or merge a new extent to extent cache * @jd: The journal descriptor * @lblock: The logical block at start of new extent * @dblock: The physical block at start of new extent * @blocks: Size of extent in fs blocks * * Returns: 0 on success or -ENOMEM */ static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) { struct gfs2_journal_extent *jext; if (!list_empty(&jd->extent_list)) { jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list); if ((jext->dblock + jext->blocks) == dblock) { jext->blocks += blocks; return 0; } } jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); if (jext == NULL) return -ENOMEM; jext->dblock = dblock; jext->lblock = lblock; jext->blocks = blocks; list_add_tail(&jext->list, &jd->extent_list); jd->nr_extents++; return 0; } /** * gfs2_map_journal_extents - Cache journal bmap info * @sdp: The super block * @jd: The journal to map * * Create a reusable "extent" mapping from all logical * blocks to all physical blocks for the given journal. This will save * us time when writing journal blocks. Most journals will have only one * extent that maps all their logical blocks. That's because gfs2.mkfs * arranges the journal blocks sequentially to maximize performance. * So the extent would map the first block for the entire file length. * However, gfs2_jadd can happen while file activity is happening, so * those journals may not be sequential. Less likely is the case where * the users created their own journals by mounting the metafs and * laying it out. But it's still possible. These journals might have * several extents. * * Returns: 0 on success, or error on failure */ int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) { u64 lblock = 0; u64 lblock_stop; struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct buffer_head bh; unsigned int shift = sdp->sd_sb.sb_bsize_shift; u64 size; int rc; ktime_t start, end; start = ktime_get(); lblock_stop = i_size_read(jd->jd_inode) >> shift; size = (lblock_stop - lblock) << shift; jd->nr_extents = 0; WARN_ON(!list_empty(&jd->extent_list)); do { bh.b_state = 0; bh.b_blocknr = 0; bh.b_size = size; rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); if (rc || !buffer_mapped(&bh)) goto fail; rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); if (rc) goto fail; size -= bh.b_size; lblock += (bh.b_size >> ip->i_inode.i_blkbits); } while(size > 0); end = ktime_get(); fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid, jd->nr_extents, ktime_ms_delta(end, start)); return 0; fail: fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", rc, jd->jd_jid, (unsigned long long)(i_size_read(jd->jd_inode) - size), jd->nr_extents); fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, bh.b_state, (unsigned long long)bh.b_size); gfs2_free_journal_extents(jd); return rc; } /** * gfs2_write_alloc_required - figure out if a write will require an allocation * @ip: the file being written to * @offset: the offset to write to * @len: the number of bytes being written * * Returns: 1 if an alloc is required, 0 otherwise */ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, unsigned int len) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head bh; unsigned int shift; u64 lblock, lblock_stop, size; u64 end_of_file; if (!len) return 0; if (gfs2_is_stuffed(ip)) { if (offset + len > gfs2_max_stuffed_size(ip)) return 1; return 0; } shift = sdp->sd_sb.sb_bsize_shift; BUG_ON(gfs2_is_dir(ip)); end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; lblock = offset >> shift; lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) return 1; size = (lblock_stop - lblock) << shift; do { bh.b_state = 0; bh.b_size = size; gfs2_block_map(&ip->i_inode, lblock, &bh, 0); if (!buffer_mapped(&bh)) return 1; size -= bh.b_size; lblock += (bh.b_size >> ip->i_inode.i_blkbits); } while(size > 0); return 0; } static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; if (offset >= inode->i_size) return 0; if (offset + length > inode->i_size) length = inode->i_size - offset; error = gfs2_meta_inode_buffer(ip, &dibh); if (error) return error; gfs2_trans_add_meta(ip->i_gl, dibh); memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0, length); brelse(dibh); return 0; } static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset, loff_t length) { struct gfs2_sbd *sdp = GFS2_SB(inode); loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; int error; while (length) { struct gfs2_trans *tr; loff_t chunk; unsigned int offs; chunk = length; if (chunk > max_chunk) chunk = max_chunk; offs = offset & ~PAGE_MASK; if (offs && chunk > PAGE_SIZE) chunk = offs + ((chunk - offs) & PAGE_MASK); truncate_pagecache_range(inode, offset, chunk); offset += chunk; length -= chunk; tr = current->journal_info; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) continue; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); if (error) return error; } return 0; } int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) { struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int blocksize = i_blocksize(inode); loff_t start, end; int error; if (!gfs2_is_stuffed(ip)) { unsigned int start_off, end_len; start_off = offset & (blocksize - 1); end_len = (offset + length) & (blocksize - 1); if (start_off) { unsigned int len = length; if (length > blocksize - start_off) len = blocksize - start_off; error = gfs2_block_zero_range(inode, offset, len); if (error) goto out; if (start_off + length < blocksize) end_len = 0; } if (end_len) { error = gfs2_block_zero_range(inode, offset + length - end_len, end_len); if (error) goto out; } } start = round_down(offset, blocksize); end = round_up(offset + length, blocksize) - 1; error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (gfs2_is_jdata(ip)) error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, GFS2_JTRUNC_REVOKES); else error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; if (gfs2_is_stuffed(ip)) { error = stuffed_zero_range(inode, offset, length); if (error) goto out; } if (gfs2_is_jdata(ip)) { BUG_ON(!current->journal_info); gfs2_journaled_truncate_range(inode, offset, length); } else truncate_pagecache_range(inode, offset, offset + length - 1); file_update_time(file); mark_inode_dirty(inode); if (current->journal_info) gfs2_trans_end(sdp); if (!gfs2_is_stuffed(ip)) error = punch_hole(ip, offset, length); out: if (current->journal_info) gfs2_trans_end(sdp); return error; } static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned int len) { int ret; if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode)))) return -EIO; if (offset >= wpc->iomap.offset && offset < wpc->iomap.offset + wpc->iomap.length) return 0; memset(&wpc->iomap, 0, sizeof(wpc->iomap)); ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap); return ret; } const struct iomap_writeback_ops gfs2_writeback_ops = { .map_blocks = gfs2_map_blocks, };
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM dlm #if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_DLM_H #include <linux/dlm.h> #include <linux/dlmconstants.h> #include <uapi/linux/dlm_plock.h> #include <linux/tracepoint.h> #include "../../../fs/dlm/dlm_internal.h" #define show_lock_flags(flags) __print_flags(flags, "|", \ { DLM_LKF_NOQUEUE, "NOQUEUE" }, \ { DLM_LKF_CANCEL, "CANCEL" }, \ { DLM_LKF_CONVERT, "CONVERT" }, \ { DLM_LKF_VALBLK, "VALBLK" }, \ { DLM_LKF_QUECVT, "QUECVT" }, \ { DLM_LKF_IVVALBLK, "IVVALBLK" }, \ { DLM_LKF_CONVDEADLK, "CONVDEADLK" }, \ { DLM_LKF_PERSISTENT, "PERSISTENT" }, \ { DLM_LKF_NODLCKWT, "NODLCKWT" }, \ { DLM_LKF_NODLCKBLK, "NODLCKBLK" }, \ { DLM_LKF_EXPEDITE, "EXPEDITE" }, \ { DLM_LKF_NOQUEUEBAST, "NOQUEUEBAST" }, \ { DLM_LKF_HEADQUE, "HEADQUE" }, \ { DLM_LKF_NOORDER, "NOORDER" }, \ { DLM_LKF_ORPHAN, "ORPHAN" }, \ { DLM_LKF_ALTPR, "ALTPR" }, \ { DLM_LKF_ALTCW, "ALTCW" }, \ { DLM_LKF_FORCEUNLOCK, "FORCEUNLOCK" }, \ { DLM_LKF_TIMEOUT, "TIMEOUT" }) #define show_lock_mode(mode) __print_symbolic(mode, \ { DLM_LOCK_IV, "IV"}, \ { DLM_LOCK_NL, "NL"}, \ { DLM_LOCK_CR, "CR"}, \ { DLM_LOCK_CW, "CW"}, \ { DLM_LOCK_PR, "PR"}, \ { DLM_LOCK_PW, "PW"}, \ { DLM_LOCK_EX, "EX"}) #define show_dlm_sb_flags(flags) __print_flags(flags, "|", \ { DLM_SBF_DEMOTED, "DEMOTED" }, \ { DLM_SBF_VALNOTVALID, "VALNOTVALID" }, \ { DLM_SBF_ALTMODE, "ALTMODE" }) #define show_lkb_flags(flags) __print_flags(flags, "|", \ { BIT(DLM_DFL_USER_BIT), "USER" }, \ { BIT(DLM_DFL_ORPHAN_BIT), "ORPHAN" }) #define show_header_cmd(cmd) __print_symbolic(cmd, \ { DLM_MSG, "MSG"}, \ { DLM_RCOM, "RCOM"}, \ { DLM_OPTS, "OPTS"}, \ { DLM_ACK, "ACK"}, \ { DLM_FIN, "FIN"}) #define show_message_version(version) __print_symbolic(version, \ { DLM_VERSION_3_1, "3.1"}, \ { DLM_VERSION_3_2, "3.2"}) #define show_message_type(type) __print_symbolic(type, \ { DLM_MSG_REQUEST, "REQUEST"}, \ { DLM_MSG_CONVERT, "CONVERT"}, \ { DLM_MSG_UNLOCK, "UNLOCK"}, \ { DLM_MSG_CANCEL, "CANCEL"}, \ { DLM_MSG_REQUEST_REPLY, "REQUEST_REPLY"}, \ { DLM_MSG_CONVERT_REPLY, "CONVERT_REPLY"}, \ { DLM_MSG_UNLOCK_REPLY, "UNLOCK_REPLY"}, \ { DLM_MSG_CANCEL_REPLY, "CANCEL_REPLY"}, \ { DLM_MSG_GRANT, "GRANT"}, \ { DLM_MSG_BAST, "BAST"}, \ { DLM_MSG_LOOKUP, "LOOKUP"}, \ { DLM_MSG_REMOVE, "REMOVE"}, \ { DLM_MSG_LOOKUP_REPLY, "LOOKUP_REPLY"}, \ { DLM_MSG_PURGE, "PURGE"}) #define show_rcom_type(type) __print_symbolic(type, \ { DLM_RCOM_STATUS, "STATUS"}, \ { DLM_RCOM_NAMES, "NAMES"}, \ { DLM_RCOM_LOOKUP, "LOOKUP"}, \ { DLM_RCOM_LOCK, "LOCK"}, \ { DLM_RCOM_STATUS_REPLY, "STATUS_REPLY"}, \ { DLM_RCOM_NAMES_REPLY, "NAMES_REPLY"}, \ { DLM_RCOM_LOOKUP_REPLY, "LOOKUP_REPLY"}, \ { DLM_RCOM_LOCK_REPLY, "LOCK_REPLY"}) /* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ TRACE_EVENT(dlm_lock_start, TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, unsigned int namelen, int mode, __u32 flags), TP_ARGS(ls, lkb, name, namelen, mode, flags), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(int, mode) __field(__u32, flags) __dynamic_array(unsigned char, res_name, lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen) ), TP_fast_assign( struct dlm_rsb *r; __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; r = lkb->lkb_resource; if (r) memcpy(__get_dynamic_array(res_name), r->res_name, __get_dynamic_array_len(res_name)); else if (name) memcpy(__get_dynamic_array(res_name), name, __get_dynamic_array_len(res_name)); ), TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_mode(__entry->mode), show_lock_flags(__entry->flags), __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_lock_end, TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, const void *name, unsigned int namelen, int mode, __u32 flags, int error, bool kernel_lock), TP_ARGS(ls, lkb, name, namelen, mode, flags, error, kernel_lock), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(int, mode) __field(__u32, flags) __field(int, error) __dynamic_array(unsigned char, res_name, lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen) ), TP_fast_assign( struct dlm_rsb *r; __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; __entry->error = error; r = lkb->lkb_resource; if (r) memcpy(__get_dynamic_array(res_name), r->res_name, __get_dynamic_array_len(res_name)); else if (name) memcpy(__get_dynamic_array(res_name), name, __get_dynamic_array_len(res_name)); if (kernel_lock) { /* return value will be zeroed in those cases by dlm_lock() * we do it here again to not introduce more overhead if * trace isn't running and error reflects the return value. */ if (error == -EAGAIN || error == -EDEADLK) __entry->error = 0; } ), TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_mode(__entry->mode), show_lock_flags(__entry->flags), __entry->error, __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_bast, TP_PROTO(__u32 ls_id, __u32 lkb_id, int mode, const char *res_name, size_t res_length), TP_ARGS(ls_id, lkb_id, mode, res_name, res_length), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(int, mode) __dynamic_array(unsigned char, res_name, res_length) ), TP_fast_assign( __entry->ls_id = ls_id; __entry->lkb_id = lkb_id; __entry->mode = mode; memcpy(__get_dynamic_array(res_name), res_name, __get_dynamic_array_len(res_name)); ), TP_printk("ls_id=%u lkb_id=%x mode=%s res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_mode(__entry->mode), __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_ast, TP_PROTO(__u32 ls_id, __u32 lkb_id, __u8 sb_flags, int sb_status, const char *res_name, size_t res_length), TP_ARGS(ls_id, lkb_id, sb_flags, sb_status, res_name, res_length), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(__u8, sb_flags) __field(int, sb_status) __dynamic_array(unsigned char, res_name, res_length) ), TP_fast_assign( __entry->ls_id = ls_id; __entry->lkb_id = lkb_id; __entry->sb_flags = sb_flags; __entry->sb_status = sb_status; memcpy(__get_dynamic_array(res_name), res_name, __get_dynamic_array_len(res_name)); ), TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d res_name=%s", __entry->ls_id, __entry->lkb_id, show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status, __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); /* note: we begin tracing dlm_unlock_start() only if ls and lkb are found */ TRACE_EVENT(dlm_unlock_start, TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags), TP_ARGS(ls, lkb, flags), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(__u32, flags) __dynamic_array(unsigned char, res_name, lkb->lkb_resource ? lkb->lkb_resource->res_length : 0) ), TP_fast_assign( struct dlm_rsb *r; __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->flags = flags; r = lkb->lkb_resource; if (r) memcpy(__get_dynamic_array(res_name), r->res_name, __get_dynamic_array_len(res_name)); ), TP_printk("ls_id=%u lkb_id=%x flags=%s res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_flags(__entry->flags), __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_unlock_end, TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags, int error), TP_ARGS(ls, lkb, flags, error), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(__u32, flags) __field(int, error) __dynamic_array(unsigned char, res_name, lkb->lkb_resource ? lkb->lkb_resource->res_length : 0) ), TP_fast_assign( struct dlm_rsb *r; __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->flags = flags; __entry->error = error; r = lkb->lkb_resource; if (r) memcpy(__get_dynamic_array(res_name), r->res_name, __get_dynamic_array_len(res_name)); ), TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_flags(__entry->flags), __entry->error, __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); DECLARE_EVENT_CLASS(dlm_rcom_template, TP_PROTO(uint32_t dst, uint32_t h_seq, const struct dlm_rcom *rc), TP_ARGS(dst, h_seq, rc), TP_STRUCT__entry( __field(uint32_t, dst) __field(uint32_t, h_seq) __field(uint32_t, h_version) __field(uint32_t, h_lockspace) __field(uint32_t, h_nodeid) __field(uint16_t, h_length) __field(uint8_t, h_cmd) __field(uint32_t, rc_type) __field(int32_t, rc_result) __field(uint64_t, rc_id) __field(uint64_t, rc_seq) __field(uint64_t, rc_seq_reply) __dynamic_array(unsigned char, rc_buf, le16_to_cpu(rc->rc_header.h_length) - sizeof(*rc)) ), TP_fast_assign( __entry->dst = dst; __entry->h_seq = h_seq; __entry->h_version = le32_to_cpu(rc->rc_header.h_version); __entry->h_lockspace = le32_to_cpu(rc->rc_header.u.h_lockspace); __entry->h_nodeid = le32_to_cpu(rc->rc_header.h_nodeid); __entry->h_length = le16_to_cpu(rc->rc_header.h_length); __entry->h_cmd = rc->rc_header.h_cmd; __entry->rc_type = le32_to_cpu(rc->rc_type); __entry->rc_result = le32_to_cpu(rc->rc_result); __entry->rc_id = le64_to_cpu(rc->rc_id); __entry->rc_seq = le64_to_cpu(rc->rc_seq); __entry->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply); memcpy(__get_dynamic_array(rc_buf), rc->rc_buf, __get_dynamic_array_len(rc_buf)); ), TP_printk("dst=%u h_seq=%u h_version=%s h_lockspace=%u h_nodeid=%u " "h_length=%u h_cmd=%s rc_type=%s rc_result=%d " "rc_id=%llu rc_seq=%llu rc_seq_reply=%llu " "rc_buf=0x%s", __entry->dst, __entry->h_seq, show_message_version(__entry->h_version), __entry->h_lockspace, __entry->h_nodeid, __entry->h_length, show_header_cmd(__entry->h_cmd), show_rcom_type(__entry->rc_type), __entry->rc_result, __entry->rc_id, __entry->rc_seq, __entry->rc_seq_reply, __print_hex_str(__get_dynamic_array(rc_buf), __get_dynamic_array_len(rc_buf))) ); DEFINE_EVENT(dlm_rcom_template, dlm_send_rcom, TP_PROTO(uint32_t dst, uint32_t h_seq, const struct dlm_rcom *rc), TP_ARGS(dst, h_seq, rc)); DEFINE_EVENT(dlm_rcom_template, dlm_recv_rcom, TP_PROTO(uint32_t dst, uint32_t h_seq, const struct dlm_rcom *rc), TP_ARGS(dst, h_seq, rc)); TRACE_EVENT(dlm_send_message, TP_PROTO(uint32_t dst, uint32_t h_seq, const struct dlm_message *ms, const void *name, int namelen), TP_ARGS(dst, h_seq, ms, name, namelen), TP_STRUCT__entry( __field(uint32_t, dst) __field(uint32_t, h_seq) __field(uint32_t, h_version) __field(uint32_t, h_lockspace) __field(uint32_t, h_nodeid) __field(uint16_t, h_length) __field(uint8_t, h_cmd) __field(uint32_t, m_type) __field(uint32_t, m_nodeid) __field(uint32_t, m_pid) __field(uint32_t, m_lkid) __field(uint32_t, m_remid) __field(uint32_t, m_parent_lkid) __field(uint32_t, m_parent_remid) __field(uint32_t, m_exflags) __field(uint32_t, m_sbflags) __field(uint32_t, m_flags) __field(uint32_t, m_lvbseq) __field(uint32_t, m_hash) __field(int32_t, m_status) __field(int32_t, m_grmode) __field(int32_t, m_rqmode) __field(int32_t, m_bastmode) __field(int32_t, m_asts) __field(int32_t, m_result) __dynamic_array(unsigned char, m_extra, le16_to_cpu(ms->m_header.h_length) - sizeof(*ms)) __dynamic_array(unsigned char, res_name, namelen) ), TP_fast_assign( __entry->dst = dst; __entry->h_seq = h_seq; __entry->h_version = le32_to_cpu(ms->m_header.h_version); __entry->h_lockspace = le32_to_cpu(ms->m_header.u.h_lockspace); __entry->h_nodeid = le32_to_cpu(ms->m_header.h_nodeid); __entry->h_length = le16_to_cpu(ms->m_header.h_length); __entry->h_cmd = ms->m_header.h_cmd; __entry->m_type = le32_to_cpu(ms->m_type); __entry->m_nodeid = le32_to_cpu(ms->m_nodeid); __entry->m_pid = le32_to_cpu(ms->m_pid); __entry->m_lkid = le32_to_cpu(ms->m_lkid); __entry->m_remid = le32_to_cpu(ms->m_remid); __entry->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid); __entry->m_parent_remid = le32_to_cpu(ms->m_parent_remid); __entry->m_exflags = le32_to_cpu(ms->m_exflags); __entry->m_sbflags = le32_to_cpu(ms->m_sbflags); __entry->m_flags = le32_to_cpu(ms->m_flags); __entry->m_lvbseq = le32_to_cpu(ms->m_lvbseq); __entry->m_hash = le32_to_cpu(ms->m_hash); __entry->m_status = le32_to_cpu(ms->m_status); __entry->m_grmode = le32_to_cpu(ms->m_grmode); __entry->m_rqmode = le32_to_cpu(ms->m_rqmode); __entry->m_bastmode = le32_to_cpu(ms->m_bastmode); __entry->m_asts = le32_to_cpu(ms->m_asts); __entry->m_result = le32_to_cpu(ms->m_result); memcpy(__get_dynamic_array(m_extra), ms->m_extra, __get_dynamic_array_len(m_extra)); memcpy(__get_dynamic_array(res_name), name, __get_dynamic_array_len(res_name)); ), TP_printk("dst=%u h_seq=%u h_version=%s h_lockspace=%u h_nodeid=%u " "h_length=%u h_cmd=%s m_type=%s m_nodeid=%u " "m_pid=%u m_lkid=%u m_remid=%u m_parent_lkid=%u " "m_parent_remid=%u m_exflags=%s m_sbflags=%s m_flags=%s " "m_lvbseq=%u m_hash=%u m_status=%d m_grmode=%s " "m_rqmode=%s m_bastmode=%s m_asts=%d m_result=%d " "m_extra=0x%s res_name=0x%s", __entry->dst, __entry->h_seq, show_message_version(__entry->h_version), __entry->h_lockspace, __entry->h_nodeid, __entry->h_length, show_header_cmd(__entry->h_cmd), show_message_type(__entry->m_type), __entry->m_nodeid, __entry->m_pid, __entry->m_lkid, __entry->m_remid, __entry->m_parent_lkid, __entry->m_parent_remid, show_lock_flags(__entry->m_exflags), show_dlm_sb_flags(__entry->m_sbflags), show_lkb_flags(__entry->m_flags), __entry->m_lvbseq, __entry->m_hash, __entry->m_status, show_lock_mode(__entry->m_grmode), show_lock_mode(__entry->m_rqmode), show_lock_mode(__entry->m_bastmode), __entry->m_asts, __entry->m_result, __print_hex_str(__get_dynamic_array(m_extra), __get_dynamic_array_len(m_extra)), __print_hex_str(__get_dynamic_array(res_name), __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_recv_message, TP_PROTO(uint32_t dst, uint32_t h_seq, const struct dlm_message *ms), TP_ARGS(dst, h_seq, ms), TP_STRUCT__entry( __field(uint32_t, dst) __field(uint32_t, h_seq) __field(uint32_t, h_version) __field(uint32_t, h_lockspace) __field(uint32_t, h_nodeid) __field(uint16_t, h_length) __field(uint8_t, h_cmd) __field(uint32_t, m_type) __field(uint32_t, m_nodeid) __field(uint32_t, m_pid) __field(uint32_t, m_lkid) __field(uint32_t, m_remid) __field(uint32_t, m_parent_lkid) __field(uint32_t, m_parent_remid) __field(uint32_t, m_exflags) __field(uint32_t, m_sbflags) __field(uint32_t, m_flags) __field(uint32_t, m_lvbseq) __field(uint32_t, m_hash) __field(int32_t, m_status) __field(int32_t, m_grmode) __field(int32_t, m_rqmode) __field(int32_t, m_bastmode) __field(int32_t, m_asts) __field(int32_t, m_result) __dynamic_array(unsigned char, m_extra, le16_to_cpu(ms->m_header.h_length) - sizeof(*ms)) ), TP_fast_assign( __entry->dst = dst; __entry->h_seq = h_seq; __entry->h_version = le32_to_cpu(ms->m_header.h_version); __entry->h_lockspace = le32_to_cpu(ms->m_header.u.h_lockspace); __entry->h_nodeid = le32_to_cpu(ms->m_header.h_nodeid); __entry->h_length = le16_to_cpu(ms->m_header.h_length); __entry->h_cmd = ms->m_header.h_cmd; __entry->m_type = le32_to_cpu(ms->m_type); __entry->m_nodeid = le32_to_cpu(ms->m_nodeid); __entry->m_pid = le32_to_cpu(ms->m_pid); __entry->m_lkid = le32_to_cpu(ms->m_lkid); __entry->m_remid = le32_to_cpu(ms->m_remid); __entry->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid); __entry->m_parent_remid = le32_to_cpu(ms->m_parent_remid); __entry->m_exflags = le32_to_cpu(ms->m_exflags); __entry->m_sbflags = le32_to_cpu(ms->m_sbflags); __entry->m_flags = le32_to_cpu(ms->m_flags); __entry->m_lvbseq = le32_to_cpu(ms->m_lvbseq); __entry->m_hash = le32_to_cpu(ms->m_hash); __entry->m_status = le32_to_cpu(ms->m_status); __entry->m_grmode = le32_to_cpu(ms->m_grmode); __entry->m_rqmode = le32_to_cpu(ms->m_rqmode); __entry->m_bastmode = le32_to_cpu(ms->m_bastmode); __entry->m_asts = le32_to_cpu(ms->m_asts); __entry->m_result = le32_to_cpu(ms->m_result); memcpy(__get_dynamic_array(m_extra), ms->m_extra, __get_dynamic_array_len(m_extra)); ), TP_printk("dst=%u h_seq=%u h_version=%s h_lockspace=%u h_nodeid=%u " "h_length=%u h_cmd=%s m_type=%s m_nodeid=%u " "m_pid=%u m_lkid=%u m_remid=%u m_parent_lkid=%u " "m_parent_remid=%u m_exflags=%s m_sbflags=%s m_flags=%s " "m_lvbseq=%u m_hash=%u m_status=%d m_grmode=%s " "m_rqmode=%s m_bastmode=%s m_asts=%d m_result=%d " "m_extra=0x%s", __entry->dst, __entry->h_seq, show_message_version(__entry->h_version), __entry->h_lockspace, __entry->h_nodeid, __entry->h_length, show_header_cmd(__entry->h_cmd), show_message_type(__entry->m_type), __entry->m_nodeid, __entry->m_pid, __entry->m_lkid, __entry->m_remid, __entry->m_parent_lkid, __entry->m_parent_remid, show_lock_flags(__entry->m_exflags), show_dlm_sb_flags(__entry->m_sbflags), show_lkb_flags(__entry->m_flags), __entry->m_lvbseq, __entry->m_hash, __entry->m_status, show_lock_mode(__entry->m_grmode), show_lock_mode(__entry->m_rqmode), show_lock_mode(__entry->m_bastmode), __entry->m_asts, __entry->m_result, __print_hex_str(__get_dynamic_array(m_extra), __get_dynamic_array_len(m_extra))) ); DECLARE_EVENT_CLASS(dlm_plock_template, TP_PROTO(const struct dlm_plock_info *info), TP_ARGS(info), TP_STRUCT__entry( __field(uint8_t, optype) __field(uint8_t, ex) __field(uint8_t, wait) __field(uint8_t, flags) __field(uint32_t, pid) __field(int32_t, nodeid) __field(int32_t, rv) __field(uint32_t, fsid) __field(uint64_t, number) __field(uint64_t, start) __field(uint64_t, end) __field(uint64_t, owner) ), TP_fast_assign( __entry->optype = info->optype; __entry->ex = info->ex; __entry->wait = info->wait; __entry->flags = info->flags; __entry->pid = info->pid; __entry->nodeid = info->nodeid; __entry->rv = info->rv; __entry->fsid = info->fsid; __entry->number = info->number; __entry->start = info->start; __entry->end = info->end; __entry->owner = info->owner; ), TP_printk("fsid=%u number=%llx owner=%llx optype=%d ex=%d wait=%d flags=%x pid=%u nodeid=%d rv=%d start=%llx end=%llx", __entry->fsid, __entry->number, __entry->owner, __entry->optype, __entry->ex, __entry->wait, __entry->flags, __entry->pid, __entry->nodeid, __entry->rv, __entry->start, __entry->end) ); DEFINE_EVENT(dlm_plock_template, dlm_plock_read, TP_PROTO(const struct dlm_plock_info *info), TP_ARGS(info)); DEFINE_EVENT(dlm_plock_template, dlm_plock_write, TP_PROTO(const struct dlm_plock_info *info), TP_ARGS(info)); TRACE_EVENT(dlm_send, TP_PROTO(int nodeid, int ret), TP_ARGS(nodeid, ret), TP_STRUCT__entry( __field(int, nodeid) __field(int, ret) ), TP_fast_assign( __entry->nodeid = nodeid; __entry->ret = ret; ), TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret) ); TRACE_EVENT(dlm_recv, TP_PROTO(int nodeid, int ret), TP_ARGS(nodeid, ret), TP_STRUCT__entry( __field(int, nodeid) __field(int, ret) ), TP_fast_assign( __entry->nodeid = nodeid; __entry->ret = ret; ), TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret) ); #endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ #include <trace/define_trace.h>
2 3 38 38 13 13 13 13 13 13 13 13 43 6 38 38 25 13 13 38 4 4 4 4 4 4 1 2 1 6 1 1 4 2 2 2 2 39 2 5 32 19 1 11 2 35 8 42 37 6 42 37 2 31 6 1 22 12 32 32 18 31 1 5 15 4 15 13 5 3 2 1 2 2 10 1 2 8 1 5 1 1 12 9 9 1 1 22 22 22 16 5 1 13 1 1 11 11 51 38 13 48 2 46 45 42 20 21 12 1 1 1 2 1 1 1 4 1 1 3 6 24 5 1 1 1 1 1 1 1 4 1 2 1 29 29 1 13 11 2 9 1 1 2 2 18 1 5 12 1 36 4 1 3 1 1 6 1 7 4 8 3 12 12 1 21 3 1 1 4 2 4 253 254 34 255 44 211 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/ioctl.c * * Copyright (C) 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) */ #include <linux/fs.h> #include <linux/capability.h> #include <linux/time.h> #include <linux/compat.h> #include <linux/mount.h> #include <linux/file.h> #include <linux/quotaops.h> #include <linux/random.h> #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/iversion.h> #include <linux/fileattr.h> #include <linux/uuid.h> #include "ext4_jbd2.h" #include "ext4.h" #include <linux/fsmap.h> #include "fsmap.h" #include <trace/events/ext4.h> typedef void ext4_update_sb_callback(struct ext4_super_block *es, const void *arg); /* * Superblock modification callback function for changing file system * label */ static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg) { /* Sanity check, this should never happen */ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX); memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX); } /* * Superblock modification callback function for changing file system * UUID. */ static void ext4_sb_setuuid(struct ext4_super_block *es, const void *arg) { memcpy(es->s_uuid, (__u8 *)arg, UUID_SIZE); } static int ext4_update_primary_sb(struct super_block *sb, handle_t *handle, ext4_update_sb_callback func, const void *arg) { int err = 0; struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh = sbi->s_sbh; struct ext4_super_block *es = sbi->s_es; trace_ext4_update_sb(sb, bh->b_blocknr, 1); BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) goto out_err; lock_buffer(bh); func(es, arg); ext4_superblock_csum_set(sb); unlock_buffer(bh); if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) { ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " "superblock detected"); clear_buffer_write_io_error(bh); set_buffer_uptodate(bh); } err = ext4_handle_dirty_metadata(handle, NULL, bh); if (err) goto out_err; err = sync_dirty_buffer(bh); out_err: ext4_std_error(sb, err); return err; } /* * Update one backup superblock in the group 'grp' using the callback * function 'func' and argument 'arg'. If the handle is NULL the * modification is not journalled. * * Returns: 0 when no modification was done (no superblock in the group) * 1 when the modification was successful * <0 on error */ static int ext4_update_backup_sb(struct super_block *sb, handle_t *handle, ext4_group_t grp, ext4_update_sb_callback func, const void *arg) { int err = 0; ext4_fsblk_t sb_block; struct buffer_head *bh; unsigned long offset = 0; struct ext4_super_block *es; if (!ext4_bg_has_super(sb, grp)) return 0; /* * For the group 0 there is always 1k padding, so we have * either adjust offset, or sb_block depending on blocksize */ if (grp == 0) { sb_block = 1 * EXT4_MIN_BLOCK_SIZE; offset = do_div(sb_block, sb->s_blocksize); } else { sb_block = ext4_group_first_block_no(sb, grp); offset = 0; } trace_ext4_update_sb(sb, sb_block, handle ? 1 : 0); bh = ext4_sb_bread(sb, sb_block, 0); if (IS_ERR(bh)) return PTR_ERR(bh); if (handle) { BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) goto out_bh; } es = (struct ext4_super_block *) (bh->b_data + offset); lock_buffer(bh); if (ext4_has_metadata_csum(sb) && es->s_checksum != ext4_superblock_csum(sb, es)) { ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " "superblock %llu", sb_block); unlock_buffer(bh); goto out_bh; } func(es, arg); if (ext4_has_metadata_csum(sb)) es->s_checksum = ext4_superblock_csum(sb, es); set_buffer_uptodate(bh); unlock_buffer(bh); if (handle) { err = ext4_handle_dirty_metadata(handle, NULL, bh); if (err) goto out_bh; } else { BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); } err = sync_dirty_buffer(bh); out_bh: brelse(bh); ext4_std_error(sb, err); return (err) ? err : 1; } /* * Update primary and backup superblocks using the provided function * func and argument arg. * * Only the primary superblock and at most two backup superblock * modifications are journalled; the rest is modified without journal. * This is safe because e2fsck will re-write them if there is a problem, * and we're very unlikely to ever need more than two backups. */ static int ext4_update_superblocks_fn(struct super_block *sb, ext4_update_sb_callback func, const void *arg) { handle_t *handle; ext4_group_t ngroups; unsigned int three = 1; unsigned int five = 5; unsigned int seven = 7; int err = 0, ret, i; ext4_group_t grp, primary_grp; struct ext4_sb_info *sbi = EXT4_SB(sb); /* * We can't update superblocks while the online resize is running */ if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags)) { ext4_msg(sb, KERN_ERR, "Can't modify superblock while" "performing online resize"); return -EBUSY; } /* * We're only going to update primary superblock and two * backup superblocks in this transaction. */ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto out; } /* Update primary superblock */ err = ext4_update_primary_sb(sb, handle, func, arg); if (err) { ext4_msg(sb, KERN_ERR, "Failed to update primary " "superblock"); goto out_journal; } primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr); ngroups = ext4_get_groups_count(sb); /* * Update backup superblocks. We have to start from group 0 * because it might not be where the primary superblock is * if the fs is mounted with -o sb=<backup_sb_block> */ i = 0; grp = 0; while (grp < ngroups) { /* Skip primary superblock */ if (grp == primary_grp) goto next_grp; ret = ext4_update_backup_sb(sb, handle, grp, func, arg); if (ret < 0) { /* Ignore bad checksum; try to update next sb */ if (ret == -EFSBADCRC) goto next_grp; err = ret; goto out_journal; } i += ret; if (handle && i > 1) { /* * We're only journalling primary superblock and * two backup superblocks; the rest is not * journalled. */ err = ext4_journal_stop(handle); if (err) goto out; handle = NULL; } next_grp: grp = ext4_list_backups(sb, &three, &five, &seven); } out_journal: if (handle) { ret = ext4_journal_stop(handle); if (ret && !err) err = ret; } out: clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags); smp_mb__after_atomic(); return err ? err : 0; } /* * Swap memory between @a and @b for @len bytes. * * @a: pointer to first memory area * @b: pointer to second memory area * @len: number of bytes to swap * */ static void memswap(void *a, void *b, size_t len) { unsigned char *ap, *bp; ap = (unsigned char *)a; bp = (unsigned char *)b; while (len-- > 0) { swap(*ap, *bp); ap++; bp++; } } /* * Swap i_data and associated attributes between @inode1 and @inode2. * This function is used for the primary swap between inode1 and inode2 * and also to revert this primary swap in case of errors. * * Therefore you have to make sure, that calling this method twice * will revert all changes. * * @inode1: pointer to first inode * @inode2: pointer to second inode */ static void swap_inode_data(struct inode *inode1, struct inode *inode2) { loff_t isize; struct ext4_inode_info *ei1; struct ext4_inode_info *ei2; unsigned long tmp; struct timespec64 ts1, ts2; ei1 = EXT4_I(inode1); ei2 = EXT4_I(inode2); swap(inode1->i_version, inode2->i_version); ts1 = inode_get_atime(inode1); ts2 = inode_get_atime(inode2); inode_set_atime_to_ts(inode1, ts2); inode_set_atime_to_ts(inode2, ts1); ts1 = inode_get_mtime(inode1); ts2 = inode_get_mtime(inode2); inode_set_mtime_to_ts(inode1, ts2); inode_set_mtime_to_ts(inode2, ts1); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP; ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) | (ei1->i_flags & ~EXT4_FL_SHOULD_SWAP); ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP); swap(ei1->i_disksize, ei2->i_disksize); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); isize = i_size_read(inode1); i_size_write(inode1, i_size_read(inode2)); i_size_write(inode2, isize); } void ext4_reset_inode_seed(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __le32 inum = cpu_to_le32(inode->i_ino); __le32 gen = cpu_to_le32(inode->i_generation); __u32 csum; if (!ext4_has_metadata_csum(inode->i_sb)) return; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, sizeof(inum)); ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen)); } /* * Swap the information from the given @inode and the inode * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other * important fields of the inodes. * * @sb: the super block of the filesystem * @idmap: idmap of the mount the inode was found from * @inode: the inode to swap with EXT4_BOOT_LOADER_INO * */ static long swap_inode_boot_loader(struct super_block *sb, struct mnt_idmap *idmap, struct inode *inode) { handle_t *handle; int err; struct inode *inode_bl; struct ext4_inode_info *ei_bl; qsize_t size, size_bl, diff; blkcnt_t blocks; unsigned short bytes; inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL | EXT4_IGET_BAD); if (IS_ERR(inode_bl)) return PTR_ERR(inode_bl); ei_bl = EXT4_I(inode_bl); /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ lock_two_nondirectories(inode, inode_bl); if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) || IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) || (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) || ext4_has_inline_data(inode)) { err = -EINVAL; goto journal_err_out; } if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) || !inode_owner_or_capable(idmap, inode) || !capable(CAP_SYS_ADMIN)) { err = -EPERM; goto journal_err_out; } filemap_invalidate_lock(inode->i_mapping); err = filemap_write_and_wait(inode->i_mapping); if (err) goto err_out; err = filemap_write_and_wait(inode_bl->i_mapping); if (err) goto err_out; /* Wait for all existing dio workers */ inode_dio_wait(inode); inode_dio_wait(inode_bl); truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); if (IS_ERR(handle)) { err = -EINVAL; goto err_out; } ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) { /* this inode has never been used as a BOOT_LOADER */ set_nlink(inode_bl, 1); i_uid_write(inode_bl, 0); i_gid_write(inode_bl, 0); inode_bl->i_flags = 0; ei_bl->i_flags = 0; inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); EXT4_I(inode_bl)->i_disksize = inode_bl->i_size; inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); ext4_ext_tree_init(handle, inode_bl); } else memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data)); } err = dquot_initialize(inode); if (err) goto err_out1; size = (qsize_t)(inode->i_blocks) * (1 << 9) + inode->i_bytes; size_bl = (qsize_t)(inode_bl->i_blocks) * (1 << 9) + inode_bl->i_bytes; diff = size - size_bl; swap_inode_data(inode, inode_bl); inode_set_ctime_current(inode); inode_set_ctime_current(inode_bl); inode_inc_iversion(inode); inode->i_generation = get_random_u32(); inode_bl->i_generation = get_random_u32(); ext4_reset_inode_seed(inode); ext4_reset_inode_seed(inode_bl); ext4_discard_preallocations(inode); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { /* No need to update quota information. */ ext4_warning(inode->i_sb, "couldn't mark inode #%lu dirty (err %d)", inode->i_ino, err); /* Revert all changes: */ swap_inode_data(inode, inode_bl); ext4_mark_inode_dirty(handle, inode); goto err_out1; } blocks = inode_bl->i_blocks; bytes = inode_bl->i_bytes; inode_bl->i_blocks = inode->i_blocks; inode_bl->i_bytes = inode->i_bytes; err = ext4_mark_inode_dirty(handle, inode_bl); if (err < 0) { /* No need to update quota information. */ ext4_warning(inode_bl->i_sb, "couldn't mark inode #%lu dirty (err %d)", inode_bl->i_ino, err); goto revert; } /* Bootloader inode should not be counted into quota information. */ if (diff > 0) dquot_free_space(inode, diff); else err = dquot_alloc_space(inode, -1 * diff); if (err < 0) { revert: /* Revert all changes: */ inode_bl->i_blocks = blocks; inode_bl->i_bytes = bytes; swap_inode_data(inode, inode_bl); ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode_bl); } err_out1: ext4_journal_stop(handle); ext4_double_up_write_data_sem(inode, inode_bl); err_out: filemap_invalidate_unlock(inode->i_mapping); journal_err_out: unlock_two_nondirectories(inode, inode_bl); iput(inode_bl); return err; } /* * If immutable is set and we are not clearing it, we're not allowed to change * anything else in the inode. Don't error out if we're only trying to set * immutable on an immutable file. */ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, unsigned int flags) { struct ext4_inode_info *ei = EXT4_I(inode); unsigned int oldflags = ei->i_flags; if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) return 0; if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) return -EPERM; if (ext4_has_feature_project(inode->i_sb) && __kprojid_val(ei->i_projid) != new_projid) return -EPERM; return 0; } static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) { struct ext4_inode_info *ei = EXT4_I(inode); if (S_ISDIR(inode->i_mode)) return; if (test_opt2(inode->i_sb, DAX_NEVER) || test_opt(inode->i_sb, DAX_ALWAYS)) return; if ((ei->i_flags ^ flags) & EXT4_DAX_FL) d_mark_dontcache(inode); } static bool dax_compatible(struct inode *inode, unsigned int oldflags, unsigned int flags) { /* Allow the DAX flag to be changed on inline directories */ if (S_ISDIR(inode->i_mode)) { flags &= ~EXT4_INLINE_DATA_FL; oldflags &= ~EXT4_INLINE_DATA_FL; } if (flags & EXT4_DAX_FL) { if ((oldflags & EXT4_DAX_MUT_EXCL) || ext4_test_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS)) { return false; } } if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL)) return false; return true; } static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { struct ext4_inode_info *ei = EXT4_I(inode); handle_t *handle = NULL; int err = -EPERM, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags, mask, i; struct super_block *sb = inode->i_sb; /* Is it quota file? Do not allow user to mess with it */ if (ext4_is_quota_file(inode)) goto flags_out; oldflags = ei->i_flags; /* * The JOURNAL_DATA flag can only be changed by * the relevant capability. */ if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } if (!dax_compatible(inode, oldflags, flags)) { err = -EOPNOTSUPP; goto flags_out; } if ((flags ^ oldflags) & EXT4_EXTENTS_FL) migrate = 1; if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) { if (!ext4_has_feature_casefold(sb)) { err = -EOPNOTSUPP; goto flags_out; } if (!S_ISDIR(inode->i_mode)) { err = -ENOTDIR; goto flags_out; } if (!ext4_empty_dir(inode)) { err = -ENOTEMPTY; goto flags_out; } } /* * Wait for all pending directio and then flush all the dirty pages * for this file. The flush marks all the pages readonly, so any * subsequent attempt to write to the file (particularly mmap pages) * will come through the filesystem and fail. */ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && (flags & EXT4_IMMUTABLE_FL)) { inode_dio_wait(inode); err = filemap_write_and_wait(inode->i_mapping); if (err) goto flags_out; } handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto flags_out; } if (IS_SYNC(inode)) ext4_handle_sync(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto flags_err; ext4_dax_dontcache(inode, flags); for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { if (!(mask & EXT4_FL_USER_MODIFIABLE)) continue; /* These flags get special treatment later */ if (mask == EXT4_JOURNAL_DATA_FL || mask == EXT4_EXTENTS_FL) continue; if (mask & flags) ext4_set_inode_flag(inode, i); else ext4_clear_inode_flag(inode, i); } ext4_set_inode_flags(inode, false); inode_set_ctime_current(inode); inode_inc_iversion(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext4_journal_stop(handle); if (err) goto flags_out; if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { /* * Changes to the journaling mode can cause unsafe changes to * S_DAX if the inode is DAX */ if (IS_DAX(inode)) { err = -EBUSY; goto flags_out; } err = ext4_change_inode_journal_flag(inode, flags & EXT4_JOURNAL_DATA_FL); if (err) goto flags_out; } if (migrate) { if (flags & EXT4_EXTENTS_FL) err = ext4_ext_migrate(inode); else err = ext4_ind_migrate(inode); } flags_out: return err; } #ifdef CONFIG_QUOTA static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) { struct super_block *sb = inode->i_sb; struct ext4_inode_info *ei = EXT4_I(inode); int err, rc; handle_t *handle; kprojid_t kprojid; struct ext4_iloc iloc; struct ext4_inode *raw_inode; struct dquot *transfer_to[MAXQUOTAS] = { }; if (!ext4_has_feature_project(sb)) { if (projid != EXT4_DEF_PROJID) return -EOPNOTSUPP; else return 0; } if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE) return -EOPNOTSUPP; kprojid = make_kprojid(&init_user_ns, (projid_t)projid); if (projid_eq(kprojid, EXT4_I(inode)->i_projid)) return 0; err = -EPERM; /* Is it quota file? Do not allow user to mess with it */ if (ext4_is_quota_file(inode)) return err; err = dquot_initialize(inode); if (err) return err; err = ext4_get_inode_loc(inode, &iloc); if (err) return err; raw_inode = ext4_raw_inode(&iloc); if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) { err = ext4_expand_extra_isize(inode, EXT4_SB(sb)->s_want_extra_isize, &iloc); if (err) return err; } else { brelse(iloc.bh); } handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); if (IS_ERR(handle)) return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto out_stop; transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid)); if (!IS_ERR(transfer_to[PRJQUOTA])) { /* __dquot_transfer() calls back ext4_get_inode_usage() which * counts xattr inode references. */ down_read(&EXT4_I(inode)->xattr_sem); err = __dquot_transfer(inode, transfer_to); up_read(&EXT4_I(inode)->xattr_sem); dqput(transfer_to[PRJQUOTA]); if (err) goto out_dirty; } EXT4_I(inode)->i_projid = kprojid; inode_set_ctime_current(inode); inode_inc_iversion(inode); out_dirty: rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; out_stop: ext4_journal_stop(handle); return err; } #else static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) { if (projid != EXT4_DEF_PROJID) return -EOPNOTSUPP; return 0; } #endif int ext4_force_shutdown(struct super_block *sb, u32 flags) { struct ext4_sb_info *sbi = EXT4_SB(sb); int ret; if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH) return -EINVAL; if (ext4_forced_shutdown(sb)) return 0; ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags); trace_ext4_shutdown(sb, flags); switch (flags) { case EXT4_GOING_FLAGS_DEFAULT: ret = bdev_freeze(sb->s_bdev); if (ret) return ret; set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); bdev_thaw(sb->s_bdev); break; case EXT4_GOING_FLAGS_LOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) { (void) ext4_force_commit(sb); jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN); } break; case EXT4_GOING_FLAGS_NOLOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN); break; default: return -EINVAL; } clear_opt(sb, DISCARD); return 0; } static int ext4_ioctl_shutdown(struct super_block *sb, unsigned long arg) { u32 flags; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; return ext4_force_shutdown(sb, flags); } struct getfsmap_info { struct super_block *gi_sb; struct fsmap_head __user *gi_data; unsigned int gi_idx; __u32 gi_last_flags; }; static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv) { struct getfsmap_info *info = priv; struct fsmap fm; trace_ext4_getfsmap_mapping(info->gi_sb, xfm); info->gi_last_flags = xfm->fmr_flags; ext4_fsmap_from_internal(info->gi_sb, &fm, xfm); if (copy_to_user(&info->gi_data->fmh_recs[info->gi_idx++], &fm, sizeof(struct fsmap))) return -EFAULT; return 0; } static int ext4_ioc_getfsmap(struct super_block *sb, struct fsmap_head __user *arg) { struct getfsmap_info info = { NULL }; struct ext4_fsmap_head xhead = {0}; struct fsmap_head head; bool aborted = false; int error; if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) return -EFAULT; if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || memchr_inv(head.fmh_keys[0].fmr_reserved, 0, sizeof(head.fmh_keys[0].fmr_reserved)) || memchr_inv(head.fmh_keys[1].fmr_reserved, 0, sizeof(head.fmh_keys[1].fmr_reserved))) return -EINVAL; /* * ext4 doesn't report file extents at all, so the only valid * file offsets are the magic ones (all zeroes or all ones). */ if (head.fmh_keys[0].fmr_offset || (head.fmh_keys[1].fmr_offset != 0 && head.fmh_keys[1].fmr_offset != -1ULL)) return -EINVAL; xhead.fmh_iflags = head.fmh_iflags; xhead.fmh_count = head.fmh_count; ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]); ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]); trace_ext4_getfsmap_low_key(sb, &xhead.fmh_keys[0]); trace_ext4_getfsmap_high_key(sb, &xhead.fmh_keys[1]); info.gi_sb = sb; info.gi_data = arg; error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info); if (error == EXT4_QUERY_RANGE_ABORT) aborted = true; else if (error) return error; /* If we didn't abort, set the "last" flag in the last fmx */ if (!aborted && info.gi_idx) { info.gi_last_flags |= FMR_OF_LAST; if (copy_to_user(&info.gi_data->fmh_recs[info.gi_idx - 1].fmr_flags, &info.gi_last_flags, sizeof(info.gi_last_flags))) return -EFAULT; } /* copy back header */ head.fmh_entries = xhead.fmh_entries; head.fmh_oflags = xhead.fmh_oflags; if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) return -EFAULT; return 0; } static long ext4_ioctl_group_add(struct file *file, struct ext4_new_group_data *input) { struct super_block *sb = file_inode(file)->i_sb; int err, err2=0; err = ext4_resize_begin(sb); if (err) return err; if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; goto group_add_out; } err = mnt_want_write_file(file); if (err) goto group_add_out; err = ext4_group_add(sb, input); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write_file(file); if (!err && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, input->group); group_add_out: err2 = ext4_resize_end(sb, false); if (err == 0) err = err2; return err; } int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct ext4_inode_info *ei = EXT4_I(inode); u32 flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (S_ISREG(inode->i_mode)) flags &= ~FS_PROJINHERIT_FL; fileattr_fill_flags(fa, flags); if (ext4_has_feature_project(inode->i_sb)) fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid); return 0; } int ext4_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); u32 flags = fa->flags; int err = -EOPNOTSUPP; if (flags & ~EXT4_FL_USER_VISIBLE) goto out; /* * chattr(1) grabs flags via GETFLAGS, modifies the result and * passes that to SETFLAGS. So we cannot easily make SETFLAGS * more restrictive than just silently masking off visible but * not settable flags as we always did. */ flags &= EXT4_FL_USER_MODIFIABLE; if (ext4_mask_flags(inode->i_mode, flags) != flags) goto out; err = ext4_ioctl_check_immutable(inode, fa->fsx_projid, flags); if (err) goto out; err = ext4_ioctl_setflags(inode, flags); if (err) goto out; err = ext4_ioctl_setproject(inode, fa->fsx_projid); out: return err; } /* So that the fiemap access checks can't overflow on 32 bit machines. */ #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) { struct fiemap fiemap; struct fiemap __user *ufiemap = (struct fiemap __user *) arg; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = file_inode(filp); int error; if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) return -EINVAL; fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; fieinfo.fi_extents_start = ufiemap->fm_extents; error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start, fiemap.fm_length); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; } static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg) { int err = 0; __u32 flags = 0; unsigned int flush_flags = 0; struct super_block *sb = file_inode(filp)->i_sb; if (copy_from_user(&flags, (__u32 __user *)arg, sizeof(__u32))) return -EFAULT; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* check for invalid bits set */ if ((flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) || ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && (flags & JBD2_JOURNAL_FLUSH_ZEROOUT))) return -EINVAL; if (!EXT4_SB(sb)->s_journal) return -ENODEV; if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev)) return -EOPNOTSUPP; if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN) return 0; if (flags & EXT4_IOC_CHECKPOINT_FLAG_DISCARD) flush_flags |= JBD2_JOURNAL_FLUSH_DISCARD; if (flags & EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT) { flush_flags |= JBD2_JOURNAL_FLUSH_ZEROOUT; pr_info_ratelimited("warning: checkpointing journal with EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT can be slow"); } jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, flush_flags); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return err; } static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label) { size_t len; int ret = 0; char new_label[EXT4_LABEL_MAX + 1]; struct super_block *sb = file_inode(filp)->i_sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* * Copy the maximum length allowed for ext4 label with one more to * find the required terminating null byte in order to test the * label length. The on disk label doesn't need to be null terminated. */ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1)) return -EFAULT; len = strnlen(new_label, EXT4_LABEL_MAX + 1); if (len > EXT4_LABEL_MAX) return -EINVAL; /* * Clear the buffer after the new label */ memset(new_label + len, 0, EXT4_LABEL_MAX - len); ret = mnt_want_write_file(filp); if (ret) return ret; ret = ext4_update_superblocks_fn(sb, ext4_sb_setlabel, new_label); mnt_drop_write_file(filp); return ret; } static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label) { char label[EXT4_LABEL_MAX + 1]; /* * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because * FSLABEL_MAX must include terminating null byte, while s_volume_name * does not have to. */ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX); lock_buffer(sbi->s_sbh); memtostr_pad(label, sbi->s_es->s_volume_name); unlock_buffer(sbi->s_sbh); if (copy_to_user(user_label, label, sizeof(label))) return -EFAULT; return 0; } static int ext4_ioctl_getuuid(struct ext4_sb_info *sbi, struct fsuuid __user *ufsuuid) { struct fsuuid fsuuid; __u8 uuid[UUID_SIZE]; if (copy_from_user(&fsuuid, ufsuuid, sizeof(fsuuid))) return -EFAULT; if (fsuuid.fsu_len == 0) { fsuuid.fsu_len = UUID_SIZE; if (copy_to_user(&ufsuuid->fsu_len, &fsuuid.fsu_len, sizeof(fsuuid.fsu_len))) return -EFAULT; return 0; } if (fsuuid.fsu_len < UUID_SIZE || fsuuid.fsu_flags != 0) return -EINVAL; lock_buffer(sbi->s_sbh); memcpy(uuid, sbi->s_es->s_uuid, UUID_SIZE); unlock_buffer(sbi->s_sbh); fsuuid.fsu_len = UUID_SIZE; if (copy_to_user(ufsuuid, &fsuuid, sizeof(fsuuid)) || copy_to_user(&ufsuuid->fsu_uuid[0], uuid, UUID_SIZE)) return -EFAULT; return 0; } static int ext4_ioctl_setuuid(struct file *filp, const struct fsuuid __user *ufsuuid) { int ret = 0; struct super_block *sb = file_inode(filp)->i_sb; struct fsuuid fsuuid; __u8 uuid[UUID_SIZE]; if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* * If any checksums (group descriptors or metadata) are being used * then the checksum seed feature is required to change the UUID. */ if (((ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb)) && !ext4_has_feature_csum_seed(sb)) || ext4_has_feature_stable_inodes(sb)) return -EOPNOTSUPP; if (copy_from_user(&fsuuid, ufsuuid, sizeof(fsuuid))) return -EFAULT; if (fsuuid.fsu_len != UUID_SIZE || fsuuid.fsu_flags != 0) return -EINVAL; if (copy_from_user(uuid, &ufsuuid->fsu_uuid[0], UUID_SIZE)) return -EFAULT; ret = mnt_want_write_file(filp); if (ret) return ret; ret = ext4_update_superblocks_fn(sb, ext4_sb_setuuid, &uuid); mnt_drop_write_file(filp); return ret; } static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct super_block *sb = inode->i_sb; struct mnt_idmap *idmap = file_mnt_idmap(filp); ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case FS_IOC_GETFSMAP: return ext4_ioc_getfsmap(sb, (void __user *)arg); case EXT4_IOC_GETVERSION: case EXT4_IOC_GETVERSION_OLD: return put_user(inode->i_generation, (int __user *) arg); case EXT4_IOC_SETVERSION: case EXT4_IOC_SETVERSION_OLD: { handle_t *handle; struct ext4_iloc iloc; __u32 generation; int err; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; if (ext4_has_metadata_csum(inode->i_sb)) { ext4_warning(sb, "Setting inode version is not " "supported with metadata_csum enabled."); return -ENOTTY; } err = mnt_want_write_file(filp); if (err) return err; if (get_user(generation, (int __user *) arg)) { err = -EFAULT; goto setversion_out; } inode_lock(inode); handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto unlock_out; } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode_set_ctime_current(inode); inode_inc_iversion(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); unlock_out: inode_unlock(inode); setversion_out: mnt_drop_write_file(filp); return err; } case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; int err, err2=0; err = ext4_resize_begin(sb); if (err) return err; if (get_user(n_blocks_count, (__u32 __user *)arg)) { err = -EFAULT; goto group_extend_out; } if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); err = -EOPNOTSUPP; goto group_extend_out; } err = mnt_want_write_file(filp); if (err) goto group_extend_out; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write_file(filp); group_extend_out: err2 = ext4_resize_end(sb, false); if (err == 0) err = err2; return err; } case EXT4_IOC_MOVE_EXT: { struct move_extent me; struct fd donor; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; if (copy_from_user(&me, (struct move_extent __user *)arg, sizeof(me))) return -EFAULT; me.moved_len = 0; donor = fdget(me.donor_fd); if (!donor.file) return -EBADF; if (!(donor.file->f_mode & FMODE_WRITE)) { err = -EBADF; goto mext_out; } if (ext4_has_feature_bigalloc(sb)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); err = -EOPNOTSUPP; goto mext_out; } else if (IS_DAX(inode)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with DAX"); err = -EOPNOTSUPP; goto mext_out; } err = mnt_want_write_file(filp); if (err) goto mext_out; err = ext4_move_extents(filp, donor.file, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) err = -EFAULT; mext_out: fdput(donor); return err; } case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) return -EFAULT; return ext4_ioctl_group_add(filp, &input); } case EXT4_IOC_MIGRATE: { int err; if (!inode_owner_or_capable(idmap, inode)) return -EACCES; err = mnt_want_write_file(filp); if (err) return err; /* * inode_mutex prevent write and truncate on the file. * Read still goes through. We take i_data_sem in * ext4_ext_swap_inode_data before we switch the * inode format to prevent read. */ inode_lock((inode)); err = ext4_ext_migrate(inode); inode_unlock((inode)); mnt_drop_write_file(filp); return err; } case EXT4_IOC_ALLOC_DA_BLKS: { int err; if (!inode_owner_or_capable(idmap, inode)) return -EACCES; err = mnt_want_write_file(filp); if (err) return err; err = ext4_alloc_da_blocks(inode); mnt_drop_write_file(filp); return err; } case EXT4_IOC_SWAP_BOOT: { int err; if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; err = mnt_want_write_file(filp); if (err) return err; err = swap_inode_boot_loader(sb, idmap, inode); mnt_drop_write_file(filp); return err; } case EXT4_IOC_RESIZE_FS: { ext4_fsblk_t n_blocks_count; int err = 0, err2 = 0; ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, sizeof(__u64))) { return -EFAULT; } err = ext4_resize_begin(sb); if (err) return err; err = mnt_want_write_file(filp); if (err) goto resizefs_out; err = ext4_resize_fs(sb, n_blocks_count); if (EXT4_SB(sb)->s_journal) { ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write_file(filp); if (!err && (o_group < EXT4_SB(sb)->s_groups_count) && ext4_has_group_desc_csum(sb) && test_opt(sb, INIT_INODE_TABLE)) err = ext4_register_li_request(sb, o_group); resizefs_out: err2 = ext4_resize_end(sb, true); if (err == 0) err = err2; return err; } case FITRIM: { struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!bdev_max_discard_sectors(sb->s_bdev)) return -EOPNOTSUPP; /* * We haven't replayed the journal, so we cannot use our * block-bitmap-guided storage zapping commands. */ if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) return -EROFS; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; return 0; } case EXT4_IOC_PRECACHE_EXTENTS: return ext4_ext_precache(inode); case FS_IOC_SET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); case FS_IOC_GET_ENCRYPTION_PWSALT: return ext4_ioctl_get_encryption_pwsalt(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_POLICY_EX: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg); case FS_IOC_ADD_ENCRYPTION_KEY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_add_key(filp, (void __user *)arg); case FS_IOC_REMOVE_ENCRYPTION_KEY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_remove_key(filp, (void __user *)arg); case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_KEY_STATUS: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_key_status(filp, (void __user *)arg); case FS_IOC_GET_ENCRYPTION_NONCE: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_nonce(filp, (void __user *)arg); case EXT4_IOC_CLEAR_ES_CACHE: { if (!inode_owner_or_capable(idmap, inode)) return -EACCES; ext4_clear_inode_es(inode); return 0; } case EXT4_IOC_GETSTATE: { __u32 state = 0; if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED)) state |= EXT4_STATE_FLAG_EXT_PRECACHED; if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) state |= EXT4_STATE_FLAG_NEW; if (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) state |= EXT4_STATE_FLAG_NEWENTRY; if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) state |= EXT4_STATE_FLAG_DA_ALLOC_CLOSE; return put_user(state, (__u32 __user *) arg); } case EXT4_IOC_GET_ES_CACHE: return ext4_ioctl_get_es_cache(filp, arg); case EXT4_IOC_SHUTDOWN: return ext4_ioctl_shutdown(sb, arg); case FS_IOC_ENABLE_VERITY: if (!ext4_has_feature_verity(sb)) return -EOPNOTSUPP; return fsverity_ioctl_enable(filp, (const void __user *)arg); case FS_IOC_MEASURE_VERITY: if (!ext4_has_feature_verity(sb)) return -EOPNOTSUPP; return fsverity_ioctl_measure(filp, (void __user *)arg); case FS_IOC_READ_VERITY_METADATA: if (!ext4_has_feature_verity(sb)) return -EOPNOTSUPP; return fsverity_ioctl_read_metadata(filp, (const void __user *)arg); case EXT4_IOC_CHECKPOINT: return ext4_ioctl_checkpoint(filp, arg); case FS_IOC_GETFSLABEL: return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg); case FS_IOC_SETFSLABEL: return ext4_ioctl_setlabel(filp, (const void __user *)arg); case EXT4_IOC_GETFSUUID: return ext4_ioctl_getuuid(EXT4_SB(sb), (void __user *)arg); case EXT4_IOC_SETFSUUID: return ext4_ioctl_setuuid(filp, (const void __user *)arg); default: return -ENOTTY; } } long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { return __ext4_ioctl(filp, cmd, arg); } #ifdef CONFIG_COMPAT long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT4_IOC32_GETVERSION: cmd = EXT4_IOC_GETVERSION; break; case EXT4_IOC32_SETVERSION: cmd = EXT4_IOC_SETVERSION; break; case EXT4_IOC32_GROUP_EXTEND: cmd = EXT4_IOC_GROUP_EXTEND; break; case EXT4_IOC32_GETVERSION_OLD: cmd = EXT4_IOC_GETVERSION_OLD; break; case EXT4_IOC32_SETVERSION_OLD: cmd = EXT4_IOC_SETVERSION_OLD; break; case EXT4_IOC32_GETRSVSZ: cmd = EXT4_IOC_GETRSVSZ; break; case EXT4_IOC32_SETRSVSZ: cmd = EXT4_IOC_SETRSVSZ; break; case EXT4_IOC32_GROUP_ADD: { struct compat_ext4_new_group_input __user *uinput; struct ext4_new_group_data input; int err; uinput = compat_ptr(arg); err = get_user(input.group, &uinput->group); err |= get_user(input.block_bitmap, &uinput->block_bitmap); err |= get_user(input.inode_bitmap, &uinput->inode_bitmap); err |= get_user(input.inode_table, &uinput->inode_table); err |= get_user(input.blocks_count, &uinput->blocks_count); err |= get_user(input.reserved_blocks, &uinput->reserved_blocks); if (err) return -EFAULT; return ext4_ioctl_group_add(file, &input); } case EXT4_IOC_MOVE_EXT: case EXT4_IOC_RESIZE_FS: case FITRIM: case EXT4_IOC_PRECACHE_EXTENTS: case FS_IOC_SET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_PWSALT: case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: case FS_IOC_ADD_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS: case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case FS_IOC_GET_ENCRYPTION_NONCE: case EXT4_IOC_SHUTDOWN: case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: case FS_IOC_READ_VERITY_METADATA: case EXT4_IOC_CLEAR_ES_CACHE: case EXT4_IOC_GETSTATE: case EXT4_IOC_GET_ES_CACHE: case EXT4_IOC_CHECKPOINT: case FS_IOC_GETFSLABEL: case FS_IOC_SETFSLABEL: case EXT4_IOC_GETFSUUID: case EXT4_IOC_SETFSUUID: break; default: return -ENOIOCTLCMD; } return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif static void set_overhead(struct ext4_super_block *es, const void *arg) { es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg)); } int ext4_update_overhead(struct super_block *sb, bool force) { struct ext4_sb_info *sbi = EXT4_SB(sb); if (sb_rdonly(sb)) return 0; if (!force && (sbi->s_overhead == 0 || sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))) return 0; return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead); }
903 897 900 900 29 79 904 904 903 904 903 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * Glue code for the SHA1 Secure Hash Algorithm assembler implementations * using SSSE3, AVX, AVX2, and SHA-NI instructions. * * This file is based on sha1_generic.c * * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> * Copyright (c) Mathias Krause <minipli@googlemail.com> * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/cpu_device_id.h> #include <asm/simd.h> static const struct x86_cpu_id module_cpu_ids[] = { #ifdef CONFIG_AS_SHA1_NI X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL), #endif X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); static int sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha1_block_fn *sha1_xform) { struct sha1_state *sctx = shash_desc_ctx(desc); if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); /* * Make sure struct sha1_state begins directly with the SHA1 * 160-bit internal state, as this is what the asm functions expect. */ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); kernel_fpu_begin(); sha1_base_do_update(desc, data, len, sha1_xform); kernel_fpu_end(); return 0; } static int sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha1_block_fn *sha1_xform) { if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); if (len) sha1_base_do_update(desc, data, len, sha1_xform); sha1_base_do_finalize(desc, sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } asmlinkage void sha1_transform_ssse3(struct sha1_state *state, const u8 *data, int blocks); static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_transform_ssse3); } static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_transform_ssse3); } /* Add padding and return the message digest. */ static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) { return sha1_ssse3_finup(desc, NULL, 0, out); } static struct shash_alg sha1_ssse3_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_ssse3_update, .final = sha1_ssse3_final, .finup = sha1_ssse3_finup, .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name = "sha1-ssse3", .cra_priority = 150, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int register_sha1_ssse3(void) { if (boot_cpu_has(X86_FEATURE_SSSE3)) return crypto_register_shash(&sha1_ssse3_alg); return 0; } static void unregister_sha1_ssse3(void) { if (boot_cpu_has(X86_FEATURE_SSSE3)) crypto_unregister_shash(&sha1_ssse3_alg); } asmlinkage void sha1_transform_avx(struct sha1_state *state, const u8 *data, int blocks); static int sha1_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_transform_avx); } static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_transform_avx); } static int sha1_avx_final(struct shash_desc *desc, u8 *out) { return sha1_avx_finup(desc, NULL, 0, out); } static struct shash_alg sha1_avx_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_avx_update, .final = sha1_avx_final, .finup = sha1_avx_finup, .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name = "sha1-avx", .cra_priority = 160, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static bool avx_usable(void) { if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { if (boot_cpu_has(X86_FEATURE_AVX)) pr_info("AVX detected but unusable.\n"); return false; } return true; } static int register_sha1_avx(void) { if (avx_usable()) return crypto_register_shash(&sha1_avx_alg); return 0; } static void unregister_sha1_avx(void) { if (avx_usable()) crypto_unregister_shash(&sha1_avx_alg); } #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ asmlinkage void sha1_transform_avx2(struct sha1_state *state, const u8 *data, int blocks); static bool avx2_usable(void) { if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; return false; } static void sha1_apply_transform_avx2(struct sha1_state *state, const u8 *data, int blocks) { /* Select the optimal transform based on data block size */ if (blocks >= SHA1_AVX2_BLOCK_OPTSIZE) sha1_transform_avx2(state, data, blocks); else sha1_transform_avx(state, data, blocks); } static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_apply_transform_avx2); } static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2); } static int sha1_avx2_final(struct shash_desc *desc, u8 *out) { return sha1_avx2_finup(desc, NULL, 0, out); } static struct shash_alg sha1_avx2_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_avx2_update, .final = sha1_avx2_final, .finup = sha1_avx2_finup, .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name = "sha1-avx2", .cra_priority = 170, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int register_sha1_avx2(void) { if (avx2_usable()) return crypto_register_shash(&sha1_avx2_alg); return 0; } static void unregister_sha1_avx2(void) { if (avx2_usable()) crypto_unregister_shash(&sha1_avx2_alg); } #ifdef CONFIG_AS_SHA1_NI asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, int rounds); static int sha1_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { return sha1_update(desc, data, len, sha1_ni_transform); } static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { return sha1_finup(desc, data, len, out, sha1_ni_transform); } static int sha1_ni_final(struct shash_desc *desc, u8 *out) { return sha1_ni_finup(desc, NULL, 0, out); } static struct shash_alg sha1_ni_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_ni_update, .final = sha1_ni_final, .finup = sha1_ni_finup, .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name = "sha1-ni", .cra_priority = 250, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int register_sha1_ni(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) return crypto_register_shash(&sha1_ni_alg); return 0; } static void unregister_sha1_ni(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) crypto_unregister_shash(&sha1_ni_alg); } #else static inline int register_sha1_ni(void) { return 0; } static inline void unregister_sha1_ni(void) { } #endif static int __init sha1_ssse3_mod_init(void) { if (!x86_match_cpu(module_cpu_ids)) return -ENODEV; if (register_sha1_ssse3()) goto fail; if (register_sha1_avx()) { unregister_sha1_ssse3(); goto fail; } if (register_sha1_avx2()) { unregister_sha1_avx(); unregister_sha1_ssse3(); goto fail; } if (register_sha1_ni()) { unregister_sha1_avx2(); unregister_sha1_avx(); unregister_sha1_ssse3(); goto fail; } return 0; fail: return -ENODEV; } static void __exit sha1_ssse3_mod_fini(void) { unregister_sha1_ni(); unregister_sha1_avx2(); unregister_sha1_avx(); unregister_sha1_ssse3(); } module_init(sha1_ssse3_mod_init); module_exit(sha1_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-ssse3"); MODULE_ALIAS_CRYPTO("sha1-avx"); MODULE_ALIAS_CRYPTO("sha1-avx2"); #ifdef CONFIG_AS_SHA1_NI MODULE_ALIAS_CRYPTO("sha1-ni"); #endif
1 1 4 1 1 2 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 // SPDX-License-Identifier: GPL-2.0 /* * Tty port functions */ #include <linux/types.h> #include <linux/errno.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/serial.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/serdev.h> #include "tty.h" static size_t tty_port_default_receive_buf(struct tty_port *port, const u8 *p, const u8 *f, size_t count) { struct tty_struct *tty; struct tty_ldisc *ld; tty = READ_ONCE(port->itty); if (!tty) return 0; ld = tty_ldisc_ref(tty); if (!ld) return 0; count = tty_ldisc_receive_buf(ld, p, f, count); tty_ldisc_deref(ld); return count; } static void tty_port_default_lookahead_buf(struct tty_port *port, const u8 *p, const u8 *f, size_t count) { struct tty_struct *tty; struct tty_ldisc *ld; tty = READ_ONCE(port->itty); if (!tty) return; ld = tty_ldisc_ref(tty); if (!ld) return; if (ld->ops->lookahead_buf) ld->ops->lookahead_buf(ld->tty, p, f, count); tty_ldisc_deref(ld); } static void tty_port_default_wakeup(struct tty_port *port) { struct tty_struct *tty = tty_port_tty_get(port); if (tty) { tty_wakeup(tty); tty_kref_put(tty); } } const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .lookahead_buf = tty_port_default_lookahead_buf, .write_wakeup = tty_port_default_wakeup, }; EXPORT_SYMBOL_GPL(tty_port_default_client_ops); /** * tty_port_init - initialize tty_port * @port: tty_port to initialize * * Initializes the state of struct tty_port. When a port was initialized using * this function, one has to destroy the port by tty_port_destroy(). Either * indirectly by using &tty_port refcounting (tty_port_put()) or directly if * refcounting is not used. */ void tty_port_init(struct tty_port *port) { memset(port, 0, sizeof(*port)); tty_buffer_init(port); init_waitqueue_head(&port->open_wait); init_waitqueue_head(&port->delta_msr_wait); mutex_init(&port->mutex); mutex_init(&port->buf_mutex); spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); /** * tty_port_link_device - link tty and tty_port * @port: tty_port of the device * @driver: tty_driver for this device * @index: index of the tty * * Provide the tty layer with a link from a tty (specified by @index) to a * tty_port (@port). Use this only if neither tty_port_register_device() nor * tty_port_install() is used in the driver. If used, this has to be called * before tty_register_driver(). */ void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index) { if (WARN_ON(index >= driver->num)) return; driver->ports[index] = port; } EXPORT_SYMBOL_GPL(tty_port_link_device); /** * tty_port_register_device - register tty device * @port: tty_port of the device * @driver: tty_driver for this device * @index: index of the tty * @device: parent if exists, otherwise NULL * * It is the same as tty_register_device() except the provided @port is linked * to a concrete tty specified by @index. Use this or tty_port_install() (or * both). Call tty_port_link_device() as a last resort. */ struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device) { return tty_port_register_device_attr(port, driver, index, device, NULL, NULL); } EXPORT_SYMBOL_GPL(tty_port_register_device); /** * tty_port_register_device_attr - register tty device * @port: tty_port of the device * @driver: tty_driver for this device * @index: index of the tty * @device: parent if exists, otherwise NULL * @drvdata: Driver data to be set to device. * @attr_grp: Attribute group to be set on device. * * It is the same as tty_register_device_attr() except the provided @port is * linked to a concrete tty specified by @index. Use this or tty_port_install() * (or both). Call tty_port_link_device() as a last resort. */ struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp) { tty_port_link_device(port, driver, index); return tty_register_device_attr(driver, index, device, drvdata, attr_grp); } EXPORT_SYMBOL_GPL(tty_port_register_device_attr); /** * tty_port_register_device_attr_serdev - register tty or serdev device * @port: tty_port of the device * @driver: tty_driver for this device * @index: index of the tty * @host: serial port hardware device * @parent: parent if exists, otherwise NULL * @drvdata: driver data for the device * @attr_grp: attribute group for the device * * Register a serdev or tty device depending on if the parent device has any * defined serdev clients or not. */ struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, const struct attribute_group **attr_grp) { struct device *dev; tty_port_link_device(port, driver, index); dev = serdev_tty_port_register(port, host, parent, driver, index); if (PTR_ERR(dev) != -ENODEV) { /* Skip creating cdev if we registered a serdev device */ return dev; } return tty_register_device_attr(driver, index, parent, drvdata, attr_grp); } EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev); /** * tty_port_register_device_serdev - register tty or serdev device * @port: tty_port of the device * @driver: tty_driver for this device * @index: index of the tty * @host: serial port hardware controller device * @parent: parent if exists, otherwise NULL * * Register a serdev or tty device depending on if the parent device has any * defined serdev clients or not. */ struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent) { return tty_port_register_device_attr_serdev(port, driver, index, host, parent, NULL, NULL); } EXPORT_SYMBOL_GPL(tty_port_register_device_serdev); /** * tty_port_unregister_device - deregister a tty or serdev device * @port: tty_port of the device * @driver: tty_driver for this device * @index: index of the tty * * If a tty or serdev device is registered with a call to * tty_port_register_device_serdev() then this function must be called when * the device is gone. */ void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index) { int ret; ret = serdev_tty_port_unregister(port); if (ret == 0) return; tty_unregister_device(driver, index); } EXPORT_SYMBOL_GPL(tty_port_unregister_device); int tty_port_alloc_xmit_buf(struct tty_port *port) { /* We may sleep in get_zeroed_page() */ mutex_lock(&port->buf_mutex); if (port->xmit_buf == NULL) { port->xmit_buf = (u8 *)get_zeroed_page(GFP_KERNEL); if (port->xmit_buf) kfifo_init(&port->xmit_fifo, port->xmit_buf, PAGE_SIZE); } mutex_unlock(&port->buf_mutex); if (port->xmit_buf == NULL) return -ENOMEM; return 0; } EXPORT_SYMBOL(tty_port_alloc_xmit_buf); void tty_port_free_xmit_buf(struct tty_port *port) { mutex_lock(&port->buf_mutex); free_page((unsigned long)port->xmit_buf); port->xmit_buf = NULL; INIT_KFIFO(port->xmit_fifo); mutex_unlock(&port->buf_mutex); } EXPORT_SYMBOL(tty_port_free_xmit_buf); /** * tty_port_destroy - destroy inited port * @port: tty port to be destroyed * * When a port was initialized using tty_port_init(), one has to destroy the * port by this function. Either indirectly by using &tty_port refcounting * (tty_port_put()) or directly if refcounting is not used. */ void tty_port_destroy(struct tty_port *port) { tty_buffer_cancel_work(port); tty_buffer_free_all(port); } EXPORT_SYMBOL(tty_port_destroy); static void tty_port_destructor(struct kref *kref) { struct tty_port *port = container_of(kref, struct tty_port, kref); /* check if last port ref was dropped before tty release */ if (WARN_ON(port->itty)) return; free_page((unsigned long)port->xmit_buf); tty_port_destroy(port); if (port->ops && port->ops->destruct) port->ops->destruct(port); else kfree(port); } /** * tty_port_put - drop a reference to tty_port * @port: port to drop a reference of (can be NULL) * * The final put will destroy and free up the @port using * @port->ops->destruct() hook, or using kfree() if not provided. */ void tty_port_put(struct tty_port *port) { if (port) kref_put(&port->kref, tty_port_destructor); } EXPORT_SYMBOL(tty_port_put); /** * tty_port_tty_get - get a tty reference * @port: tty port * * Return a refcount protected tty instance or %NULL if the port is not * associated with a tty (eg due to close or hangup). */ struct tty_struct *tty_port_tty_get(struct tty_port *port) { unsigned long flags; struct tty_struct *tty; spin_lock_irqsave(&port->lock, flags); tty = tty_kref_get(port->tty); spin_unlock_irqrestore(&port->lock, flags); return tty; } EXPORT_SYMBOL(tty_port_tty_get); /** * tty_port_tty_set - set the tty of a port * @port: tty port * @tty: the tty * * Associate the port and tty pair. Manages any internal refcounts. Pass %NULL * to deassociate a port. */ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&port->lock, flags); tty_kref_put(port->tty); port->tty = tty_kref_get(tty); spin_unlock_irqrestore(&port->lock, flags); } EXPORT_SYMBOL(tty_port_tty_set); /** * tty_port_shutdown - internal helper to shutdown the device * @port: tty port to be shut down * @tty: the associated tty * * It is used by tty_port_hangup() and tty_port_close(). Its task is to * shutdown the device if it was initialized (note consoles remain * functioning). It lowers DTR/RTS (if @tty has HUPCL set) and invokes * @port->ops->shutdown(). */ static void tty_port_shutdown(struct tty_port *port, struct tty_struct *tty) { mutex_lock(&port->mutex); if (port->console) goto out; if (tty_port_initialized(port)) { tty_port_set_initialized(port, false); /* * Drop DTR/RTS if HUPCL is set. This causes any attached * modem to hang up the line. */ if (tty && C_HUPCL(tty)) tty_port_lower_dtr_rts(port); if (port->ops->shutdown) port->ops->shutdown(port); } out: mutex_unlock(&port->mutex); } /** * tty_port_hangup - hangup helper * @port: tty port * * Perform port level tty hangup flag and count changes. Drop the tty * reference. * * Caller holds tty lock. */ void tty_port_hangup(struct tty_port *port) { struct tty_struct *tty; unsigned long flags; spin_lock_irqsave(&port->lock, flags); port->count = 0; tty = port->tty; if (tty) set_bit(TTY_IO_ERROR, &tty->flags); port->tty = NULL; spin_unlock_irqrestore(&port->lock, flags); tty_port_set_active(port, false); tty_port_shutdown(port, tty); tty_kref_put(tty); wake_up_interruptible(&port->open_wait); wake_up_interruptible(&port->delta_msr_wait); } EXPORT_SYMBOL(tty_port_hangup); /** * tty_port_tty_hangup - helper to hang up a tty * @port: tty port * @check_clocal: hang only ttys with %CLOCAL unset? */ void tty_port_tty_hangup(struct tty_port *port, bool check_clocal) { struct tty_struct *tty = tty_port_tty_get(port); if (tty && (!check_clocal || !C_CLOCAL(tty))) tty_hangup(tty); tty_kref_put(tty); } EXPORT_SYMBOL_GPL(tty_port_tty_hangup); /** * tty_port_tty_wakeup - helper to wake up a tty * @port: tty port */ void tty_port_tty_wakeup(struct tty_port *port) { port->client_ops->write_wakeup(port); } EXPORT_SYMBOL_GPL(tty_port_tty_wakeup); /** * tty_port_carrier_raised - carrier raised check * @port: tty port * * Wrapper for the carrier detect logic. For the moment this is used * to hide some internal details. This will eventually become entirely * internal to the tty port. */ bool tty_port_carrier_raised(struct tty_port *port) { if (port->ops->carrier_raised == NULL) return true; return port->ops->carrier_raised(port); } EXPORT_SYMBOL(tty_port_carrier_raised); /** * tty_port_raise_dtr_rts - Raise DTR/RTS * @port: tty port * * Wrapper for the DTR/RTS raise logic. For the moment this is used to hide * some internal details. This will eventually become entirely internal to the * tty port. */ void tty_port_raise_dtr_rts(struct tty_port *port) { if (port->ops->dtr_rts) port->ops->dtr_rts(port, true); } EXPORT_SYMBOL(tty_port_raise_dtr_rts); /** * tty_port_lower_dtr_rts - Lower DTR/RTS * @port: tty port * * Wrapper for the DTR/RTS raise logic. For the moment this is used to hide * some internal details. This will eventually become entirely internal to the * tty port. */ void tty_port_lower_dtr_rts(struct tty_port *port) { if (port->ops->dtr_rts) port->ops->dtr_rts(port, false); } EXPORT_SYMBOL(tty_port_lower_dtr_rts); /** * tty_port_block_til_ready - Waiting logic for tty open * @port: the tty port being opened * @tty: the tty device being bound * @filp: the file pointer of the opener or %NULL * * Implement the core POSIX/SuS tty behaviour when opening a tty device. * Handles: * * - hangup (both before and during) * - non blocking open * - rts/dtr/dcd * - signals * - port flags and counts * * The passed @port must implement the @port->ops->carrier_raised method if it * can do carrier detect and the @port->ops->dtr_rts method if it supports * software management of these lines. Note that the dtr/rts raise is done each * iteration as a hangup may have previously dropped them while we wait. * * Caller holds tty lock. * * Note: May drop and reacquire tty lock when blocking, so @tty and @port may * have changed state (eg., may have been hung up). */ int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp) { int do_clocal = 0, retval; unsigned long flags; DEFINE_WAIT(wait); /* if non-blocking mode is set we can pass directly to open unless * the port has just hung up or is in another error state. */ if (tty_io_error(tty)) { tty_port_set_active(port, true); return 0; } if (filp == NULL || (filp->f_flags & O_NONBLOCK)) { /* Indicate we are open */ if (C_BAUD(tty)) tty_port_raise_dtr_rts(port); tty_port_set_active(port, true); return 0; } if (C_CLOCAL(tty)) do_clocal = 1; /* Block waiting until we can proceed. We may need to wait for the * carrier, but we must also wait for any close that is in progress * before the next open may complete. */ retval = 0; /* The port lock protects the port counts */ spin_lock_irqsave(&port->lock, flags); port->count--; port->blocked_open++; spin_unlock_irqrestore(&port->lock, flags); while (1) { /* Indicate we are open */ if (C_BAUD(tty) && tty_port_initialized(port)) tty_port_raise_dtr_rts(port); prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE); /* Check for a hangup or uninitialised port. * Return accordingly. */ if (tty_hung_up_p(filp) || !tty_port_initialized(port)) { if (port->flags & ASYNC_HUP_NOTIFY) retval = -EAGAIN; else retval = -ERESTARTSYS; break; } /* * Probe the carrier. For devices with no carrier detect * tty_port_carrier_raised will always return true. * Never ask drivers if CLOCAL is set, this causes troubles * on some hardware. */ if (do_clocal || tty_port_carrier_raised(port)) break; if (signal_pending(current)) { retval = -ERESTARTSYS; break; } tty_unlock(tty); schedule(); tty_lock(tty); } finish_wait(&port->open_wait, &wait); /* Update counts. A parallel hangup will have set count to zero and * we must not mess that up further. */ spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) port->count++; port->blocked_open--; spin_unlock_irqrestore(&port->lock, flags); if (retval == 0) tty_port_set_active(port, true); return retval; } EXPORT_SYMBOL(tty_port_block_til_ready); static void tty_port_drain_delay(struct tty_port *port, struct tty_struct *tty) { unsigned int bps = tty_get_baud_rate(tty); long timeout; if (bps > 1200) { timeout = (HZ * 10 * port->drain_delay) / bps; timeout = max_t(long, timeout, HZ / 10); } else { timeout = 2 * HZ; } schedule_timeout_interruptible(timeout); } /** * tty_port_close_start - helper for tty->ops->close, part 1/2 * @port: tty_port of the device * @tty: tty being closed * @filp: passed file pointer * * Decrements and checks open count. Flushes the port if this is the last * close. That means, dropping the data from the outpu buffer on the device and * waiting for sending logic to finish. The rest of close handling is performed * in tty_port_close_end(). * * Locking: Caller holds tty lock. * * Return: 1 if this is the last close, otherwise 0 */ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp) { unsigned long flags; if (tty_hung_up_p(filp)) return 0; spin_lock_irqsave(&port->lock, flags); if (tty->count == 1 && port->count != 1) { tty_warn(tty, "%s: tty->count = 1 port count = %d\n", __func__, port->count); port->count = 1; } if (--port->count < 0) { tty_warn(tty, "%s: bad port count (%d)\n", __func__, port->count); port->count = 0; } if (port->count) { spin_unlock_irqrestore(&port->lock, flags); return 0; } spin_unlock_irqrestore(&port->lock, flags); tty->closing = 1; if (tty_port_initialized(port)) { /* Don't block on a stalled port, just pull the chain */ if (tty->flow.tco_stopped) tty_driver_flush_buffer(tty); if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE) tty_wait_until_sent(tty, port->closing_wait); if (port->drain_delay) tty_port_drain_delay(port, tty); } /* Flush the ldisc buffering */ tty_ldisc_flush(tty); /* Report to caller this is the last port reference */ return 1; } EXPORT_SYMBOL(tty_port_close_start); /** * tty_port_close_end - helper for tty->ops->close, part 2/2 * @port: tty_port of the device * @tty: tty being closed * * This is a continuation of the first part: tty_port_close_start(). This * should be called after turning off the device. It flushes the data from the * line discipline and delays the close by @port->close_delay. * * Locking: Caller holds tty lock. */ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty) { unsigned long flags; tty_ldisc_flush(tty); tty->closing = 0; spin_lock_irqsave(&port->lock, flags); if (port->blocked_open) { spin_unlock_irqrestore(&port->lock, flags); if (port->close_delay) msleep_interruptible(jiffies_to_msecs(port->close_delay)); spin_lock_irqsave(&port->lock, flags); wake_up_interruptible(&port->open_wait); } spin_unlock_irqrestore(&port->lock, flags); tty_port_set_active(port, false); } EXPORT_SYMBOL(tty_port_close_end); /** * tty_port_close - generic tty->ops->close handler * @port: tty_port of the device * @tty: tty being closed * @filp: passed file pointer * * It is a generic helper to be used in driver's @tty->ops->close. It wraps a * sequence of tty_port_close_start(), tty_port_shutdown(), and * tty_port_close_end(). The latter two are called only if this is the last * close. See the respective functions for the details. * * Locking: Caller holds tty lock */ void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp) { if (tty_port_close_start(port, tty, filp) == 0) return; tty_port_shutdown(port, tty); if (!port->console) set_bit(TTY_IO_ERROR, &tty->flags); tty_port_close_end(port, tty); tty_port_tty_set(port, NULL); } EXPORT_SYMBOL(tty_port_close); /** * tty_port_install - generic tty->ops->install handler * @port: tty_port of the device * @driver: tty_driver for this device * @tty: tty to be installed * * It is the same as tty_standard_install() except the provided @port is linked * to a concrete tty specified by @tty. Use this or tty_port_register_device() * (or both). Call tty_port_link_device() as a last resort. */ int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty) { tty->port = port; return tty_standard_install(driver, tty); } EXPORT_SYMBOL_GPL(tty_port_install); /** * tty_port_open - generic tty->ops->open handler * @port: tty_port of the device * @tty: tty to be opened * @filp: passed file pointer * * It is a generic helper to be used in driver's @tty->ops->open. It activates * the devices using @port->ops->activate if not active already. And waits for * the device to be ready using tty_port_block_til_ready() (e.g. raises * DTR/CTS and waits for carrier). * * Note that @port->ops->shutdown is not called when @port->ops->activate * returns an error (on the contrary, @tty->ops->close is). * * Locking: Caller holds tty lock. * * Note: may drop and reacquire tty lock (in tty_port_block_til_ready()) so * @tty and @port may have changed state (eg., may be hung up now). */ int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp) { spin_lock_irq(&port->lock); ++port->count; spin_unlock_irq(&port->lock); tty_port_tty_set(port, tty); /* * Do the device-specific open only if the hardware isn't * already initialized. Serialize open and shutdown using the * port mutex. */ mutex_lock(&port->mutex); if (!tty_port_initialized(port)) { clear_bit(TTY_IO_ERROR, &tty->flags); if (port->ops->activate) { int retval = port->ops->activate(port, tty); if (retval) { mutex_unlock(&port->mutex); return retval; } } tty_port_set_initialized(port, true); } mutex_unlock(&port->mutex); return tty_port_block_til_ready(port, tty, filp); } EXPORT_SYMBOL(tty_port_open);
247 248 247 247 113 114 112 113 7 4 3 1 1 3 1 2 5 5 10 10 8 8 8 5 15 15 5 1 15 1 9 5 5 5 26 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/mount.c - kernfs mount implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/exportfs.h> #include <linux/uuid.h> #include <linux/statfs.h> #include "kernfs-internal.h" struct kmem_cache *kernfs_node_cache __ro_after_init; struct kmem_cache *kernfs_iattrs_cache __ro_after_init; struct kernfs_global_locks *kernfs_locks __ro_after_init; static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_options) return scops->show_options(sf, root); return 0; } static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) { struct kernfs_node *node = kernfs_dentry_node(dentry); struct kernfs_root *root = kernfs_root(node); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_path) return scops->show_path(sf, node, root); seq_dentry(sf, dentry, " \t\n\\"); return 0; } static int kernfs_statfs(struct dentry *dentry, struct kstatfs *buf) { simple_statfs(dentry, buf); buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); return 0; } const struct super_operations kernfs_sops = { .statfs = kernfs_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, }; static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct kernfs_node *kn = inode->i_private; if (*max_len < 2) { *max_len = 2; return FILEID_INVALID; } *max_len = 2; *(u64 *)fh = kn->id; return FILEID_KERNFS; } static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, bool get_parent) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_node *kn; struct inode *inode; u64 id; if (fh_len < 2) return NULL; switch (fh_type) { case FILEID_KERNFS: id = *(u64 *)fid; break; case FILEID_INO32_GEN: case FILEID_INO32_GEN_PARENT: /* * blk_log_action() exposes "LOW32,HIGH32" pair without * type and userland can call us with generic fid * constructed from them. Combine it back to ID. See * blk_log_action(). */ id = ((u64)fid->i32.gen << 32) | fid->i32.ino; break; default: return NULL; } kn = kernfs_find_and_get_node_by_id(info->root, id); if (!kn) return ERR_PTR(-ESTALE); if (get_parent) { struct kernfs_node *parent; parent = kernfs_get_parent(kn); kernfs_put(kn); kn = parent; if (!kn) return ERR_PTR(-ESTALE); } inode = kernfs_get_inode(sb, kn); kernfs_put(kn); return d_obtain_alias(inode); } static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, false); } static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, true); } static struct dentry *kernfs_get_parent_dentry(struct dentry *child) { struct kernfs_node *kn = kernfs_dentry_node(child); return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); } static const struct export_operations kernfs_export_ops = { .encode_fh = kernfs_encode_fh, .fh_to_dentry = kernfs_fh_to_dentry, .fh_to_parent = kernfs_fh_to_parent, .get_parent = kernfs_get_parent_dentry, }; /** * kernfs_root_from_sb - determine kernfs_root associated with a super_block * @sb: the super_block in question * * Return: the kernfs_root associated with @sb. If @sb is not a kernfs one, * %NULL is returned. */ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { if (sb->s_op == &kernfs_sops) return kernfs_info(sb)->root; return NULL; } /* * find the next ancestor in the path down to @child, where @parent was the * ancestor whose descendant we want to find. * * Say the path is /a/b/c/d. @child is d, @parent is %NULL. We return the root * node. If @parent is b, then we return the node for c. * Passing in d as @parent is not ok. */ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, struct kernfs_node *parent) { if (child == parent) { pr_crit_once("BUG in find_next_ancestor: called with parent == child"); return NULL; } while (child->parent != parent) { if (!child->parent) return NULL; child = child->parent; } return child; } /** * kernfs_node_dentry - get a dentry for the given kernfs_node * @kn: kernfs_node for which a dentry is needed * @sb: the kernfs super_block * * Return: the dentry pointer */ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb) { struct dentry *dentry; struct kernfs_node *knparent; BUG_ON(sb->s_op != &kernfs_sops); dentry = dget(sb->s_root); /* Check if this is the root kernfs_node */ if (!kn->parent) return dentry; knparent = find_next_ancestor(kn, NULL); if (WARN_ON(!knparent)) { dput(dentry); return ERR_PTR(-EINVAL); } do { struct dentry *dtmp; struct kernfs_node *kntmp; if (kn == knparent) return dentry; kntmp = find_next_ancestor(kn, knparent); if (WARN_ON(!kntmp)) { dput(dentry); return ERR_PTR(-EINVAL); } dtmp = lookup_positive_unlocked(kntmp->name, dentry, strlen(kntmp->name)); dput(dentry); if (IS_ERR(dtmp)) return dtmp; knparent = kntmp; dentry = dtmp; } while (true); } static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *kf_root = kfc->root; struct inode *inode; struct dentry *root; info->sb = sb; /* Userspace would break if executables or devices appear on sysfs */ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) sb->s_export_op = &kernfs_export_ops; sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); inode = kernfs_get_inode(sb, info->root->kn); up_read(&kf_root->kernfs_rwsem); if (!inode) { pr_debug("kernfs: could not get root inode\n"); return -ENOMEM; } /* instantiate and link root dentry */ root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n", __func__); return -ENOMEM; } sb->s_root = root; sb->s_d_op = &kernfs_dops; return 0; } static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; kfc->ns_tag = NULL; return set_anon_super_fc(sb, fc); } /** * kernfs_super_ns - determine the namespace tag of a kernfs super_block * @sb: super_block of interest * * Return: the namespace tag associated with kernfs super_block @sb. */ const void *kernfs_super_ns(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); return info->ns; } /** * kernfs_get_tree - kernfs filesystem access/retrieval helper * @fc: The filesystem context. * * This is to be called from each kernfs user's fs_context->ops->get_tree() * implementation, which should set the specified ->@fs_type and ->@flags, and * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, * respectively. * * Return: %0 on success, -errno on failure. */ int kernfs_get_tree(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->root = kfc->root; info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); fc->s_fs_info = info; sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = kfc->root; kfc->new_sb_created = true; error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); return error; } sb->s_flags |= SB_ACTIVE; uuid_t uuid; uuid_gen(&uuid); super_set_uuid(sb, uuid.b, sizeof(uuid)); down_write(&root->kernfs_supers_rwsem); list_add(&info->node, &info->root->supers); up_write(&root->kernfs_supers_rwsem); } fc->root = dget(sb->s_root); return 0; } void kernfs_free_fs_context(struct fs_context *fc) { /* Note that we don't deal with kfc->ns_tag here. */ kfree(fc->s_fs_info); fc->s_fs_info = NULL; } /** * kernfs_kill_sb - kill_sb for kernfs * @sb: super_block being killed * * This can be used directly for file_system_type->kill_sb(). If a kernfs * user needs extra cleanup, it can implement its own kill_sb() and call * this function at the end. */ void kernfs_kill_sb(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = info->root; down_write(&root->kernfs_supers_rwsem); list_del(&info->node); up_write(&root->kernfs_supers_rwsem); /* * Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing kernfs_super_info. */ kill_anon_super(sb); kfree(info); } static void __init kernfs_mutex_init(void) { int count; for (count = 0; count < NR_KERNFS_LOCKS; count++) mutex_init(&kernfs_locks->open_file_mutex[count]); } static void __init kernfs_lock_init(void) { kernfs_locks = kmalloc(sizeof(struct kernfs_global_locks), GFP_KERNEL); WARN_ON(!kernfs_locks); kernfs_mutex_init(); } void __init kernfs_init(void) { kernfs_node_cache = kmem_cache_create("kernfs_node_cache", sizeof(struct kernfs_node), 0, SLAB_PANIC, NULL); /* Creates slab cache for kernfs inode attributes */ kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache", sizeof(struct kernfs_iattrs), 0, SLAB_PANIC, NULL); kernfs_lock_init(); }
1 1 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * Timer control routines * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_timer.h" #include "seq_oss_event.h" #include <sound/seq_oss_legacy.h> #include <linux/slab.h> /* */ #define MIN_OSS_TEMPO 8 #define MAX_OSS_TEMPO 360 #define MIN_OSS_TIMEBASE 1 #define MAX_OSS_TIMEBASE 1000 /* */ static void calc_alsa_tempo(struct seq_oss_timer *timer); static int send_timer_event(struct seq_oss_devinfo *dp, int type, int value); /* * create and register a new timer. * if queue is not started yet, start it. */ struct seq_oss_timer * snd_seq_oss_timer_new(struct seq_oss_devinfo *dp) { struct seq_oss_timer *rec; rec = kzalloc(sizeof(*rec), GFP_KERNEL); if (rec == NULL) return NULL; rec->dp = dp; rec->cur_tick = 0; rec->realtime = 0; rec->running = 0; rec->oss_tempo = 60; rec->oss_timebase = 100; calc_alsa_tempo(rec); return rec; } /* * delete timer. * if no more timer exists, stop the queue. */ void snd_seq_oss_timer_delete(struct seq_oss_timer *rec) { if (rec) { snd_seq_oss_timer_stop(rec); kfree(rec); } } /* * process one timing event * return 1 : event proceseed -- skip this event * 0 : not a timer event -- enqueue this event */ int snd_seq_oss_process_timer_event(struct seq_oss_timer *rec, union evrec *ev) { abstime_t parm = ev->t.time; if (ev->t.code == EV_TIMING) { switch (ev->t.cmd) { case TMR_WAIT_REL: parm += rec->cur_tick; rec->realtime = 0; fallthrough; case TMR_WAIT_ABS: if (parm == 0) { rec->realtime = 1; } else if (parm >= rec->cur_tick) { rec->realtime = 0; rec->cur_tick = parm; } return 1; /* skip this event */ case TMR_START: snd_seq_oss_timer_start(rec); return 1; } } else if (ev->s.code == SEQ_WAIT) { /* time = from 1 to 3 bytes */ parm = (ev->echo >> 8) & 0xffffff; if (parm > rec->cur_tick) { /* set next event time */ rec->cur_tick = parm; rec->realtime = 0; } return 1; } return 0; } /* * convert tempo units */ static void calc_alsa_tempo(struct seq_oss_timer *timer) { timer->tempo = (60 * 1000000) / timer->oss_tempo; timer->ppq = timer->oss_timebase; } /* * dispatch a timer event */ static int send_timer_event(struct seq_oss_devinfo *dp, int type, int value) { struct snd_seq_event ev; memset(&ev, 0, sizeof(ev)); ev.type = type; ev.source.client = dp->cseq; ev.source.port = 0; ev.dest.client = SNDRV_SEQ_CLIENT_SYSTEM; ev.dest.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; ev.queue = dp->queue; ev.data.queue.queue = dp->queue; ev.data.queue.param.value = value; return snd_seq_kernel_client_dispatch(dp->cseq, &ev, 1, 0); } /* * set queue tempo and start queue */ int snd_seq_oss_timer_start(struct seq_oss_timer *timer) { struct seq_oss_devinfo *dp = timer->dp; struct snd_seq_queue_tempo tmprec; if (timer->running) snd_seq_oss_timer_stop(timer); memset(&tmprec, 0, sizeof(tmprec)); tmprec.queue = dp->queue; tmprec.ppq = timer->ppq; tmprec.tempo = timer->tempo; snd_seq_set_queue_tempo(dp->cseq, &tmprec); send_timer_event(dp, SNDRV_SEQ_EVENT_START, 0); timer->running = 1; timer->cur_tick = 0; return 0; } /* * stop queue */ int snd_seq_oss_timer_stop(struct seq_oss_timer *timer) { if (! timer->running) return 0; send_timer_event(timer->dp, SNDRV_SEQ_EVENT_STOP, 0); timer->running = 0; return 0; } /* * continue queue */ int snd_seq_oss_timer_continue(struct seq_oss_timer *timer) { if (timer->running) return 0; send_timer_event(timer->dp, SNDRV_SEQ_EVENT_CONTINUE, 0); timer->running = 1; return 0; } /* * change queue tempo */ int snd_seq_oss_timer_tempo(struct seq_oss_timer *timer, int value) { if (value < MIN_OSS_TEMPO) value = MIN_OSS_TEMPO; else if (value > MAX_OSS_TEMPO) value = MAX_OSS_TEMPO; timer->oss_tempo = value; calc_alsa_tempo(timer); if (timer->running) send_timer_event(timer->dp, SNDRV_SEQ_EVENT_TEMPO, timer->tempo); return 0; } /* * ioctls */ int snd_seq_oss_timer_ioctl(struct seq_oss_timer *timer, unsigned int cmd, int __user *arg) { int value; if (cmd == SNDCTL_SEQ_CTRLRATE) { /* if *arg == 0, just return the current rate */ if (get_user(value, arg)) return -EFAULT; if (value) return -EINVAL; value = ((timer->oss_tempo * timer->oss_timebase) + 30) / 60; return put_user(value, arg) ? -EFAULT : 0; } if (timer->dp->seq_mode == SNDRV_SEQ_OSS_MODE_SYNTH) return 0; switch (cmd) { case SNDCTL_TMR_START: return snd_seq_oss_timer_start(timer); case SNDCTL_TMR_STOP: return snd_seq_oss_timer_stop(timer); case SNDCTL_TMR_CONTINUE: return snd_seq_oss_timer_continue(timer); case SNDCTL_TMR_TEMPO: if (get_user(value, arg)) return -EFAULT; return snd_seq_oss_timer_tempo(timer, value); case SNDCTL_TMR_TIMEBASE: if (get_user(value, arg)) return -EFAULT; if (value < MIN_OSS_TIMEBASE) value = MIN_OSS_TIMEBASE; else if (value > MAX_OSS_TIMEBASE) value = MAX_OSS_TIMEBASE; timer->oss_timebase = value; calc_alsa_tempo(timer); return 0; case SNDCTL_TMR_METRONOME: case SNDCTL_TMR_SELECT: case SNDCTL_TMR_SOURCE: /* not supported */ return 0; } return 0; }
64 64 63 1 2 5 55 263 263 4 4 14 2 235 8 237 247 8 7 1 2 3 35 330 331 3 26 216 30 216 216 2 2 2 11 2 2 11 11 18 26 5 17 4 17 14 1 4 12 4 7 8 13 13 13 14 3 4 1 3 35 35 4 30 29 2 30 31 31 4 29 12 26 2 28 30 58 4 49 34 21 5 1 1 1 1 1 1 1 1 440 10 10 4 10 10 9 10 10 531 532 502 128 495 100 481 487 2 308 36 300 9 3 524 466 96 457 17 446 21 21 457 13 446 449 7 38 439 2 432 309 2 437 439 438 31 202 202 445 18 99 148 3 503 7 15 6 6 6 6 6 513 2 512 15 291 269 2 4 263 1 493 11 11 1 2 8 10 10 10 10 10 10 7 9 10 11 11 1 10 10 10 1 9 10 10 8 2 3 10 208 208 189 8 199 187 197 196 49 175 197 197 178 178 16 174 168 168 1 1 14 14 2 2 2 10 8 2 10 10 1 9 10 1 9 10 10 10 223 223 223 180 6 46 182 9 6 3 186 11 2 6 6 6 30 197 3 5 2 1 1 6 6 6 85 73 6 69 27 86 55 55 6 48 6 12 91 5 41 148 97 37 17 148 2 146 19 15 113 13 134 131 145 14 1 13 3 1 9 3 10 12 13 39 1 38 7 31 4 33 7 31 11 51 11 51 83 83 37 46 1 42 31 11 42 42 91 91 1 90 6 1 9 8 71 71 58 13 5 73 87 59 42 1 4 1 3 4 3 1 4 28 21 5 5 2 3 1 4 18 24 23 1 1 1 20 1 1 10 10 10 1 9 9 9 10 47 47 1 1 44 1 44 36 8 1 42 42 1 42 2 41 2 40 49 1 49 1 1 46 48 1 3 3 31 1 4 1 25 3 4 4 10 4 4 16 1 24 20 4 21 21 1 17 3 20 13 8 3 20 22 22 1 23 1 21 18 2 1 2 1 4 2 1 3 9 10 40 14 27 3 17 23 9 7 9 15 55 1 53 26 33 2 2 2 7 7 7 39 3 3 1 32 35 4 11 7 4 4 31 14 17 2 1 3 29 81 1 81 1 1 54 26 1 54 24 60 18 76 50 29 3 75 17 49 10 17 21 20 50 42 28 49 18 42 24 1 27 46 17 47 16 4 12 1 34 46 44 2 4 29 50 78 78 78 24 3 23 1 23 1 1 21 21 9 5 16 5 11 8 4 3 4 7 6 5 22 22 22 22 2 20 109 109 9 26 82 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/namei.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 * Directory entry file type support and forward compatibility hooks * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 * Hash Tree Directory indexing (c) * Daniel Phillips, 2001 * Hash Tree Directory indexing porting * Christopher Li, 2002 * Hash Tree Directory indexing cleanup * Theodore Ts'o, 2002 */ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/time.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> #include <linux/iversion.h> #include <linux/unicode.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include <trace/events/ext4.h> /* * define how far ahead to read directories while searching them. */ #define NAMEI_RA_CHUNKS 2 #define NAMEI_RA_BLOCKS 4 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) static struct buffer_head *ext4_append(handle_t *handle, struct inode *inode, ext4_lblk_t *block) { struct ext4_map_blocks map; struct buffer_head *bh; int err; if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && ((inode->i_size >> 10) >= EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) return ERR_PTR(-ENOSPC); *block = inode->i_size >> inode->i_sb->s_blocksize_bits; map.m_lblk = *block; map.m_len = 1; /* * We're appending new directory block. Make sure the block is not * allocated yet, otherwise we will end up corrupting the * directory. */ err = ext4_map_blocks(NULL, inode, &map, 0); if (err < 0) return ERR_PTR(err); if (err) { EXT4_ERROR_INODE(inode, "Logical block already allocated"); return ERR_PTR(-EFSCORRUPTED); } bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE); if (IS_ERR(bh)) return bh; inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_mark_inode_dirty(handle, inode); if (err) goto out; BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (err) goto out; return bh; out: brelse(bh); ext4_std_error(inode->i_sb, err); return ERR_PTR(err); } static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); /* * Hints to ext4_read_dirblock regarding whether we expect a directory * block being read to be an index block, or a block containing * directory entries (and if the latter, whether it was found via a * logical block in an htree index block). This is used to control * what sort of sanity checkinig ext4_read_dirblock() will do on the * directory block read from the storage device. EITHER will means * the caller doesn't know what kind of directory block will be read, * so no specific verification will be done. */ typedef enum { EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ __ext4_read_dirblock((inode), (block), (type), __func__, __LINE__) static struct buffer_head *__ext4_read_dirblock(struct inode *inode, ext4_lblk_t block, dirblock_type_t type, const char *func, unsigned int line) { struct buffer_head *bh; struct ext4_dir_entry *dirent; int is_dx_block = 0; if (block >= inode->i_size >> inode->i_blkbits) { ext4_error_inode(inode, func, line, block, "Attempting to read directory block (%u) that is past i_size (%llu)", block, inode->i_size); return ERR_PTR(-EFSCORRUPTED); } if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO)) bh = ERR_PTR(-EIO); else bh = ext4_bread(NULL, inode, block, 0); if (IS_ERR(bh)) { __ext4_warning(inode->i_sb, func, line, "inode #%lu: lblock %lu: comm %s: " "error %ld reading directory block", inode->i_ino, (unsigned long)block, current->comm, PTR_ERR(bh)); return bh; } /* The first directory block must not be a hole. */ if (!bh && (type == INDEX || type == DIRENT_HTREE || block == 0)) { ext4_error_inode(inode, func, line, block, "Directory hole found for htree %s block %u", (type == INDEX) ? "index" : "leaf", block); return ERR_PTR(-EFSCORRUPTED); } if (!bh) return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { if (block == 0) is_dx_block = 1; else if (ext4_rec_len_from_disk(dirent->rec_len, inode->i_sb->s_blocksize) == inode->i_sb->s_blocksize) is_dx_block = 1; } if (!is_dx_block && type == INDEX) { ext4_error_inode(inode, func, line, block, "directory leaf block found instead of index block"); brelse(bh); return ERR_PTR(-EFSCORRUPTED); } if (!ext4_has_metadata_csum(inode->i_sb) || buffer_verified(bh)) return bh; /* * An empty leaf block can get mistaken for a index block; for * this reason, we can only check the index checksum when the * caller is sure it should be an index block. */ if (is_dx_block && type == INDEX) { if (ext4_dx_csum_verify(inode, dirent) && !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { ext4_error_inode_err(inode, func, line, block, EFSBADCRC, "Directory index failed checksum"); brelse(bh); return ERR_PTR(-EFSBADCRC); } } if (!is_dx_block) { if (ext4_dirblock_csum_verify(inode, bh) && !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { ext4_error_inode_err(inode, func, line, block, EFSBADCRC, "Directory block failed checksum"); brelse(bh); return ERR_PTR(-EFSBADCRC); } } return bh; } #ifdef DX_DEBUG #define dxtrace(command) command #else #define dxtrace(command) #endif struct fake_dirent { __le32 inode; __le16 rec_len; u8 name_len; u8 file_type; }; struct dx_countlimit { __le16 limit; __le16 count; }; struct dx_entry { __le32 hash; __le32 block; }; /* * dx_root_info is laid out so that if it should somehow get overlaid by a * dirent the two low bits of the hash version will be zero. Therefore, the * hash version mod 4 should never be 0. Sincerely, the paranoia department. */ struct dx_root { struct fake_dirent dot; char dot_name[4]; struct fake_dirent dotdot; char dotdot_name[4]; struct dx_root_info { __le32 reserved_zero; u8 hash_version; u8 info_length; /* 8 */ u8 indirect_levels; u8 unused_flags; } info; struct dx_entry entries[]; }; struct dx_node { struct fake_dirent fake; struct dx_entry entries[]; }; struct dx_frame { struct buffer_head *bh; struct dx_entry *entries; struct dx_entry *at; }; struct dx_map_entry { u32 hash; u16 offs; u16 size; }; /* * This goes at the end of each htree block. */ struct dx_tail { u32 dt_reserved; __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ }; static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); static inline unsigned dx_get_hash(struct dx_entry *entry); static void dx_set_hash(struct dx_entry *entry, unsigned value); static unsigned dx_get_count(struct dx_entry *entries); static unsigned dx_get_limit(struct dx_entry *entries); static void dx_set_count(struct dx_entry *entries, unsigned value); static void dx_set_limit(struct dx_entry *entries, unsigned value); static unsigned dx_root_limit(struct inode *dir, unsigned infosize); static unsigned dx_node_limit(struct inode *dir); static struct dx_frame *dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame); static void dx_release(struct dx_frame *frames); static int dx_make_map(struct inode *dir, struct buffer_head *bh, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail); static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from, char *to, struct dx_map_entry *offsets, int count, unsigned int blocksize); static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base, unsigned int blocksize); static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block); static int ext4_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, struct dx_frame *frames, __u32 *start_hash); static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir); static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); /* checksumming functions */ void ext4_initialize_dirent_tail(struct buffer_head *bh, unsigned int blocksize) { struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); memset(t, 0, sizeof(struct ext4_dir_entry_tail)); t->det_rec_len = ext4_rec_len_to_disk( sizeof(struct ext4_dir_entry_tail), blocksize); t->det_reserved_ft = EXT4_FT_DIR_CSUM; } /* Walk through a dirent block to find a checksum "dirent" at the tail */ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); #ifdef PARANOID struct ext4_dir_entry *d, *top; d = (struct ext4_dir_entry *)bh->b_data; top = (struct ext4_dir_entry *)(bh->b_data + (blocksize - sizeof(struct ext4_dir_entry_tail))); while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize)) d = (struct ext4_dir_entry *)(((void *)d) + ext4_rec_len_from_disk(d->rec_len, blocksize)); if (d != top) return NULL; t = (struct ext4_dir_entry_tail *)d; #else t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb)); #endif if (t->det_reserved_zero1 || (ext4_rec_len_from_disk(t->det_rec_len, blocksize) != sizeof(struct ext4_dir_entry_tail)) || t->det_reserved_zero2 || t->det_reserved_ft != EXT4_FT_DIR_CSUM) return NULL; return t; } static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); __u32 csum; csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); return cpu_to_le32(csum); } #define warn_no_space_for_csum(inode) \ __warn_no_space_for_csum((inode), __func__, __LINE__) static void __warn_no_space_for_csum(struct inode *inode, const char *func, unsigned int line) { __ext4_warning_inode(inode, func, line, "No space for directory leaf checksum. Please run e2fsck -D."); } int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return 1; t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return 0; } if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data, (char *)t - bh->b_data)) return 0; return 1; } static void ext4_dirblock_csum_set(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return; t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return; } t->det_checksum = ext4_dirblock_csum(inode, bh->b_data, (char *)t - bh->b_data); } int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, struct buffer_head *bh) { ext4_dirblock_csum_set(inode, bh); return ext4_handle_dirty_metadata(handle, inode, bh); } static struct dx_countlimit *get_dx_countlimit(struct inode *inode, struct ext4_dir_entry *dirent, int *offset) { struct ext4_dir_entry *dp; struct dx_root_info *root; int count_offset; int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize); if (rlen == blocksize) count_offset = 8; else if (rlen == 12) { dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12) return NULL; root = (struct dx_root_info *)(((void *)dp + 12)); if (root->reserved_zero || root->info_length != sizeof(struct dx_root_info)) return NULL; count_offset = 32; } else return NULL; if (offset) *offset = count_offset; return (struct dx_countlimit *)(((void *)dirent) + count_offset); } static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, int count_offset, int count, struct dx_tail *t) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); __u32 csum; int size; __u32 dummy_csum = 0; int offset = offsetof(struct dx_tail, dt_checksum); size = count_offset + (count * sizeof(struct dx_entry)); csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); csum = ext4_chksum(sbi, csum, (__u8 *)t, offset); csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum)); return cpu_to_le32(csum); } static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) { struct dx_countlimit *c; struct dx_tail *t; int count_offset, limit, count; if (!ext4_has_metadata_csum(inode->i_sb)) return 1; c = get_dx_countlimit(inode, dirent, &count_offset); if (!c) { EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); return 0; } limit = le16_to_cpu(c->limit); count = le16_to_cpu(c->count); if (count_offset + (limit * sizeof(struct dx_entry)) > EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { warn_no_space_for_csum(inode); return 0; } t = (struct dx_tail *)(((struct dx_entry *)c) + limit); if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset, count, t)) return 0; return 1; } static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) { struct dx_countlimit *c; struct dx_tail *t; int count_offset, limit, count; if (!ext4_has_metadata_csum(inode->i_sb)) return; c = get_dx_countlimit(inode, dirent, &count_offset); if (!c) { EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); return; } limit = le16_to_cpu(c->limit); count = le16_to_cpu(c->count); if (count_offset + (limit * sizeof(struct dx_entry)) > EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { warn_no_space_for_csum(inode); return; } t = (struct dx_tail *)(((struct dx_entry *)c) + limit); t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t); } static inline int ext4_handle_dirty_dx_node(handle_t *handle, struct inode *inode, struct buffer_head *bh) { ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); return ext4_handle_dirty_metadata(handle, inode, bh); } /* * p is at least 6 bytes before the end of page */ static inline struct ext4_dir_entry_2 * ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize) { return (struct ext4_dir_entry_2 *)((char *)p + ext4_rec_len_from_disk(p->rec_len, blocksize)); } /* * Future: use high four bits of block for coalesce-on-delete flags * Mask them off for now. */ static inline ext4_lblk_t dx_get_block(struct dx_entry *entry) { return le32_to_cpu(entry->block) & 0x0fffffff; } static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) { entry->block = cpu_to_le32(value); } static inline unsigned dx_get_hash(struct dx_entry *entry) { return le32_to_cpu(entry->hash); } static inline void dx_set_hash(struct dx_entry *entry, unsigned value) { entry->hash = cpu_to_le32(value); } static inline unsigned dx_get_count(struct dx_entry *entries) { return le16_to_cpu(((struct dx_countlimit *) entries)->count); } static inline unsigned dx_get_limit(struct dx_entry *entries) { return le16_to_cpu(((struct dx_countlimit *) entries)->limit); } static inline void dx_set_count(struct dx_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); } static inline void dx_set_limit(struct dx_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); } static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) { unsigned int entry_space = dir->i_sb->s_blocksize - ext4_dir_rec_len(1, NULL) - ext4_dir_rec_len(2, NULL) - infosize; if (ext4_has_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit(struct inode *dir) { unsigned int entry_space = dir->i_sb->s_blocksize - ext4_dir_rec_len(0, dir); if (ext4_has_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } /* * Debug */ #ifdef DX_DEBUG static void dx_show_index(char * label, struct dx_entry *entries) { int i, n = dx_get_count (entries); printk(KERN_DEBUG "%s index", label); for (i = 0; i < n; i++) { printk(KERN_CONT " %x->%lu", i ? dx_get_hash(entries + i) : 0, (unsigned long)dx_get_block(entries + i)); } printk(KERN_CONT "\n"); } struct stats { unsigned names; unsigned space; unsigned bcount; }; static struct stats dx_show_leaf(struct inode *dir, struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de, int size, int show_names) { unsigned names = 0, space = 0; char *base = (char *) de; struct dx_hash_info h = *hinfo; printk("names: "); while ((char *) de < base + size) { if (de->inode) { if (show_names) { #ifdef CONFIG_FS_ENCRYPTION int len; char *name; struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0); int res = 0; name = de->name; len = de->name_len; if (!IS_ENCRYPTED(dir)) { /* Directory is not encrypted */ (void) ext4fs_dirhash(dir, de->name, de->name_len, &h); printk("%*.s:(U)%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); } else { struct fscrypt_str de_name = FSTR_INIT(name, len); /* Directory is encrypted */ res = fscrypt_fname_alloc_buffer( len, &fname_crypto_str); if (res) printk(KERN_WARNING "Error " "allocating crypto " "buffer--skipping " "crypto\n"); res = fscrypt_fname_disk_to_usr(dir, 0, 0, &de_name, &fname_crypto_str); if (res) { printk(KERN_WARNING "Error " "converting filename " "from disk to usr" "\n"); name = "??"; len = 2; } else { name = fname_crypto_str.name; len = fname_crypto_str.len; } if (IS_CASEFOLDED(dir)) h.hash = EXT4_DIRENT_HASH(de); else (void) ext4fs_dirhash(dir, de->name, de->name_len, &h); printk("%*.s:(E)%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); fscrypt_fname_free_buffer( &fname_crypto_str); } #else int len = de->name_len; char *name = de->name; (void) ext4fs_dirhash(dir, de->name, de->name_len, &h); printk("%*.s:%x.%u ", len, name, h.hash, (unsigned) ((char *) de - base)); #endif } space += ext4_dir_rec_len(de->name_len, dir); names++; } de = ext4_next_entry(de, size); } printk(KERN_CONT "(%i)\n", names); return (struct stats) { names, space, 1 }; } struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, struct dx_entry *entries, int levels) { unsigned blocksize = dir->i_sb->s_blocksize; unsigned count = dx_get_count(entries), names = 0, space = 0, i; unsigned bcount = 0; struct buffer_head *bh; printk("%i indexed blocks...\n", count); for (i = 0; i < count; i++, entries++) { ext4_lblk_t block = dx_get_block(entries); ext4_lblk_t hash = i ? dx_get_hash(entries): 0; u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; struct stats stats; printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); bh = ext4_bread(NULL,dir, block, 0); if (!bh || IS_ERR(bh)) continue; stats = levels? dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); names += stats.names; space += stats.space; bcount += stats.bcount; brelse(bh); } if (bcount) printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", levels ? "" : " ", names, space/bcount, (space/bcount)*100/blocksize); return (struct stats) { names, space, bcount}; } /* * Linear search cross check */ static inline void htree_rep_invariant_check(struct dx_entry *at, struct dx_entry *target, u32 hash, unsigned int n) { while (n--) { dxtrace(printk(KERN_CONT ",")); if (dx_get_hash(++at) > hash) { at--; break; } } ASSERT(at == target - 1); } #else /* DX_DEBUG */ static inline void htree_rep_invariant_check(struct dx_entry *at, struct dx_entry *target, u32 hash, unsigned int n) { } #endif /* DX_DEBUG */ /* * Probe for a directory leaf block to search. * * dx_probe can return ERR_BAD_DX_DIR, which means there was a format * error in the directory index, and the caller should fall back to * searching the directory normally. The callers of dx_probe **MUST** * check for this error code, and make sure it never gets reflected * back to userspace. */ static struct dx_frame * dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { unsigned count, indirect, level, i; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; struct dx_frame *frame = frame_in; struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; ext4_lblk_t block; ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); frame->bh = ext4_read_dirblock(dir, 0, INDEX); if (IS_ERR(frame->bh)) return (struct dx_frame *) frame->bh; root = (struct dx_root *) frame->bh->b_data; if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY && root->info.hash_version != DX_HASH_SIPHASH) { ext4_warning_inode(dir, "Unrecognised inode hash code %u", root->info.hash_version); goto fail; } if (ext4_hash_in_dirent(dir)) { if (root->info.hash_version != DX_HASH_SIPHASH) { ext4_warning_inode(dir, "Hash in dirent, but hash is not SIPHASH"); goto fail; } } else { if (root->info.hash_version == DX_HASH_SIPHASH) { ext4_warning_inode(dir, "Hash code is SIPHASH, but hash not in dirent"); goto fail; } } if (fname) hinfo = &fname->hinfo; hinfo->hash_version = root->info.hash_version; if (hinfo->hash_version <= DX_HASH_TEA) hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; /* hash is already computed for encrypted casefolded directory */ if (fname && fname_name(fname) && !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) { int ret = ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo); if (ret < 0) { ret_err = ERR_PTR(ret); goto fail; } } hash = hinfo->hash; if (root->info.unused_flags & 1) { ext4_warning_inode(dir, "Unimplemented hash flags: %#06x", root->info.unused_flags); goto fail; } indirect = root->info.indirect_levels; if (indirect >= ext4_dir_htree_level(dir->i_sb)) { ext4_warning(dir->i_sb, "Directory (ino: %lu) htree depth %#06x exceed" "supported value", dir->i_ino, ext4_dir_htree_level(dir->i_sb)); if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) { ext4_warning(dir->i_sb, "Enable large directory " "feature to access it"); } goto fail; } entries = (struct dx_entry *)(((char *)&root->info) + root->info.info_length); if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { ext4_warning_inode(dir, "dx entry: limit %u != root limit %u", dx_get_limit(entries), dx_root_limit(dir, root->info.info_length)); goto fail; } dxtrace(printk("Look up %x", hash)); level = 0; blocks[0] = 0; while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { ext4_warning_inode(dir, "dx entry: count %u beyond limit %u", count, dx_get_limit(entries)); goto fail; } p = entries + 1; q = entries + count - 1; while (p <= q) { m = p + (q - p) / 2; dxtrace(printk(KERN_CONT ".")); if (dx_get_hash(m) > hash) q = m - 1; else p = m + 1; } htree_rep_invariant_check(entries, p, hash, count - 1); at = p - 1; dxtrace(printk(KERN_CONT " %x->%u\n", at == entries ? 0 : dx_get_hash(at), dx_get_block(at))); frame->entries = entries; frame->at = at; block = dx_get_block(at); for (i = 0; i <= level; i++) { if (blocks[i] == block) { ext4_warning_inode(dir, "dx entry: tree cycle block %u points back to block %u", blocks[level], block); goto fail; } } if (++level > indirect) return frame; blocks[level] = block; frame++; frame->bh = ext4_read_dirblock(dir, block, INDEX); if (IS_ERR(frame->bh)) { ret_err = (struct dx_frame *) frame->bh; frame->bh = NULL; goto fail; } entries = ((struct dx_node *) frame->bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit(dir)) { ext4_warning_inode(dir, "dx entry: limit %u != node limit %u", dx_get_limit(entries), dx_node_limit(dir)); goto fail; } } fail: while (frame >= frame_in) { brelse(frame->bh); frame--; } if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) ext4_warning_inode(dir, "Corrupt directory, running e2fsck is recommended"); return ret_err; } static void dx_release(struct dx_frame *frames) { struct dx_root_info *info; int i; unsigned int indirect_levels; if (frames[0].bh == NULL) return; info = &((struct dx_root *)frames[0].bh->b_data)->info; /* save local copy, "info" may be freed after brelse() */ indirect_levels = info->indirect_levels; for (i = 0; i <= indirect_levels; i++) { if (frames[i].bh == NULL) break; brelse(frames[i].bh); frames[i].bh = NULL; } } /* * This function increments the frame pointer to search the next leaf * block, and reads in the necessary intervening nodes if the search * should be necessary. Whether or not the search is necessary is * controlled by the hash parameter. If the hash value is even, then * the search is only continued if the next block starts with that * hash value. This is used if we are searching for a specific file. * * If the hash value is HASH_NB_ALWAYS, then always go to the next block. * * This function returns 1 if the caller should continue to search, * or 0 if it should not. If there is an error reading one of the * index blocks, it will a negative error code. * * If start_hash is non-null, it will be filled in with the starting * hash of the next page. */ static int ext4_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, struct dx_frame *frames, __u32 *start_hash) { struct dx_frame *p; struct buffer_head *bh; int num_frames = 0; __u32 bhash; p = frame; /* * Find the next leaf page by incrementing the frame pointer. * If we run out of entries in the interior node, loop around and * increment pointer in the parent node. When we break out of * this loop, num_frames indicates the number of interior * nodes need to be read. */ while (1) { if (++(p->at) < p->entries + dx_get_count(p->entries)) break; if (p == frames) return 0; num_frames++; p--; } /* * If the hash is 1, then continue only if the next page has a * continuation hash of any value. This is used for readdir * handling. Otherwise, check to see if the hash matches the * desired continuation hash. If it doesn't, return since * there's no point to read in the successive index pages. */ bhash = dx_get_hash(p->at); if (start_hash) *start_hash = bhash; if ((hash & 1) == 0) { if ((bhash & ~1) != hash) return 0; } /* * If the hash is HASH_NB_ALWAYS, we always go to the next * block so no check is necessary */ while (num_frames--) { bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX); if (IS_ERR(bh)) return PTR_ERR(bh); p++; brelse(p->bh); p->bh = bh; p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; } return 1; } /* * This function fills a red-black tree with information from a * directory block. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ static int htree_dirblock_to_tree(struct file *dir_file, struct inode *dir, ext4_lblk_t block, struct dx_hash_info *hinfo, __u32 start_hash, __u32 start_minor_hash) { struct buffer_head *bh; struct ext4_dir_entry_2 *de, *top; int err = 0, count = 0; struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str; int csum = ext4_has_metadata_csum(dir->i_sb); dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; /* csum entries are not larger in the casefolded encrypted case */ top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - ext4_dir_rec_len(0, csum ? NULL : dir)); /* Check if the directory is encrypted */ if (IS_ENCRYPTED(dir)) { err = fscrypt_prepare_readdir(dir); if (err < 0) { brelse(bh); return err; } err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN, &fname_crypto_str); if (err < 0) { brelse(bh); return err; } } for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, bh->b_size, (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + ((char *)de - bh->b_data))) { /* silently ignore the rest of the block */ break; } if (ext4_hash_in_dirent(dir)) { if (de->name_len && de->inode) { hinfo->hash = EXT4_DIRENT_HASH(de); hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); } else { hinfo->hash = 0; hinfo->minor_hash = 0; } } else { err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); if (err < 0) { count = err; goto errout; } } if ((hinfo->hash < start_hash) || ((hinfo->hash == start_hash) && (hinfo->minor_hash < start_minor_hash))) continue; if (de->inode == 0) continue; if (!IS_ENCRYPTED(dir)) { tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); } else { int save_len = fname_crypto_str.len; struct fscrypt_str de_name = FSTR_INIT(de->name, de->name_len); /* Directory is encrypted */ err = fscrypt_fname_disk_to_usr(dir, hinfo->hash, hinfo->minor_hash, &de_name, &fname_crypto_str); if (err) { count = err; goto errout; } err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &fname_crypto_str); fname_crypto_str.len = save_len; } if (err != 0) { count = err; goto errout; } count++; } errout: brelse(bh); fscrypt_fname_free_buffer(&fname_crypto_str); return count; } /* * This function fills a red-black tree with information from a * directory. We start scanning the directory in hash order, starting * at start_hash and start_minor_hash. * * This function returns the number of entries inserted into the tree, * or a negative error code. */ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, __u32 start_minor_hash, __u32 *next_hash) { struct dx_hash_info hinfo; struct ext4_dir_entry_2 *de; struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct inode *dir; ext4_lblk_t block; int count = 0; int ret, err; __u32 hashval; struct fscrypt_str tmp_str; dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); dir = file_inode(dir_file); if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { if (ext4_hash_in_dirent(dir)) hinfo.hash_version = DX_HASH_SIPHASH; else hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; if (hinfo.hash_version <= DX_HASH_TEA) hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; count = ext4_inlinedir_to_tree(dir_file, dir, 0, &hinfo, start_hash, start_minor_hash, &has_inline_data); if (has_inline_data) { *next_hash = ~0; return count; } } count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, start_hash, start_minor_hash); *next_hash = ~0; return count; } hinfo.hash = start_hash; hinfo.minor_hash = 0; frame = dx_probe(NULL, dir, &hinfo, frames); if (IS_ERR(frame)) return PTR_ERR(frame); /* Add '.' and '..' from the htree header */ if (!start_hash && !start_minor_hash) { de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, 0, 0, de, &tmp_str); if (err != 0) goto errout; count++; } if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; de = ext4_next_entry(de, dir->i_sb->s_blocksize); tmp_str.name = de->name; tmp_str.len = de->name_len; err = ext4_htree_store_dirent(dir_file, 2, 0, de, &tmp_str); if (err != 0) goto errout; count++; } while (1) { if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto errout; } cond_resched(); block = dx_get_block(frame->at); ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, start_hash, start_minor_hash); if (ret < 0) { err = ret; goto errout; } count += ret; hashval = ~0; ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS, frame, frames, &hashval); *next_hash = hashval; if (ret < 0) { err = ret; goto errout; } /* * Stop if: (a) there are no more entries, or * (b) we have inserted at least one entry and the * next hash value is not a continuation */ if ((ret == 0) || (count && ((hashval & 1) == 0))) break; } dx_release(frames); dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " "next hash: %x\n", count, *next_hash)); return count; errout: dx_release(frames); return (err); } static inline int search_dirblock(struct buffer_head *bh, struct inode *dir, struct ext4_filename *fname, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir, fname, offset, res_dir); } /* * Directory block splitting, compacting */ /* * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ static int dx_make_map(struct inode *dir, struct buffer_head *bh, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { int count = 0; struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data; unsigned int buflen = bh->b_size; char *base = bh->b_data; struct dx_hash_info h = *hinfo; int blocksize = EXT4_BLOCK_SIZE(dir->i_sb); if (ext4_has_metadata_csum(dir->i_sb)) buflen -= sizeof(struct ext4_dir_entry_tail); while ((char *) de < base + buflen) { if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen, ((char *)de) - base)) return -EFSCORRUPTED; if (de->name_len && de->inode) { if (ext4_hash_in_dirent(dir)) h.hash = EXT4_DIRENT_HASH(de); else { int err = ext4fs_dirhash(dir, de->name, de->name_len, &h); if (err < 0) return err; } map_tail--; map_tail->hash = h.hash; map_tail->offs = ((char *) de - base)>>2; map_tail->size = ext4_rec_len_from_disk(de->rec_len, blocksize); count++; cond_resched(); } de = ext4_next_entry(de, blocksize); } return count; } /* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; int more; /* Combsort until bubble sort doesn't suck */ while (count > 2) { count = count*10/13; if (count - 9 < 2) /* 9, 10 -> 11 */ count = 11; for (p = top, q = p - count; q >= map; p--, q--) if (p->hash < q->hash) swap(*p, *q); } /* Garden variety bubble sort */ do { more = 0; q = top; while (q-- > map) { if (q[1].hash >= q[0].hash) continue; swap(*(q+1), *q); more = 1; } } while(more); } static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) { struct dx_entry *entries = frame->entries; struct dx_entry *old = frame->at, *new = old + 1; int count = dx_get_count(entries); ASSERT(count < dx_get_limit(entries)); ASSERT(old < entries + count); memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); dx_set_hash(new, hash); dx_set_block(new, block); dx_set_count(entries, count + 1); } #if IS_ENABLED(CONFIG_UNICODE) int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct ext4_filename *name) { struct qstr *cf_name = &name->cf_name; unsigned char *buf; struct dx_hash_info *hinfo = &name->hinfo; int len; if (!IS_CASEFOLDED(dir) || (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) { cf_name->name = NULL; return 0; } buf = kmalloc(EXT4_NAME_LEN, GFP_NOFS); if (!buf) return -ENOMEM; len = utf8_casefold(dir->i_sb->s_encoding, iname, buf, EXT4_NAME_LEN); if (len <= 0) { kfree(buf); buf = NULL; } cf_name->name = buf; cf_name->len = (unsigned) len; if (!IS_ENCRYPTED(dir)) return 0; hinfo->hash_version = DX_HASH_SIPHASH; hinfo->seed = NULL; if (cf_name->name) return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo); else return ext4fs_dirhash(dir, iname->name, iname->len, hinfo); } #endif /* * Test whether a directory entry matches the filename being searched for. * * Return: %true if the directory entry matches, otherwise %false. */ static bool ext4_match(struct inode *parent, const struct ext4_filename *fname, struct ext4_dir_entry_2 *de) { struct fscrypt_name f; if (!de->inode) return false; f.usr_fname = fname->usr_fname; f.disk_name = fname->disk_name; #ifdef CONFIG_FS_ENCRYPTION f.crypto_buf = fname->crypto_buf; #endif #if IS_ENABLED(CONFIG_UNICODE) if (IS_CASEFOLDED(parent) && (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { /* * Just checking IS_ENCRYPTED(parent) below is not * sufficient to decide whether one can use the hash for * skipping the string comparison, because the key might * have been added right after * ext4_fname_setup_ci_filename(). In this case, a hash * mismatch will be a false negative. Therefore, make * sure cf_name was properly initialized before * considering the calculated hash. */ if (IS_ENCRYPTED(parent) && fname->cf_name.name && (fname->hinfo.hash != EXT4_DIRENT_HASH(de) || fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de))) return false; /* * Treat comparison errors as not a match. The * only case where it happens is on a disk * corruption or ENOMEM. */ return generic_ci_match(parent, fname->usr_fname, &fname->cf_name, de->name, de->name_len) > 0; } #endif return fscrypt_match_name(&f, de->name, de->name_len); } /* * Returns 0 if not found, -1 on failure, and 1 on success */ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, struct inode *dir, struct ext4_filename *fname, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { struct ext4_dir_entry_2 * de; char * dlimit; int de_len; de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ if (de->name + de->name_len <= dlimit && ext4_match(dir, fname, de)) { /* found a match - just to be sure, do * a full check */ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf, buf_size, offset)) return -1; *res_dir = de; return 1; } /* prevent looping on a bad block */ de_len = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize); if (de_len <= 0) return -1; offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } return 0; } static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, struct ext4_dir_entry *de) { struct super_block *sb = dir->i_sb; if (!is_dx(dir)) return 0; if (block == 0) return 1; if (de->inode == 0 && ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) == sb->s_blocksize) return 1; return 0; } /* * __ext4_find_entry() * * finds an entry in the specified directory with the wanted name. It * returns the cache buffer in which the entry was found, and the entry * itself (as a parameter - res_dir). It does NOT read the inode of the * entry - you'll have to do that yourself if you want to. * * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ static struct buffer_head *__ext4_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, int *inlined) { struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; ext4_lblk_t start, block; const u8 *name = fname->usr_fname->name; size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ size_t ra_ptr = 0; /* Current index into readahead buffer */ ext4_lblk_t nblocks; int i, namelen, retval; *res_dir = NULL; sb = dir->i_sb; namelen = fname->usr_fname->len; if (namelen > EXT4_NAME_LEN) return NULL; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); if (inlined) *inlined = has_inline_data; if (has_inline_data) goto cleanup_and_exit; } if ((namelen <= 2) && (name[0] == '.') && (name[1] == '.' || name[1] == '\0')) { /* * "." or ".." will only be in the first block * NFS may look up ".."; "." should be handled by the VFS */ block = start = 0; nblocks = 1; goto restart; } if (is_dx(dir)) { ret = ext4_dx_find_entry(dir, fname, res_dir); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the * old fashioned way. */ if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR) goto cleanup_and_exit; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); ret = NULL; } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); if (!nblocks) { ret = NULL; goto cleanup_and_exit; } start = EXT4_I(dir)->i_dir_start_lookup; if (start >= nblocks) start = 0; block = start; restart: do { /* * We deal with the read-ahead logic here. */ cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; if (block < start) ra_max = start - block; else ra_max = nblocks - block; ra_max = min(ra_max, ARRAY_SIZE(bh_use)); retval = ext4_bread_batch(dir, block, ra_max, false /* wait */, bh_use); if (retval) { ret = ERR_PTR(retval); ra_max = 0; goto cleanup_and_exit; } } if ((bh = bh_use[ra_ptr++]) == NULL) goto next; wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_ERR(dir, EIO, "reading directory lblock %lu", (unsigned long) block); brelse(bh); ret = ERR_PTR(-EIO); goto cleanup_and_exit; } if (!buffer_verified(bh) && !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && !ext4_dirblock_csum_verify(dir, bh)) { EXT4_ERROR_INODE_ERR(dir, EFSBADCRC, "checksumming directory " "block %lu", (unsigned long)block); brelse(bh); ret = ERR_PTR(-EFSBADCRC); goto cleanup_and_exit; } set_buffer_verified(bh); i = search_dirblock(bh, dir, fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; ret = bh; goto cleanup_and_exit; } else { brelse(bh); if (i < 0) goto cleanup_and_exit; } next: if (++block >= nblocks) block = 0; } while (block != start); /* * If the directory has grown while we were searching, then * search the last part of the directory before giving up. */ block = nblocks; nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); if (block < nblocks) { start = 0; goto restart; } cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) brelse(bh_use[ra_ptr]); return ret; } static struct buffer_head *ext4_find_entry(struct inode *dir, const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *inlined) { int err; struct ext4_filename fname; struct buffer_head *bh; err = ext4_fname_setup_filename(dir, d_name, 1, &fname); if (err == -ENOENT) return NULL; if (err) return ERR_PTR(err); bh = __ext4_find_entry(dir, &fname, res_dir, inlined); ext4_fname_free_filename(&fname); return bh; } static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct dentry *dentry, struct ext4_dir_entry_2 **res_dir) { int err; struct ext4_filename fname; struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); if (err == -ENOENT) return NULL; if (err) return ERR_PTR(err); bh = __ext4_find_entry(dir, &fname, res_dir, NULL); ext4_fname_free_filename(&fname); return bh; } static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir) { struct super_block * sb = dir->i_sb; struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct buffer_head *bh; ext4_lblk_t block; int retval; #ifdef CONFIG_FS_ENCRYPTION *res_dir = NULL; #endif frame = dx_probe(fname, dir, NULL, frames); if (IS_ERR(frame)) return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; retval = search_dirblock(bh, dir, fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (retval == 1) goto success; brelse(bh); if (retval == -1) { bh = ERR_PTR(ERR_BAD_DX_DIR); goto errout; } /* Check to see if we should continue to search */ retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame, frames, NULL); if (retval < 0) { ext4_warning_inode(dir, "error %d reading directory index block", retval); bh = ERR_PTR(retval); goto errout; } } while (retval == 1); bh = NULL; errout: dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name)); success: dx_release(frames); return bh; } static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; struct ext4_dir_entry_2 *de; struct buffer_head *bh; if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); bh = ext4_lookup_entry(dir, dentry, &de); if (IS_ERR(bh)) return ERR_CAST(bh); inode = NULL; if (bh) { __u32 ino = le32_to_cpu(de->inode); brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); return ERR_PTR(-EFSCORRUPTED); } if (unlikely(ino == dir->i_ino)) { EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir", dentry); return ERR_PTR(-EFSCORRUPTED); } inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", ino); return ERR_PTR(-EFSCORRUPTED); } if (!IS_ERR(inode) && IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", dir->i_ino, inode->i_ino); iput(inode); return ERR_PTR(-EPERM); } } if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as * well. For now, prevent the negative dentry * from being cached. */ return NULL; } return d_splice_alias(inode, dentry); } struct dentry *ext4_get_parent(struct dentry *child) { __u32 ino; struct ext4_dir_entry_2 * de; struct buffer_head *bh; bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL); if (IS_ERR(bh)) return ERR_CAST(bh); if (!bh) return ERR_PTR(-ENOENT); ino = le32_to_cpu(de->inode); brelse(bh); if (!ext4_valid_inum(child->d_sb, ino)) { EXT4_ERROR_INODE(d_inode(child), "bad parent inode number: %u", ino); return ERR_PTR(-EFSCORRUPTED); } return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL)); } /* * Move count entries from end of map between two memory locations. * Returns pointer to last entry moved. */ static struct ext4_dir_entry_2 * dx_move_dirents(struct inode *dir, char *from, char *to, struct dx_map_entry *map, int count, unsigned blocksize) { unsigned rec_len = 0; while (count--) { struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + (map->offs<<2)); rec_len = ext4_dir_rec_len(de->name_len, dir); memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); /* wipe dir_entry excluding the rec_len field */ de->inode = 0; memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len, blocksize) - offsetof(struct ext4_dir_entry_2, name_len)); map++; to += rec_len; } return (struct ext4_dir_entry_2 *) (to - rec_len); } /* * Compact each dir entry in the range to the minimal rec_len. * Returns pointer to last entry in range. */ static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base, unsigned int blocksize) { struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; unsigned rec_len = 0; prev = to = de; while ((char*)de < base + blocksize) { next = ext4_next_entry(de, blocksize); if (de->inode && de->name_len) { rec_len = ext4_dir_rec_len(de->name_len, dir); if (de > to) memmove(to, de, rec_len); to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); prev = to; to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); } de = next; } return prev; } /* * Split a full leaf block to make room for a new dir entry. * Allocate a new block, and move entries so that they are approx. equally full. * Returns pointer to de in block into which the new entry will be inserted. */ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo) { unsigned blocksize = dir->i_sb->s_blocksize; unsigned continued; int count; struct buffer_head *bh2; ext4_lblk_t newblock; u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; int csum_size = 0; int err = 0, i; if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); bh2 = ext4_append(handle, dir, &newblock); if (IS_ERR(bh2)) { brelse(*bh); *bh = NULL; return (struct ext4_dir_entry_2 *) bh2; } BUFFER_TRACE(*bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, *bh, EXT4_JTR_NONE); if (err) goto journal_error; BUFFER_TRACE(frame->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh, EXT4_JTR_NONE); if (err) goto journal_error; data2 = bh2->b_data; /* create map in the end of data2 block */ map = (struct dx_map_entry *) (data2 + blocksize); count = dx_make_map(dir, *bh, hinfo, map); if (count < 0) { err = count; goto journal_error; } map -= count; dx_sort_map(map, count); /* Ensure that neither split block is over half full */ size = 0; move = 0; for (i = count-1; i >= 0; i--) { /* is more than half of this entry in 2nd half of the block? */ if (size + map[i].size/2 > blocksize/2) break; size += map[i].size; move++; } /* * map index at which we will split * * If the sum of active entries didn't exceed half the block size, just * split it in half by count; each resulting block will have at least * half the space free. */ if (i > 0) split = count - move; else split = count/2; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n", (unsigned long)dx_get_block(frame->at), hash2, split, count-split)); /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(dir, data1, data2, map + split, count - split, blocksize); de = dx_pack_dirents(dir, data1, blocksize); de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - (char *) de, blocksize); de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - (char *) de2, blocksize); if (csum_size) { ext4_initialize_dirent_tail(*bh, blocksize); ext4_initialize_dirent_tail(bh2, blocksize); } dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); /* Which block gets the new entry? */ if (hinfo->hash >= hash2) { swap(*bh, bh2); de = de2; } dx_insert_block(frame, hash2 + continued, newblock); err = ext4_handle_dirty_dirblock(handle, dir, bh2); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (err) goto journal_error; brelse(bh2); dxtrace(dx_show_index("frame", frame->entries)); return de; journal_error: brelse(*bh); brelse(bh2); *bh = NULL; ext4_std_error(dir->i_sb, err); return ERR_PTR(err); } int ext4_find_dest_de(struct inode *dir, struct inode *inode, struct buffer_head *bh, void *buf, int buf_size, struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de) { struct ext4_dir_entry_2 *de; unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir); int nlen, rlen; unsigned int offset = 0; char *top; de = buf; top = buf + buf_size - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, buf, buf_size, offset)) return -EFSCORRUPTED; if (ext4_match(dir, fname, de)) return -EEXIST; nlen = ext4_dir_rec_len(de->name_len, dir); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); if ((de->inode ? rlen - nlen : rlen) >= reclen) break; de = (struct ext4_dir_entry_2 *)((char *)de + rlen); offset += rlen; } if ((char *) de > top) return -ENOSPC; *dest_de = de; return 0; } void ext4_insert_dentry(struct inode *dir, struct inode *inode, struct ext4_dir_entry_2 *de, int buf_size, struct ext4_filename *fname) { int nlen, rlen; nlen = ext4_dir_rec_len(de->name_len, dir); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); if (de->inode) { struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size); de->rec_len = ext4_rec_len_to_disk(nlen, buf_size); de = de1; } de->file_type = EXT4_FT_UNKNOWN; de->inode = cpu_to_le32(inode->i_ino); ext4_set_de_type(inode->i_sb, de, inode->i_mode); de->name_len = fname_len(fname); memcpy(de->name, fname_name(fname), fname_len(fname)); if (ext4_hash_in_dirent(dir)) { struct dx_hash_info *hinfo = &fname->hinfo; EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash); EXT4_DIRENT_HASHES(de)->minor_hash = cpu_to_le32(hinfo->minor_hash); } } /* * Add a new entry into a directory (leaf) block. If de is non-NULL, * it points to a directory entry which is guaranteed to be large * enough for new directory entry. If de is NULL, then * add_dirent_to_buf will attempt search the directory block for * space. It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. */ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode, struct ext4_dir_entry_2 *de, struct buffer_head *bh) { unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; int err, err2; if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (!de) { err = ext4_find_dest_de(dir, inode, bh, bh->b_data, blocksize - csum_size, fname, &de); if (err) return err; } BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, bh, EXT4_JTR_NONE); if (err) { ext4_std_error(dir->i_sb, err); return err; } /* By now the buffer is marked for journaling */ ext4_insert_dentry(dir, inode, de, blocksize, fname); /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend * on this. * * XXX similarly, too many callers depend on * ext4_new_inode() setting the times, but error * recovery deletes the inode, so the worst that can * happen is that the times are slightly out of date * and/or different from the directory change time. */ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); inode_inc_iversion(dir); err2 = ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_dirblock(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); return err ? err : err2; } static bool ext4_check_dx_root(struct inode *dir, struct dx_root *root) { struct fake_dirent *fde; const char *error_msg; unsigned int rlen; unsigned int blocksize = dir->i_sb->s_blocksize; char *blockend = (char *)root + dir->i_sb->s_blocksize; fde = &root->dot; if (unlikely(fde->name_len != 1)) { error_msg = "invalid name_len for '.'"; goto corrupted; } if (unlikely(strncmp(root->dot_name, ".", fde->name_len))) { error_msg = "invalid name for '.'"; goto corrupted; } rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize); if (unlikely((char *)fde + rlen >= blockend)) { error_msg = "invalid rec_len for '.'"; goto corrupted; } fde = &root->dotdot; if (unlikely(fde->name_len != 2)) { error_msg = "invalid name_len for '..'"; goto corrupted; } if (unlikely(strncmp(root->dotdot_name, "..", fde->name_len))) { error_msg = "invalid name for '..'"; goto corrupted; } rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize); if (unlikely((char *)fde + rlen >= blockend)) { error_msg = "invalid rec_len for '..'"; goto corrupted; } return true; corrupted: EXT4_ERROR_INODE(dir, "Corrupt dir, %s, running e2fsck is recommended", error_msg); return false; } /* * This converts a one block unindexed directory to a 3 block indexed * directory, and adds the dentry to the indexed directory. */ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode, struct buffer_head *bh) { struct buffer_head *bh2; struct dx_root *root; struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; char *data2, *top; unsigned len; int retval; unsigned blocksize; ext4_lblk_t block; struct fake_dirent *fde; int csum_size = 0; if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); BUFFER_TRACE(bh, "get_write_access"); retval = ext4_journal_get_write_access(handle, dir->i_sb, bh, EXT4_JTR_NONE); if (retval) { ext4_std_error(dir->i_sb, retval); brelse(bh); return retval; } root = (struct dx_root *) bh->b_data; if (!ext4_check_dx_root(dir, root)) { brelse(bh); return -EFSCORRUPTED; } /* The 0th block becomes the root, move the dirents out */ fde = &root->dotdot; de = (struct ext4_dir_entry_2 *)((char *)fde + ext4_rec_len_from_disk(fde->rec_len, blocksize)); len = ((char *) root) + (blocksize - csum_size) - (char *) de; /* Allocate new block for the 0th block's dirents */ bh2 = ext4_append(handle, dir, &block); if (IS_ERR(bh2)) { brelse(bh); return PTR_ERR(bh2); } ext4_set_inode_flag(dir, EXT4_INODE_INDEX); data2 = bh2->b_data; memcpy(data2, de, len); memset(de, 0, len); /* wipe old data */ de = (struct ext4_dir_entry_2 *) data2; top = data2 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, (char *)de - data2)) { brelse(bh2); brelse(bh); return -EFSCORRUPTED; } de = de2; } de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - (char *) de, blocksize); if (csum_size) ext4_initialize_dirent_tail(bh2, blocksize); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); de->rec_len = ext4_rec_len_to_disk( blocksize - ext4_dir_rec_len(2, NULL), blocksize); memset (&root->info, 0, sizeof(root->info)); root->info.info_length = sizeof(root->info); if (ext4_hash_in_dirent(dir)) root->info.hash_version = DX_HASH_SIPHASH; else root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; entries = root->entries; dx_set_block(entries, 1); dx_set_count(entries, 1); dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); /* Initialize as for dx_probe */ fname->hinfo.hash_version = root->info.hash_version; if (fname->hinfo.hash_version <= DX_HASH_TEA) fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; /* casefolded encrypted hashes are computed on fname setup */ if (!ext4_hash_in_dirent(dir)) { int err = ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), &fname->hinfo); if (err < 0) { brelse(bh2); brelse(bh); return err; } } memset(frames, 0, sizeof(frames)); frame = frames; frame->entries = entries; frame->at = entries; frame->bh = bh; retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (retval) goto out_frames; retval = ext4_handle_dirty_dirblock(handle, dir, bh2); if (retval) goto out_frames; de = do_split(handle,dir, &bh2, frame, &fname->hinfo); if (IS_ERR(de)) { retval = PTR_ERR(de); goto out_frames; } retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2); out_frames: /* * Even if the block split failed, we have to properly write * out all the changes we did so far. Otherwise we can end up * with corrupted filesystem. */ if (retval) ext4_mark_inode_dirty(handle, dir); dx_release(frames); brelse(bh2); return retval; } /* * ext4_add_entry() * * adds a file entry to the specified directory, using the same * semantics as ext4_find_entry(). It returns NULL if it failed. * * NOTE!! The inode part of 'de' is left at 0 - which means you * may not sleep between calling this and putting something into * the entry, as someone else might have used it while you slept. */ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode) { struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh = NULL; struct ext4_dir_entry_2 *de; struct super_block *sb; struct ext4_filename fname; int retval; int dx_fallback=0; unsigned blocksize; ext4_lblk_t block, blocks; int csum_size = 0; if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; blocksize = sb->s_blocksize; if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; #if IS_ENABLED(CONFIG_UNICODE) if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; #endif retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname); if (retval) return retval; if (ext4_has_inline_data(dir)) { retval = ext4_try_add_inline_entry(handle, &fname, dir, inode); if (retval < 0) goto out; if (retval == 1) { retval = 0; goto out; } } if (is_dx(dir)) { retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; /* Can we just ignore htree data? */ if (ext4_has_metadata_csum(sb)) { EXT4_ERROR_INODE(dir, "Directory has corrupted htree index."); retval = -EFSCORRUPTED; goto out; } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; retval = ext4_mark_inode_dirty(handle, dir); if (unlikely(retval)) goto out; } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); if (bh == NULL) { bh = ext4_bread(handle, dir, block, EXT4_GET_BLOCKS_CREATE); goto add_to_new_block; } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; goto out; } retval = add_dirent_to_buf(handle, &fname, dir, inode, NULL, bh); if (retval != -ENOSPC) goto out; if (blocks == 1 && !dx_fallback && ext4_has_feature_dir_index(sb)) { retval = make_indexed_dir(handle, &fname, dir, inode, bh); bh = NULL; /* make_indexed_dir releases bh */ goto out; } brelse(bh); } bh = ext4_append(handle, dir, &block); add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; goto out; } de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); if (csum_size) ext4_initialize_dirent_tail(bh, blocksize); retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh); out: ext4_fname_free_filename(&fname); brelse(bh); if (retval == 0) ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); return retval; } /* * Returns 0 for success, or a negative error value */ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct dx_entry *entries, *at; struct buffer_head *bh; struct super_block *sb = dir->i_sb; struct ext4_dir_entry_2 *de; int restart; int err; again: restart = 0; frame = dx_probe(fname, dir, NULL, frames); if (IS_ERR(frame)) return PTR_ERR(frame); entries = frame->entries; at = frame->at; bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; goto cleanup; } BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); if (err) goto journal_error; err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh); if (err != -ENOSPC) goto cleanup; err = 0; /* Block full, should compress but for now just split */ dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", dx_get_count(entries), dx_get_limit(entries))); /* Need to split index? */ if (dx_get_count(entries) == dx_get_limit(entries)) { ext4_lblk_t newblock; int levels = frame - frames + 1; unsigned int icount; int add_level = 1; struct dx_entry *entries2; struct dx_node *node2; struct buffer_head *bh2; while (frame > frames) { if (dx_get_count((frame - 1)->entries) < dx_get_limit((frame - 1)->entries)) { add_level = 0; break; } frame--; /* split higher index block */ at = frame->at; entries = frame->entries; restart = 1; } if (add_level && levels == ext4_dir_htree_level(sb)) { ext4_warning(sb, "Directory (ino: %lu) index full, " "reach max htree level :%d", dir->i_ino, levels); if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) { ext4_warning(sb, "Large directory feature is " "not enabled on this " "filesystem"); } err = -ENOSPC; goto cleanup; } icount = dx_get_count(entries); bh2 = ext4_append(handle, dir, &newblock); if (IS_ERR(bh2)) { err = PTR_ERR(bh2); goto cleanup; } node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; memset(&node2->fake, 0, sizeof(struct fake_dirent)); node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, sb->s_blocksize); BUFFER_TRACE(frame->bh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, frame->bh, EXT4_JTR_NONE); if (err) goto journal_error; if (!add_level) { unsigned icount1 = icount/2, icount2 = icount - icount1; unsigned hash2 = dx_get_hash(entries + icount1); dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", icount1, icount2)); BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ err = ext4_journal_get_write_access(handle, sb, (frame - 1)->bh, EXT4_JTR_NONE); if (err) goto journal_error; memcpy((char *) entries2, (char *) (entries + icount1), icount2 * sizeof(struct dx_entry)); dx_set_count(entries, icount1); dx_set_count(entries2, icount2); dx_set_limit(entries2, dx_node_limit(dir)); /* Which index block gets the new entry? */ if (at - entries >= icount1) { frame->at = at - entries - icount1 + entries2; frame->entries = entries = entries2; swap(frame->bh, bh2); } dx_insert_block((frame - 1), hash2, newblock); dxtrace(dx_show_index("node", frame->entries)); dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); err = ext4_handle_dirty_dx_node(handle, dir, bh2); if (err) goto journal_error; brelse (bh2); err = ext4_handle_dirty_dx_node(handle, dir, (frame - 1)->bh); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (restart || err) goto journal_error; } else { struct dx_root *dxroot; memcpy((char *) entries2, (char *) entries, icount * sizeof(struct dx_entry)); dx_set_limit(entries2, dx_node_limit(dir)); /* Set up root */ dx_set_count(entries, 1); dx_set_block(entries + 0, newblock); dxroot = (struct dx_root *)frames[0].bh->b_data; dxroot->info.indirect_levels += 1; dxtrace(printk(KERN_DEBUG "Creating %d level index...\n", dxroot->info.indirect_levels)); err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, bh2); brelse(bh2); restart = 1; goto journal_error; } } de = do_split(handle, dir, &bh, frame, &fname->hinfo); if (IS_ERR(de)) { err = PTR_ERR(de); goto cleanup; } err = add_dirent_to_buf(handle, fname, dir, inode, de, bh); goto cleanup; journal_error: ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */ cleanup: brelse(bh); dx_release(frames); /* @restart is true means htree-path has been changed, we need to * repeat dx_probe() to find out valid htree-path */ if (restart && err == 0) goto again; return err; } /* * ext4_generic_delete_entry deletes a directory entry by merging it * with the previous entry */ int ext4_generic_delete_entry(struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh, void *entry_buf, int buf_size, int csum_size) { struct ext4_dir_entry_2 *de, *pde; unsigned int blocksize = dir->i_sb->s_blocksize; int i; i = 0; pde = NULL; de = entry_buf; while (i < buf_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, entry_buf, buf_size, i)) return -EFSCORRUPTED; if (de == de_del) { if (pde) { pde->rec_len = ext4_rec_len_to_disk( ext4_rec_len_from_disk(pde->rec_len, blocksize) + ext4_rec_len_from_disk(de->rec_len, blocksize), blocksize); /* wipe entire dir_entry */ memset(de, 0, ext4_rec_len_from_disk(de->rec_len, blocksize)); } else { /* wipe dir_entry excluding the rec_len field */ de->inode = 0; memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len, blocksize) - offsetof(struct ext4_dir_entry_2, name_len)); } inode_inc_iversion(dir); return 0; } i += ext4_rec_len_from_disk(de->rec_len, blocksize); pde = de; de = ext4_next_entry(de, blocksize); } return -ENOENT; } static int ext4_delete_entry(handle_t *handle, struct inode *dir, struct ext4_dir_entry_2 *de_del, struct buffer_head *bh) { int err, csum_size = 0; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; err = ext4_delete_inline_entry(handle, dir, de_del, bh, &has_inline_data); if (has_inline_data) return err; } if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, dir->i_sb, bh, EXT4_JTR_NONE); if (unlikely(err)) goto out; err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data, dir->i_sb->s_blocksize, csum_size); if (err) goto out; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_dirblock(handle, dir, bh); if (unlikely(err)) goto out; return 0; out: if (err != -ENOENT) ext4_std_error(dir->i_sb, err); return err; } /* * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2 * since this indicates that nlinks count was previously 1 to avoid overflowing * the 16-bit i_links_count field on disk. Directories with i_nlink == 1 mean * that subdirectory link counts are not being maintained accurately. * * The caller has already checked for i_nlink overflow in case the DIR_LINK * feature is not enabled and returned -EMLINK. The is_dx() check is a proxy * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set * on regular files) and to avoid creating huge/slow non-HTREE directories. */ static void ext4_inc_count(struct inode *inode) { inc_nlink(inode); if (is_dx(inode) && (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2)) set_nlink(inode, 1); } /* * If a directory had nlink == 1, then we should let it be 1. This indicates * directory has >EXT4_LINK_MAX subdirs. */ static void ext4_dec_count(struct inode *inode) { if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) drop_nlink(inode); } /* * Add non-directory inode to a directory. On success, the inode reference is * consumed by dentry is instantiation. This is also indicated by clearing of * *inodep pointer. On failure, the caller is responsible for dropping the * inode reference in the safe context. */ static int ext4_add_nondir(handle_t *handle, struct dentry *dentry, struct inode **inodep) { struct inode *dir = d_inode(dentry->d_parent); struct inode *inode = *inodep; int err = ext4_add_entry(handle, dentry, inode); if (!err) { err = ext4_mark_inode_dirty(handle, inode); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); d_instantiate_new(dentry, inode); *inodep = NULL; return err; } drop_nlink(inode); ext4_mark_inode_dirty(handle, inode); ext4_orphan_add(handle, inode); unlock_new_inode(inode); return err; } /* * By the time this is called, we already have created * the directory cache entry for the new file, but it * is so far negative - it has no inode. * * If the create succeeds, we fill in the inode information * with d_instantiate(). */ static int ext4_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { handle_t *handle; struct inode *inode; int err, credits, retries = 0; err = dquot_initialize(dir); if (err) return err; credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); err = ext4_add_nondir(handle, dentry, &inode); if (!err) ext4_fc_track_create(handle, dentry); } if (handle) ext4_journal_stop(handle); if (!IS_ERR_OR_NULL(inode)) iput(inode); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } static int ext4_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { handle_t *handle; struct inode *inode; int err, credits, retries = 0; err = dquot_initialize(dir); if (err) return err; credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &ext4_special_inode_operations; err = ext4_add_nondir(handle, dentry, &inode); if (!err) ext4_fc_track_create(handle, dentry); } if (handle) ext4_journal_stop(handle); if (!IS_ERR_OR_NULL(inode)) iput(inode); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } static int ext4_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { handle_t *handle; struct inode *inode; int err, retries = 0; err = dquot_initialize(dir); if (err) return err; retry: inode = ext4_new_inode_start_handle(idmap, dir, mode, NULL, 0, NULL, EXT4_HT_DIR, EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) + 4 + EXT4_XATTR_TRANS_BLOCKS); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); d_tmpfile(file, inode); err = ext4_orphan_add(handle, inode); if (err) goto err_unlock_inode; mark_inode_dirty(inode); unlock_new_inode(inode); } if (handle) ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return finish_open_simple(file, err); err_unlock_inode: ext4_journal_stop(handle); unlock_new_inode(inode); return err; } struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, struct ext4_dir_entry_2 *de, int blocksize, int csum_size, unsigned int parent_ino, int dotdot_real_len) { de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL), blocksize); strcpy(de->name, "."); ext4_set_de_type(inode->i_sb, de, S_IFDIR); de = ext4_next_entry(de, blocksize); de->inode = cpu_to_le32(parent_ino); de->name_len = 2; if (!dotdot_real_len) de->rec_len = ext4_rec_len_to_disk(blocksize - (csum_size + ext4_dir_rec_len(1, NULL)), blocksize); else de->rec_len = ext4_rec_len_to_disk( ext4_dir_rec_len(de->name_len, NULL), blocksize); strcpy(de->name, ".."); ext4_set_de_type(inode->i_sb, de, S_IFDIR); return ext4_next_entry(de, blocksize); } int ext4_init_new_dir(handle_t *handle, struct inode *dir, struct inode *inode) { struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; ext4_lblk_t block = 0; unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; int err; if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { err = ext4_try_create_inline_dir(handle, dir, inode); if (err < 0 && err != -ENOSPC) goto out; if (!err) goto out; } inode->i_size = 0; dir_block = ext4_append(handle, inode, &block); if (IS_ERR(dir_block)) return PTR_ERR(dir_block); de = (struct ext4_dir_entry_2 *)dir_block->b_data; ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); set_nlink(inode, 2); if (csum_size) ext4_initialize_dirent_tail(dir_block, blocksize); BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) goto out; set_buffer_verified(dir_block); out: brelse(dir_block); return err; } static int ext4_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { handle_t *handle; struct inode *inode; int err, err2 = 0, credits, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; err = dquot_initialize(dir); if (err) return err; credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: inode = ext4_new_inode_start_handle(idmap, dir, S_IFDIR | mode, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; err = ext4_init_new_dir(handle, dir, inode); if (err) goto out_clear_inode; err = ext4_mark_inode_dirty(handle, inode); if (!err) err = ext4_add_entry(handle, dentry, inode); if (err) { out_clear_inode: clear_nlink(inode); ext4_orphan_add(handle, inode); unlock_new_inode(inode); err2 = ext4_mark_inode_dirty(handle, inode); if (unlikely(err2)) err = err2; ext4_journal_stop(handle); iput(inode); goto out_retry; } ext4_inc_count(dir); ext4_update_dx_flag(dir); err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; d_instantiate_new(dentry, inode); ext4_fc_track_create(handle, dentry); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); out_stop: if (handle) ext4_journal_stop(handle); out_retry: if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } /* * routine to check that the specified directory is empty (for rmdir) */ bool ext4_empty_dir(struct inode *inode) { unsigned int offset; struct buffer_head *bh; struct ext4_dir_entry_2 *de; struct super_block *sb; if (ext4_has_inline_data(inode)) { int has_inline_data = 1; int ret; ret = empty_inline_dir(inode, &has_inline_data); if (has_inline_data) return ret; } sb = inode->i_sb; if (inode->i_size < ext4_dir_rec_len(1, NULL) + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); return false; } bh = ext4_read_dirblock(inode, 0, EITHER); if (IS_ERR(bh)) return false; de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, 0) || le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); return false; } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, offset) || le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { if (!(offset & (sb->s_blocksize - 1))) { unsigned int lblock; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); if (bh == NULL) { offset += sb->s_blocksize; continue; } if (IS_ERR(bh)) return false; } de = (struct ext4_dir_entry_2 *) (bh->b_data + (offset & (sb->s_blocksize - 1))); if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, offset) || le32_to_cpu(de->inode)) { brelse(bh); return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); } brelse(bh); return true; } static int ext4_rmdir(struct inode *dir, struct dentry *dentry) { int retval; struct inode *inode; struct buffer_head *bh; struct ext4_dir_entry_2 *de; handle_t *handle = NULL; if (unlikely(ext4_forced_shutdown(dir->i_sb))) return -EIO; /* Initialize quotas before so that eventual writes go in * separate transaction */ retval = dquot_initialize(dir); if (retval) return retval; retval = dquot_initialize(d_inode(dentry)); if (retval) return retval; retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); if (IS_ERR(bh)) return PTR_ERR(bh); if (!bh) goto end_rmdir; inode = d_inode(dentry); retval = -EFSCORRUPTED; if (le32_to_cpu(de->inode) != inode->i_ino) goto end_rmdir; retval = -ENOTEMPTY; if (!ext4_empty_dir(inode)) goto end_rmdir; handle = ext4_journal_start(dir, EXT4_HT_DIR, EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); handle = NULL; goto end_rmdir; } if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_rmdir; if (!EXT4_DIR_LINK_EMPTY(inode)) ext4_warning_inode(inode, "empty directory '%.*s' has too many links (%u)", dentry->d_name.len, dentry->d_name.name, inode->i_nlink); inode_inc_iversion(inode); clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is * zero will ensure that the right thing happens during any * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_set_ctime_current(inode); retval = ext4_mark_inode_dirty(handle, inode); if (retval) goto end_rmdir; ext4_dec_count(dir); ext4_update_dx_flag(dir); ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at ext4_lookup(), when it is better * supported by the VFS for the CI case. */ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); end_rmdir: brelse(bh); if (handle) ext4_journal_stop(handle); return retval; } int __ext4_unlink(struct inode *dir, const struct qstr *d_name, struct inode *inode, struct dentry *dentry /* NULL during fast_commit recovery */) { int retval = -ENOENT; struct buffer_head *bh; struct ext4_dir_entry_2 *de; handle_t *handle; int skip_remove_dentry = 0; /* * Keep this outside the transaction; it may have to set up the * directory's encryption key, which isn't GFP_NOFS-safe. */ bh = ext4_find_entry(dir, d_name, &de, NULL); if (IS_ERR(bh)) return PTR_ERR(bh); if (!bh) return -ENOENT; if (le32_to_cpu(de->inode) != inode->i_ino) { /* * It's okay if we find dont find dentry which matches * the inode. That's because it might have gotten * renamed to a different inode number */ if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) skip_remove_dentry = 1; else goto out_bh; } handle = ext4_journal_start(dir, EXT4_HT_DIR, EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); goto out_bh; } if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); if (!skip_remove_dentry) { retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto out_handle; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); ext4_update_dx_flag(dir); retval = ext4_mark_inode_dirty(handle, dir); if (retval) goto out_handle; } else { retval = 0; } if (inode->i_nlink == 0) ext4_warning_inode(inode, "Deleting file '%.*s' with no links", d_name->len, d_name->name); else drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode_set_ctime_current(inode); retval = ext4_mark_inode_dirty(handle, inode); if (dentry && !retval) ext4_fc_track_unlink(handle, dentry); out_handle: ext4_journal_stop(handle); out_bh: brelse(bh); return retval; } static int ext4_unlink(struct inode *dir, struct dentry *dentry) { int retval; if (unlikely(ext4_forced_shutdown(dir->i_sb))) return -EIO; trace_ext4_unlink_enter(dir, dentry); /* * Initialize quotas before so that eventual writes go * in separate transaction */ retval = dquot_initialize(dir); if (retval) goto out_trace; retval = dquot_initialize(d_inode(dentry)); if (retval) goto out_trace; retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry); /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the * negative dentries at ext4_lookup(), when it is better * supported by the VFS for the CI case. */ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); out_trace: trace_ext4_unlink_exit(dentry, retval); return retval; } static int ext4_init_symlink_block(handle_t *handle, struct inode *inode, struct fscrypt_str *disk_link) { struct buffer_head *bh; char *kaddr; int err = 0; bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE); if (IS_ERR(bh)) return PTR_ERR(bh); BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (err) goto out; kaddr = (char *)bh->b_data; memcpy(kaddr, disk_link->name, disk_link->len); inode->i_size = disk_link->len - 1; EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_handle_dirty_metadata(handle, inode, bh); out: brelse(bh); return err; } static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { handle_t *handle; struct inode *inode; int err, len = strlen(symname); int credits; struct fscrypt_str disk_link; int retries = 0; if (unlikely(ext4_forced_shutdown(dir->i_sb))) return -EIO; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); if (err) return err; err = dquot_initialize(dir); if (err) return err; /* * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the * directory. +3 for inode, inode bitmap, group descriptor allocation. * EXT4_DATA_TRANS_BLOCKS for the data block allocation and * modification. */ credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; retry: inode = ext4_new_inode_start_handle(idmap, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); if (IS_ERR(inode)) { if (handle) ext4_journal_stop(handle); err = PTR_ERR(inode); goto out_retry; } if (IS_ENCRYPTED(inode)) { err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); if (err) goto err_drop_inode; inode->i_op = &ext4_encrypted_symlink_inode_operations; } else { if ((disk_link.len > EXT4_N_BLOCKS * 4)) { inode->i_op = &ext4_symlink_inode_operations; } else { inode->i_op = &ext4_fast_symlink_inode_operations; inode->i_link = (char *)&EXT4_I(inode)->i_data; } } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { /* alloc symlink block and fill it */ err = ext4_init_symlink_block(handle, inode, &disk_link); if (err) goto err_drop_inode; } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, disk_link.len); inode->i_size = disk_link.len - 1; EXT4_I(inode)->i_disksize = inode->i_size; } err = ext4_add_nondir(handle, dentry, &inode); if (handle) ext4_journal_stop(handle); iput(inode); goto out_retry; err_drop_inode: clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); ext4_orphan_add(handle, inode); unlock_new_inode(inode); if (handle) ext4_journal_stop(handle); iput(inode); out_retry: if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; if (disk_link.name != (unsigned char *)symname) kfree(disk_link.name); return err; } int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry) { handle_t *handle; int err, retries = 0; retry: handle = ext4_journal_start(dir, EXT4_HT_DIR, (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1); if (IS_ERR(handle)) return PTR_ERR(handle); if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); inode_set_ctime_current(inode); ext4_inc_count(inode); ihold(inode); err = ext4_add_entry(handle, dentry, inode); if (!err) { err = ext4_mark_inode_dirty(handle, inode); /* this can happen only for tmpfile being * linked the first time */ if (inode->i_nlink == 1) ext4_orphan_del(handle, inode); d_instantiate(dentry, inode); ext4_fc_track_link(handle, dentry); } else { drop_nlink(inode); iput(inode); } ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; } static int ext4_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); int err; if (inode->i_nlink >= EXT4_LINK_MAX) return -EMLINK; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) return err; if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) && (!projid_eq(EXT4_I(dir)->i_projid, EXT4_I(old_dentry->d_inode)->i_projid))) return -EXDEV; err = dquot_initialize(dir); if (err) return err; return __ext4_link(dir, inode, dentry); } /* * Try to find buffer head where contains the parent block. * It should be the inode block if it is inlined or the 1st block * if it is a normal dir. */ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct inode *inode, int *retval, struct ext4_dir_entry_2 **parent_de, int *inlined) { struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { struct ext4_dir_entry_2 *de; unsigned int offset; bh = ext4_read_dirblock(inode, 0, EITHER); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; } de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, 0) || le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { EXT4_ERROR_INODE(inode, "directory missing '.'"); brelse(bh); *retval = -EFSCORRUPTED; return NULL; } offset = ext4_rec_len_from_disk(de->rec_len, inode->i_sb->s_blocksize); de = ext4_next_entry(de, inode->i_sb->s_blocksize); if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, offset) || le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { EXT4_ERROR_INODE(inode, "directory missing '..'"); brelse(bh); *retval = -EFSCORRUPTED; return NULL; } *parent_de = de; return bh; } *inlined = 1; return ext4_get_first_inline_block(inode, parent_de, retval); } struct ext4_renament { struct inode *dir; struct dentry *dentry; struct inode *inode; bool is_dir; int dir_nlink_delta; /* entry for "dentry" */ struct buffer_head *bh; struct ext4_dir_entry_2 *de; int inlined; /* entry for ".." in inode if it's a directory */ struct buffer_head *dir_bh; struct ext4_dir_entry_2 *parent_de; int dir_inlined; }; static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent, bool is_cross) { int retval; ent->is_dir = true; if (!is_cross) return 0; ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode, &retval, &ent->parent_de, &ent->dir_inlined); if (!ent->dir_bh) return retval; if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) return -EFSCORRUPTED; BUFFER_TRACE(ent->dir_bh, "get_write_access"); return ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->dir_bh, EXT4_JTR_NONE); } static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, unsigned dir_ino) { int retval; if (!ent->dir_bh) return 0; ent->parent_de->inode = cpu_to_le32(dir_ino); BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata"); if (!ent->dir_inlined) { if (is_dx(ent->inode)) { retval = ext4_handle_dirty_dx_node(handle, ent->inode, ent->dir_bh); } else { retval = ext4_handle_dirty_dirblock(handle, ent->inode, ent->dir_bh); } } else { retval = ext4_mark_inode_dirty(handle, ent->inode); } if (retval) { ext4_std_error(ent->dir->i_sb, retval); return retval; } return 0; } static int ext4_setent(handle_t *handle, struct ext4_renament *ent, unsigned ino, unsigned file_type) { int retval, retval2; BUFFER_TRACE(ent->bh, "get write access"); retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh, EXT4_JTR_NONE); if (retval) return retval; ent->de->inode = cpu_to_le32(ino); if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; inode_inc_iversion(ent->dir); inode_set_mtime_to_ts(ent->dir, inode_set_ctime_current(ent->dir)); retval = ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); if (unlikely(retval2)) { ext4_std_error(ent->dir->i_sb, retval2); return retval2; } } return retval; } static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, unsigned ino, unsigned file_type) { struct ext4_renament old = *ent; int retval = 0; /* * old->de could have moved from under us during make indexed dir, * so the old->de may no longer valid and need to find it again * before reset old inode info. */ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); if (IS_ERR(old.bh)) retval = PTR_ERR(old.bh); if (!old.bh) retval = -ENOENT; if (retval) { ext4_std_error(old.dir->i_sb, retval); return; } ext4_setent(handle, &old, ino, file_type); brelse(old.bh); } static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, const struct qstr *d_name) { int retval = -ENOENT; struct buffer_head *bh; struct ext4_dir_entry_2 *de; bh = ext4_find_entry(dir, d_name, &de, NULL); if (IS_ERR(bh)) return PTR_ERR(bh); if (bh) { retval = ext4_delete_entry(handle, dir, de, bh); brelse(bh); } return retval; } static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent, int force_reread) { int retval; /* * ent->de could have moved from under us during htree split, so make * sure that we are deleting the right entry. We might also be pointing * to a stale entry in the unused part of ent->bh so just checking inum * and the name isn't enough. */ if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || ent->de->name_len != ent->dentry->d_name.len || strncmp(ent->de->name, ent->dentry->d_name.name, ent->de->name_len) || force_reread) { retval = ext4_find_delete_entry(handle, ent->dir, &ent->dentry->d_name); } else { retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh); if (retval == -ENOENT) { retval = ext4_find_delete_entry(handle, ent->dir, &ent->dentry->d_name); } } if (retval) { ext4_warning_inode(ent->dir, "Deleting old file: nlink %d, error=%d", ent->dir->i_nlink, retval); } } static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) { if (ent->dir_nlink_delta) { if (ent->dir_nlink_delta == -1) ext4_dec_count(ent->dir); else ext4_inc_count(ent->dir); ext4_mark_inode_dirty(handle, ent->dir); } } static struct inode *ext4_whiteout_for_rename(struct mnt_idmap *idmap, struct ext4_renament *ent, int credits, handle_t **h) { struct inode *wh; handle_t *handle; int retries = 0; /* * for inode block, sb block, group summaries, * and inode bitmap */ credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) + EXT4_XATTR_TRANS_BLOCKS + 4); retry: wh = ext4_new_inode_start_handle(idmap, ent->dir, S_IFCHR | WHITEOUT_MODE, &ent->dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); handle = ext4_journal_current_handle(); if (IS_ERR(wh)) { if (handle) ext4_journal_stop(handle); if (PTR_ERR(wh) == -ENOSPC && ext4_should_retry_alloc(ent->dir->i_sb, &retries)) goto retry; } else { *h = handle; init_special_inode(wh, wh->i_mode, WHITEOUT_DEV); wh->i_op = &ext4_special_inode_operations; } return wh; } /* * Anybody can rename anything with this: the permission checks are left to the * higher-level routines. * * n.b. old_{dentry,inode) refers to the source dentry/inode * while new_{dentry,inode) refers to the destination dentry/inode * This comes from rename(const char *oldpath, const char *newpath) */ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { handle_t *handle = NULL; struct ext4_renament old = { .dir = old_dir, .dentry = old_dentry, .inode = d_inode(old_dentry), }; struct ext4_renament new = { .dir = new_dir, .dentry = new_dentry, .inode = d_inode(new_dentry), }; int force_reread; int retval; struct inode *whiteout = NULL; int credits; u8 old_file_type; if (new.inode && new.inode->i_nlink == 0) { EXT4_ERROR_INODE(new.inode, "target of rename is already freed"); return -EFSCORRUPTED; } if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) && (!projid_eq(EXT4_I(new_dir)->i_projid, EXT4_I(old_dentry->d_inode)->i_projid))) return -EXDEV; retval = dquot_initialize(old.dir); if (retval) return retval; retval = dquot_initialize(old.inode); if (retval) return retval; retval = dquot_initialize(new.dir); if (retval) return retval; /* Initialize quotas before so that eventual writes go * in separate transaction */ if (new.inode) { retval = dquot_initialize(new.inode); if (retval) return retval; } old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); if (IS_ERR(old.bh)) return PTR_ERR(old.bh); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process * and merrily kill the link to whatever was created under the * same name. Goodbye sticky bit ;-< */ retval = -ENOENT; if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) goto release_bh; new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); new.bh = NULL; goto release_bh; } if (new.bh) { if (!new.inode) { brelse(new.bh); new.bh = NULL; } } if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) ext4_alloc_da_blocks(old.inode); credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); if (!(flags & RENAME_WHITEOUT)) { handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); if (IS_ERR(handle)) { retval = PTR_ERR(handle); goto release_bh; } } else { whiteout = ext4_whiteout_for_rename(idmap, &old, credits, &handle); if (IS_ERR(whiteout)) { retval = PTR_ERR(whiteout); goto release_bh; } } old_file_type = old.de->file_type; if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); if (S_ISDIR(old.inode->i_mode)) { if (new.inode) { retval = -ENOTEMPTY; if (!ext4_empty_dir(new.inode)) goto end_rename; } else { retval = -EMLINK; if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } retval = ext4_rename_dir_prepare(handle, &old, new.dir != old.dir); if (retval) goto end_rename; } /* * If we're renaming a file within an inline_data dir and adding or * setting the new dirent causes a conversion from inline_data to * extents/blockmap, we need to force the dirent delete code to * re-read the directory, or else we end up trying to delete a dirent * from what is now the extent tree root (or a block map). */ force_reread = (new.dir->i_ino == old.dir->i_ino && ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); if (whiteout) { /* * Do this before adding a new entry, so the old entry is sure * to be still pointing to the valid old entry. */ retval = ext4_setent(handle, &old, whiteout->i_ino, EXT4_FT_CHRDEV); if (retval) goto end_rename; retval = ext4_mark_inode_dirty(handle, whiteout); if (unlikely(retval)) goto end_rename; } if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); if (retval) goto end_rename; } else { retval = ext4_setent(handle, &new, old.inode->i_ino, old_file_type); if (retval) goto end_rename; } if (force_reread) force_reread = !ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA); /* * Like most other Unix systems, set the ctime for inodes on a * rename. */ inode_set_ctime_current(old.inode); retval = ext4_mark_inode_dirty(handle, old.inode); if (unlikely(retval)) goto end_rename; if (!whiteout) { /* * ok, that's it */ ext4_rename_delete(handle, &old, force_reread); } if (new.inode) { ext4_dec_count(new.inode); inode_set_ctime_current(new.inode); } inode_set_mtime_to_ts(old.dir, inode_set_ctime_current(old.dir)); ext4_update_dx_flag(old.dir); if (old.is_dir) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) goto end_rename; ext4_dec_count(old.dir); if (new.inode) { /* checked ext4_empty_dir above, can't have another * parent, ext4_dec_count() won't work for many-linked * dirs */ clear_nlink(new.inode); } else { ext4_inc_count(new.dir); ext4_update_dx_flag(new.dir); retval = ext4_mark_inode_dirty(handle, new.dir); if (unlikely(retval)) goto end_rename; } } retval = ext4_mark_inode_dirty(handle, old.dir); if (unlikely(retval)) goto end_rename; if (old.is_dir) { /* * We disable fast commits here that's because the * replay code is not yet capable of changing dot dot * dirents in directories. */ ext4_fc_mark_ineligible(old.inode->i_sb, EXT4_FC_REASON_RENAME_DIR, handle); } else { struct super_block *sb = old.inode->i_sb; if (new.inode) ext4_fc_track_unlink(handle, new.dentry); if (test_opt2(sb, JOURNAL_FAST_COMMIT) && !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) { __ext4_fc_track_link(handle, old.inode, new.dentry); __ext4_fc_track_unlink(handle, old.inode, old.dentry); if (whiteout) __ext4_fc_track_create(handle, whiteout, old.dentry); } } if (new.inode) { retval = ext4_mark_inode_dirty(handle, new.inode); if (unlikely(retval)) goto end_rename; if (!new.inode->i_nlink) ext4_orphan_add(handle, new.inode); } retval = 0; end_rename: if (whiteout) { if (retval) { ext4_resetent(handle, &old, old.inode->i_ino, old_file_type); drop_nlink(whiteout); ext4_mark_inode_dirty(handle, whiteout); ext4_orphan_add(handle, whiteout); } unlock_new_inode(whiteout); ext4_journal_stop(handle); iput(whiteout); } else { ext4_journal_stop(handle); } release_bh: brelse(old.dir_bh); brelse(old.bh); brelse(new.bh); return retval; } static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { handle_t *handle = NULL; struct ext4_renament old = { .dir = old_dir, .dentry = old_dentry, .inode = d_inode(old_dentry), }; struct ext4_renament new = { .dir = new_dir, .dentry = new_dentry, .inode = d_inode(new_dentry), }; u8 new_file_type; int retval; if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) && !projid_eq(EXT4_I(new_dir)->i_projid, EXT4_I(old_dentry->d_inode)->i_projid)) || (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) && !projid_eq(EXT4_I(old_dir)->i_projid, EXT4_I(new_dentry->d_inode)->i_projid))) return -EXDEV; retval = dquot_initialize(old.dir); if (retval) return retval; retval = dquot_initialize(new.dir); if (retval) return retval; old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); if (IS_ERR(old.bh)) return PTR_ERR(old.bh); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process * and merrily kill the link to whatever was created under the * same name. Goodbye sticky bit ;-< */ retval = -ENOENT; if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) goto end_rename; new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); new.bh = NULL; goto end_rename; } /* RENAME_EXCHANGE case: old *and* new must both exist */ if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) goto end_rename; handle = ext4_journal_start(old.dir, EXT4_HT_DIR, (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); handle = NULL; goto end_rename; } if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); if (S_ISDIR(old.inode->i_mode)) { retval = ext4_rename_dir_prepare(handle, &old, new.dir != old.dir); if (retval) goto end_rename; } if (S_ISDIR(new.inode->i_mode)) { retval = ext4_rename_dir_prepare(handle, &new, new.dir != old.dir); if (retval) goto end_rename; } /* * Other than the special case of overwriting a directory, parents' * nlink only needs to be modified if this is a cross directory rename. */ if (old.dir != new.dir && old.is_dir != new.is_dir) { old.dir_nlink_delta = old.is_dir ? -1 : 1; new.dir_nlink_delta = -old.dir_nlink_delta; retval = -EMLINK; if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) || (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir))) goto end_rename; } new_file_type = new.de->file_type; retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type); if (retval) goto end_rename; retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type); if (retval) goto end_rename; /* * Like most other Unix systems, set the ctime for inodes on a * rename. */ inode_set_ctime_current(old.inode); inode_set_ctime_current(new.inode); retval = ext4_mark_inode_dirty(handle, old.inode); if (unlikely(retval)) goto end_rename; retval = ext4_mark_inode_dirty(handle, new.inode); if (unlikely(retval)) goto end_rename; ext4_fc_mark_ineligible(new.inode->i_sb, EXT4_FC_REASON_CROSS_RENAME, handle); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) goto end_rename; } if (new.dir_bh) { retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino); if (retval) goto end_rename; } ext4_update_dir_count(handle, &old); ext4_update_dir_count(handle, &new); retval = 0; end_rename: brelse(old.dir_bh); brelse(new.dir_bh); brelse(old.bh); brelse(new.bh); if (handle) ext4_journal_stop(handle); return retval; } static int ext4_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { int err; if (unlikely(ext4_forced_shutdown(old_dir->i_sb))) return -EIO; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, flags); if (err) return err; if (flags & RENAME_EXCHANGE) { return ext4_cross_rename(old_dir, old_dentry, new_dir, new_dentry); } return ext4_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); } /* * directories can handle most operations... */ const struct inode_operations ext4_dir_inode_operations = { .create = ext4_create, .lookup = ext4_lookup, .link = ext4_link, .unlink = ext4_unlink, .symlink = ext4_symlink, .mkdir = ext4_mkdir, .rmdir = ext4_rmdir, .mknod = ext4_mknod, .tmpfile = ext4_tmpfile, .rename = ext4_rename2, .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, .fileattr_get = ext4_fileattr_get, .fileattr_set = ext4_fileattr_set, }; const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, };
2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 // SPDX-License-Identifier: GPL-2.0-or-later /* handling of writes to regular files and writing back to the server * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/backing-dev.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/netfs.h> #include <trace/events/netfs.h> #include "internal.h" /* * completion of write to server */ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len) { _enter("{%llx:%llu},{%x @%llx}", vnode->fid.vid, vnode->fid.vnode, len, start); afs_prune_wb_keys(vnode); _leave(""); } /* * Find a key to use for the writeback. We cached the keys used to author the * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key * record used or NULL and we need to start from there if it's set. * wreq->netfs_priv will be set to the key itself or NULL. */ static void afs_get_writeback_key(struct netfs_io_request *wreq) { struct afs_wb_key *wbk, *old = wreq->netfs_priv2; struct afs_vnode *vnode = AFS_FS_I(wreq->inode); key_put(wreq->netfs_priv); wreq->netfs_priv = NULL; wreq->netfs_priv2 = NULL; spin_lock(&vnode->wb_lock); if (old) wbk = list_next_entry(old, vnode_link); else wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link); list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) { _debug("wbk %u", key_serial(wbk->key)); if (key_validate(wbk->key) == 0) { refcount_inc(&wbk->usage); wreq->netfs_priv = key_get(wbk->key); wreq->netfs_priv2 = wbk; _debug("USE WB KEY %u", key_serial(wbk->key)); break; } } spin_unlock(&vnode->wb_lock); afs_put_wb_key(old); } static void afs_store_data_success(struct afs_operation *op) { struct afs_vnode *vnode = op->file[0].vnode; op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); if (!afs_op_error(op)) { afs_pages_written_back(vnode, op->store.pos, op->store.size); afs_stat_v(vnode, n_stores); atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes); } } static const struct afs_operation_ops afs_store_data_operation = { .issue_afs_rpc = afs_fs_store_data, .issue_yfs_rpc = yfs_fs_store_data, .success = afs_store_data_success, }; /* * Prepare a subrequest to write to the server. This sets the max_len * parameter. */ void afs_prepare_write(struct netfs_io_subrequest *subreq) { //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) // subreq->max_len = 512 * 1024; //else subreq->max_len = 256 * 1024 * 1024; } /* * Issue a subrequest to write to the server. */ static void afs_issue_write_worker(struct work_struct *work) { struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work); struct netfs_io_request *wreq = subreq->rreq; struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(wreq->inode); unsigned long long pos = subreq->start + subreq->transferred; size_t len = subreq->len - subreq->transferred; int ret = -ENOKEY; _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx", wreq->debug_id, subreq->debug_index, vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, pos, len); #if 0 // Error injection if (subreq->debug_index == 3) return netfs_write_subrequest_terminated(subreq, -ENOANO, false); if (!test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) { set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); } #endif op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); if (IS_ERR(op)) return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); afs_op_set_vnode(op, 0, vnode); op->file[0].dv_delta = 1; op->file[0].modification = true; op->store.pos = pos; op->store.size = len; op->flags |= AFS_OPERATION_UNINTR; op->ops = &afs_store_data_operation; afs_begin_vnode_operation(op); op->store.write_iter = &subreq->io_iter; op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size); op->mtime = inode_get_mtime(&vnode->netfs.inode); afs_wait_for_operation(op); ret = afs_put_operation(op); switch (ret) { case -EACCES: case -EPERM: case -ENOKEY: case -EKEYEXPIRED: case -EKEYREJECTED: case -EKEYREVOKED: /* If there are more keys we can try, use the retry algorithm * to rotate the keys. */ if (wreq->netfs_priv2) set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); break; } netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); } void afs_issue_write(struct netfs_io_subrequest *subreq) { subreq->work.func = afs_issue_write_worker; if (!queue_work(system_unbound_wq, &subreq->work)) WARN_ON_ONCE(1); } /* * Writeback calls this when it finds a folio that needs uploading. This isn't * called if writeback only has copy-to-cache to deal with. */ void afs_begin_writeback(struct netfs_io_request *wreq) { afs_get_writeback_key(wreq); wreq->io_streams[0].avail = true; } /* * Prepare to retry the writes in request. Use this to try rotating the * available writeback keys. */ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream) { struct netfs_io_subrequest *subreq = list_first_entry(&stream->subrequests, struct netfs_io_subrequest, rreq_link); switch (subreq->error) { case -EACCES: case -EPERM: case -ENOKEY: case -EKEYEXPIRED: case -EKEYREJECTED: case -EKEYREVOKED: afs_get_writeback_key(wreq); if (!wreq->netfs_priv) stream->failed = true; break; } } /* * write some of the pending data back to the server */ int afs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); int ret; /* We have to be careful as we can end up racing with setattr() * truncating the pagecache since the caller doesn't take a lock here * to prevent it. */ if (wbc->sync_mode == WB_SYNC_ALL) down_read(&vnode->validate_lock); else if (!down_read_trylock(&vnode->validate_lock)) return 0; ret = netfs_writepages(mapping, wbc); up_read(&vnode->validate_lock); return ret; } /* * flush any dirty pages for this process, and check for write errors. * - the return status from this call provides a reliable indication of * whether any write errors occurred for this process. */ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); struct afs_file *af = file->private_data; int ret; _enter("{%llx:%llu},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); ret = afs_validate(vnode, af->key); if (ret < 0) return ret; return file_write_and_wait_range(file, start, end); } /* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal */ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0) return VM_FAULT_SIGBUS; return netfs_page_mkwrite(vmf, NULL); } /* * Prune the keys cached for writeback. The caller must hold vnode->wb_lock. */ void afs_prune_wb_keys(struct afs_vnode *vnode) { LIST_HEAD(graveyard); struct afs_wb_key *wbk, *tmp; /* Discard unused keys */ spin_lock(&vnode->wb_lock); if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) && !mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) { list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) { if (refcount_read(&wbk->usage) == 1) list_move(&wbk->vnode_link, &graveyard); } } spin_unlock(&vnode->wb_lock); while (!list_empty(&graveyard)) { wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link); list_del(&wbk->vnode_link); afs_put_wb_key(wbk); } }
145 102 15 1 32 58 7 87 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 */ #include <linux/fs.h> #include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_imap.h" #include "jfs_dinode.h" #include "jfs_debug.h" void jfs_set_inode_flags(struct inode *inode) { unsigned int flags = JFS_IP(inode)->mode2; unsigned int new_fl = 0; if (flags & JFS_IMMUTABLE_FL) new_fl |= S_IMMUTABLE; if (flags & JFS_APPEND_FL) new_fl |= S_APPEND; if (flags & JFS_NOATIME_FL) new_fl |= S_NOATIME; if (flags & JFS_DIRSYNC_FL) new_fl |= S_DIRSYNC; if (flags & JFS_SYNC_FL) new_fl |= S_SYNC; inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME | S_DIRSYNC | S_SYNC); } /* * NAME: ialloc() * * FUNCTION: Allocate a new inode * */ struct inode *ialloc(struct inode *parent, umode_t mode) { struct super_block *sb = parent->i_sb; struct inode *inode; struct jfs_inode_info *jfs_inode; int rc; inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); return ERR_PTR(-ENOMEM); } jfs_inode = JFS_IP(inode); rc = diAlloc(parent, S_ISDIR(mode), inode); if (rc) { jfs_warn("ialloc: diAlloc returned %d!", rc); goto fail_put; } if (insert_inode_locked(inode) < 0) { rc = -EINVAL; goto fail_put; } inode_init_owner(&nop_mnt_idmap, inode, parent, mode); /* * New inodes need to save sane values on disk when * uid & gid mount options are used */ jfs_inode->saved_uid = inode->i_uid; jfs_inode->saved_gid = inode->i_gid; /* * Allocate inode to quota. */ rc = dquot_initialize(inode); if (rc) goto fail_drop; rc = dquot_alloc_inode(inode); if (rc) goto fail_drop; /* inherit flags from parent */ jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; if (S_ISDIR(mode)) { jfs_inode->mode2 |= IDIRECTORY; jfs_inode->mode2 &= ~JFS_DIRSYNC_FL; } else { jfs_inode->mode2 |= INLINEEA | ISPARSE; if (S_ISLNK(mode)) jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); } jfs_inode->mode2 |= inode->i_mode; inode->i_blocks = 0; simple_inode_init_ts(inode); jfs_inode->otime = inode_get_ctime_sec(inode); inode->i_generation = JFS_SBI(sb)->gengen++; jfs_inode->cflag = 0; /* Zero remaining fields */ memset(&jfs_inode->acl, 0, sizeof(dxd_t)); memset(&jfs_inode->ea, 0, sizeof(dxd_t)); jfs_inode->next_index = 0; jfs_inode->acltype = 0; jfs_inode->btorder = 0; jfs_inode->btindex = 0; jfs_inode->bxflag = 0; jfs_inode->blid = 0; jfs_inode->atlhead = 0; jfs_inode->atltail = 0; jfs_inode->xtlid = 0; jfs_set_inode_flags(inode); jfs_info("ialloc returns inode = 0x%p", inode); return inode; fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; clear_nlink(inode); discard_new_inode(inode); return ERR_PTR(rc); fail_put: iput(inode); return ERR_PTR(rc); }
8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 // SPDX-License-Identifier: GPL-2.0-or-later /* * kernel/stop_machine.c * * Copyright (C) 2008, 2005 IBM Corporation. * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au * Copyright (C) 2010 SUSE Linux Products GmbH * Copyright (C) 2010 Tejun Heo <tj@kernel.org> */ #include <linux/compiler.h> #include <linux/completion.h> #include <linux/cpu.h> #include <linux/init.h> #include <linux/kthread.h> #include <linux/export.h> #include <linux/percpu.h> #include <linux/sched.h> #include <linux/stop_machine.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> #include <linux/smpboot.h> #include <linux/atomic.h> #include <linux/nmi.h> #include <linux/sched/wake_q.h> /* * Structure to determine completion condition and record errors. May * be shared by works on different cpus. */ struct cpu_stop_done { atomic_t nr_todo; /* nr left to execute */ int ret; /* collected return value */ struct completion completion; /* fired if nr_todo reaches 0 */ }; /* the actual stopper, one per every possible cpu, enabled on online cpus */ struct cpu_stopper { struct task_struct *thread; raw_spinlock_t lock; bool enabled; /* is this stopper enabled? */ struct list_head works; /* list of pending works */ struct cpu_stop_work stop_work; /* for stop_cpus */ unsigned long caller; cpu_stop_fn_t fn; }; static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); static bool stop_machine_initialized = false; void print_stop_info(const char *log_lvl, struct task_struct *task) { /* * If @task is a stopper task, it cannot migrate and task_cpu() is * stable. */ struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task)); if (task != stopper->thread) return; printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller); } /* static data for stop_cpus */ static DEFINE_MUTEX(stop_cpus_mutex); static bool stop_cpus_in_progress; static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) { memset(done, 0, sizeof(*done)); atomic_set(&done->nr_todo, nr_todo); init_completion(&done->completion); } /* signal completion unless @done is NULL */ static void cpu_stop_signal_done(struct cpu_stop_done *done) { if (atomic_dec_and_test(&done->nr_todo)) complete(&done->completion); } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, struct cpu_stop_work *work, struct wake_q_head *wakeq) { list_add_tail(&work->list, &stopper->works); wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; preempt_disable(); raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) __cpu_stop_queue_work(stopper, work, &wakeq); else if (work->done) cpu_stop_signal_done(work->done); raw_spin_unlock_irqrestore(&stopper->lock, flags); wake_up_q(&wakeq); preempt_enable(); return enabled; } /** * stop_one_cpu - stop a cpu * @cpu: cpu to stop * @fn: function to execute * @arg: argument to @fn * * Execute @fn(@arg) on @cpu. @fn is run in a process context with * the highest priority preempting any task on the cpu and * monopolizing it. This function returns after the execution is * complete. * * This function doesn't guarantee @cpu stays online till @fn * completes. If @cpu goes down in the middle, execution may happen * partially or fully on different cpus. @fn should either be ready * for that or the caller should ensure that @cpu stays online until * this function completes. * * CONTEXT: * Might sleep. * * RETURNS: * -ENOENT if @fn(@arg) was not executed because @cpu was offline; * otherwise, the return value of @fn. */ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) { struct cpu_stop_done done; struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ }; cpu_stop_init_done(&done, 1); if (!cpu_stop_queue_work(cpu, &work)) return -ENOENT; /* * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup * cycle by doing a preemption: */ cond_resched(); wait_for_completion(&done.completion); return done.ret; } /* This controls the threads on each CPU. */ enum multi_stop_state { /* Dummy starting state for thread. */ MULTI_STOP_NONE, /* Awaiting everyone to be scheduled. */ MULTI_STOP_PREPARE, /* Disable interrupts. */ MULTI_STOP_DISABLE_IRQ, /* Run the function */ MULTI_STOP_RUN, /* Exit */ MULTI_STOP_EXIT, }; struct multi_stop_data { cpu_stop_fn_t fn; void *data; /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ unsigned int num_threads; const struct cpumask *active_cpus; enum multi_stop_state state; atomic_t thread_ack; }; static void set_state(struct multi_stop_data *msdata, enum multi_stop_state newstate) { /* Reset ack counter. */ atomic_set(&msdata->thread_ack, msdata->num_threads); smp_wmb(); WRITE_ONCE(msdata->state, newstate); } /* Last one to ack a state moves to the next state. */ static void ack_state(struct multi_stop_data *msdata) { if (atomic_dec_and_test(&msdata->thread_ack)) set_state(msdata, msdata->state + 1); } notrace void __weak stop_machine_yield(const struct cpumask *cpumask) { cpu_relax(); } /* This is the cpu_stop function which stops the CPU. */ static int multi_cpu_stop(void *data) { struct multi_stop_data *msdata = data; enum multi_stop_state newstate, curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; const struct cpumask *cpumask; unsigned long flags; bool is_active; /* * When called from stop_machine_from_inactive_cpu(), irq might * already be disabled. Save the state and restore it on exit. */ local_save_flags(flags); if (!msdata->active_cpus) { cpumask = cpu_online_mask; is_active = cpu == cpumask_first(cpumask); } else { cpumask = msdata->active_cpus; is_active = cpumask_test_cpu(cpu, cpumask); } /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ stop_machine_yield(cpumask); newstate = READ_ONCE(msdata->state); if (newstate != curstate) { curstate = newstate; switch (curstate) { case MULTI_STOP_DISABLE_IRQ: local_irq_disable(); hard_irq_disable(); break; case MULTI_STOP_RUN: if (is_active) err = msdata->fn(msdata->data); break; default: break; } ack_state(msdata); } else if (curstate > MULTI_STOP_PREPARE) { /* * At this stage all other CPUs we depend on must spin * in the same loop. Any reason for hard-lockup should * be detected and reported on their side. */ touch_nmi_watchdog(); } rcu_momentary_dyntick_idle(); } while (curstate != MULTI_STOP_EXIT); local_irq_restore(flags); return err; } static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, int cpu2, struct cpu_stop_work *work2) { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); DEFINE_WAKE_Q(wakeq); int err; retry: /* * The waking up of stopper threads has to happen in the same * scheduling context as the queueing. Otherwise, there is a * possibility of one of the above stoppers being woken up by another * CPU, and preempting us. This will cause us to not wake up the other * stopper forever. */ preempt_disable(); raw_spin_lock_irq(&stopper1->lock); raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); if (!stopper1->enabled || !stopper2->enabled) { err = -ENOENT; goto unlock; } /* * Ensure that if we race with __stop_cpus() the stoppers won't get * queued up in reverse order leading to system deadlock. * * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has * queued a work on cpu1 but not on cpu2, we hold both locks. * * It can be falsely true but it is safe to spin until it is cleared, * queue_stop_cpus_work() does everything under preempt_disable(). */ if (unlikely(stop_cpus_in_progress)) { err = -EDEADLK; goto unlock; } err = 0; __cpu_stop_queue_work(stopper1, work1, &wakeq); __cpu_stop_queue_work(stopper2, work2, &wakeq); unlock: raw_spin_unlock(&stopper2->lock); raw_spin_unlock_irq(&stopper1->lock); if (unlikely(err == -EDEADLK)) { preempt_enable(); while (stop_cpus_in_progress) cpu_relax(); goto retry; } wake_up_q(&wakeq); preempt_enable(); return err; } /** * stop_two_cpus - stops two cpus * @cpu1: the cpu to stop * @cpu2: the other cpu to stop * @fn: function to execute * @arg: argument to @fn * * Stops both the current and specified CPU and runs @fn on one of them. * * returns when both are completed. */ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg) { struct cpu_stop_done done; struct cpu_stop_work work1, work2; struct multi_stop_data msdata; msdata = (struct multi_stop_data){ .fn = fn, .data = arg, .num_threads = 2, .active_cpus = cpumask_of(cpu1), }; work1 = work2 = (struct cpu_stop_work){ .fn = multi_cpu_stop, .arg = &msdata, .done = &done, .caller = _RET_IP_, }; cpu_stop_init_done(&done, 2); set_state(&msdata, MULTI_STOP_PREPARE); if (cpu1 > cpu2) swap(cpu1, cpu2); if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) return -ENOENT; wait_for_completion(&done.completion); return done.ret; } /** * stop_one_cpu_nowait - stop a cpu but don't wait for completion * @cpu: cpu to stop * @fn: function to execute * @arg: argument to @fn * @work_buf: pointer to cpu_stop_work structure * * Similar to stop_one_cpu() but doesn't wait for completion. The * caller is responsible for ensuring @work_buf is currently unused * and will remain untouched until stopper starts executing @fn. * * CONTEXT: * Don't care. * * RETURNS: * true if cpu_stop_work was queued successfully and @fn will be called, * false otherwise. */ bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf) { *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, }; return cpu_stop_queue_work(cpu, work_buf); } static bool queue_stop_cpus_work(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg, struct cpu_stop_done *done) { struct cpu_stop_work *work; unsigned int cpu; bool queued = false; /* * Disable preemption while queueing to avoid getting * preempted by a stopper which might wait for other stoppers * to enter @fn which can lead to deadlock. */ preempt_disable(); stop_cpus_in_progress = true; barrier(); for_each_cpu(cpu, cpumask) { work = &per_cpu(cpu_stopper.stop_work, cpu); work->fn = fn; work->arg = arg; work->done = done; work->caller = _RET_IP_; if (cpu_stop_queue_work(cpu, work)) queued = true; } barrier(); stop_cpus_in_progress = false; preempt_enable(); return queued; } static int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) { struct cpu_stop_done done; cpu_stop_init_done(&done, cpumask_weight(cpumask)); if (!queue_stop_cpus_work(cpumask, fn, arg, &done)) return -ENOENT; wait_for_completion(&done.completion); return done.ret; } /** * stop_cpus - stop multiple cpus * @cpumask: cpus to stop * @fn: function to execute * @arg: argument to @fn * * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, * @fn is run in a process context with the highest priority * preempting any task on the cpu and monopolizing it. This function * returns after all executions are complete. * * This function doesn't guarantee the cpus in @cpumask stay online * till @fn completes. If some cpus go down in the middle, execution * on the cpu may happen partially or fully on different cpus. @fn * should either be ready for that or the caller should ensure that * the cpus stay online until this function completes. * * All stop_cpus() calls are serialized making it safe for @fn to wait * for all cpus to start executing it. * * CONTEXT: * Might sleep. * * RETURNS: * -ENOENT if @fn(@arg) was not executed at all because all cpus in * @cpumask were offline; otherwise, 0 if all executions of @fn * returned 0, any non zero return value if any returned non zero. */ static int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) { int ret; /* static works are used, process one request at a time */ mutex_lock(&stop_cpus_mutex); ret = __stop_cpus(cpumask, fn, arg); mutex_unlock(&stop_cpus_mutex); return ret; } static int cpu_stop_should_run(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); unsigned long flags; int run; raw_spin_lock_irqsave(&stopper->lock, flags); run = !list_empty(&stopper->works); raw_spin_unlock_irqrestore(&stopper->lock, flags); return run; } static void cpu_stopper_thread(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); struct cpu_stop_work *work; repeat: work = NULL; raw_spin_lock_irq(&stopper->lock); if (!list_empty(&stopper->works)) { work = list_first_entry(&stopper->works, struct cpu_stop_work, list); list_del_init(&work->list); } raw_spin_unlock_irq(&stopper->lock); if (work) { cpu_stop_fn_t fn = work->fn; void *arg = work->arg; struct cpu_stop_done *done = work->done; int ret; /* cpu stop callbacks must not sleep, make in_atomic() == T */ stopper->caller = work->caller; stopper->fn = fn; preempt_count_inc(); ret = fn(arg); if (done) { if (ret) done->ret = ret; cpu_stop_signal_done(done); } preempt_count_dec(); stopper->fn = NULL; stopper->caller = 0; WARN_ONCE(preempt_count(), "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg); goto repeat; } } void stop_machine_park(int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); /* * Lockless. cpu_stopper_thread() will take stopper->lock and flush * the pending works before it parks, until then it is fine to queue * the new works. */ stopper->enabled = false; kthread_park(stopper->thread); } static void cpu_stop_create(unsigned int cpu) { sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu)); } static void cpu_stop_park(unsigned int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); WARN_ON(!list_empty(&stopper->works)); } void stop_machine_unpark(int cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); stopper->enabled = true; kthread_unpark(stopper->thread); } static struct smp_hotplug_thread cpu_stop_threads = { .store = &cpu_stopper.thread, .thread_should_run = cpu_stop_should_run, .thread_fn = cpu_stopper_thread, .thread_comm = "migration/%u", .create = cpu_stop_create, .park = cpu_stop_park, .selfparking = true, }; static int __init cpu_stop_init(void) { unsigned int cpu; for_each_possible_cpu(cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); raw_spin_lock_init(&stopper->lock); INIT_LIST_HEAD(&stopper->works); } BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); stop_machine_unpark(raw_smp_processor_id()); stop_machine_initialized = true; return 0; } early_initcall(cpu_stop_init); int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, .data = data, .num_threads = num_online_cpus(), .active_cpus = cpus, }; lockdep_assert_cpus_held(); if (!stop_machine_initialized) { /* * Handle the case where stop_machine() is called * early in boot before stop_machine() has been * initialized. */ unsigned long flags; int ret; WARN_ON_ONCE(msdata.num_threads != 1); local_irq_save(flags); hard_irq_disable(); ret = (*fn)(data); local_irq_restore(flags); return ret; } /* Set the initial state and stop all online cpus. */ set_state(&msdata, MULTI_STOP_PREPARE); return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata); } int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { int ret; /* No CPUs can come up or down during this. */ cpus_read_lock(); ret = stop_machine_cpuslocked(fn, data, cpus); cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(stop_machine); #ifdef CONFIG_SCHED_SMT int stop_core_cpuslocked(unsigned int cpu, cpu_stop_fn_t fn, void *data) { const struct cpumask *smt_mask = cpu_smt_mask(cpu); struct multi_stop_data msdata = { .fn = fn, .data = data, .num_threads = cpumask_weight(smt_mask), .active_cpus = smt_mask, }; lockdep_assert_cpus_held(); /* Set the initial state and stop all online cpus. */ set_state(&msdata, MULTI_STOP_PREPARE); return stop_cpus(smt_mask, multi_cpu_stop, &msdata); } EXPORT_SYMBOL_GPL(stop_core_cpuslocked); #endif /** * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU * @fn: the function to run * @data: the data ptr for the @fn() * @cpus: the cpus to run the @fn() on (NULL = any online cpu) * * This is identical to stop_machine() but can be called from a CPU which * is not active. The local CPU is in the process of hotplug (so no other * CPU hotplug can start) and not marked active and doesn't have enough * context to sleep. * * This function provides stop_machine() functionality for such state by * using busy-wait for synchronization and executing @fn directly for local * CPU. * * CONTEXT: * Local CPU is inactive. Temporarily stops all active CPUs. * * RETURNS: * 0 if all executions of @fn returned 0, any non zero return value if any * returned non zero. */ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, .data = data, .active_cpus = cpus }; struct cpu_stop_done done; int ret; /* Local CPU must be inactive and CPU hotplug in progress. */ BUG_ON(cpu_active(raw_smp_processor_id())); msdata.num_threads = num_active_cpus() + 1; /* +1 for local */ /* No proper task established and can't sleep - busy wait for lock. */ while (!mutex_trylock(&stop_cpus_mutex)) cpu_relax(); /* Schedule work on other CPUs and execute directly for local CPU */ set_state(&msdata, MULTI_STOP_PREPARE); cpu_stop_init_done(&done, num_active_cpus()); queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, &done); ret = multi_cpu_stop(&msdata); /* Busy wait for completion. */ while (!completion_done(&done.completion)) cpu_relax(); mutex_unlock(&stop_cpus_mutex); return ret ?: done.ret; }
7750 7755 7776 7775 6238 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 // SPDX-License-Identifier: GPL-2.0-only /* * Access kernel or user memory without faulting. */ #include <linux/export.h> #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/tlb.h> bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return true; } #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __get_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_from_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; if (!copy_from_kernel_nofault_allowed(src, size)) return -ERANGE; pagefault_disable(); if (!(align & 7)) copy_from_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_from_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_from_kernel_nofault_loop(dst, src, size, u16, Efault); copy_from_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } EXPORT_SYMBOL_GPL(copy_from_kernel_nofault); #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \ while (len >= sizeof(type)) { \ __put_kernel_nofault(dst, src, type, err_label); \ dst += sizeof(type); \ src += sizeof(type); \ len -= sizeof(type); \ } long copy_to_kernel_nofault(void *dst, const void *src, size_t size) { unsigned long align = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) align = (unsigned long)dst | (unsigned long)src; pagefault_disable(); if (!(align & 7)) copy_to_kernel_nofault_loop(dst, src, size, u64, Efault); if (!(align & 3)) copy_to_kernel_nofault_loop(dst, src, size, u32, Efault); if (!(align & 1)) copy_to_kernel_nofault_loop(dst, src, size, u16, Efault); copy_to_kernel_nofault_loop(dst, src, size, u8, Efault); pagefault_enable(); return 0; Efault: pagefault_enable(); return -EFAULT; } long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) { const void *src = unsafe_addr; if (unlikely(count <= 0)) return 0; if (!copy_from_kernel_nofault_allowed(unsafe_addr, count)) return -ERANGE; pagefault_disable(); do { __get_kernel_nofault(dst, src, u8, Efault); dst++; src++; } while (dst[-1] && src - unsafe_addr < count); pagefault_enable(); dst[-1] = '\0'; return src - unsafe_addr; Efault: pagefault_enable(); dst[0] = '\0'; return -EFAULT; } /** * copy_from_user_nofault(): safely attempt to read from a user-space location * @dst: pointer to the buffer that shall take the data * @src: address to read from. This must be a user address. * @size: size of the data chunk * * Safely read from user address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; if (!__access_ok(src, size)) return ret; if (!nmi_uaccess_okay()) return ret; pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_from_user_nofault); /** * copy_to_user_nofault(): safely attempt to write to a user-space location * @dst: address to write to * @src: pointer to the data that shall be written * @size: size of the data chunk * * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } if (ret) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(copy_to_user_nofault); /** * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user * address. * @dst: Destination address, in kernel space. This buffer must be at * least @count bytes long. * @unsafe_addr: Unsafe user address. * @count: Maximum number of bytes to copy, including the trailing NUL. * * Copies a NUL-terminated string from unsafe user address to kernel buffer. * * On success, returns the length of the string INCLUDING the trailing NUL. * * If access fails, returns -EFAULT (some data may have been copied * and the trailing NUL added). * * If @count is smaller than the length of the string, copies @count-1 bytes, * sets the last byte of @dst buffer to NUL and returns @count. */ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { long ret; if (unlikely(count <= 0)) return 0; pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); if (ret >= count) { ret = count; dst[ret - 1] = '\0'; } else if (ret > 0) { ret++; } return ret; } /** * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL. * @unsafe_addr: The string to measure. * @count: Maximum count (including NUL) * * Get the size of a NUL-terminated string in user space without pagefault. * * Returns the size of the string INCLUDING the terminating NUL. * * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * Unlike strnlen_user, this can be used from IRQ handler etc. because * it disables pagefaults. */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { int ret; pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); return ret; } void __copy_overflow(int size, unsigned long count) { WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); } EXPORT_SYMBOL(__copy_overflow);
5 5 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 // SPDX-License-Identifier: GPL-2.0 /* * f2fs shrinker support * the basic infra was copied from fs/ubifs/shrinker.c * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org> */ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include "f2fs.h" #include "node.h" static LIST_HEAD(f2fs_list); static DEFINE_SPINLOCK(f2fs_list_lock); static unsigned int shrinker_run_no; static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) { return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT]; } static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS; return count > 0 ? count : 0; } static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi, enum extent_type type) { struct extent_tree_info *eti = &sbi->extent_tree[type]; return atomic_read(&eti->total_zombie_tree) + atomic_read(&eti->total_ext_node); } unsigned long f2fs_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct f2fs_sb_info *sbi; struct list_head *p; unsigned long count = 0; spin_lock(&f2fs_list_lock); p = f2fs_list.next; while (p != &f2fs_list) { sbi = list_entry(p, struct f2fs_sb_info, s_list); /* stop f2fs_put_super */ if (!mutex_trylock(&sbi->umount_mutex)) { p = p->next; continue; } spin_unlock(&f2fs_list_lock); /* count read extent cache entries */ count += __count_extent_cache(sbi, EX_READ); /* count block age extent cache entries */ count += __count_extent_cache(sbi, EX_BLOCK_AGE); /* count clean nat cache entries */ count += __count_nat_entries(sbi); /* count free nids cache entries */ count += __count_free_nids(sbi); spin_lock(&f2fs_list_lock); p = p->next; mutex_unlock(&sbi->umount_mutex); } spin_unlock(&f2fs_list_lock); return count; } unsigned long f2fs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { unsigned long nr = sc->nr_to_scan; struct f2fs_sb_info *sbi; struct list_head *p; unsigned int run_no; unsigned long freed = 0; spin_lock(&f2fs_list_lock); do { run_no = ++shrinker_run_no; } while (run_no == 0); p = f2fs_list.next; while (p != &f2fs_list) { sbi = list_entry(p, struct f2fs_sb_info, s_list); if (sbi->shrinker_run_no == run_no) break; /* stop f2fs_put_super */ if (!mutex_trylock(&sbi->umount_mutex)) { p = p->next; continue; } spin_unlock(&f2fs_list_lock); sbi->shrinker_run_no = run_no; /* shrink extent cache entries */ freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2); /* shrink read extent cache entries */ freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2); /* shrink clean nat cache entries */ if (freed < nr) freed += f2fs_try_to_free_nats(sbi, nr - freed); /* shrink free nids cache entries */ if (freed < nr) freed += f2fs_try_to_free_nids(sbi, nr - freed); spin_lock(&f2fs_list_lock); p = p->next; list_move_tail(&sbi->s_list, &f2fs_list); mutex_unlock(&sbi->umount_mutex); if (freed >= nr) break; } spin_unlock(&f2fs_list_lock); return freed; } void f2fs_join_shrinker(struct f2fs_sb_info *sbi) { spin_lock(&f2fs_list_lock); list_add_tail(&sbi->s_list, &f2fs_list); spin_unlock(&f2fs_list_lock); } void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) { f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); f2fs_shrink_age_extent_tree(sbi, __count_extent_cache(sbi, EX_BLOCK_AGE)); spin_lock(&f2fs_list_lock); list_del_init(&sbi->s_list); spin_unlock(&f2fs_list_lock); }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 // SPDX-License-Identifier: GPL-2.0-or-later /* * ip6_flowlabel.c IPv6 flowlabel manager. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/pid_namespace.h> #include <linux/jump_label_ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/rawv6.h> #include <net/transp_v6.h> #include <linux/uaccess.h> #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified in old IPv6 RFC. Well, it was reasonable value. */ #define FL_MAX_LINGER 150 /* Maximal linger timeout */ /* FL hash table */ #define FL_MAX_PER_SOCK 32 #define FL_MAX_SIZE 4096 #define FL_HASH_MASK 255 #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) static atomic_t fl_size = ATOMIC_INIT(0); static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(struct timer_list *unused); static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); /* FL hash table lock: it protects only of GC */ static DEFINE_SPINLOCK(ip6_fl_lock); /* Big socket sock */ static DEFINE_SPINLOCK(ip6_sk_fl_lock); DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); EXPORT_SYMBOL(ipv6_flowlabel_exclusive); #define for_each_fl_rcu(hash, fl) \ for (fl = rcu_dereference(fl_ht[(hash)]); \ fl != NULL; \ fl = rcu_dereference(fl->next)) #define for_each_fl_continue_rcu(fl) \ for (fl = rcu_dereference(fl->next); \ fl != NULL; \ fl = rcu_dereference(fl->next)) #define for_each_sk_fl_rcu(np, sfl) \ for (sfl = rcu_dereference(np->ipv6_fl_list); \ sfl != NULL; \ sfl = rcu_dereference(sfl->next)) static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; for_each_fl_rcu(FL_HASH(label), fl) { if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } return NULL; } static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; rcu_read_lock(); fl = __fl_lookup(net, label); if (fl && !atomic_inc_not_zero(&fl->users)) fl = NULL; rcu_read_unlock(); return fl; } static bool fl_shared_exclusive(struct ip6_flowlabel *fl) { return fl->share == IPV6_FL_S_EXCL || fl->share == IPV6_FL_S_PROCESS || fl->share == IPV6_FL_S_USER; } static void fl_free_rcu(struct rcu_head *head) { struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); if (fl->share == IPV6_FL_S_PROCESS) put_pid(fl->owner.pid); kfree(fl->opt); kfree(fl); } static void fl_free(struct ip6_flowlabel *fl) { if (!fl) return; if (fl_shared_exclusive(fl) || fl->opt) static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive); call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) { spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (atomic_dec_and_test(&fl->users)) { unsigned long ttd = fl->lastuse + fl->linger; if (time_after(ttd, fl->expires)) fl->expires = ttd; ttd = fl->expires; if (fl->opt && fl->share == IPV6_FL_S_EXCL) { struct ipv6_txoptions *opt = fl->opt; fl->opt = NULL; kfree(opt); } if (!timer_pending(&ip6_fl_gc_timer) || time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); } spin_unlock_bh(&ip6_fl_lock); } static void ip6_fl_gc(struct timer_list *unused) { int i; unsigned long now = jiffies; unsigned long sched = 0; spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl; struct ip6_flowlabel __rcu **flp; flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { if (atomic_read(&fl->users) == 0) { unsigned long ttd = fl->lastuse + fl->linger; if (time_after(ttd, fl->expires)) fl->expires = ttd; ttd = fl->expires; if (time_after_eq(now, ttd)) { *flp = fl->next; fl_free(fl); atomic_dec(&fl_size); continue; } if (!sched || time_before(ttd, sched)) sched = ttd; } flp = &fl->next; } } if (!sched && atomic_read(&fl_size)) sched = now + FL_MAX_LINGER; if (sched) { mod_timer(&ip6_fl_gc_timer, sched); } spin_unlock(&ip6_fl_lock); } static void __net_exit ip6_fl_purge(struct net *net) { int i; spin_lock_bh(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl; struct ip6_flowlabel __rcu **flp; flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { if (net_eq(fl->fl_net, net) && atomic_read(&fl->users) == 0) { *flp = fl->next; fl_free(fl); atomic_dec(&fl_size); continue; } flp = &fl->next; } } spin_unlock_bh(&ip6_fl_lock); } static struct ip6_flowlabel *fl_intern(struct net *net, struct ip6_flowlabel *fl, __be32 label) { struct ip6_flowlabel *lfl; fl->label = label & IPV6_FLOWLABEL_MASK; rcu_read_lock(); spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); if (!lfl) break; } } } else { /* * we dropper the ip6_fl_lock, so this entry could reappear * and we need to recheck with it. * * OTOH no need to search the active socket first, like it is * done in ipv6_flowlabel_opt - sock is locked, so new entry * with the same label can only appear on another sock */ lfl = __fl_lookup(net, fl->label); if (lfl) { atomic_inc(&lfl->users); spin_unlock_bh(&ip6_fl_lock); rcu_read_unlock(); return lfl; } } fl->lastuse = jiffies; fl->next = fl_ht[FL_HASH(fl->label)]; rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); atomic_inc(&fl_size); spin_unlock_bh(&ip6_fl_lock); rcu_read_unlock(); return NULL; } /* Socket flowlabel lists */ struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) { struct ipv6_fl_socklist *sfl; struct ipv6_pinfo *np = inet6_sk(sk); label &= IPV6_FLOWLABEL_MASK; rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; if (fl->label == label && atomic_inc_not_zero(&fl->users)) { fl->lastuse = jiffies; rcu_read_unlock(); return fl; } } rcu_read_unlock(); return NULL; } EXPORT_SYMBOL_GPL(__fl6_sock_lookup); void fl6_free_socklist(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; if (!rcu_access_pointer(np->ipv6_fl_list)) return; spin_lock_bh(&ip6_sk_fl_lock); while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { np->ipv6_fl_list = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); spin_lock_bh(&ip6_sk_fl_lock); } spin_unlock_bh(&ip6_sk_fl_lock); } /* Service routines */ /* It is the only difficult place. flowlabel enforces equal headers before and including routing header, however user may supply options following rthdr. */ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, struct ipv6_txoptions *fopt) { struct ipv6_txoptions *fl_opt = fl->opt; if (!fopt || fopt->opt_flen == 0) return fl_opt; if (fl_opt) { opt_space->hopopt = fl_opt->hopopt; opt_space->dst0opt = fl_opt->dst0opt; opt_space->srcrt = fl_opt->srcrt; opt_space->opt_nflen = fl_opt->opt_nflen; } else { if (fopt->opt_nflen == 0) return fopt; opt_space->hopopt = NULL; opt_space->dst0opt = NULL; opt_space->srcrt = NULL; opt_space->opt_nflen = 0; } opt_space->dst1opt = fopt->dst1opt; opt_space->opt_flen = fopt->opt_flen; opt_space->tot_len = fopt->tot_len; return opt_space; } EXPORT_SYMBOL_GPL(fl6_merge_options); static unsigned long check_linger(unsigned long ttl) { if (ttl < FL_MIN_LINGER) return FL_MIN_LINGER*HZ; if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) return 0; return ttl*HZ; } static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) { linger = check_linger(linger); if (!linger) return -EPERM; expires = check_linger(expires); if (!expires) return -EPERM; spin_lock_bh(&ip6_fl_lock); fl->lastuse = jiffies; if (time_before(fl->linger, linger)) fl->linger = linger; if (time_before(expires, fl->linger)) expires = fl->linger; if (time_before(fl->expires, fl->lastuse + expires)) fl->expires = fl->lastuse + expires; spin_unlock_bh(&ip6_fl_lock); return 0; } static struct ip6_flowlabel * fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, sockptr_t optval, int optlen, int *err_p) { struct ip6_flowlabel *fl = NULL; int olen; int addr_type; int err; olen = optlen - CMSG_ALIGN(sizeof(*freq)); err = -EINVAL; if (olen > 64 * 1024) goto done; err = -ENOMEM; fl = kzalloc(sizeof(*fl), GFP_KERNEL); if (!fl) goto done; if (olen > 0) { struct msghdr msg; struct flowi6 flowi6; struct ipcm6_cookie ipc6; err = -ENOMEM; fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); if (!fl->opt) goto done; memset(fl->opt, 0, sizeof(*fl->opt)); fl->opt->tot_len = sizeof(*fl->opt) + olen; err = -EFAULT; if (copy_from_sockptr_offset(fl->opt + 1, optval, CMSG_ALIGN(sizeof(*freq)), olen)) goto done; msg.msg_controllen = olen; msg.msg_control = (void *)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); ipc6.opt = fl->opt; err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6); if (err) goto done; err = -EINVAL; if (fl->opt->opt_flen) goto done; if (fl->opt->opt_nflen == 0) { kfree(fl->opt); fl->opt = NULL; } } fl->fl_net = net; fl->expires = jiffies; err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); if (err) goto done; fl->share = freq->flr_share; addr_type = ipv6_addr_type(&freq->flr_dst); if ((addr_type & IPV6_ADDR_MAPPED) || addr_type == IPV6_ADDR_ANY) { err = -EINVAL; goto done; } fl->dst = freq->flr_dst; atomic_set(&fl->users, 1); switch (fl->share) { case IPV6_FL_S_EXCL: case IPV6_FL_S_ANY: break; case IPV6_FL_S_PROCESS: fl->owner.pid = get_task_pid(current, PIDTYPE_PID); break; case IPV6_FL_S_USER: fl->owner.uid = current_euid(); break; default: err = -EINVAL; goto done; } if (fl_shared_exclusive(fl) || fl->opt) { WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); static_branch_deferred_inc(&ipv6_flowlabel_exclusive); } return fl; done: if (fl) { kfree(fl->opt); kfree(fl); } *err_p = err; return NULL; } static int mem_check(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; int room = FL_MAX_SIZE - atomic_read(&fl_size); int count = 0; if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) count++; rcu_read_unlock(); if (room <= 0 || ((count >= FL_MAX_PER_SOCK || (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && !capable(CAP_NET_ADMIN))) return -ENOBUFS; return 0; } static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, struct ip6_flowlabel *fl) { spin_lock_bh(&ip6_sk_fl_lock); sfl->fl = fl; sfl->next = np->ipv6_fl_list; rcu_assign_pointer(np->ipv6_fl_list, sfl); spin_unlock_bh(&ip6_sk_fl_lock); } int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; if (flags & IPV6_FL_F_REMOTE) { freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; return 0; } if (inet6_test_bit(REPFLOW, sk)) { freq->flr_label = np->flow_label; return 0; } rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { spin_lock_bh(&ip6_fl_lock); freq->flr_label = sfl->fl->label; freq->flr_dst = sfl->fl->dst; freq->flr_share = sfl->fl->share; freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; freq->flr_linger = sfl->fl->linger / HZ; spin_unlock_bh(&ip6_fl_lock); rcu_read_unlock(); return 0; } } rcu_read_unlock(); return -ENOENT; } #define socklist_dereference(__sflp) \ rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock)) static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist __rcu **sflp; struct ipv6_fl_socklist *sfl; if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; if (!inet6_test_bit(REPFLOW, sk)) return -ESRCH; np->flow_label = 0; inet6_clear_bit(REPFLOW, sk); return 0; } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; (sfl = socklist_dereference(*sflp)) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq->flr_label) goto found; } spin_unlock_bh(&ip6_sk_fl_lock); return -ESRCH; found: if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; *sflp = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); return 0; } static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6_fl_socklist *sfl; int err; rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq->flr_label) { err = fl6_renew(sfl->fl, freq->flr_linger, freq->flr_expires); rcu_read_unlock(); return err; } } rcu_read_unlock(); if (freq->flr_share == IPV6_FL_S_NONE && ns_capable(net->user_ns, CAP_NET_ADMIN)) { struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label); if (fl) { err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); fl_release(fl); return err; } } return -ESRCH; } static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, sockptr_t optval, int optlen) { struct ipv6_fl_socklist *sfl, *sfl1 = NULL; struct ip6_flowlabel *fl, *fl1 = NULL; struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int err; if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (net->ipv6.sysctl.flowlabel_consistency) { net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); return -EPERM; } if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; inet6_set_bit(REPFLOW, sk); return 0; } if (freq->flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; if (net->ipv6.sysctl.flowlabel_state_ranges && (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) return -ERANGE; fl = fl_create(net, sk, freq, optval, optlen, &err); if (!fl) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); if (freq->flr_label) { err = -EEXIST; rcu_read_lock(); for_each_sk_fl_rcu(np, sfl) { if (sfl->fl->label == freq->flr_label) { if (freq->flr_flags & IPV6_FL_F_EXCL) { rcu_read_unlock(); goto done; } fl1 = sfl->fl; if (!atomic_inc_not_zero(&fl1->users)) fl1 = NULL; break; } } rcu_read_unlock(); if (!fl1) fl1 = fl_lookup(net, freq->flr_label); if (fl1) { recheck: err = -EEXIST; if (freq->flr_flags&IPV6_FL_F_EXCL) goto release; err = -EPERM; if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; if (!sfl1) goto release; if (fl->linger > fl1->linger) fl1->linger = fl->linger; if ((long)(fl->expires - fl1->expires) > 0) fl1->expires = fl->expires; fl_link(np, sfl1, fl1); fl_free(fl); return 0; release: fl_release(fl1); goto done; } } err = -ENOENT; if (!(freq->flr_flags & IPV6_FL_F_CREATE)) goto done; err = -ENOMEM; if (!sfl1) goto done; err = mem_check(sk); if (err != 0) goto done; fl1 = fl_intern(net, fl, freq->flr_label); if (fl1) goto recheck; if (!freq->flr_label) { size_t offset = offsetof(struct in6_flowlabel_req, flr_label); if (copy_to_sockptr_offset(optval, offset, &fl->label, sizeof(fl->label))) { /* Intentionally ignore fault. */ } } fl_link(np, sfl1, fl); return 0; done: fl_free(fl); kfree(sfl1); return err; } int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen) { struct in6_flowlabel_req freq; if (optlen < sizeof(freq)) return -EINVAL; if (copy_from_sockptr(&freq, optval, sizeof(freq))) return -EFAULT; switch (freq.flr_action) { case IPV6_FL_A_PUT: return ipv6_flowlabel_put(sk, &freq); case IPV6_FL_A_RENEW: return ipv6_flowlabel_renew(sk, &freq); case IPV6_FL_A_GET: return ipv6_flowlabel_get(sk, &freq, optval, optlen); default: return -EINVAL; } } #ifdef CONFIG_PROC_FS struct ip6fl_iter_state { struct seq_net_private p; struct pid_namespace *pid_ns; int bucket; }; #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) { struct ip6_flowlabel *fl = NULL; struct ip6fl_iter_state *state = ip6fl_seq_private(seq); struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { for_each_fl_rcu(state->bucket, fl) { if (net_eq(fl->fl_net, net)) goto out; } } fl = NULL; out: return fl; } static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); struct net *net = seq_file_net(seq); for_each_fl_continue_rcu(fl) { if (net_eq(fl->fl_net, net)) goto out; } try_again: if (++state->bucket <= FL_HASH_MASK) { for_each_fl_rcu(state->bucket, fl) { if (net_eq(fl->fl_net, net)) goto out; } goto try_again; } fl = NULL; out: return fl; } static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) { struct ip6_flowlabel *fl = ip6fl_get_first(seq); if (fl) while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) --pos; return pos ? NULL : fl; } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb); rcu_read_lock(); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip6_flowlabel *fl; if (v == SEQ_START_TOKEN) fl = ip6fl_get_first(seq); else fl = ip6fl_get_next(seq, v); ++*pos; return fl; } static void ip6fl_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ip6fl_seq_show(struct seq_file *seq, void *v) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); if (v == SEQ_START_TOKEN) { seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); } else { struct ip6_flowlabel *fl = v; seq_printf(seq, "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned int)ntohl(fl->label), fl->share, ((fl->share == IPV6_FL_S_PROCESS) ? pid_nr_ns(fl->owner.pid, state->pid_ns) : ((fl->share == IPV6_FL_S_USER) ? from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : 0)), atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, &fl->dst, fl->opt ? fl->opt->opt_nflen : 0); } return 0; } static const struct seq_operations ip6fl_seq_ops = { .start = ip6fl_seq_start, .next = ip6fl_seq_next, .stop = ip6fl_seq_stop, .show = ip6fl_seq_show, }; static int __net_init ip6_flowlabel_proc_init(struct net *net) { if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) return -ENOMEM; return 0; } static void __net_exit ip6_flowlabel_proc_fini(struct net *net) { remove_proc_entry("ip6_flowlabel", net->proc_net); } #else static inline int ip6_flowlabel_proc_init(struct net *net) { return 0; } static inline void ip6_flowlabel_proc_fini(struct net *net) { } #endif static void __net_exit ip6_flowlabel_net_exit(struct net *net) { ip6_fl_purge(net); ip6_flowlabel_proc_fini(net); } static struct pernet_operations ip6_flowlabel_net_ops = { .init = ip6_flowlabel_proc_init, .exit = ip6_flowlabel_net_exit, }; int ip6_flowlabel_init(void) { return register_pernet_subsys(&ip6_flowlabel_net_ops); } void ip6_flowlabel_cleanup(void) { static_key_deferred_flush(&ipv6_flowlabel_exclusive); del_timer(&ip6_fl_gc_timer); unregister_pernet_subsys(&ip6_flowlabel_net_ops); }
28 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 // SPDX-License-Identifier: GPL-2.0 /* Lock down the kernel * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ #include <linux/security.h> #include <linux/export.h> #include <linux/lsm_hooks.h> #include <uapi/linux/lsm.h> static enum lockdown_reason kernel_locked_down; static const enum lockdown_reason lockdown_levels[] = {LOCKDOWN_NONE, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_CONFIDENTIALITY_MAX}; /* * Put the kernel into lock-down mode. */ static int lock_kernel_down(const char *where, enum lockdown_reason level) { if (kernel_locked_down >= level) return -EPERM; kernel_locked_down = level; pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n", where); return 0; } static int __init lockdown_param(char *level) { if (!level) return -EINVAL; if (strcmp(level, "integrity") == 0) lock_kernel_down("command line", LOCKDOWN_INTEGRITY_MAX); else if (strcmp(level, "confidentiality") == 0) lock_kernel_down("command line", LOCKDOWN_CONFIDENTIALITY_MAX); else return -EINVAL; return 0; } early_param("lockdown", lockdown_param); /** * lockdown_is_locked_down - Find out if the kernel is locked down * @what: Tag to use in notice generated if lockdown is in effect */ static int lockdown_is_locked_down(enum lockdown_reason what) { if (WARN(what >= LOCKDOWN_CONFIDENTIALITY_MAX, "Invalid lockdown reason")) return -EPERM; if (kernel_locked_down >= what) { if (lockdown_reasons[what]) pr_notice_ratelimited("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n", current->comm, lockdown_reasons[what]); return -EPERM; } return 0; } static struct security_hook_list lockdown_hooks[] __ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), }; const struct lsm_id lockdown_lsmid = { .name = "lockdown", .id = LSM_ID_LOCKDOWN, }; static int __init lockdown_lsm_init(void) { #if defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY) lock_kernel_down("Kernel configuration", LOCKDOWN_INTEGRITY_MAX); #elif defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY) lock_kernel_down("Kernel configuration", LOCKDOWN_CONFIDENTIALITY_MAX); #endif security_add_hooks(lockdown_hooks, ARRAY_SIZE(lockdown_hooks), &lockdown_lsmid); return 0; } static ssize_t lockdown_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { char temp[80]; int i, offset = 0; for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) { enum lockdown_reason level = lockdown_levels[i]; if (lockdown_reasons[level]) { const char *label = lockdown_reasons[level]; if (kernel_locked_down == level) offset += sprintf(temp+offset, "[%s] ", label); else offset += sprintf(temp+offset, "%s ", label); } } /* Convert the last space to a newline if needed. */ if (offset > 0) temp[offset-1] = '\n'; return simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); } static ssize_t lockdown_write(struct file *file, const char __user *buf, size_t n, loff_t *ppos) { char *state; int i, len, err = -EINVAL; state = memdup_user_nul(buf, n); if (IS_ERR(state)) return PTR_ERR(state); len = strlen(state); if (len && state[len-1] == '\n') { state[len-1] = '\0'; len--; } for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) { enum lockdown_reason level = lockdown_levels[i]; const char *label = lockdown_reasons[level]; if (label && !strcmp(state, label)) err = lock_kernel_down("securityfs", level); } kfree(state); return err ? err : n; } static const struct file_operations lockdown_ops = { .read = lockdown_read, .write = lockdown_write, }; static int __init lockdown_secfs_init(void) { struct dentry *dentry; dentry = securityfs_create_file("lockdown", 0644, NULL, NULL, &lockdown_ops); return PTR_ERR_OR_ZERO(dentry); } core_initcall(lockdown_secfs_init); #ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY DEFINE_EARLY_LSM(lockdown) = { #else DEFINE_LSM(lockdown) = { #endif .name = "lockdown", .init = lockdown_lsm_init, };
1526 1 2196 15 256 851 2149 2148 1 1 300 1 516 234 1 2158 2160 268 257 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H #include <linux/mem_encrypt.h> #include <asm/page.h> #include <asm/pgtable_types.h> /* * Macro to mark a page protection value as UC- */ #define pgprot_noncached(prot) \ ((boot_cpu_data.x86 > 3) \ ? (__pgprot(pgprot_val(prot) | \ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) #ifndef __ASSEMBLY__ #include <linux/spinlock.h> #include <asm/x86_init.h> #include <asm/pkru.h> #include <asm/fpu/api.h> #include <asm/coco.h> #include <asm-generic/pgtable_uffd.h> #include <linux/page_table_check.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); struct seq_file; void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); bool ptdump_walk_pgd_level_checkwx(void); #define ptdump_check_wx ptdump_walk_pgd_level_checkwx void ptdump_walk_user_pgd_level_checkwx(void); /* * Macros to add or remove encryption attribute */ #define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) #define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) #ifdef CONFIG_DEBUG_WX #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else #define debug_checkwx_user() do { } while (0) #endif /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __visible; #define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; extern struct list_head pgd_list; extern struct mm_struct *pgd_page_get_mm(struct page *page); extern pmdval_t early_pmd_flags; #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT_XXL */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) #define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) #define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d # define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d) #endif #ifndef __PAGETABLE_PUD_FOLDED #define p4d_clear(p4d) native_p4d_clear(p4d) #endif #ifndef set_pud # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif #ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif #define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) #define pmd_clear(pmd) native_pmd_clear(pmd) #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) #ifndef __PAGETABLE_P4D_FOLDED #define p4d_val(x) native_p4d_val(x) #define __p4d(x) native_make_p4d(x) #endif #ifndef __PAGETABLE_PUD_FOLDED #define pud_val(x) native_pud_val(x) #define __pud(x) native_make_pud(x) #endif #ifndef __PAGETABLE_PMD_FOLDED #define pmd_val(x) native_pmd_val(x) #define __pmd(x) native_make_pmd(x) #endif #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) #define arch_end_context_switch(prev) do {} while(0) #endif /* CONFIG_PARAVIRT_XXL */ /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ static inline bool pte_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_DIRTY_BITS; } static inline bool pte_shstk(pte_t pte) { return cpu_feature_enabled(X86_FEATURE_SHSTK) && (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; } static inline bool pte_decrypted(pte_t pte) { return cc_mkdec(pte_val(pte)) == pte_val(pte); } #define pmd_dirty pmd_dirty static inline bool pmd_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_DIRTY_BITS; } static inline bool pmd_shstk(pmd_t pmd) { return cpu_feature_enabled(X86_FEATURE_SHSTK) && (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == (_PAGE_DIRTY | _PAGE_PSE); } #define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; } static inline bool pud_dirty(pud_t pud) { return pud_flags(pud) & _PAGE_DIRTY_BITS; } static inline int pud_young(pud_t pud) { return pud_flags(pud) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { /* * Shadow stack pages are logically writable, but do not have * _PAGE_RW. Check for them separately from _PAGE_RW itself. */ return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte); } #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { /* * Shadow stack pages are logically writable, but do not have * _PAGE_RW. Check for them separately from _PAGE_RW itself. */ return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd); } #define pud_write pud_write static inline int pud_write(pud_t pud) { return pud_flags(pud) & _PAGE_RW; } static inline int pte_huge(pte_t pte) { return pte_flags(pte) & _PAGE_PSE; } static inline int pte_global(pte_t pte) { return pte_flags(pte) & _PAGE_GLOBAL; } static inline int pte_exec(pte_t pte) { return !(pte_flags(pte) & _PAGE_NX); } static inline int pte_special(pte_t pte) { return pte_flags(pte) & _PAGE_SPECIAL; } /* Entries that were set to PROT_NONE are inverted */ static inline u64 protnone_mask(u64 val); #define PFN_PTE_SHIFT PAGE_SHIFT static inline unsigned long pte_pfn(pte_t pte) { phys_addr_t pfn = pte_val(pte); pfn ^= protnone_mask(pfn); return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline unsigned long pmd_pfn(pmd_t pmd) { phys_addr_t pfn = pmd_val(pmd); pfn ^= protnone_mask(pfn); return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } #define pud_pfn pud_pfn static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); pfn ^= protnone_mask(pfn); return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT; } static inline unsigned long p4d_pfn(p4d_t p4d) { return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT; } static inline unsigned long pgd_pfn(pgd_t pgd) { return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } #define p4d_leaf p4d_leaf static inline bool p4d_leaf(p4d_t p4d) { /* No 512 GiB pages yet */ return 0; } #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define pmd_leaf pmd_leaf static inline bool pmd_leaf(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_trans_huge(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } #endif #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { return boot_cpu_has(X86_FEATURE_PSE); } #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pmd_devmap(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_DEVMAP); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static inline int pud_devmap(pud_t pud) { return !!(pud_val(pud) & _PAGE_DEVMAP); } #else static inline int pud_devmap(pud_t pud) { return 0; } #endif static inline int pgd_devmap(pgd_t pgd) { return 0; } #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline pte_t pte_set_flags(pte_t pte, pteval_t set) { pteval_t v = native_pte_val(pte); return native_make_pte(v | set); } static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) { pteval_t v = native_pte_val(pte); return native_make_pte(v & ~clear); } /* * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So * when creating dirty, write-protected memory, a software bit is used: * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the * Dirty bit to SavedDirty, and vice-vesra. * * This shifting is only done if needed. In the case of shifting * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of * shifting SavedDirty->Dirty, the condition is Write=1. */ static inline pgprotval_t mksaveddirty_shift(pgprotval_t v) { pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1; v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY; v &= ~(cond << _PAGE_BIT_DIRTY); return v; } static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v) { pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1; v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY; v &= ~(cond << _PAGE_BIT_SAVED_DIRTY); return v; } static inline pte_t pte_mksaveddirty(pte_t pte) { pteval_t v = native_pte_val(pte); v = mksaveddirty_shift(v); return native_make_pte(v); } static inline pte_t pte_clear_saveddirty(pte_t pte) { pteval_t v = native_pte_val(pte); v = clear_saveddirty_shift(v); return native_make_pte(v); } static inline pte_t pte_wrprotect(pte_t pte) { pte = pte_clear_flags(pte, _PAGE_RW); /* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PTE (Write=0,Dirty=1). Move the hardware * dirty value to the software bit, if present. */ return pte_mksaveddirty(pte); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { return pte_flags(pte) & _PAGE_UFFD_WP; } static inline pte_t pte_mkuffd_wp(pte_t pte) { return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP)); } static inline pte_t pte_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY_BITS); } static inline pte_t pte_mkold(pte_t pte) { return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkexec(pte_t pte) { return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); return pte_mksaveddirty(pte); } static inline pte_t pte_mkwrite_shstk(pte_t pte) { pte = pte_clear_flags(pte, _PAGE_RW); return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite_novma(pte_t pte) { return pte_set_flags(pte, _PAGE_RW); } struct vm_area_struct; pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma); #define pte_mkwrite pte_mkwrite static inline pte_t pte_mkhuge(pte_t pte) { return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { return pte_set_flags(pte, _PAGE_SPECIAL); } static inline pte_t pte_mkdevmap(pte_t pte) { return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP); } static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); return native_make_pmd(v & ~clear); } /* See comments above mksaveddirty_shift() */ static inline pmd_t pmd_mksaveddirty(pmd_t pmd) { pmdval_t v = native_pmd_val(pmd); v = mksaveddirty_shift(v); return native_make_pmd(v); } /* See comments above mksaveddirty_shift() */ static inline pmd_t pmd_clear_saveddirty(pmd_t pmd) { pmdval_t v = native_pmd_val(pmd); v = clear_saveddirty_shift(v); return native_make_pmd(v); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { pmd = pmd_clear_flags(pmd, _PAGE_RW); /* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PMD (RW=0, Dirty=1). Move the hardware * dirty value to the software bit. */ return pmd_mksaveddirty(pmd); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pmd_uffd_wp(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_UFFD_WP; } static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) { return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP)); } static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); } static inline pmd_t pmd_mkclean(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); return pmd_mksaveddirty(pmd); } static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd) { pmd = pmd_clear_flags(pmd, _PAGE_RW); return pmd_set_flags(pmd, _PAGE_DIRTY); } static inline pmd_t pmd_mkdevmap(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DEVMAP); } static inline pmd_t pmd_mkhuge(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_PSE); } static inline pmd_t pmd_mkyoung(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_ACCESSED); } static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_RW); } pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); #define pmd_mkwrite pmd_mkwrite static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); return native_make_pud(v & ~clear); } /* See comments above mksaveddirty_shift() */ static inline pud_t pud_mksaveddirty(pud_t pud) { pudval_t v = native_pud_val(pud); v = mksaveddirty_shift(v); return native_make_pud(v); } /* See comments above mksaveddirty_shift() */ static inline pud_t pud_clear_saveddirty(pud_t pud) { pudval_t v = native_pud_val(pud); v = clear_saveddirty_shift(v); return native_make_pud(v); } static inline pud_t pud_mkold(pud_t pud) { return pud_clear_flags(pud, _PAGE_ACCESSED); } static inline pud_t pud_mkclean(pud_t pud) { return pud_clear_flags(pud, _PAGE_DIRTY_BITS); } static inline pud_t pud_wrprotect(pud_t pud) { pud = pud_clear_flags(pud, _PAGE_RW); /* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PUD (RW=0, Dirty=1). Move the hardware * dirty value to the software bit. */ return pud_mksaveddirty(pud); } static inline pud_t pud_mkdirty(pud_t pud) { pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); return pud_mksaveddirty(pud); } static inline pud_t pud_mkdevmap(pud_t pud) { return pud_set_flags(pud, _PAGE_DEVMAP); } static inline pud_t pud_mkhuge(pud_t pud) { return pud_set_flags(pud, _PAGE_PSE); } static inline pud_t pud_mkyoung(pud_t pud) { return pud_set_flags(pud, _PAGE_ACCESSED); } static inline pud_t pud_mkwrite(pud_t pud) { pud = pud_set_flags(pud, _PAGE_RW); return pud_clear_saveddirty(pud); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_SOFT_DIRTY; } static inline int pmd_soft_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; } static inline int pud_soft_dirty(pud_t pud) { return pud_flags(pud) & _PAGE_SOFT_DIRTY; } static inline pte_t pte_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } static inline pud_t pud_mksoft_dirty(pud_t pud) { return pud_set_flags(pud, _PAGE_SOFT_DIRTY); } static inline pte_t pte_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); } static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); } static inline pud_t pud_clear_soft_dirty(pud_t pud) { return pud_clear_flags(pud, _PAGE_SOFT_DIRTY); } #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. */ static inline pgprotval_t massage_pgprot(pgprot_t pgprot) { pgprotval_t protval = pgprot_val(pgprot); if (protval & _PAGE_PRESENT) protval &= __supported_pte_mask; return protval; } static inline pgprotval_t check_pgprot(pgprot_t pgprot) { pgprotval_t massaged_val = massage_pgprot(pgprot); /* mmdebug.h can not be included here because of dependencies */ #ifdef CONFIG_DEBUG_VM WARN_ONCE(pgprot_val(pgprot) != massaged_val, "attempted to set unsupported pgprot: %016llx " "bits: %016llx supported: %016llx\n", (u64)pgprot_val(pgprot), (u64)pgprot_val(pgprot) ^ massaged_val, (u64)__supported_pte_mask); #endif return massaged_val; } static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PTE_PFN_MASK; return __pte(pfn | check_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PMD_PAGE_MASK; return __pmd(pfn | check_pgprot(pgprot)); } static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) { phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; pfn ^= protnone_mask(pgprot_val(pgprot)); pfn &= PHYSICAL_PUD_PAGE_MASK; return __pud(pfn | check_pgprot(pgprot)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pteval_t val = pte_val(pte), oldval = val; pte_t pte_result; /* * Chop off the NX bit (if present), and add the NX portion of * the newprot (if present): */ val &= _PAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); pte_result = __pte(val); /* * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid: * 1. Marking Write=0 PTEs Dirty=1 * 2. Marking Dirty=1 PTEs Write=0 * * The first case cannot happen because the _PAGE_CHG_MASK will filter * out any Dirty bit passed in newprot. Handle the second case by * going through the mksaveddirty exercise. Only do this if the old * value was Write=1 to avoid doing this on Shadow Stack PTEs. */ if (oldval & _PAGE_RW) pte_result = pte_mksaveddirty(pte_result); else pte_result = pte_clear_saveddirty(pte_result); return pte_result; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { pmdval_t val = pmd_val(pmd), oldval = val; pmd_t pmd_result; val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY); val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); pmd_result = __pmd(val); /* * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid: * 1. Marking Write=0 PMDs Dirty=1 * 2. Marking Dirty=1 PMDs Write=0 * * The first case cannot happen because the _PAGE_CHG_MASK will filter * out any Dirty bit passed in newprot. Handle the second case by * going through the mksaveddirty exercise. Only do this if the old * value was Write=1 to avoid doing this on Shadow Stack PTEs. */ if (oldval & _PAGE_RW) pmd_result = pmd_mksaveddirty(pmd_result); else pmd_result = pmd_clear_saveddirty(pmd_result); return pmd_result; } /* * mprotect needs to preserve PAT and encryption bits when updating * vm_page_prot */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } #define pte_pgprot(x) __pgprot(pte_flags(x)) #define pmd_pgprot(x) __pgprot(pmd_flags(x)) #define pud_pgprot(x) __pgprot(pud_flags(x)) #define p4d_pgprot(x) __pgprot(p4d_flags(x)) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) { /* * PAT type is always WB for untracked ranges, so no need to check. */ if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) return 1; /* * Certain new memtypes are not allowed with certain * requested memtype: * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back * - request is write-through, return cannot be write-back * - request is write-through, return cannot be write-combine */ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WC && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WT && new_pcm == _PAGE_CACHE_MODE_WB) || (pcm == _PAGE_CACHE_MODE_WT && new_pcm == _PAGE_CACHE_MODE_WC)) { return 0; } return 1; } pmd_t *populate_extra_pmd(unsigned long vaddr); pte_t *populate_extra_pte(unsigned long vaddr); #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd); /* * Take a PGD location (pgdp) and a pgd value that needs to be set there. * Populates the user and returns the resulting PGD that must be set in * the kernel copy of the page tables. */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { if (!static_cpu_has(X86_FEATURE_PTI)) return pgd; return __pti_set_user_pgtbl(pgdp, pgd); } #else /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { return pgd; } #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 # include <asm/pgtable_32.h> #else # include <asm/pgtable_64.h> #endif #ifndef __ASSEMBLY__ #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/log2.h> #include <asm/fixmap.h> static inline int pte_none(pte_t pte) { return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK)); } #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) { return a.pte == b.pte; } static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { if (__pte_needs_invert(pte_val(pte))) return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT)); return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); } #define pte_advance_pfn pte_advance_pfn static inline int pte_present(pte_t a) { return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t a) { return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP; } #endif #define pte_accessible pte_accessible static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { if (pte_flags(a) & _PAGE_PRESENT) return true; if ((pte_flags(a) & _PAGE_PROTNONE) && atomic_read(&mm->tlb_flush_pending)) return true; return false; } static inline int pmd_present(pmd_t pmd) { /* * Checking for _PAGE_PSE is needed too because * split_huge_page will temporarily clear the present bit (but * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear). */ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT)) == _PAGE_PROTNONE; } static inline int pmd_protnone(pmd_t pmd) { return (pmd_flags(pmd) & (_PAGE_PROTNONE | _PAGE_PRESENT)) == _PAGE_PROTNONE; } #endif /* CONFIG_NUMA_BALANCING */ static inline int pmd_none(pmd_t pmd) { /* Only check low word on 32-bit platforms, since it might be out of sync with upper half. */ unsigned long val = native_pmd_val(pmd); return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) { return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * * (Currently stuck as a macro because of indirect forward reference * to linux/mm.h:page_to_nid()) */ #define mk_pte(page, pgprot) \ ({ \ pgprot_t __pgprot = pgprot; \ \ WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \ _PAGE_DIRTY); \ pfn_pte(page_to_pfn(page), __pgprot); \ }) static inline int pmd_bad(pmd_t pmd) { return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) { return npg >> (20 - PAGE_SHIFT); } #if CONFIG_PGTABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int pud_present(pud_t pud) { return pud_flags(pud) & _PAGE_PRESENT; } static inline pmd_t *pud_pgtable(pud_t pud) { return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) #define pud_leaf pud_leaf static inline bool pud_leaf(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } static inline int pud_bad(pud_t pud) { return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0; } static inline int p4d_present(p4d_t p4d) { return p4d_flags(p4d) & _PAGE_PRESENT; } static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ static inline unsigned long p4d_index(unsigned long address) { return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1); } #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { if (!pgtable_l5_enabled()) return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } static inline unsigned long pgd_page_vaddr(pgd_t pgd) { return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { if (!pgtable_l5_enabled()) return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; if (!pgtable_l5_enabled()) return 0; if (IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) { if (!pgtable_l5_enabled()) return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables * except on 32-bit non-PAE which is not supported on * KNL. */ return !native_pgd_val(pgd); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* __ASSEMBLY__ */ #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) #ifndef __ASSEMBLY__ extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); #ifdef CONFIG_X86_64 extern pgd_t trampoline_pgd_entry; #endif /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { pte_t res = *ptep; /* Pure native function needs no input for mm, addr */ native_pte_clear(NULL, 0, ptep); return res; } static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) { pmd_t res = *pmdp; native_pmd_clear(pmdp); return res; } static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) { pud_t res = *pudp; native_pud_clear(pudp); return res; } static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(mm, pmdp, pmd); set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { page_table_check_pud_set(mm, pudp, pud); native_set_pud(pudp, pud); } /* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware * will do the accessed bit for us, and we don't want to * race with other CPU's that might be updating the dirty * bit at the same time. */ struct vm_area_struct; #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t entry, int dirty); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH extern int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); page_table_check_pte_clear(mm, pte); return pte; } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { pte_t pte; if (full) { /* * Full address destruction in progress; paravirt does not * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); page_table_check_pte_clear(mm, pte); } else { pte = ptep_get_and_clear(mm, addr, ptep); } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* * Avoid accidentally creating shadow stack PTEs * (Write=0,Dirty=1). Use cmpxchg() to prevent races with * the hardware setting Dirty=1. */ pte_t old_pte, new_pte; old_pte = READ_ONCE(*ptep); do { new_pte = pte_wrprotect(old_pte); } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); } #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); extern int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t entry, int dirty); #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp); extern int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp); #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { pmd_t pmd = native_pmdp_get_and_clear(pmdp); page_table_check_pmd_clear(mm, pmd); return pmd; } #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { pud_t pud = native_pudp_get_and_clear(pudp); page_table_check_pud_clear(mm, pud); return pud; } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { /* * Avoid accidentally creating shadow stack PTEs * (Write=0,Dirty=1). Use cmpxchg() to prevent races with * the hardware setting Dirty=1. */ pmd_t old_pmd, new_pmd; old_pmd = READ_ONCE(*pmdp); do { new_pmd = pmd_wrprotect(old_pmd); } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); } #ifndef pmdp_establish #define pmdp_establish pmdp_establish static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { pmd_t old = *pmdp; WRITE_ONCE(*pmdp, pmd); return old; } } #endif #define __HAVE_ARCH_PMDP_INVALIDATE_AD extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); /* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. * * Returns true for parts of the PGD that map userspace and * false for the parts that map the kernel. */ static inline bool pgdp_maps_userspace(void *__ptr) { unsigned long ptr = (unsigned long)__ptr; return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } #define pgd_leaf pgd_leaf static inline bool pgd_leaf(pgd_t pgd) { return false; } #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and * the user one is in the last 4k. To switch between them, you * just need to flip the 12th bit in their addresses. */ #define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT /* * This generates better code than the inline assembly in * __set_bit(). */ static inline void *ptr_set_bit(void *ptr, int bit) { unsigned long __ptr = (unsigned long)ptr; __ptr |= BIT(bit); return (void *)__ptr; } static inline void *ptr_clear_bit(void *ptr, int bit) { unsigned long __ptr = (unsigned long)ptr; __ptr &= ~BIT(bit); return (void *)__ptr; } static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) { return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); } static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) { return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); } static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) { return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) { return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); } #endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * * dst - pointer to pgd range anywhere on a pgd page * src - "" * count - the number of pgds to copy. * * dst and src can be on the same page, but the range must not overlap, * and must not cross a page boundary. */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */ memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), count * sizeof(pgd_t)); #endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) static inline int page_level_shift(enum pg_level level) { return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; } static inline unsigned long page_level_size(enum pg_level level) { return 1UL << page_level_shift(level); } static inline unsigned long page_level_mask(enum pg_level level) { return ~(page_level_size(level) - 1); } /* * The x86 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { } static inline void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr) { } static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { } static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } static inline pte_t pte_swp_mkexclusive(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); } static inline int pte_swp_exclusive(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; } static inline pte_t pte_swp_clear_exclusive(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY); } static inline int pte_swp_soft_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY; } static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY); } static inline int pmd_swp_soft_dirty(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY; } static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY); } #endif #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline pte_t pte_swp_mkuffd_wp(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); } static inline int pte_swp_uffd_wp(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_UFFD_WP; } static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); } static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); } static inline int pmd_swp_uffd_wp(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; } static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* ifdef to avoid doing 59-bit shift on 32-bit values */ return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0; #else return 0; #endif } static inline bool __pkru_allows_pkey(u16 pkey, bool write) { u32 pkru = read_pkru(); if (!__pkru_allows_read(pkru, pkey)) return false; if (write && !__pkru_allows_write(pkru, pkey)) return false; return true; } /* * 'pteval' can come from a PTE, PMD or PUD. We only check * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the * same value on all 3 types. */ static inline bool __pte_access_permitted(unsigned long pteval, bool write) { unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; /* * Write=0,Dirty=1 PTEs are shadow stack, which the kernel * shouldn't generally allow access to, but since they * are already Write=0, the below logic covers both cases. */ if (write) need_pte_bits |= _PAGE_RW; if ((pteval & need_pte_bits) != need_pte_bits) return 0; return __pkru_allows_pkey(pte_flags_pkey(pteval), write); } #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { return __pte_access_permitted(pte_val(pte), write); } #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) { return __pte_access_permitted(pmd_val(pmd), write); } #define pud_access_permitted pud_access_permitted static inline bool pud_access_permitted(pud_t pud, bool write) { return __pte_access_permitted(pud_val(pud), write); } #define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1 extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot); static inline bool arch_has_pfn_modify_check(void) { return boot_cpu_has_bug(X86_BUG_L1TF); } #define arch_check_zapped_pte arch_check_zapped_pte void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte); #define arch_check_zapped_pmd arch_check_zapped_pmd void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd); #ifdef CONFIG_XEN_PV #define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young static inline bool arch_has_hw_nonleaf_pmd_young(void) { return !cpu_feature_enabled(X86_FEATURE_XENPV); } #endif #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT) && (pte_val(pte) & _PAGE_USER); } static inline bool pmd_user_accessible_page(pmd_t pmd) { return pmd_leaf(pmd) && (pmd_val(pmd) & _PAGE_PRESENT) && (pmd_val(pmd) & _PAGE_USER); } static inline bool pud_user_accessible_page(pud_t pud) { return pud_leaf(pud) && (pud_val(pud) & _PAGE_PRESENT) && (pud_val(pud) & _PAGE_USER); } #endif #ifdef CONFIG_X86_SGX int arch_memory_failure(unsigned long pfn, int flags); #define arch_memory_failure arch_memory_failure bool arch_is_platform_page(u64 paddr); #define arch_is_platform_page arch_is_platform_page #endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */
361 522 9 679 62 309 3 35 156 670 127 174 19 109 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 /* SPDX-License-Identifier: GPL-2.0 */ /* * fscrypt.h: declarations for per-file encryption * * Filesystems that implement per-file encryption must include this header * file. * * Copyright (C) 2015, Google, Inc. * * Written by Michael Halcrow, 2015. * Modified by Jaegeuk Kim, 2015. */ #ifndef _LINUX_FSCRYPT_H #define _LINUX_FSCRYPT_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/slab.h> #include <uapi/linux/fscrypt.h> /* * The lengths of all file contents blocks must be divisible by this value. * This is needed to ensure that all contents encryption modes will work, as * some of the supported modes don't support arbitrarily byte-aligned messages. * * Since the needed alignment is 16 bytes, most filesystems will meet this * requirement naturally, as typical block sizes are powers of 2. However, if a * filesystem can generate arbitrarily byte-aligned block lengths (e.g., via * compression), then it will need to pad to this alignment before encryption. */ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; struct fscrypt_inode_info; struct fs_parameter; struct seq_file; struct fscrypt_str { unsigned char *name; u32 len; }; struct fscrypt_name { const struct qstr *usr_fname; struct fscrypt_str disk_name; u32 hash; u32 minor_hash; struct fscrypt_str crypto_buf; bool is_nokey_name; }; #define FSTR_INIT(n, l) { .name = n, .len = l } #define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) #define fname_name(p) ((p)->disk_name.name) #define fname_len(p) ((p)->disk_name.len) /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 40 #ifdef CONFIG_FS_ENCRYPTION /* Crypto operations for filesystems */ struct fscrypt_operations { /* * If set, then fs/crypto/ will allocate a global bounce page pool the * first time an encryption key is set up for a file. The bounce page * pool is required by the following functions: * * - fscrypt_encrypt_pagecache_blocks() * - fscrypt_zeroout_range() for files not using inline crypto * * If the filesystem doesn't use those, it doesn't need to set this. */ unsigned int needs_bounce_pages : 1; /* * If set, then fs/crypto/ will allow the use of encryption settings * that assume inode numbers fit in 32 bits (i.e. * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other * prerequisites for these settings are also met. This is only useful * if the filesystem wants to support inline encryption hardware that is * limited to 32-bit or 64-bit data unit numbers and where programming * keyslots is very slow. */ unsigned int has_32bit_inodes : 1; /* * If set, then fs/crypto/ will allow users to select a crypto data unit * size that is less than the filesystem block size. This is done via * the log2_data_unit_size field of the fscrypt policy. This flag is * not compatible with filesystems that encrypt variable-length blocks * (i.e. blocks that aren't all equal to filesystem's block size), for * example as a result of compression. It's also not compatible with * the fscrypt_encrypt_block_inplace() and * fscrypt_decrypt_block_inplace() functions. */ unsigned int supports_subblock_data_units : 1; /* * This field exists only for backwards compatibility reasons and should * only be set by the filesystems that are setting it already. It * contains the filesystem-specific key description prefix that is * accepted for "logon" keys for v1 fscrypt policies. This * functionality is deprecated in favor of the generic prefix * "fscrypt:", which itself is deprecated in favor of the filesystem * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that * are newly adding fscrypt support should not set this field. */ const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. * * @inode: the inode whose context to get * @ctx: the buffer into which to get the context * @len: length of the @ctx buffer in bytes * * Return: On success, returns the length of the context in bytes; this * may be less than @len. On failure, returns -ENODATA if the * inode doesn't have a context, -ERANGE if the context is * longer than @len, or another -errno code. */ int (*get_context)(struct inode *inode, void *ctx, size_t len); /* * Set an fscrypt context on the given inode. * * @inode: the inode whose context to set. The inode won't already have * an fscrypt context. * @ctx: the context to set * @len: length of @ctx in bytes (at most FSCRYPT_SET_CONTEXT_MAX_SIZE) * @fs_data: If called from fscrypt_set_context(), this will be the * value the filesystem passed to fscrypt_set_context(). * Otherwise (i.e. when called from * FS_IOC_SET_ENCRYPTION_POLICY) this will be NULL. * * i_rwsem will be held for write. * * Return: 0 on success, -errno on failure. */ int (*set_context)(struct inode *inode, const void *ctx, size_t len, void *fs_data); /* * Get the dummy fscrypt policy in use on the filesystem (if any). * * Filesystems only need to implement this function if they support the * test_dummy_encryption mount option. * * Return: A pointer to the dummy fscrypt policy, if the filesystem is * mounted with test_dummy_encryption; otherwise NULL. */ const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb); /* * Check whether a directory is empty. i_rwsem will be held for write. */ bool (*empty_dir)(struct inode *inode); /* * Check whether the filesystem's inode numbers and UUID are stable, * meaning that they will never be changed even by offline operations * such as filesystem shrinking and therefore can be used in the * encryption without the possibility of files becoming unreadable. * * Filesystems only need to implement this function if they want to * support the FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags. These * flags are designed to work around the limitations of UFS and eMMC * inline crypto hardware, and they shouldn't be used in scenarios where * such hardware isn't being used. * * Leaving this NULL is equivalent to always returning false. */ bool (*has_stable_inodes)(struct super_block *sb); /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem * only has a single such block device, or an ERR_PTR() on error. * * On successful non-NULL return, *num_devs is set to the number of * devices in the returned array. The caller must free the returned * array using kfree(). * * If the filesystem can use multiple block devices (other than block * devices that aren't used for encrypted file contents, such as * external journal devices), and wants to support inline encryption, * then it must implement this function. Otherwise it's not needed. */ struct block_device **(*get_devices)(struct super_block *sb, unsigned int *num_devs); }; int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). * I.e., another task may publish ->i_crypt_info concurrently, executing * a RELEASE barrier. We need to use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. */ return smp_load_acquire(&inode->i_crypt_info); } /** * fscrypt_needs_contents_encryption() - check whether an inode needs * contents encryption * @inode: the inode to check * * Return: %true iff the inode is an encrypted regular file and the kernel was * built with fscrypt support. * * If you need to know whether the encrypt bit is set even when the kernel was * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. */ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) { return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); } /* * When d_splice_alias() moves a directory's no-key alias to its * plaintext alias as a result of the encryption key being added, * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity * to disable d_revalidate. Note that we don't have to support the * inverse operation because fscrypt doesn't allow no-key names to be * the source or target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { /* * VFS calls fscrypt_handle_d_move even for non-fscrypt * filesystems. */ if (dentry->d_flags & DCACHE_NOKEY_NAME) { dentry->d_flags &= ~DCACHE_NOKEY_NAME; /* * Other filesystem features might be handling dentry * revalidation, in which case it cannot be disabled. */ if (dentry->d_op->d_revalidate == fscrypt_d_revalidate) dentry->d_flags &= ~DCACHE_OP_REVALIDATE; } } /** * fscrypt_is_nokey_name() - test whether a dentry is a no-key name * @dentry: the dentry to check * * This returns true if the dentry is a no-key dentry. A no-key dentry is a * dentry that was created in an encrypted directory that hasn't had its * encryption key added yet. Such dentries may be either positive or negative. * * When a filesystem is asked to create a new filename in an encrypted directory * and the new filename's dentry is a no-key dentry, it must fail the operation * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(), * ->rename(), and ->link(). (However, ->rename() and ->link() are already * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().) * * This is necessary because creating a filename requires the directory's * encryption key, but just checking for the key on the directory inode during * the final filesystem operation doesn't guarantee that the key was available * during the preceding dentry lookup. And the key must have already been * available during the dentry lookup in order for it to have been checked * whether the filename already exists in the directory and for the new file's * dentry not to be invalidated due to it incorrectly having the no-key flag. * * Return: %true if the dentry is a no-key name */ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) { return dentry->d_flags & DCACHE_NOKEY_NAME; } static inline void fscrypt_prepare_dentry(struct dentry *dentry, bool is_nokey_name) { /* * This code tries to only take ->d_lock when necessary to write * to ->d_flags. We shouldn't be peeking on d_flags for * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case * there is a race, the worst it can happen is that we fail to * unset DCACHE_OP_REVALIDATE and pay the cost of an extra * d_revalidate. */ if (is_nokey_name) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NOKEY_NAME; spin_unlock(&dentry->d_lock); } else if (dentry->d_flags & DCACHE_OP_REVALIDATE && dentry->d_op->d_revalidate == fscrypt_d_revalidate) { /* * Unencrypted dentries and encrypted dentries where the * key is available are always valid from fscrypt * perspective. Avoid the cost of calling * fscrypt_d_revalidate unnecessarily. */ spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_OP_REVALIDATE; spin_unlock(&dentry->d_lock); } } /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, gfp_t gfp_flags); int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs); int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num); static inline bool fscrypt_is_bounce_page(struct page *page) { return page->mapping == NULL; } static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { return (struct page *)page_private(bounce_page); } static inline bool fscrypt_is_bounce_folio(struct folio *folio) { return folio->mapping == NULL; } static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) { return bounce_folio->private; } void fscrypt_free_bounce_page(struct page *bounce_page); /* policy.c */ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg); int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg); int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg); int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); int fscrypt_context_for_new_inode(void *ctx, struct inode *inode); int fscrypt_set_context(struct inode *inode, void *fs_data); struct fscrypt_dummy_policy { const union fscrypt_policy *policy; }; int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy); bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline bool fscrypt_is_dummy_policy_set(const struct fscrypt_dummy_policy *dummy_policy) { return dummy_policy->policy != NULL; } static inline void fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { kfree(dummy_policy->policy); dummy_policy->policy = NULL; } /* keyring.c */ void fscrypt_destroy_keyring(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); /* keysetup.c */ int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret); void fscrypt_put_encryption_info(struct inode *inode); void fscrypt_free_inode(struct inode *inode); int fscrypt_drop_inode(struct inode *inode); /* fname.c */ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen); bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); int fscrypt_setup_filename(struct inode *inode, const struct qstr *iname, int lookup, struct fscrypt_name *fname); static inline void fscrypt_free_filename(struct fscrypt_name *fname) { kfree(fname->crypto_buf.name); } int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str); void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str); int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname); bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); /* hooks.c */ int fscrypt_file_open(struct inode *inode, struct file *filp); int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry); int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry); int __fscrypt_prepare_readdir(struct inode *dir); int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link); const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done); int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat); static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { sb->s_cop = s_cop; } #else /* !CONFIG_FS_ENCRYPTION */ static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { return NULL; } static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) { return false; } static inline void fscrypt_handle_d_move(struct dentry *dentry) { } static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) { return false; } static inline void fscrypt_prepare_dentry(struct dentry *dentry, bool is_nokey_name) { } /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { return -EOPNOTSUPP; } static inline int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs) { return -EOPNOTSUPP; } static inline int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { return -EOPNOTSUPP; } static inline bool fscrypt_is_bounce_page(struct page *page) { return false; } static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } static inline bool fscrypt_is_bounce_folio(struct folio *folio) { return false; } static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } static inline void fscrypt_free_bounce_page(struct page *bounce_page) { } /* policy.c */ static inline int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { return 0; } static inline int fscrypt_set_context(struct inode *inode, void *fs_data) { return -EOPNOTSUPP; } struct fscrypt_dummy_policy { }; static inline int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy) { return -EINVAL; } static inline bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2) { return true; } static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb) { } static inline bool fscrypt_is_dummy_policy_set(const struct fscrypt_dummy_policy *dummy_policy) { return false; } static inline void fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { } /* keyring.c */ static inline void fscrypt_destroy_keyring(struct super_block *sb) { } static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } /* keysetup.c */ static inline int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; return 0; } static inline void fscrypt_put_encryption_info(struct inode *inode) { return; } static inline void fscrypt_free_inode(struct inode *inode) { } static inline int fscrypt_drop_inode(struct inode *inode) { return 0; } /* fname.c */ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(*fname)); fname->usr_fname = iname; fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; } static inline void fscrypt_free_filename(struct fscrypt_name *fname) { return; } static inline int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; } static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { return; } static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) { return -EOPNOTSUPP; } static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { /* Encryption support disabled; use standard comparison */ if (de_name_len != fname->disk_name.len) return false; return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } static inline u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) { WARN_ON_ONCE(1); return 0; } static inline int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { return 1; } /* bio.c */ static inline bool fscrypt_decrypt_bio(struct bio *bio) { return true; } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { return -EOPNOTSUPP; } /* hooks.c */ static inline int fscrypt_file_open(struct inode *inode, struct file *filp) { if (IS_ENCRYPTED(inode)) return -EOPNOTSUPP; return 0; } static inline int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { return -EOPNOTSUPP; } static inline int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_readdir(struct inode *dir) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { return -EOPNOTSUPP; } static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { return 0; } static inline int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; disk_link->name = (unsigned char *)target; disk_link->len = len + 1; if (disk_link->len > max_len) return -ENAMETOOLONG; return 0; } static inline int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { return -EOPNOTSUPP; } static inline const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done) { return ERR_PTR(-EOPNOTSUPP); } static inline int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat) { return -EOPNOTSUPP; } static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { } #endif /* !CONFIG_FS_ENCRYPTION */ /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode); void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask); void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask); bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk); bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); bool fscrypt_dio_supported(struct inode *inode); u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return false; } static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask) { } static inline void fscrypt_set_bio_crypt_ctx_bh( struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask) { } static inline bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk) { return true; } static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh) { return true; } static inline bool fscrypt_dio_supported(struct inode *inode) { return !fscrypt_needs_contents_encryption(inode); } static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) { return nr_blocks; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /** * fscrypt_inode_uses_inline_crypto() - test whether an inode uses inline * encryption * @inode: an inode. If encrypted, its key must be set up. * * Return: true if the inode requires file contents encryption and if the * encryption should be done in the block layer via blk-crypto rather * than in the filesystem layer. */ static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return fscrypt_needs_contents_encryption(inode) && __fscrypt_inode_uses_inline_crypto(inode); } /** * fscrypt_inode_uses_fs_layer_crypto() - test whether an inode uses fs-layer * encryption * @inode: an inode. If encrypted, its key must be set up. * * Return: true if the inode requires file contents encryption and if the * encryption should be done in the filesystem layer rather than in the * block layer via blk-crypto. */ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) { return fscrypt_needs_contents_encryption(inode) && !__fscrypt_inode_uses_inline_crypto(inode); } /** * fscrypt_has_encryption_key() - check whether an inode has had its key set up * @inode: the inode to check * * Return: %true if the inode has had its encryption key set up, else %false. * * Usually this should be preceded by fscrypt_get_encryption_info() to try to * set up the key first. */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { return fscrypt_get_inode_info(inode) != NULL; } /** * fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted * directory * @old_dentry: an existing dentry for the inode being linked * @dir: the target directory * @dentry: negative dentry for the target filename * * A new link can only be added to an encrypted directory if the directory's * encryption key is available --- since otherwise we'd have no way to encrypt * the filename. * * We also verify that the link will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, * -EXDEV if the link would result in an inconsistent encryption policy, or * another -errno code. */ static inline int fscrypt_prepare_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_link(d_inode(old_dentry), dir, dentry); return 0; } /** * fscrypt_prepare_rename() - prepare for a rename between possibly-encrypted * directories * @old_dir: source directory * @old_dentry: dentry for source file * @new_dir: target directory * @new_dentry: dentry for target location (may be negative unless exchanging) * @flags: rename flags (we care at least about %RENAME_EXCHANGE) * * Prepare for ->rename() where the source and/or target directories may be * encrypted. A new link can only be added to an encrypted directory if the * directory's encryption key is available --- since otherwise we'd have no way * to encrypt the filename. A rename to an existing name, on the other hand, * *is* cryptographically possible without the key. However, we take the more * conservative approach and just forbid all no-key renames. * * We also verify that the rename will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the * rename would cause inconsistent encryption policies, or another -errno code. */ static inline int fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) return __fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, flags); return 0; } /** * fscrypt_prepare_lookup() - prepare to lookup a name in a possibly-encrypted * directory * @dir: directory being searched * @dentry: filename being looked up * @fname: (output) the name to use to search the on-disk directory * * Prepare for ->lookup() in a directory which may be encrypted by determining * the name that will actually be used to search the directory on-disk. If the * directory's encryption policy is supported by this kernel and its encryption * key is available, then the lookup is assumed to be by plaintext name; * otherwise, it is assumed to be by no-key name. * * This will set DCACHE_NOKEY_NAME on the dentry if the lookup is by no-key * name. In this case the filesystem must assign the dentry a dentry_operations * which contains fscrypt_d_revalidate (or contains a d_revalidate method that * calls fscrypt_d_revalidate), so that the dentry will be invalidated if the * directory's encryption key is later added. * * Return: 0 on success; -ENOENT if the directory's key is unavailable but the * filename isn't a valid no-key name, so a negative dentry should be created; * or another -errno code. */ static inline int fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_lookup(dir, dentry, fname); memset(fname, 0, sizeof(*fname)); fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; fscrypt_prepare_dentry(dentry, false); return 0; } /** * fscrypt_prepare_readdir() - prepare to read a possibly-encrypted directory * @dir: the directory inode * * If the directory is encrypted and it doesn't already have its encryption key * set up, try to set it up so that the filenames will be listed in plaintext * form rather than in no-key form. * * Return: 0 on success; -errno on error. Note that the encryption key being * unavailable is not considered an error. It is also not an error if * the encryption policy is unsupported by this kernel; that is treated * like the key being unavailable, so that files can still be deleted. */ static inline int fscrypt_prepare_readdir(struct inode *dir) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_readdir(dir); return 0; } /** * fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's * attributes * @dentry: dentry through which the inode is being changed * @attr: attributes to change * * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, * most attribute changes are allowed even without the encryption key. However, * without the encryption key we do have to forbid truncates. This is needed * because the size being truncated to may not be a multiple of the filesystem * block size, and in that case we'd have to decrypt the final block, zero the * portion past i_size, and re-encrypt it. (We *could* allow truncating to a * filesystem block boundary, but it's simpler to just forbid all truncates --- * and we already forbid all other contents modifications without the key.) * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code * if a problem occurred while setting up the encryption key. */ static inline int fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { if (IS_ENCRYPTED(d_inode(dentry))) return __fscrypt_prepare_setattr(dentry, attr); return 0; } /** * fscrypt_encrypt_symlink() - encrypt the symlink target if needed * @inode: symlink inode * @target: plaintext symlink target * @len: length of @target excluding null terminator * @disk_link: (in/out) the on-disk symlink target being prepared * * If the symlink target needs to be encrypted, then this function encrypts it * into @disk_link->name. fscrypt_prepare_symlink() must have been called * previously to compute @disk_link->len. If the filesystem did not allocate a * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one * will be kmalloc()'ed and the filesystem will be responsible for freeing it. * * Return: 0 on success, -errno on failure */ static inline int fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { if (IS_ENCRYPTED(inode)) return __fscrypt_encrypt_symlink(inode, target, len, disk_link); return 0; } /* If *pagep is a bounce page, free it and set *pagep to the pagecache page */ static inline void fscrypt_finalize_bounce_page(struct page **pagep) { struct page *page = *pagep; if (fscrypt_is_bounce_page(page)) { *pagep = fscrypt_pagecache_page(page); fscrypt_free_bounce_page(page); } } #endif /* _LINUX_FSCRYPT_H */
130 131 131 131 586 589 370 539 266 267 587 464 21 380 105 349 465 266 12 546 577 11 2 587 115 549 587 21 31 579 105 386 4 166 136 86 131 5 5 5 5 5 5 5 5 5 5 5 587 586 5 497 389 589 4 584 153 588 584 5 561 560 562 541 554 224 225 223 107 94 14 108 429 391 417 173 147 521 251 432 499 525 526 521 1 521 394 8 428 23 516 5 68 474 32 32 521 199 200 200 132 72 266 267 1 266 244 244 9 242 128 7 121 75 45 31 43 40 3 3 40 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. * Copyright (C) 2010 Red Hat, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_extent_busy.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_icache.h" #include "xfs_rtbitmap.h" struct kmem_cache *xfs_trans_cache; #if defined(CONFIG_TRACEPOINTS) static void xfs_trans_trace_reservations( struct xfs_mount *mp) { struct xfs_trans_res *res; struct xfs_trans_res *end_res; int i; res = (struct xfs_trans_res *)M_RES(mp); end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); for (i = 0; res < end_res; i++, res++) trace_xfs_trans_resv_calc(mp, i, res); } #else # define xfs_trans_trace_reservations(mp) #endif /* * Initialize the precomputed transaction reservation values * in the mount structure. */ void xfs_trans_init( struct xfs_mount *mp) { xfs_trans_resv_calc(mp, M_RES(mp)); xfs_trans_trace_reservations(mp); } /* * Free the transaction structure. If there is more clean up * to do when the structure is freed, add it here. */ STATIC void xfs_trans_free( struct xfs_trans *tp) { xfs_extent_busy_sort(&tp->t_busy); xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); trace_xfs_trans_free(tp, _RET_IP_); xfs_trans_clear_context(tp); if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); kmem_cache_free(xfs_trans_cache, tp); } /* * This is called to create a new transaction which will share the * permanent log reservation of the given transaction. The remaining * unused block and rt extent reservations are also inherited. This * implies that the original transaction is no longer allowed to allocate * blocks. Locks and log items, however, are no inherited. They must * be added to the new transaction explicitly. */ STATIC struct xfs_trans * xfs_trans_dup( struct xfs_trans *tp) { struct xfs_trans *ntp; trace_xfs_trans_dup(tp, _RET_IP_); ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); /* * Initialize the new transaction structure. */ ntp->t_magic = XFS_TRANS_HEADER_MAGIC; ntp->t_mountp = tp->t_mountp; INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_busy); INIT_LIST_HEAD(&ntp->t_dfops); ntp->t_highest_agno = NULLAGNUMBER; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_ticket != NULL); ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE) | (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) | (tp->t_flags & XFS_TRANS_RES_FDBLKS); /* We gave our writer reference to the new transaction */ tp->t_flags |= XFS_TRANS_NO_WRITECOUNT; ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ASSERT(tp->t_blk_res >= tp->t_blk_res_used); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; xfs_trans_switch_context(tp, ntp); /* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); xfs_trans_dup_dqinfo(tp, ntp); return ntp; } /* * This is called to reserve free disk blocks and log space for the * given transaction. This must be done before allocating any resources * within the transaction. * * This will return ENOSPC if there are not enough blocks available. * It will sleep waiting for available log space. * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which * is used by long running transactions. If any one of the reservations * fails then they will all be backed out. * * This does not do quota reservations. That typically is done by the * caller afterwards. */ static int xfs_trans_reserve( struct xfs_trans *tp, struct xfs_trans_res *resp, uint blocks, uint rtextents) { struct xfs_mount *mp = tp->t_mountp; int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* * Attempt to reserve the needed disk blocks by decrementing * the number needed from the number available. This will * fail if the count would go below zero. */ if (blocks > 0) { error = xfs_dec_fdblocks(mp, blocks, rsvd); if (error != 0) return -ENOSPC; tp->t_blk_res += blocks; } /* * Reserve the log space needed for this transaction. */ if (resp->tr_logres > 0) { bool permanent = false; ASSERT(tp->t_log_res == 0 || tp->t_log_res == resp->tr_logres); ASSERT(tp->t_log_count == 0 || tp->t_log_count == resp->tr_logcount); if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) { tp->t_flags |= XFS_TRANS_PERM_LOG_RES; permanent = true; } else { ASSERT(tp->t_ticket == NULL); ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); } if (tp->t_ticket != NULL) { ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); error = xfs_log_regrant(mp, tp->t_ticket); } else { error = xfs_log_reserve(mp, resp->tr_logres, resp->tr_logcount, &tp->t_ticket, permanent); } if (error) goto undo_blocks; tp->t_log_res = resp->tr_logres; tp->t_log_count = resp->tr_logcount; } /* * Attempt to reserve the needed realtime extents by decrementing * the number needed from the number available. This will * fail if the count would go below zero. */ if (rtextents > 0) { error = xfs_dec_frextents(mp, rtextents); if (error) { error = -ENOSPC; goto undo_log; } tp->t_rtx_res += rtextents; } return 0; /* * Error cases jump to one of these labels to undo any * reservations which have already been performed. */ undo_log: if (resp->tr_logres > 0) { xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; tp->t_log_res = 0; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; } undo_blocks: if (blocks > 0) { xfs_add_fdblocks(mp, blocks); tp->t_blk_res = 0; } return error; } int xfs_trans_alloc( struct xfs_mount *mp, struct xfs_trans_res *resp, uint blocks, uint rtextents, uint flags, struct xfs_trans **tpp) { struct xfs_trans *tp; bool want_retry = true; int error; /* * Allocate the handle before we do our freeze accounting and setting up * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); xfs_trans_set_context(tp); /* * Zero-reservation ("empty") transactions can't modify anything, so * they're allowed to run while we're frozen. */ WARN_ON(resp->tr_logres > 0 && mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp)); tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; tp->t_mountp = mp; INIT_LIST_HEAD(&tp->t_items); INIT_LIST_HEAD(&tp->t_busy); INIT_LIST_HEAD(&tp->t_dfops); tp->t_highest_agno = NULLAGNUMBER; error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { xfs_trans_cancel(tp); /* * We weren't able to reserve enough space for the transaction. * Flush the other speculative space allocations to free space. * Do not perform a synchronous scan because callers can hold * other locks. */ error = xfs_blockgc_flush_all(mp); if (error) return error; want_retry = false; goto retry; } if (error) { xfs_trans_cancel(tp); return error; } trace_xfs_trans_alloc(tp, _RET_IP_); *tpp = tp; return 0; } /* * Create an empty transaction with no reservation. This is a defensive * mechanism for routines that query metadata without actually modifying them -- * if the metadata being queried is somehow cross-linked (think a btree block * pointer that points higher in the tree), we risk deadlock. However, blocks * grabbed as part of a transaction can be re-grabbed. The verifiers will * notice the corrupt block and the operation will fail back to userspace * without deadlocking. * * Note the zero-length reservation; this transaction MUST be cancelled without * any dirty data. * * Callers should obtain freeze protection to avoid a conflict with fs freezing * where we can be grabbing buffers at the same time that freeze is trying to * drain the buffer LRU list. */ int xfs_trans_alloc_empty( struct xfs_mount *mp, struct xfs_trans **tpp) { struct xfs_trans_res resv = {0}; return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); } /* * Record the indicated change to the given field for application * to the file system's superblock when the transaction commits. * For now, just store the change in the transaction structure. * * Mark the transaction structure to indicate that the superblock * needs to be updated before committing. * * Because we may not be keeping track of allocated/free inodes and * used filesystem blocks in the superblock, we do not mark the * superblock dirty in this transaction if we modify these fields. * We still need to update the transaction deltas so that they get * applied to the incore superblock, but we don't want them to * cause the superblock to get locked and logged if these are the * only fields in the superblock that the transaction modifies. */ void xfs_trans_mod_sb( xfs_trans_t *tp, uint field, int64_t delta) { uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY); xfs_mount_t *mp = tp->t_mountp; switch (field) { case XFS_TRANS_SB_ICOUNT: tp->t_icount_delta += delta; if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_IFREE: tp->t_ifree_delta += delta; if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_FDBLOCKS: /* * Track the number of blocks allocated in the transaction. * Make sure it does not exceed the number reserved. If so, * shutdown as this can lead to accounting inconsistency. */ if (delta < 0) { tp->t_blk_res_used += (uint)-delta; if (tp->t_blk_res_used > tp->t_blk_res) xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) { int64_t blkres_delta; /* * Return freed blocks directly to the reservation * instead of the global pool, being careful not to * overflow the trans counter. This is used to preserve * reservation across chains of transaction rolls that * repeatedly free and allocate blocks. */ blkres_delta = min_t(int64_t, delta, UINT_MAX - tp->t_blk_res); tp->t_blk_res += blkres_delta; delta -= blkres_delta; } tp->t_fdblocks_delta += delta; if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_RES_FDBLOCKS: /* * The allocation has already been applied to the * in-core superblock's counter. This should only * be applied to the on-disk superblock. */ tp->t_res_fdblocks_delta += delta; if (xfs_has_lazysbcount(mp)) flags &= ~XFS_TRANS_SB_DIRTY; break; case XFS_TRANS_SB_FREXTENTS: /* * Track the number of blocks allocated in the * transaction. Make sure it does not exceed the * number reserved. */ if (delta < 0) { tp->t_rtx_res_used += (uint)-delta; ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res); } tp->t_frextents_delta += delta; break; case XFS_TRANS_SB_RES_FREXTENTS: /* * The allocation has already been applied to the * in-core superblock's counter. This should only * be applied to the on-disk superblock. */ ASSERT(delta < 0); tp->t_res_frextents_delta += delta; break; case XFS_TRANS_SB_DBLOCKS: tp->t_dblocks_delta += delta; break; case XFS_TRANS_SB_AGCOUNT: ASSERT(delta > 0); tp->t_agcount_delta += delta; break; case XFS_TRANS_SB_IMAXPCT: tp->t_imaxpct_delta += delta; break; case XFS_TRANS_SB_REXTSIZE: tp->t_rextsize_delta += delta; break; case XFS_TRANS_SB_RBMBLOCKS: tp->t_rbmblocks_delta += delta; break; case XFS_TRANS_SB_RBLOCKS: tp->t_rblocks_delta += delta; break; case XFS_TRANS_SB_REXTENTS: tp->t_rextents_delta += delta; break; case XFS_TRANS_SB_REXTSLOG: tp->t_rextslog_delta += delta; break; default: ASSERT(0); return; } tp->t_flags |= flags; } /* * xfs_trans_apply_sb_deltas() is called from the commit code * to bring the superblock buffer into the current transaction * and modify it as requested by earlier calls to xfs_trans_mod_sb(). * * For now we just look at each field allowed to change and change * it if necessary. */ STATIC void xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { struct xfs_dsb *sbp; struct xfs_buf *bp; int whole = 0; bp = xfs_trans_getsb(tp); sbp = bp->b_addr; /* * Only update the superblock counters if we are logging them */ if (!xfs_has_lazysbcount((tp->t_mountp))) { if (tp->t_icount_delta) be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); if (tp->t_ifree_delta) be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta); if (tp->t_fdblocks_delta) be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta); if (tp->t_res_fdblocks_delta) be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); } /* * Updating frextents requires careful handling because it does not * behave like the lazysb counters because we cannot rely on log * recovery in older kenels to recompute the value from the rtbitmap. * This means that the ondisk frextents must be consistent with the * rtbitmap. * * Therefore, log the frextents change to the ondisk superblock and * update the incore superblock so that future calls to xfs_log_sb * write the correct value ondisk. * * Don't touch m_frextents because it includes incore reservations, * and those are handled by the unreserve function. */ if (tp->t_frextents_delta || tp->t_res_frextents_delta) { struct xfs_mount *mp = tp->t_mountp; int64_t rtxdelta; rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta; spin_lock(&mp->m_sb_lock); be64_add_cpu(&sbp->sb_frextents, rtxdelta); mp->m_sb.sb_frextents += rtxdelta; spin_unlock(&mp->m_sb_lock); } if (tp->t_dblocks_delta) { be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); whole = 1; } if (tp->t_agcount_delta) { be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); whole = 1; } if (tp->t_imaxpct_delta) { sbp->sb_imax_pct += tp->t_imaxpct_delta; whole = 1; } if (tp->t_rextsize_delta) { be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); whole = 1; } if (tp->t_rbmblocks_delta) { be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); whole = 1; } if (tp->t_rblocks_delta) { be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); whole = 1; } if (tp->t_rextents_delta) { be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta); whole = 1; } if (tp->t_rextslog_delta) { sbp->sb_rextslog += tp->t_rextslog_delta; whole = 1; } xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); if (whole) /* * Log the whole thing, the fields are noncontiguous. */ xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); else /* * Since all the modifiable fields are contiguous, we * can get away with this. */ xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount), offsetof(struct xfs_dsb, sb_frextents) + sizeof(sbp->sb_frextents) - 1); } /* * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and * apply superblock counter changes to the in-core superblock. The * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT * applied to the in-core superblock. The idea is that that has already been * done. * * If we are not logging superblock counters, then the inode allocated/free and * used block counts are not updated in the on disk superblock. In this case, * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we * still need to update the incore superblock with the changes. * * Deltas for the inode count are +/-64, hence we use a large batch size of 128 * so we don't need to take the counter lock on every update. */ #define XFS_ICOUNT_BATCH 128 void xfs_trans_unreserve_and_mod_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; int64_t blkdelta = tp->t_blk_res; int64_t rtxdelta = tp->t_rtx_res; int64_t idelta = 0; int64_t ifreedelta = 0; /* * Calculate the deltas. * * t_fdblocks_delta and t_frextents_delta can be positive or negative: * * - positive values indicate blocks freed in the transaction. * - negative values indicate blocks allocated in the transaction * * Negative values can only happen if the transaction has a block * reservation that covers the allocated block. The end result is * that the calculated delta values must always be positive and we * can only put back previous allocated or reserved blocks here. */ ASSERT(tp->t_blk_res || tp->t_fdblocks_delta >= 0); if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { blkdelta += tp->t_fdblocks_delta; ASSERT(blkdelta >= 0); } ASSERT(tp->t_rtx_res || tp->t_frextents_delta >= 0); if (tp->t_flags & XFS_TRANS_SB_DIRTY) { rtxdelta += tp->t_frextents_delta; ASSERT(rtxdelta >= 0); } if (xfs_has_lazysbcount(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) { idelta = tp->t_icount_delta; ifreedelta = tp->t_ifree_delta; } /* apply the per-cpu counters */ if (blkdelta) xfs_add_fdblocks(mp, blkdelta); if (idelta) percpu_counter_add_batch(&mp->m_icount, idelta, XFS_ICOUNT_BATCH); if (ifreedelta) percpu_counter_add(&mp->m_ifree, ifreedelta); if (rtxdelta) xfs_add_frextents(mp, rtxdelta); if (!(tp->t_flags & XFS_TRANS_SB_DIRTY)) return; /* apply remaining deltas */ spin_lock(&mp->m_sb_lock); mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta; mp->m_sb.sb_icount += idelta; mp->m_sb.sb_ifree += ifreedelta; /* * Do not touch sb_frextents here because we are dealing with incore * reservation. sb_frextents is not part of the lazy sb counters so it * must be consistent with the ondisk rtbitmap and must never include * incore reservations. */ mp->m_sb.sb_dblocks += tp->t_dblocks_delta; mp->m_sb.sb_agcount += tp->t_agcount_delta; mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; mp->m_sb.sb_rextsize += tp->t_rextsize_delta; if (tp->t_rextsize_delta) { mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize); mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize); } mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; mp->m_sb.sb_rblocks += tp->t_rblocks_delta; mp->m_sb.sb_rextents += tp->t_rextents_delta; mp->m_sb.sb_rextslog += tp->t_rextslog_delta; spin_unlock(&mp->m_sb_lock); /* * Debug checks outside of the spinlock so they don't lock up the * machine if they fail. */ ASSERT(mp->m_sb.sb_imax_pct >= 0); ASSERT(mp->m_sb.sb_rextslog >= 0); } /* Add the given log item to the transaction's list of log items. */ void xfs_trans_add_item( struct xfs_trans *tp, struct xfs_log_item *lip) { ASSERT(lip->li_log == tp->t_mountp->m_log); ASSERT(lip->li_ailp == tp->t_mountp->m_ail); ASSERT(list_empty(&lip->li_trans)); ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags)); list_add_tail(&lip->li_trans, &tp->t_items); trace_xfs_trans_add_item(tp, _RET_IP_); } /* * Unlink the log item from the transaction. the log item is no longer * considered dirty in this transaction, as the linked transaction has * finished, either by abort or commit completion. */ void xfs_trans_del_item( struct xfs_log_item *lip) { clear_bit(XFS_LI_DIRTY, &lip->li_flags); list_del_init(&lip->li_trans); } /* Detach and unlock all of the items in a transaction */ static void xfs_trans_free_items( struct xfs_trans *tp, bool abort) { struct xfs_log_item *lip, *next; trace_xfs_trans_free_items(tp, _RET_IP_); list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); if (abort) set_bit(XFS_LI_ABORTED, &lip->li_flags); if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } } /* * Sort transaction items prior to running precommit operations. This will * attempt to order the items such that they will always be locked in the same * order. Items that have no sort function are moved to the end of the list * and so are locked last. * * This may need refinement as different types of objects add sort functions. * * Function is more complex than it needs to be because we are comparing 64 bit * values and the function only returns 32 bit values. */ static int xfs_trans_precommit_sort( void *unused_arg, const struct list_head *a, const struct list_head *b) { struct xfs_log_item *lia = container_of(a, struct xfs_log_item, li_trans); struct xfs_log_item *lib = container_of(b, struct xfs_log_item, li_trans); int64_t diff; /* * If both items are non-sortable, leave them alone. If only one is * sortable, move the non-sortable item towards the end of the list. */ if (!lia->li_ops->iop_sort && !lib->li_ops->iop_sort) return 0; if (!lia->li_ops->iop_sort) return 1; if (!lib->li_ops->iop_sort) return -1; diff = lia->li_ops->iop_sort(lia) - lib->li_ops->iop_sort(lib); if (diff < 0) return -1; if (diff > 0) return 1; return 0; } /* * Run transaction precommit functions. * * If there is an error in any of the callouts, then stop immediately and * trigger a shutdown to abort the transaction. There is no recovery possible * from errors at this point as the transaction is dirty.... */ static int xfs_trans_run_precommits( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_log_item *lip, *n; int error = 0; /* * Sort the item list to avoid ABBA deadlocks with other transactions * running precommit operations that lock multiple shared items such as * inode cluster buffers. */ list_sort(NULL, &tp->t_items, xfs_trans_precommit_sort); /* * Precommit operations can remove the log item from the transaction * if the log item exists purely to delay modifications until they * can be ordered against other operations. Hence we have to use * list_for_each_entry_safe() here. */ list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) continue; if (lip->li_ops->iop_precommit) { error = lip->li_ops->iop_precommit(tp, lip); if (error) break; } } if (error) xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } /* * Commit the given transaction to the log. * * XFS disk error handling mechanism is not based on a typical * transaction abort mechanism. Logically after the filesystem * gets marked 'SHUTDOWN', we can't let any new transactions * be durable - ie. committed to disk - because some metadata might * be inconsistent. In such cases, this returns an error, and the * caller may assume that all locked objects joined to the transaction * have already been unlocked as if the commit had succeeded. * Do not reference the transaction structure after this call. */ static int __xfs_trans_commit( struct xfs_trans *tp, bool regrant) { struct xfs_mount *mp = tp->t_mountp; struct xlog *log = mp->m_log; xfs_csn_t commit_seq = 0; int error = 0; int sync = tp->t_flags & XFS_TRANS_SYNC; trace_xfs_trans_commit(tp, _RET_IP_); error = xfs_trans_run_precommits(tp); if (error) { if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) xfs_defer_cancel(tp); goto out_unreserve; } /* * Finish deferred items on final commit. Only permanent transactions * should ever have deferred ops. */ WARN_ON_ONCE(!list_empty(&tp->t_dfops) && !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { error = xfs_defer_finish_noroll(&tp); if (error) goto out_unreserve; /* Run precommits from final tx in defer chain. */ error = xfs_trans_run_precommits(tp); if (error) goto out_unreserve; } /* * If there is nothing to be logged by the transaction, * then unlock all of the items associated with the * transaction and free the transaction structure. * Also make sure to return any reserved blocks to * the free pool. */ if (!(tp->t_flags & XFS_TRANS_DIRTY)) goto out_unreserve; /* * We must check against log shutdown here because we cannot abort log * items and leave them dirty, inconsistent and unpinned in memory while * the log is active. This leaves them open to being written back to * disk, and that will lead to on-disk corruption. */ if (xlog_is_shutdown(log)) { error = -EIO; goto out_unreserve; } ASSERT(tp->t_ticket != NULL); /* * If we need to update the superblock, then do it now. */ if (tp->t_flags & XFS_TRANS_SB_DIRTY) xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_dquot_deltas(tp); xlog_cil_commit(log, tp, &commit_seq, regrant); xfs_trans_free(tp); /* * If the transaction needs to be synchronous, then force the * log out now and wait for it. */ if (sync) { error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL); XFS_STATS_INC(mp, xs_trans_sync); } else { XFS_STATS_INC(mp, xs_trans_async); } return error; out_unreserve: xfs_trans_unreserve_and_mod_sb(tp); /* * It is indeed possible for the transaction to be not dirty but * the dqinfo portion to be. All that means is that we have some * (non-persistent) quota reservations that need to be unreserved. */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); else xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; } xfs_trans_free_items(tp, !!error); xfs_trans_free(tp); XFS_STATS_INC(mp, xs_trans_empty); return error; } int xfs_trans_commit( struct xfs_trans *tp) { return __xfs_trans_commit(tp, false); } /* * Unlock all of the transaction's items and free the transaction. If the * transaction is dirty, we must shut down the filesystem because there is no * way to restore them to their previous state. * * If the transaction has made a log reservation, make sure to release it as * well. * * This is a high level function (equivalent to xfs_trans_commit()) and so can * be called after the transaction has effectively been aborted due to the mount * being shut down. However, if the mount has not been shut down and the * transaction is dirty we will shut the mount down and, in doing so, that * guarantees that the log is shut down, too. Hence we don't need to be as * careful with shutdown state and dirty items here as we need to be in * xfs_trans_commit(). */ void xfs_trans_cancel( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; struct xlog *log = mp->m_log; bool dirty = (tp->t_flags & XFS_TRANS_DIRTY); trace_xfs_trans_cancel(tp, _RET_IP_); /* * It's never valid to cancel a transaction with deferred ops attached, * because the transaction is effectively dirty. Complain about this * loudly before freeing the in-memory defer items and shutting down the * filesystem. */ if (!list_empty(&tp->t_dfops)) { ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); dirty = true; xfs_defer_cancel(tp); } /* * See if the caller is relying on us to shut down the filesystem. We * only want an error report if there isn't already a shutdown in * progress, so we only need to check against the mount shutdown state * here. */ if (dirty && !xfs_is_shutdown(mp)) { XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } #ifdef DEBUG /* Log items need to be consistent until the log is shut down. */ if (!dirty && !xlog_is_shutdown(log)) { struct xfs_log_item *lip; list_for_each_entry(lip, &tp->t_items, li_trans) ASSERT(!xlog_item_is_intent_done(lip)); } #endif xfs_trans_unreserve_and_mod_sb(tp); xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { xfs_log_ticket_ungrant(log, tp->t_ticket); tp->t_ticket = NULL; } xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } /* * Roll from one trans in the sequence of PERMANENT transactions to * the next: permanent transactions are only flushed out when * committed with xfs_trans_commit(), but we still want as soon * as possible to let chunks of it go to the log. So we commit the * chunk we've been working on and get a new transaction to continue. */ int xfs_trans_roll( struct xfs_trans **tpp) { struct xfs_trans *trans = *tpp; struct xfs_trans_res tres; int error; trace_xfs_trans_roll(trans, _RET_IP_); /* * Copy the critical parameters from one trans to the next. */ tres.tr_logres = trans->t_log_res; tres.tr_logcount = trans->t_log_count; *tpp = xfs_trans_dup(trans); /* * Commit the current transaction. * If this commit failed, then it'd just unlock those items that * are not marked ihold. That also means that a filesystem shutdown * is in progress. The caller takes the responsibility to cancel * the duplicate transaction that gets returned. */ error = __xfs_trans_commit(trans, true); if (error) return error; /* * Reserve space in the log for the next transaction. * This also pushes items in the "AIL", the list of logged items, * out to disk if they are taking up space at the tail of the log * that we want to use. This requires that either nothing be locked * across this call, or that anything that is locked be logged in * the prior and the next transactions. */ tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; return xfs_trans_reserve(*tpp, &tres, 0, 0); } /* * Allocate an transaction, lock and join the inode to it, and reserve quota. * * The caller must ensure that the on-disk dquots attached to this inode have * already been allocated and initialized. The caller is responsible for * releasing ILOCK_EXCL if a new transaction is returned. */ int xfs_trans_alloc_inode( struct xfs_inode *ip, struct xfs_trans_res *resv, unsigned int dblocks, unsigned int rblocks, bool force, struct xfs_trans **tpp) { struct xfs_trans *tp; struct xfs_mount *mp = ip->i_mount; bool retried = false; int error; retry: error = xfs_trans_alloc(mp, resv, dblocks, xfs_extlen_to_rtxlen(mp, rblocks), force ? XFS_TRANS_RESERVE : 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); error = xfs_qm_dqattach_locked(ip, false); if (error) { /* Caller should have allocated the dquots! */ ASSERT(error != -ENOENT); goto out_cancel; } error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force); if ((error == -EDQUOT || error == -ENOSPC) && !retried) { xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_blockgc_free_quota(ip, 0); retried = true; goto retry; } if (error) goto out_cancel; *tpp = tp; return 0; out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } /* * Try to reserve more blocks for a transaction. * * This is for callers that need to attach resources to a transaction, scan * those resources to determine the space reservation requirements, and then * modify the attached resources. In other words, online repair. This can * fail due to ENOSPC, so the caller must be able to cancel the transaction * without shutting down the fs. */ int xfs_trans_reserve_more( struct xfs_trans *tp, unsigned int blocks, unsigned int rtextents) { struct xfs_trans_res resv = { }; return xfs_trans_reserve(tp, &resv, blocks, rtextents); } /* * Try to reserve more blocks and file quota for a transaction. Same * conditions of usage as xfs_trans_reserve_more. */ int xfs_trans_reserve_more_inode( struct xfs_trans *tp, struct xfs_inode *ip, unsigned int dblocks, unsigned int rblocks, bool force_quota) { struct xfs_trans_res resv = { }; struct xfs_mount *mp = ip->i_mount; unsigned int rtx = xfs_extlen_to_rtxlen(mp, rblocks); int error; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve(tp, &resv, dblocks, rtx); if (error) return error; if (!XFS_IS_QUOTA_ON(mp) || xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return 0; if (tp->t_flags & XFS_TRANS_RESERVE) force_quota = true; error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force_quota); if (!error) return 0; /* Quota failed, give back the new reservation. */ xfs_add_fdblocks(mp, dblocks); tp->t_blk_res -= dblocks; xfs_add_frextents(mp, rtx); tp->t_rtx_res -= rtx; return error; } /* * Allocate an transaction in preparation for inode creation by reserving quota * against the given dquots. Callers are not required to hold any inode locks. */ int xfs_trans_alloc_icreate( struct xfs_mount *mp, struct xfs_trans_res *resv, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, unsigned int dblocks, struct xfs_trans **tpp) { struct xfs_trans *tp; bool retried = false; int error; retry: error = xfs_trans_alloc(mp, resv, dblocks, 0, 0, &tp); if (error) return error; error = xfs_trans_reserve_quota_icreate(tp, udqp, gdqp, pdqp, dblocks); if ((error == -EDQUOT || error == -ENOSPC) && !retried) { xfs_trans_cancel(tp); xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0); retried = true; goto retry; } if (error) { xfs_trans_cancel(tp); return error; } *tpp = tp; return 0; } /* * Allocate an transaction, lock and join the inode to it, and reserve quota * in preparation for inode attribute changes that include uid, gid, or prid * changes. * * The caller must ensure that the on-disk dquots attached to this inode have * already been allocated and initialized. The ILOCK will be dropped when the * transaction is committed or cancelled. */ int xfs_trans_alloc_ichange( struct xfs_inode *ip, struct xfs_dquot *new_udqp, struct xfs_dquot *new_gdqp, struct xfs_dquot *new_pdqp, bool force, struct xfs_trans **tpp) { struct xfs_trans *tp; struct xfs_mount *mp = ip->i_mount; struct xfs_dquot *udqp; struct xfs_dquot *gdqp; struct xfs_dquot *pdqp; bool retried = false; int error; retry: error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); error = xfs_qm_dqattach_locked(ip, false); if (error) { /* Caller should have allocated the dquots! */ ASSERT(error != -ENOENT); goto out_cancel; } /* * For each quota type, skip quota reservations if the inode's dquots * now match the ones that came from the caller, or the caller didn't * pass one in. The inode's dquots can change if we drop the ILOCK to * perform a blockgc scan, so we must preserve the caller's arguments. */ udqp = (new_udqp != ip->i_udquot) ? new_udqp : NULL; gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL; pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL; if (udqp || gdqp || pdqp) { unsigned int qflags = XFS_QMOPT_RES_REGBLKS; if (force) qflags |= XFS_QMOPT_FORCE_RES; /* * Reserve enough quota to handle blocks on disk and reserved * for a delayed allocation. We'll actually transfer the * delalloc reservation between dquots at chown time, even * though that part is only semi-transactional. */ error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp, pdqp, ip->i_nblocks + ip->i_delayed_blks, 1, qflags); if ((error == -EDQUOT || error == -ENOSPC) && !retried) { xfs_trans_cancel(tp); xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0); retried = true; goto retry; } if (error) goto out_cancel; } *tpp = tp; return 0; out_cancel: xfs_trans_cancel(tp); return error; } /* * Allocate an transaction, lock and join the directory and child inodes to it, * and reserve quota for a directory update. If there isn't sufficient space, * @dblocks will be set to zero for a reservationless directory update and * @nospace_error will be set to a negative errno describing the space * constraint we hit. * * The caller must ensure that the on-disk dquots attached to this inode have * already been allocated and initialized. The ILOCKs will be dropped when the * transaction is committed or cancelled. * * Caller is responsible for unlocking the inodes manually upon return */ int xfs_trans_alloc_dir( struct xfs_inode *dp, struct xfs_trans_res *resv, struct xfs_inode *ip, unsigned int *dblocks, struct xfs_trans **tpp, int *nospace_error) { struct xfs_trans *tp; struct xfs_mount *mp = ip->i_mount; unsigned int resblks; bool retried = false; int error; retry: *nospace_error = 0; resblks = *dblocks; error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); if (error == -ENOSPC) { *nospace_error = error; resblks = 0; error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); } if (error) return error; xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dp, 0); xfs_trans_ijoin(tp, ip, 0); error = xfs_qm_dqattach_locked(dp, false); if (error) { /* Caller should have allocated the dquots! */ ASSERT(error != -ENOENT); goto out_cancel; } error = xfs_qm_dqattach_locked(ip, false); if (error) { /* Caller should have allocated the dquots! */ ASSERT(error != -ENOENT); goto out_cancel; } if (resblks == 0) goto done; error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false); if (error == -EDQUOT || error == -ENOSPC) { if (!retried) { xfs_trans_cancel(tp); xfs_iunlock(dp, XFS_ILOCK_EXCL); if (dp != ip) xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_blockgc_free_quota(dp, 0); retried = true; goto retry; } *nospace_error = error; resblks = 0; error = 0; } if (error) goto out_cancel; done: *tpp = tp; *dblocks = resblks; return 0; out_cancel: xfs_trans_cancel(tp); return error; }
19 19 19 19 19 19 19 19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 // SPDX-License-Identifier: GPL-2.0-or-later /* * Fast Userspace Mutexes (which I call "Futexes!"). * (C) Rusty Russell, IBM 2002 * * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar * (C) Copyright 2003 Red Hat Inc, All Rights Reserved * * Removed page pinning, fix privately mapped COW pages and other cleanups * (C) Copyright 2003, 2004 Jamie Lokier * * Robust futex support started by Ingo Molnar * (C) Copyright 2006 Red Hat Inc, All Rights Reserved * Thanks to Thomas Gleixner for suggestions, analysis and fixes. * * PI-futex support started by Ingo Molnar and Thomas Gleixner * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> * * PRIVATE futexes by Eric Dumazet * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> * * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com> * Copyright (C) IBM Corporation, 2009 * Thanks to Thomas Gleixner for conceptual design and careful reviews. * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. * * "The futexes are also cursed." * "But they come in a choice of three flavours!" */ #include <linux/compat.h> #include <linux/jhash.h> #include <linux/pagemap.h> #include <linux/plist.h> #include <linux/memblock.h> #include <linux/fault-inject.h> #include <linux/slab.h> #include "futex.h" #include "../locking/rtmutex_common.h" /* * The base of the bucket array and its size are always used together * (after initialization only in futex_hash()), so ensure that they * reside in the same cacheline. */ static struct { struct futex_hash_bucket *queues; unsigned long hashsize; } __futex_data __read_mostly __aligned(2*sizeof(long)); #define futex_queues (__futex_data.queues) #define futex_hashsize (__futex_data.hashsize) /* * Fault injections for futexes. */ #ifdef CONFIG_FAIL_FUTEX static struct { struct fault_attr attr; bool ignore_private; } fail_futex = { .attr = FAULT_ATTR_INITIALIZER, .ignore_private = false, }; static int __init setup_fail_futex(char *str) { return setup_fault_attr(&fail_futex.attr, str); } __setup("fail_futex=", setup_fail_futex); bool should_fail_futex(bool fshared) { if (fail_futex.ignore_private && !fshared) return false; return should_fail(&fail_futex.attr, 1); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS static int __init fail_futex_debugfs(void) { umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; struct dentry *dir; dir = fault_create_debugfs_attr("fail_futex", NULL, &fail_futex.attr); if (IS_ERR(dir)) return PTR_ERR(dir); debugfs_create_bool("ignore-private", mode, dir, &fail_futex.ignore_private); return 0; } late_initcall(fail_futex_debugfs); #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ #endif /* CONFIG_FAIL_FUTEX */ /** * futex_hash - Return the hash bucket in the global hash * @key: Pointer to the futex key for which the hash is calculated * * We hash on the keys returned from get_futex_key (see below) and return the * corresponding hash bucket in the global hash. */ struct futex_hash_bucket *futex_hash(union futex_key *key) { u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); return &futex_queues[hash & (futex_hashsize - 1)]; } /** * futex_setup_timer - set up the sleeping hrtimer. * @time: ptr to the given timeout value * @timeout: the hrtimer_sleeper structure to be set up * @flags: futex flags * @range_ns: optional range in ns * * Return: Initialized hrtimer_sleeper structure or NULL if no timeout * value given */ struct hrtimer_sleeper * futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, int flags, u64 range_ns) { if (!time) return NULL; hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ? CLOCK_REALTIME : CLOCK_MONOTONIC, HRTIMER_MODE_ABS); /* * If range_ns is 0, calling hrtimer_set_expires_range_ns() is * effectively the same as calling hrtimer_set_expires(). */ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); return timeout; } /* * Generate a machine wide unique identifier for this inode. * * This relies on u64 not wrapping in the life-time of the machine; which with * 1ns resolution means almost 585 years. * * This further relies on the fact that a well formed program will not unmap * the file while it has a (shared) futex waiting on it. This mapping will have * a file reference which pins the mount and inode. * * If for some reason an inode gets evicted and read back in again, it will get * a new sequence number and will _NOT_ match, even though it is the exact same * file. * * It is important that futex_match() will never have a false-positive, esp. * for PI futexes that can mess up the state. The above argues that false-negatives * are only possible for malformed programs. */ static u64 get_inode_sequence_number(struct inode *inode) { static atomic64_t i_seq; u64 old; /* Does the inode already have a sequence number? */ old = atomic64_read(&inode->i_sequence); if (likely(old)) return old; for (;;) { u64 new = atomic64_add_return(1, &i_seq); if (WARN_ON_ONCE(!new)) continue; old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); if (old) return old; return new; } } /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex * @flags: FLAGS_* * @key: address where result is stored. * @rw: mapping needs to be read/write (values: FUTEX_READ, * FUTEX_WRITE) * * Return: a negative error code or 0 * * The key words are stored in @key on success. * * For shared mappings (when @fshared), the key is: * * ( inode->i_sequence, page->index, offset_within_page ) * * [ also see get_inode_sequence_number() ] * * For private mappings (or when !@fshared), the key is: * * ( current->mm, address, 0 ) * * This allows (cross process, where applicable) identification of the futex * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, enum futex_access rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page; struct folio *folio; struct address_space *mapping; int err, ro = 0; bool fshared; fshared = flags & FLAGS_SHARED; /* * The futex address must be "naturally" aligned. */ key->both.offset = address % PAGE_SIZE; if (unlikely((address % sizeof(u32)) != 0)) return -EINVAL; address -= key->both.offset; if (unlikely(!access_ok(uaddr, sizeof(u32)))) return -EFAULT; if (unlikely(should_fail_futex(fshared))) return -EFAULT; /* * PROCESS_PRIVATE futexes are fast. * As the mm cannot disappear under us and the 'key' only needs * virtual address, we dont even have to find the underlying vma. * Note : We do have to check 'uaddr' is a valid user address, * but access_ok() should be faster than find_vma() */ if (!fshared) { /* * On no-MMU, shared futexes are treated as private, therefore * we must not include the current process in the key. Since * there is only one address space, the address is a unique key * on its own. */ if (IS_ENABLED(CONFIG_MMU)) key->private.mm = mm; else key->private.mm = NULL; key->private.address = address; return 0; } again: /* Ignore any VERIFY_READ mapping (futex common case) */ if (unlikely(should_fail_futex(true))) return -EFAULT; err = get_user_pages_fast(address, 1, FOLL_WRITE, &page); /* * If write access is not required (eg. FUTEX_WAIT), try * and get read-only access. */ if (err == -EFAULT && rw == FUTEX_READ) { err = get_user_pages_fast(address, 1, 0, &page); ro = 1; } if (err < 0) return err; else err = 0; /* * The treatment of mapping from this point on is critical. The folio * lock protects many things but in this context the folio lock * stabilizes mapping, prevents inode freeing in the shared * file-backed region case and guards against movement to swap cache. * * Strictly speaking the folio lock is not needed in all cases being * considered here and folio lock forces unnecessarily serialization. * From this point on, mapping will be re-verified if necessary and * folio lock will be acquired only if it is unavoidable * * Mapping checks require the folio so it is looked up now. For * anonymous pages, it does not matter if the folio is split * in the future as the key is based on the address. For * filesystem-backed pages, the precise page is required as the * index of the page determines the key. */ folio = page_folio(page); mapping = READ_ONCE(folio->mapping); /* * If folio->mapping is NULL, then it cannot be an anonymous * page; but it might be the ZERO_PAGE or in the gate area or * in a special mapping (all cases which we are happy to fail); * or it may have been a good file page when get_user_pages_fast * found it, but truncated or holepunched or subjected to * invalidate_complete_page2 before we got the folio lock (also * cases which we are happy to fail). And we hold a reference, * so refcount care in invalidate_inode_page's remove_mapping * prevents drop_caches from setting mapping to NULL beneath us. * * The case we do have to guard against is when memory pressure made * shmem_writepage move it from filecache to swapcache beneath us: * an unlikely race, but we do need to retry for folio->mapping. */ if (unlikely(!mapping)) { int shmem_swizzled; /* * Folio lock is required to identify which special case above * applies. If this is really a shmem page then the folio lock * will prevent unexpected transitions. */ folio_lock(folio); shmem_swizzled = folio_test_swapcache(folio) || folio->mapping; folio_unlock(folio); folio_put(folio); if (shmem_swizzled) goto again; return -EFAULT; } /* * Private mappings are handled in a simple way. * * If the futex key is stored in anonymous memory, then the associated * object is the mm which is implicitly pinned by the calling process. * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to * the object not the particular process. */ if (folio_test_anon(folio)) { /* * A RO anonymous page will never change and thus doesn't make * sense for futex operations. */ if (unlikely(should_fail_futex(true)) || ro) { err = -EFAULT; goto out; } key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; } else { struct inode *inode; /* * The associated futex object in this case is the inode and * the folio->mapping must be traversed. Ordinarily this should * be stabilised under folio lock but it's not strictly * necessary in this case as we just want to pin the inode, not * update i_pages or anything like that. * * The RCU read lock is taken as the inode is finally freed * under RCU. If the mapping still matches expectations then the * mapping->host can be safely accessed as being a valid inode. */ rcu_read_lock(); if (READ_ONCE(folio->mapping) != mapping) { rcu_read_unlock(); folio_put(folio); goto again; } inode = READ_ONCE(mapping->host); if (!inode) { rcu_read_unlock(); folio_put(folio); goto again; } key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.i_seq = get_inode_sequence_number(inode); key->shared.pgoff = folio->index + folio_page_idx(folio, page); rcu_read_unlock(); } out: folio_put(folio); return err; } /** * fault_in_user_writeable() - Fault in user address and verify RW access * @uaddr: pointer to faulting user space address * * Slow path to fixup the fault we just took in the atomic write * access to @uaddr. * * We have no generic implementation of a non-destructive write to the * user address. We know that we faulted in the atomic pagefault * disabled section so we can as well avoid the #PF overhead by * calling get_user_pages() right away. */ int fault_in_user_writeable(u32 __user *uaddr) { struct mm_struct *mm = current->mm; int ret; mmap_read_lock(mm); ret = fixup_user_fault(mm, (unsigned long)uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(mm); return ret < 0 ? ret : 0; } /** * futex_top_waiter() - Return the highest priority waiter on a futex * @hb: the hash bucket the futex_q's reside in * @key: the futex key (to distinguish it from other futex futex_q's) * * Must be called with the hb lock held. */ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key) { struct futex_q *this; plist_for_each_entry(this, &hb->chain, list) { if (futex_match(&this->key, key)) return this; } return NULL; } int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) { int ret; pagefault_disable(); ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); pagefault_enable(); return ret; } int futex_get_value_locked(u32 *dest, u32 __user *from) { int ret; pagefault_disable(); ret = __get_user(*dest, from); pagefault_enable(); return ret ? -EFAULT : 0; } /** * wait_for_owner_exiting - Block until the owner has exited * @ret: owner's current futex lock status * @exiting: Pointer to the exiting task * * Caller must hold a refcount on @exiting. */ void wait_for_owner_exiting(int ret, struct task_struct *exiting) { if (ret != -EBUSY) { WARN_ON_ONCE(exiting); return; } if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) return; mutex_lock(&exiting->futex_exit_mutex); /* * No point in doing state checking here. If the waiter got here * while the task was in exec()->exec_futex_release() then it can * have any FUTEX_STATE_* value when the waiter has acquired the * mutex. OK, if running, EXITING or DEAD if it reached exit() * already. Highly unlikely and not a problem. Just one more round * through the futex maze. */ mutex_unlock(&exiting->futex_exit_mutex); put_task_struct(exiting); } /** * __futex_unqueue() - Remove the futex_q from its futex_hash_bucket * @q: The futex_q to unqueue * * The q->lock_ptr must not be NULL and must be held by the caller. */ void __futex_unqueue(struct futex_q *q) { struct futex_hash_bucket *hb; if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) return; lockdep_assert_held(q->lock_ptr); hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); plist_del(&q->list, &hb->chain); futex_hb_waiters_dec(hb); } /* The key must be already stored in q->key. */ struct futex_hash_bucket *futex_q_lock(struct futex_q *q) __acquires(&hb->lock) { struct futex_hash_bucket *hb; hb = futex_hash(&q->key); /* * Increment the counter before taking the lock so that * a potential waker won't miss a to-be-slept task that is * waiting for the spinlock. This is safe as all futex_q_lock() * users end up calling futex_queue(). Similarly, for housekeeping, * decrement the counter at futex_q_unlock() when some error has * occurred and we don't end up adding the task to the list. */ futex_hb_waiters_inc(hb); /* implies smp_mb(); (A) */ q->lock_ptr = &hb->lock; spin_lock(&hb->lock); return hb; } void futex_q_unlock(struct futex_hash_bucket *hb) __releases(&hb->lock) { spin_unlock(&hb->lock); futex_hb_waiters_dec(hb); } void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) { int prio; /* * The priority used to register this element is * - either the real thread-priority for the real-time threads * (i.e. threads with a priority lower than MAX_RT_PRIO) * - or MAX_RT_PRIO for non-RT threads. * Thus, all RT-threads are woken first in priority order, and * the others are woken last, in FIFO order. */ prio = min(current->normal_prio, MAX_RT_PRIO); plist_node_init(&q->list, prio); plist_add(&q->list, &hb->chain); q->task = current; } /** * futex_unqueue() - Remove the futex_q from its futex_hash_bucket * @q: The futex_q to unqueue * * The q->lock_ptr must not be held by the caller. A call to futex_unqueue() must * be paired with exactly one earlier call to futex_queue(). * * Return: * - 1 - if the futex_q was still queued (and we removed unqueued it); * - 0 - if the futex_q was already removed by the waking thread */ int futex_unqueue(struct futex_q *q) { spinlock_t *lock_ptr; int ret = 0; /* In the common case we don't take the spinlock, which is nice. */ retry: /* * q->lock_ptr can change between this read and the following spin_lock. * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and * optimizing lock_ptr out of the logic below. */ lock_ptr = READ_ONCE(q->lock_ptr); if (lock_ptr != NULL) { spin_lock(lock_ptr); /* * q->lock_ptr can change between reading it and * spin_lock(), causing us to take the wrong lock. This * corrects the race condition. * * Reasoning goes like this: if we have the wrong lock, * q->lock_ptr must have changed (maybe several times) * between reading it and the spin_lock(). It can * change again after the spin_lock() but only if it was * already changed before the spin_lock(). It cannot, * however, change back to the original value. Therefore * we can detect whether we acquired the correct lock. */ if (unlikely(lock_ptr != q->lock_ptr)) { spin_unlock(lock_ptr); goto retry; } __futex_unqueue(q); BUG_ON(q->pi_state); spin_unlock(lock_ptr); ret = 1; } return ret; } /* * PI futexes can not be requeued and must remove themselves from the hash * bucket. The hash bucket lock (i.e. lock_ptr) is held. */ void futex_unqueue_pi(struct futex_q *q) { /* * If the lock was not acquired (due to timeout or signal) then the * rt_waiter is removed before futex_q is. If this is observed by * an unlocker after dropping the rtmutex wait lock and before * acquiring the hash bucket lock, then the unlocker dequeues the * futex_q from the hash bucket list to guarantee consistent state * vs. userspace. Therefore the dequeue here must be conditional. */ if (!plist_node_empty(&q->list)) __futex_unqueue(q); BUG_ON(!q->pi_state); put_pi_state(q->pi_state); q->pi_state = NULL; } /* Constants for the pending_op argument of handle_futex_death */ #define HANDLE_DEATH_PENDING true #define HANDLE_DEATH_LIST false /* * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, bool pi, bool pending_op) { u32 uval, nval, mval; pid_t owner; int err; /* Futex address must be 32bit aligned */ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) return -1; retry: if (get_user(uval, uaddr)) return -1; /* * Special case for regular (non PI) futexes. The unlock path in * user space has two race scenarios: * * 1. The unlock path releases the user space futex value and * before it can execute the futex() syscall to wake up * waiters it is killed. * * 2. A woken up waiter is killed before it can acquire the * futex in user space. * * In the second case, the wake up notification could be generated * by the unlock path in user space after setting the futex value * to zero or by the kernel after setting the OWNER_DIED bit below. * * In both cases the TID validation below prevents a wakeup of * potential waiters which can cause these waiters to block * forever. * * In both cases the following conditions are met: * * 1) task->robust_list->list_op_pending != NULL * @pending_op == true * 2) The owner part of user space futex value == 0 * 3) Regular futex: @pi == false * * If these conditions are met, it is safe to attempt waking up a * potential waiter without touching the user space futex value and * trying to set the OWNER_DIED bit. If the futex value is zero, * the rest of the user space mutex state is consistent, so a woken * waiter will just take over the uncontended futex. Setting the * OWNER_DIED bit would create inconsistent state and malfunction * of the user space owner died handling. Otherwise, the OWNER_DIED * bit is already set, and the woken waiter is expected to deal with * this. */ owner = uval & FUTEX_TID_MASK; if (pending_op && !pi && !owner) { futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, FUTEX_BITSET_MATCH_ANY); return 0; } if (owner != task_pid_vnr(curr)) return 0; /* * Ok, this dying thread is truly holding a futex * of interest. Set the OWNER_DIED bit atomically * via cmpxchg, and if the value had FUTEX_WAITERS * set, wake up a waiter (if any). (We have to do a * futex_wake() even if OWNER_DIED is already set - * to handle the rare but possible case of recursive * thread-death.) The rest of the cleanup is done in * userspace. */ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; /* * We are not holding a lock here, but we want to have * the pagefault_disable/enable() protection because * we want to handle the fault gracefully. If the * access fails we try to fault in the futex with R/W * verification via get_user_pages. get_user() above * does not guarantee R/W access. If that fails we * give up and leave the futex locked. */ if ((err = futex_cmpxchg_value_locked(&nval, uaddr, uval, mval))) { switch (err) { case -EFAULT: if (fault_in_user_writeable(uaddr)) return -1; goto retry; case -EAGAIN: cond_resched(); goto retry; default: WARN_ON_ONCE(1); return err; } } if (nval != uval) goto retry; /* * Wake robust non-PI futexes here. The wakeup of * PI futexes happens in exit_pi_state(): */ if (!pi && (uval & FUTEX_WAITERS)) { futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, FUTEX_BITSET_MATCH_ANY); } return 0; } /* * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int fetch_robust_entry(struct robust_list __user **entry, struct robust_list __user * __user *head, unsigned int *pi) { unsigned long uentry; if (get_user(uentry, (unsigned long __user *)head)) return -EFAULT; *entry = (void __user *)(uentry & ~1UL); *pi = uentry & 1; return 0; } /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */ static void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; unsigned long futex_offset; int rc; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: */ if (get_user(futex_offset, &head->futex_offset)) return; /* * Fetch any possibly pending lock-add first, and handle it * if it exists: */ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; next_entry = NULL; /* avoid warning with gcc */ while (entry != &head->list) { /* * Fetch the next entry in the list before calling * handle_futex_death: */ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); /* * A pending lock might already be on the list, so * don't process it twice: */ if (entry != pending) { if (handle_futex_death((void __user *)entry + futex_offset, curr, pi, HANDLE_DEATH_LIST)) return; } if (rc) return; entry = next_entry; pi = next_pi; /* * Avoid excessively long or circular lists: */ if (!--limit) break; cond_resched(); } if (pending) { handle_futex_death((void __user *)pending + futex_offset, curr, pip, HANDLE_DEATH_PENDING); } } #ifdef CONFIG_COMPAT static void __user *futex_uaddr(struct robust_list __user *entry, compat_long_t futex_offset) { compat_uptr_t base = ptr_to_compat(entry); void __user *uaddr = compat_ptr(base + futex_offset); return uaddr; } /* * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, compat_uptr_t __user *head, unsigned int *pi) { if (get_user(*uentry, head)) return -EFAULT; *entry = compat_ptr((*uentry) & ~1); *pi = (unsigned int)(*uentry) & 1; return 0; } /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */ static void compat_exit_robust_list(struct task_struct *curr) { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; compat_uptr_t uentry, next_uentry, upending; compat_long_t futex_offset; int rc; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: */ if (get_user(futex_offset, &head->futex_offset)) return; /* * Fetch any possibly pending lock-add first, and handle it * if it exists: */ if (compat_fetch_robust_entry(&upending, &pending, &head->list_op_pending, &pip)) return; next_entry = NULL; /* avoid warning with gcc */ while (entry != (struct robust_list __user *) &head->list) { /* * Fetch the next entry in the list before calling * handle_futex_death: */ rc = compat_fetch_robust_entry(&next_uentry, &next_entry, (compat_uptr_t __user *)&entry->next, &next_pi); /* * A pending lock might already be on the list, so * dont process it twice: */ if (entry != pending) { void __user *uaddr = futex_uaddr(entry, futex_offset); if (handle_futex_death(uaddr, curr, pi, HANDLE_DEATH_LIST)) return; } if (rc) return; uentry = next_uentry; entry = next_entry; pi = next_pi; /* * Avoid excessively long or circular lists: */ if (!--limit) break; cond_resched(); } if (pending) { void __user *uaddr = futex_uaddr(pending, futex_offset); handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); } } #endif #ifdef CONFIG_FUTEX_PI /* * This task is holding PI mutexes at exit time => bad. * Kernel cleans up PI-state, but userspace is likely hosed. * (Robust-futex cleanup is separate and might save the day for userspace.) */ static void exit_pi_state_list(struct task_struct *curr) { struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; union futex_key key = FUTEX_KEY_INIT; /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful * versus waiters unqueueing themselves: */ raw_spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; hb = futex_hash(&key); /* * We can race against put_pi_state() removing itself from the * list (a waiter going away). put_pi_state() will first * decrement the reference count and then modify the list, so * its possible to see the list entry but fail this reference * acquire. * * In that case; drop the locks to let put_pi_state() make * progress and retry the loop. */ if (!refcount_inc_not_zero(&pi_state->refcount)) { raw_spin_unlock_irq(&curr->pi_lock); cpu_relax(); raw_spin_lock_irq(&curr->pi_lock); continue; } raw_spin_unlock_irq(&curr->pi_lock); spin_lock(&hb->lock); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); raw_spin_lock(&curr->pi_lock); /* * We dropped the pi-lock, so re-check whether this * task still owns the PI-state: */ if (head->next != next) { /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); put_pi_state(pi_state); continue; } WARN_ON(pi_state->owner != curr); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); pi_state->owner = NULL; raw_spin_unlock(&curr->pi_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(&hb->lock); rt_mutex_futex_unlock(&pi_state->pi_mutex); put_pi_state(pi_state); raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); } #else static inline void exit_pi_state_list(struct task_struct *curr) { } #endif static void futex_cleanup(struct task_struct *tsk) { if (unlikely(tsk->robust_list)) { exit_robust_list(tsk); tsk->robust_list = NULL; } #ifdef CONFIG_COMPAT if (unlikely(tsk->compat_robust_list)) { compat_exit_robust_list(tsk); tsk->compat_robust_list = NULL; } #endif if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); } /** * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD * @tsk: task to set the state on * * Set the futex exit state of the task lockless. The futex waiter code * observes that state when a task is exiting and loops until the task has * actually finished the futex cleanup. The worst case for this is that the * waiter runs through the wait loop until the state becomes visible. * * This is called from the recursive fault handling path in make_task_dead(). * * This is best effort. Either the futex exit code has run already or * not. If the OWNER_DIED bit has been set on the futex then the waiter can * take it over. If not, the problem is pushed back to user space. If the * futex exit code did not run yet, then an already queued waiter might * block forever, but there is nothing which can be done about that. */ void futex_exit_recursive(struct task_struct *tsk) { /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ if (tsk->futex_state == FUTEX_STATE_EXITING) mutex_unlock(&tsk->futex_exit_mutex); tsk->futex_state = FUTEX_STATE_DEAD; } static void futex_cleanup_begin(struct task_struct *tsk) { /* * Prevent various race issues against a concurrent incoming waiter * including live locks by forcing the waiter to block on * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in * attach_to_pi_owner(). */ mutex_lock(&tsk->futex_exit_mutex); /* * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. * * This ensures that all subsequent checks of tsk->futex_state in * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with * tsk->pi_lock held. * * It guarantees also that a pi_state which was queued right before * the state change under tsk->pi_lock by a concurrent waiter must * be observed in exit_pi_state_list(). */ raw_spin_lock_irq(&tsk->pi_lock); tsk->futex_state = FUTEX_STATE_EXITING; raw_spin_unlock_irq(&tsk->pi_lock); } static void futex_cleanup_end(struct task_struct *tsk, int state) { /* * Lockless store. The only side effect is that an observer might * take another loop until it becomes visible. */ tsk->futex_state = state; /* * Drop the exit protection. This unblocks waiters which observed * FUTEX_STATE_EXITING to reevaluate the state. */ mutex_unlock(&tsk->futex_exit_mutex); } void futex_exec_release(struct task_struct *tsk) { /* * The state handling is done for consistency, but in the case of * exec() there is no way to prevent further damage as the PID stays * the same. But for the unlikely and arguably buggy case that a * futex is held on exec(), this provides at least as much state * consistency protection which is possible. */ futex_cleanup_begin(tsk); futex_cleanup(tsk); /* * Reset the state to FUTEX_STATE_OK. The task is alive and about * exec a new binary. */ futex_cleanup_end(tsk, FUTEX_STATE_OK); } void futex_exit_release(struct task_struct *tsk) { futex_cleanup_begin(tsk); futex_cleanup(tsk); futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } static int __init futex_init(void) { unsigned int futex_shift; unsigned long i; #ifdef CONFIG_BASE_SMALL futex_hashsize = 16; #else futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus()); #endif futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues), futex_hashsize, 0, 0, &futex_shift, NULL, futex_hashsize, futex_hashsize); futex_hashsize = 1UL << futex_shift; for (i = 0; i < futex_hashsize; i++) { atomic_set(&futex_queues[i].waiters, 0); plist_head_init(&futex_queues[i].chain); spin_lock_init(&futex_queues[i].lock); } return 0; } core_initcall(futex_init);
308 306 308 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 // SPDX-License-Identifier: GPL-2.0-only /* * Landlock LSM - Credential hooks * * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI */ #include <linux/cred.h> #include <linux/lsm_hooks.h> #include "common.h" #include "cred.h" #include "ruleset.h" #include "setup.h" static void hook_cred_transfer(struct cred *const new, const struct cred *const old) { struct landlock_ruleset *const old_dom = landlock_cred(old)->domain; if (old_dom) { landlock_get_ruleset(old_dom); landlock_cred(new)->domain = old_dom; } } static int hook_cred_prepare(struct cred *const new, const struct cred *const old, const gfp_t gfp) { hook_cred_transfer(new, old); return 0; } static void hook_cred_free(struct cred *const cred) { struct landlock_ruleset *const dom = landlock_cred(cred)->domain; if (dom) landlock_put_ruleset_deferred(dom); } static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, hook_cred_prepare), LSM_HOOK_INIT(cred_transfer, hook_cred_transfer), LSM_HOOK_INIT(cred_free, hook_cred_free), }; __init void landlock_add_cred_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 /* * Copyright 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software") * to deal in the software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * them Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/dma-buf.h> #include <linux/dma-resv.h> #include <drm/drm_file.h> #include "vgem_drv.h" #define VGEM_FENCE_TIMEOUT (10*HZ) struct vgem_fence { struct dma_fence base; struct spinlock lock; struct timer_list timer; }; static const char *vgem_fence_get_driver_name(struct dma_fence *fence) { return "vgem"; } static const char *vgem_fence_get_timeline_name(struct dma_fence *fence) { return "unbound"; } static void vgem_fence_release(struct dma_fence *base) { struct vgem_fence *fence = container_of(base, typeof(*fence), base); del_timer_sync(&fence->timer); dma_fence_free(&fence->base); } static void vgem_fence_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%llu", fence->seqno); } static void vgem_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%llu", dma_fence_is_signaled(fence) ? fence->seqno : 0); } static const struct dma_fence_ops vgem_fence_ops = { .get_driver_name = vgem_fence_get_driver_name, .get_timeline_name = vgem_fence_get_timeline_name, .release = vgem_fence_release, .fence_value_str = vgem_fence_value_str, .timeline_value_str = vgem_fence_timeline_value_str, }; static void vgem_fence_timeout(struct timer_list *t) { struct vgem_fence *fence = from_timer(fence, t, timer); dma_fence_signal(&fence->base); } static struct dma_fence *vgem_fence_create(struct vgem_file *vfile, unsigned int flags) { struct vgem_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (!fence) return NULL; spin_lock_init(&fence->lock); dma_fence_init(&fence->base, &vgem_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); timer_setup(&fence->timer, vgem_fence_timeout, 0); /* We force the fence to expire within 10s to prevent driver hangs */ mod_timer(&fence->timer, jiffies + VGEM_FENCE_TIMEOUT); return &fence->base; } /* * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH): * * Create and attach a fence to the vGEM handle. This fence is then exposed * via the dma-buf reservation object and visible to consumers of the exported * dma-buf. If the flags contain VGEM_FENCE_WRITE, the fence indicates the * vGEM buffer is being written to by the client and is exposed as an exclusive * fence, otherwise the fence indicates the client is current reading from the * buffer and all future writes should wait for the client to signal its * completion. Note that if a conflicting fence is already on the dma-buf (i.e. * an exclusive fence when adding a read, or any fence when adding a write), * -EBUSY is reported. Serialisation between operations should be handled * by waiting upon the dma-buf. * * This returns the handle for the new fence that must be signaled within 10 * seconds (or otherwise it will automatically expire). See * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL). * * If the vGEM handle does not exist, vgem_fence_attach_ioctl returns -ENOENT. */ int vgem_fence_attach_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_vgem_fence_attach *arg = data; struct vgem_file *vfile = file->driver_priv; struct dma_resv *resv; struct drm_gem_object *obj; enum dma_resv_usage usage; struct dma_fence *fence; int ret; if (arg->flags & ~VGEM_FENCE_WRITE) return -EINVAL; if (arg->pad) return -EINVAL; obj = drm_gem_object_lookup(file, arg->handle); if (!obj) return -ENOENT; fence = vgem_fence_create(vfile, arg->flags); if (!fence) { ret = -ENOMEM; goto err; } /* Check for a conflicting fence */ resv = obj->resv; usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE); if (!dma_resv_test_signaled(resv, usage)) { ret = -EBUSY; goto err_fence; } /* Expose the fence via the dma-buf */ dma_resv_lock(resv, NULL); ret = dma_resv_reserve_fences(resv, 1); if (!ret) dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); dma_resv_unlock(resv); /* Record the fence in our idr for later signaling */ if (ret == 0) { mutex_lock(&vfile->fence_mutex); ret = idr_alloc(&vfile->fence_idr, fence, 1, 0, GFP_KERNEL); mutex_unlock(&vfile->fence_mutex); if (ret > 0) { arg->out_fence = ret; ret = 0; } } err_fence: if (ret) { dma_fence_signal(fence); dma_fence_put(fence); } err: drm_gem_object_put(obj); return ret; } /* * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL): * * Signal and consume a fence ealier attached to a vGEM handle using * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH). * * All fences must be signaled within 10s of attachment or otherwise they * will automatically expire (and a vgem_fence_signal_ioctl returns -ETIMEDOUT). * * Signaling a fence indicates to all consumers of the dma-buf that the * client has completed the operation associated with the fence, and that the * buffer is then ready for consumption. * * If the fence does not exist (or has already been signaled by the client), * vgem_fence_signal_ioctl returns -ENOENT. */ int vgem_fence_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct vgem_file *vfile = file->driver_priv; struct drm_vgem_fence_signal *arg = data; struct dma_fence *fence; int ret = 0; if (arg->flags) return -EINVAL; mutex_lock(&vfile->fence_mutex); fence = idr_replace(&vfile->fence_idr, NULL, arg->fence); mutex_unlock(&vfile->fence_mutex); if (!fence) return -ENOENT; if (IS_ERR(fence)) return PTR_ERR(fence); if (dma_fence_is_signaled(fence)) ret = -ETIMEDOUT; dma_fence_signal(fence); dma_fence_put(fence); return ret; } int vgem_fence_open(struct vgem_file *vfile) { mutex_init(&vfile->fence_mutex); idr_init_base(&vfile->fence_idr, 1); return 0; } static int __vgem_fence_idr_fini(int id, void *p, void *data) { dma_fence_signal(p); dma_fence_put(p); return 0; } void vgem_fence_close(struct vgem_file *vfile) { idr_for_each(&vfile->fence_idr, __vgem_fence_idr_fini, vfile); idr_destroy(&vfile->fence_idr); mutex_destroy(&vfile->fence_mutex); }
23 5 94 1 45 57 25 6 21 6 1 6 24 8 1 1 3 3 6 5 12 1 2 1 8 10 2 12 6 2 4 19 6 1 4 3 4 2 4 2 6 1 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/minix/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include "minix.h" static int add_nondir(struct dentry *dentry, struct inode *inode) { int err = minix_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); iput(inode); return err; } static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) return ERR_PTR(-ENAMETOOLONG); ino = minix_inode_by_name(dentry); if (ino) inode = minix_iget(dir->i_sb, ino); return d_splice_alias(inode, dentry); } static int minix_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; if (!old_valid_dev(rdev)) return -EINVAL; inode = minix_new_inode(dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); minix_set_inode(inode, rdev); mark_inode_dirty(inode); return add_nondir(dentry, inode); } static int minix_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode = minix_new_inode(dir, mode); if (IS_ERR(inode)) return finish_open_simple(file, PTR_ERR(inode)); minix_set_inode(inode, 0); mark_inode_dirty(inode); d_tmpfile(file, inode); return finish_open_simple(file, 0); } static int minix_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return minix_mknod(&nop_mnt_idmap, dir, dentry, mode, 0); } static int minix_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int i = strlen(symname)+1; struct inode * inode; int err; if (i > dir->i_sb->s_blocksize) return -ENAMETOOLONG; inode = minix_new_inode(dir, S_IFLNK | 0777); if (IS_ERR(inode)) return PTR_ERR(inode); minix_set_inode(inode, 0); err = page_symlink(inode, symname, i); if (unlikely(err)) { inode_dec_link_count(inode); iput(inode); return err; } return add_nondir(dentry, inode); } static int minix_link(struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); inode_set_ctime_current(inode); inode_inc_link_count(inode); ihold(inode); return add_nondir(dentry, inode); } static int minix_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode * inode; int err; inode = minix_new_inode(dir, S_IFDIR | mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode_inc_link_count(dir); minix_set_inode(inode, 0); inode_inc_link_count(inode); err = minix_make_empty(inode, dir); if (err) goto out_fail; err = minix_add_link(dentry, inode); if (err) goto out_fail; d_instantiate(dentry, inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); iput(inode); inode_dec_link_count(dir); goto out; } static int minix_unlink(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); struct page * page; struct minix_dir_entry * de; int err; de = minix_find_entry(dentry, &page); if (!de) return -ENOENT; err = minix_delete_entry(de, page); unmap_and_put_page(page, de); if (err) return err; inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); inode_dec_link_count(inode); return 0; } static int minix_rmdir(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); int err = -ENOTEMPTY; if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { inode_dec_link_count(dir); inode_dec_link_count(inode); } } return err; } static int minix_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct inode * old_inode = d_inode(old_dentry); struct inode * new_inode = d_inode(new_dentry); struct page * dir_page = NULL; struct minix_dir_entry * dir_de = NULL; struct page * old_page; struct minix_dir_entry * old_de; int err = -ENOENT; if (flags & ~RENAME_NOREPLACE) return -EINVAL; old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; if (S_ISDIR(old_inode->i_mode)) { err = -EIO; dir_de = minix_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { struct page * new_page; struct minix_dir_entry * new_de; err = -ENOTEMPTY; if (dir_de && !minix_empty_dir(new_inode)) goto out_dir; err = -ENOENT; new_de = minix_find_entry(new_dentry, &new_page); if (!new_de) goto out_dir; err = minix_set_link(new_de, new_page, old_inode); unmap_and_put_page(new_page, new_de); if (err) goto out_dir; inode_set_ctime_current(new_inode); if (dir_de) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { err = minix_add_link(new_dentry, old_inode); if (err) goto out_dir; if (dir_de) inode_inc_link_count(new_dir); } err = minix_delete_entry(old_de, old_page); if (err) goto out_dir; mark_inode_dirty(old_inode); if (dir_de) { err = minix_set_link(dir_de, dir_page, new_dir); if (!err) inode_dec_link_count(old_dir); } out_dir: if (dir_de) unmap_and_put_page(dir_page, dir_de); out_old: unmap_and_put_page(old_page, old_de); out: return err; } /* * directories can handle most operations... */ const struct inode_operations minix_dir_inode_operations = { .create = minix_create, .lookup = minix_lookup, .link = minix_link, .unlink = minix_unlink, .symlink = minix_symlink, .mkdir = minix_mkdir, .rmdir = minix_rmdir, .mknod = minix_mknod, .rename = minix_rename, .getattr = minix_getattr, .tmpfile = minix_tmpfile, };
1 1 1 1 47 5 5 1 48 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 // SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/base/map.c * * (C) Copyright Al Viro 2002,2003 * * NOTE: data structure needs to be changed. It works, but for large dev_t * it will be too slow. It is isolated, though, so these changes will be * local to that file. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/kdev_t.h> #include <linux/kobject.h> #include <linux/kobj_map.h> struct kobj_map { struct probe { struct probe *next; dev_t dev; unsigned long range; struct module *owner; kobj_probe_t *get; int (*lock)(dev_t, void *); void *data; } *probes[255]; struct mutex *lock; }; int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range, struct module *module, kobj_probe_t *probe, int (*lock)(dev_t, void *), void *data) { unsigned int n = MAJOR(dev + range - 1) - MAJOR(dev) + 1; unsigned int index = MAJOR(dev); unsigned int i; struct probe *p; if (n > 255) n = 255; p = kmalloc_array(n, sizeof(struct probe), GFP_KERNEL); if (p == NULL) return -ENOMEM; for (i = 0; i < n; i++, p++) { p->owner = module; p->get = probe; p->lock = lock; p->dev = dev; p->range = range; p->data = data; } mutex_lock(domain->lock); for (i = 0, p -= n; i < n; i++, p++, index++) { struct probe **s = &domain->probes[index % 255]; while (*s && (*s)->range < range) s = &(*s)->next; p->next = *s; *s = p; } mutex_unlock(domain->lock); return 0; } void kobj_unmap(struct kobj_map *domain, dev_t dev, unsigned long range) { unsigned int n = MAJOR(dev + range - 1) - MAJOR(dev) + 1; unsigned int index = MAJOR(dev); unsigned int i; struct probe *found = NULL; if (n > 255) n = 255; mutex_lock(domain->lock); for (i = 0; i < n; i++, index++) { struct probe **s; for (s = &domain->probes[index % 255]; *s; s = &(*s)->next) { struct probe *p = *s; if (p->dev == dev && p->range == range) { *s = p->next; if (!found) found = p; break; } } } mutex_unlock(domain->lock); kfree(found); } struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index) { struct kobject *kobj; struct probe *p; unsigned long best = ~0UL; retry: mutex_lock(domain->lock); for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) { struct kobject *(*probe)(dev_t, int *, void *); struct module *owner; void *data; if (p->dev > dev || p->dev + p->range - 1 < dev) continue; if (p->range - 1 >= best) break; if (!try_module_get(p->owner)) continue; owner = p->owner; data = p->data; probe = p->get; best = p->range - 1; *index = dev - p->dev; if (p->lock && p->lock(dev, data) < 0) { module_put(owner); continue; } mutex_unlock(domain->lock); kobj = probe(dev, index, data); /* Currently ->owner protects _only_ ->probe() itself. */ module_put(owner); if (kobj) return kobj; goto retry; } mutex_unlock(domain->lock); return NULL; } struct kobj_map *kobj_map_init(kobj_probe_t *base_probe, struct mutex *lock) { struct kobj_map *p = kmalloc(sizeof(struct kobj_map), GFP_KERNEL); struct probe *base = kzalloc(sizeof(*base), GFP_KERNEL); int i; if ((p == NULL) || (base == NULL)) { kfree(p); kfree(base); return NULL; } base->dev = 1; base->range = ~0; base->get = base_probe; for (i = 0; i < 255; i++) p->probes[i] = base; p->lock = lock; return p; }
1 1 1 1 3 3 1 2 1 1 2 3 3 3 2 1 1 1 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 // SPDX-License-Identifier: GPL-2.0-or-later /* * Generic address resultion entity * * Authors: * net_random Alan Cox * net_ratelimit Andi Kleen * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ #include <linux/module.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/inet.h> #include <linux/mm.h> #include <linux/net.h> #include <linux/string.h> #include <linux/types.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/ratelimit.h> #include <linux/socket.h> #include <net/sock.h> #include <net/net_ratelimit.h> #include <net/ipv6.h> #include <asm/byteorder.h> #include <linux/uaccess.h> DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10); /* * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { return __ratelimit(&net_ratelimit_state); } EXPORT_SYMBOL(net_ratelimit); /* * Convert an ASCII string to binary IP. * This is outside of net/ipv4/ because various code that uses IP addresses * is otherwise not dependent on the TCP/IP stack. */ __be32 in_aton(const char *str) { unsigned int l; unsigned int val; int i; l = 0; for (i = 0; i < 4; i++) { l <<= 8; if (*str != '\0') { val = 0; while (*str != '\0' && *str != '.' && *str != '\n') { val *= 10; val += *str - '0'; str++; } l |= val; if (*str != '\0') str++; } } return htonl(l); } EXPORT_SYMBOL(in_aton); #define IN6PTON_XDIGIT 0x00010000 #define IN6PTON_DIGIT 0x00020000 #define IN6PTON_COLON_MASK 0x00700000 #define IN6PTON_COLON_1 0x00100000 /* single : requested */ #define IN6PTON_COLON_2 0x00200000 /* second : requested */ #define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ #define IN6PTON_DOT 0x00800000 /* . */ #define IN6PTON_DELIM 0x10000000 #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 static inline int xdigit2bin(char c, int delim) { int val; if (c == delim || c == '\0') return IN6PTON_DELIM; if (c == ':') return IN6PTON_COLON_MASK; if (c == '.') return IN6PTON_DOT; val = hex_to_bin(c); if (val >= 0) return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0); if (delim == -1) return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } /** * in4_pton - convert an IPv4 address from literal to binary representation * @src: the start of the IPv4 address string * @srclen: the length of the string, -1 means strlen(src) * @dst: the binary (u8[4] array) representation of the IPv4 address * @delim: the delimiter of the IPv4 address in @src, -1 means no delimiter * @end: A pointer to the end of the parsed string will be placed here * * Return one on success, return zero when any error occurs * and @end will point to the end of the parsed string. * */ int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s; u8 *d; u8 dbuf[4]; int ret = 0; int i; int w = 0; if (srclen < 0) srclen = strlen(src); s = src; d = dbuf; i = 0; while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK))) { goto out; } if (c & (IN6PTON_DOT | IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (w == 0) goto out; *d++ = w & 0xff; w = 0; i++; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { if (i != 4) goto out; break; } goto cont; } w = (w * 10) + c; if ((w & 0xffff) > 255) { goto out; } cont: if (i >= 4) goto out; s++; srclen--; } ret = 1; memcpy(dst, dbuf, sizeof(dbuf)); out: if (end) *end = s; return ret; } EXPORT_SYMBOL(in4_pton); /** * in6_pton - convert an IPv6 address from literal to binary representation * @src: the start of the IPv6 address string * @srclen: the length of the string, -1 means strlen(src) * @dst: the binary (u8[16] array) representation of the IPv6 address * @delim: the delimiter of the IPv6 address in @src, -1 means no delimiter * @end: A pointer to the end of the parsed string will be placed here * * Return one on success, return zero when any error occurs * and @end will point to the end of the parsed string. * */ int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; u8 dbuf[16]; int ret = 0; int i; int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; int w = 0; memset(dbuf, 0, sizeof(dbuf)); s = src; d = dbuf; if (srclen < 0) srclen = strlen(src); while (1) { int c; c = xdigit2bin(srclen > 0 ? *s : '\0', delim); if (!(c & state)) goto out; if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { /* process one 16-bit word */ if (!(state & IN6PTON_NULL)) { *d++ = (w >> 8) & 0xff; *d++ = w & 0xff; } w = 0; if (c & IN6PTON_DELIM) { /* We've processed last word */ break; } /* * COLON_1 => XDIGIT * COLON_2 => XDIGIT|DELIM * COLON_1_2 => COLON_2 */ switch (state & IN6PTON_COLON_MASK) { case IN6PTON_COLON_2: dc = d; state = IN6PTON_XDIGIT | IN6PTON_DELIM; if (dc - dbuf >= sizeof(dbuf)) state |= IN6PTON_NULL; break; case IN6PTON_COLON_1|IN6PTON_COLON_1_2: state = IN6PTON_XDIGIT | IN6PTON_COLON_2; break; case IN6PTON_COLON_1: state = IN6PTON_XDIGIT; break; case IN6PTON_COLON_1_2: state = IN6PTON_COLON_2; break; default: state = 0; } tok = s + 1; goto cont; } if (c & IN6PTON_DOT) { ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); if (ret > 0) { d += 4; break; } goto out; } w = (w << 4) | (0xff & c); state = IN6PTON_COLON_1 | IN6PTON_DELIM; if (!(w & 0xf000)) { state |= IN6PTON_XDIGIT; } if (!dc && d + 2 < dbuf + sizeof(dbuf)) { state |= IN6PTON_COLON_1_2; state &= ~IN6PTON_DELIM; } if (d + 2 >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); } cont: if ((dc && d + 4 < dbuf + sizeof(dbuf)) || d + 4 == dbuf + sizeof(dbuf)) { state |= IN6PTON_DOT; } if (d >= dbuf + sizeof(dbuf)) { state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); } s++; srclen--; } i = 15; d--; if (dc) { while (d >= dc) dst[i--] = *d--; while (i >= dc - dbuf) dst[i--] = 0; while (i >= 0) dst[i--] = *d--; } else memcpy(dst, dbuf, sizeof(dbuf)); ret = 1; out: if (end) *end = s; return ret; } EXPORT_SYMBOL(in6_pton); static int inet4_pton(const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; size_t srclen = strlen(src); if (srclen > INET_ADDRSTRLEN) return -EINVAL; if (in4_pton(src, srclen, (u8 *)&addr4->sin_addr.s_addr, '\n', NULL) == 0) return -EINVAL; addr4->sin_family = AF_INET; addr4->sin_port = htons(port_num); return 0; } static int inet6_pton(struct net *net, const char *src, u16 port_num, struct sockaddr_storage *addr) { struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; const char *scope_delim; size_t srclen = strlen(src); if (srclen > INET6_ADDRSTRLEN) return -EINVAL; if (in6_pton(src, srclen, (u8 *)&addr6->sin6_addr.s6_addr, '%', &scope_delim) == 0) return -EINVAL; if (ipv6_addr_type(&addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL && src + srclen != scope_delim && *scope_delim == '%') { struct net_device *dev; char scope_id[16]; size_t scope_len = min_t(size_t, sizeof(scope_id) - 1, src + srclen - scope_delim - 1); memcpy(scope_id, scope_delim + 1, scope_len); scope_id[scope_len] = '\0'; dev = dev_get_by_name(net, scope_id); if (dev) { addr6->sin6_scope_id = dev->ifindex; dev_put(dev); } else if (kstrtouint(scope_id, 0, &addr6->sin6_scope_id)) { return -EINVAL; } } addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(port_num); return 0; } /** * inet_pton_with_scope - convert an IPv4/IPv6 and port to socket address * @net: net namespace (used for scope handling) * @af: address family, AF_INET, AF_INET6 or AF_UNSPEC for either * @src: the start of the address string * @port: the start of the port string (or NULL for none) * @addr: output socket address * * Return zero on success, return errno when any error occurs. */ int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af, const char *src, const char *port, struct sockaddr_storage *addr) { u16 port_num; int ret = -EINVAL; if (port) { if (kstrtou16(port, 0, &port_num)) return -EINVAL; } else { port_num = 0; } switch (af) { case AF_INET: ret = inet4_pton(src, port_num, addr); break; case AF_INET6: ret = inet6_pton(net, src, port_num, addr); break; case AF_UNSPEC: ret = inet4_pton(src, port_num, addr); if (ret) ret = inet6_pton(net, src, port_num, addr); break; default: pr_err("unexpected address family %d\n", af); } return ret; } EXPORT_SYMBOL(inet_pton_with_scope); bool inet_addr_is_any(struct sockaddr *addr) { if (addr->sa_family == AF_INET6) { struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr; const struct sockaddr_in6 in6_any = { .sin6_addr = IN6ADDR_ANY_INIT }; if (!memcmp(in6->sin6_addr.s6_addr, in6_any.sin6_addr.s6_addr, 16)) return true; } else if (addr->sa_family == AF_INET) { struct sockaddr_in *in = (struct sockaddr_in *)addr; if (in->sin_addr.s_addr == htonl(INADDR_ANY)) return true; } else { pr_warn("unexpected address family %u\n", addr->sa_family); } return false; } EXPORT_SYMBOL(inet_addr_is_any); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace4(sum, from, to); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_add(csum_sub(~(skb->csum), (__force __wsum)from), (__force __wsum)to); } else if (pseudohdr) *sum = ~csum_fold(csum_add(csum_sub(csum_unfold(*sum), (__force __wsum)from), (__force __wsum)to)); } EXPORT_SYMBOL(inet_proto_csum_replace4); /** * inet_proto_csum_replace16 - update layer 4 header checksum field * @sum: Layer 4 header checksum field * @skb: sk_buff for the packet * @from: old IPv6 address * @to: new IPv6 address * @pseudohdr: True if layer 4 header checksum includes pseudoheader * * Update layer 4 header as per the update in IPv6 src/dst address. * * There is no need to update skb->csum in this function, because update in two * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as * L4 Header checksum for skb->csum calculation. */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], to[0], to[1], to[2], to[3], }; if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace16); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace_by_diff(sum, diff); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); } } EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
8531 3408 8525 6272 7437 6272 8262 5746 8261 5745 5754 5757 5769 5768 8253 8261 8266 8294 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 // SPDX-License-Identifier: GPL-2.0 #include <linux/debugfs.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/memblock.h> #include <linux/stacktrace.h> #include <linux/page_owner.h> #include <linux/jump_label.h> #include <linux/migrate.h> #include <linux/stackdepot.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> #include <linux/sched/clock.h> #include "internal.h" /* * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack) * to use off stack temporal storage */ #define PAGE_OWNER_STACK_DEPTH (16) struct page_owner { unsigned short order; short last_migrate_reason; gfp_t gfp_mask; depot_stack_handle_t handle; depot_stack_handle_t free_handle; u64 ts_nsec; u64 free_ts_nsec; char comm[TASK_COMM_LEN]; pid_t pid; pid_t tgid; pid_t free_pid; pid_t free_tgid; }; struct stack { struct stack_record *stack_record; struct stack *next; }; static struct stack dummy_stack; static struct stack failure_stack; static struct stack *stack_list; static DEFINE_SPINLOCK(stack_list_lock); static bool page_owner_enabled __initdata; DEFINE_STATIC_KEY_FALSE(page_owner_inited); static depot_stack_handle_t dummy_handle; static depot_stack_handle_t failure_handle; static depot_stack_handle_t early_handle; static void init_early_allocated_pages(void); static inline void set_current_in_page_owner(void) { /* * Avoid recursion. * * We might need to allocate more memory from page_owner code, so make * sure to signal it in order to avoid recursion. */ current->in_page_owner = 1; } static inline void unset_current_in_page_owner(void) { current->in_page_owner = 0; } static int __init early_page_owner_param(char *buf) { int ret = kstrtobool(buf, &page_owner_enabled); if (page_owner_enabled) stack_depot_request_early_init(); return ret; } early_param("page_owner", early_page_owner_param); static __init bool need_page_owner(void) { return page_owner_enabled; } static __always_inline depot_stack_handle_t create_dummy_stack(void) { unsigned long entries[4]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); return stack_depot_save(entries, nr_entries, GFP_KERNEL); } static noinline void register_dummy_stack(void) { dummy_handle = create_dummy_stack(); } static noinline void register_failure_stack(void) { failure_handle = create_dummy_stack(); } static noinline void register_early_stack(void) { early_handle = create_dummy_stack(); } static __init void init_page_owner(void) { if (!page_owner_enabled) return; register_dummy_stack(); register_failure_stack(); register_early_stack(); init_early_allocated_pages(); /* Initialize dummy and failure stacks and link them to stack_list */ dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle); failure_stack.stack_record = __stack_depot_get_stack_record(failure_handle); if (dummy_stack.stack_record) refcount_set(&dummy_stack.stack_record->count, 1); if (failure_stack.stack_record) refcount_set(&failure_stack.stack_record->count, 1); dummy_stack.next = &failure_stack; stack_list = &dummy_stack; static_branch_enable(&page_owner_inited); } struct page_ext_operations page_owner_ops = { .size = sizeof(struct page_owner), .need = need_page_owner, .init = init_page_owner, .need_shared_flags = true, }; static inline struct page_owner *get_page_owner(struct page_ext *page_ext) { return page_ext_data(page_ext, &page_owner_ops); } static noinline depot_stack_handle_t save_stack(gfp_t flags) { unsigned long entries[PAGE_OWNER_STACK_DEPTH]; depot_stack_handle_t handle; unsigned int nr_entries; if (current->in_page_owner) return dummy_handle; set_current_in_page_owner(); nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2); handle = stack_depot_save(entries, nr_entries, flags); if (!handle) handle = failure_handle; unset_current_in_page_owner(); return handle; } static void add_stack_record_to_list(struct stack_record *stack_record, gfp_t gfp_mask) { unsigned long flags; struct stack *stack; set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); if (!stack) { unset_current_in_page_owner(); return; } unset_current_in_page_owner(); stack->stack_record = stack_record; stack->next = NULL; spin_lock_irqsave(&stack_list_lock, flags); stack->next = stack_list; /* * This pairs with smp_load_acquire() from function * stack_start(). This guarantees that stack_start() * will see an updated stack_list before starting to * traverse the list. */ smp_store_release(&stack_list, stack); spin_unlock_irqrestore(&stack_list_lock, flags); } static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask, int nr_base_pages) { struct stack_record *stack_record = __stack_depot_get_stack_record(handle); if (!stack_record) return; /* * New stack_record's that do not use STACK_DEPOT_FLAG_GET start * with REFCOUNT_SATURATED to catch spurious increments of their * refcount. * Since we do not use STACK_DEPOT_FLAG_GET API, let us * set a refcount of 1 ourselves. */ if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) { int old = REFCOUNT_SATURATED; if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1)) /* Add the new stack_record to our list */ add_stack_record_to_list(stack_record, gfp_mask); } refcount_add(nr_base_pages, &stack_record->count); } static void dec_stack_record_count(depot_stack_handle_t handle, int nr_base_pages) { struct stack_record *stack_record = __stack_depot_get_stack_record(handle); if (!stack_record) return; if (refcount_sub_and_test(nr_base_pages, &stack_record->count)) pr_warn("%s: refcount went to 0 for %u handle\n", __func__, handle); } static inline void __update_page_owner_handle(struct page_ext *page_ext, depot_stack_handle_t handle, unsigned short order, gfp_t gfp_mask, short last_migrate_reason, u64 ts_nsec, pid_t pid, pid_t tgid, char *comm) { int i; struct page_owner *page_owner; for (i = 0; i < (1 << order); i++) { page_owner = get_page_owner(page_ext); page_owner->handle = handle; page_owner->order = order; page_owner->gfp_mask = gfp_mask; page_owner->last_migrate_reason = last_migrate_reason; page_owner->pid = pid; page_owner->tgid = tgid; page_owner->ts_nsec = ts_nsec; strscpy(page_owner->comm, comm, sizeof(page_owner->comm)); __set_bit(PAGE_EXT_OWNER, &page_ext->flags); __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); page_ext = page_ext_next(page_ext); } } static inline void __update_page_owner_free_handle(struct page_ext *page_ext, depot_stack_handle_t handle, unsigned short order, pid_t pid, pid_t tgid, u64 free_ts_nsec) { int i; struct page_owner *page_owner; for (i = 0; i < (1 << order); i++) { page_owner = get_page_owner(page_ext); /* Only __reset_page_owner() wants to clear the bit */ if (handle) { __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); page_owner->free_handle = handle; } page_owner->free_ts_nsec = free_ts_nsec; page_owner->free_pid = current->pid; page_owner->free_tgid = current->tgid; page_ext = page_ext_next(page_ext); } } void __reset_page_owner(struct page *page, unsigned short order) { struct page_ext *page_ext; depot_stack_handle_t handle; depot_stack_handle_t alloc_handle; struct page_owner *page_owner; u64 free_ts_nsec = local_clock(); page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; page_owner = get_page_owner(page_ext); alloc_handle = page_owner->handle; handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); __update_page_owner_free_handle(page_ext, handle, order, current->pid, current->tgid, free_ts_nsec); page_ext_put(page_ext); if (alloc_handle != early_handle) /* * early_handle is being set as a handle for all those * early allocated pages. See init_pages_in_zone(). * Since their refcount is not being incremented because * the machinery is not ready yet, we cannot decrement * their refcount either. */ dec_stack_record_count(alloc_handle, 1 << order); } noinline void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask) { struct page_ext *page_ext; u64 ts_nsec = local_clock(); depot_stack_handle_t handle; handle = save_stack(gfp_mask); page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1, ts_nsec, current->pid, current->tgid, current->comm); page_ext_put(page_ext); inc_stack_record_count(handle, gfp_mask, 1 << order); } void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = page_ext_get(page); struct page_owner *page_owner; if (unlikely(!page_ext)) return; page_owner = get_page_owner(page_ext); page_owner->last_migrate_reason = reason; page_ext_put(page_ext); } void __split_page_owner(struct page *page, int old_order, int new_order) { int i; struct page_ext *page_ext = page_ext_get(page); struct page_owner *page_owner; if (unlikely(!page_ext)) return; for (i = 0; i < (1 << old_order); i++) { page_owner = get_page_owner(page_ext); page_owner->order = new_order; page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } void __folio_copy_owner(struct folio *newfolio, struct folio *old) { int i; struct page_ext *old_ext; struct page_ext *new_ext; struct page_owner *old_page_owner; struct page_owner *new_page_owner; depot_stack_handle_t migrate_handle; old_ext = page_ext_get(&old->page); if (unlikely(!old_ext)) return; new_ext = page_ext_get(&newfolio->page); if (unlikely(!new_ext)) { page_ext_put(old_ext); return; } old_page_owner = get_page_owner(old_ext); new_page_owner = get_page_owner(new_ext); migrate_handle = new_page_owner->handle; __update_page_owner_handle(new_ext, old_page_owner->handle, old_page_owner->order, old_page_owner->gfp_mask, old_page_owner->last_migrate_reason, old_page_owner->ts_nsec, old_page_owner->pid, old_page_owner->tgid, old_page_owner->comm); /* * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio * will be freed after migration. Keep them until then as they may be * useful. */ __update_page_owner_free_handle(new_ext, 0, old_page_owner->order, old_page_owner->free_pid, old_page_owner->free_tgid, old_page_owner->free_ts_nsec); /* * We linked the original stack to the new folio, we need to do the same * for the new one and the old folio otherwise there will be an imbalance * when subtracting those pages from the stack. */ for (i = 0; i < (1 << new_page_owner->order); i++) { old_page_owner->handle = migrate_handle; old_ext = page_ext_next(old_ext); old_page_owner = get_page_owner(old_ext); } page_ext_put(new_ext); page_ext_put(old_ext); } void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone) { struct page *page; struct page_ext *page_ext; struct page_owner *page_owner; unsigned long pfn, block_end_pfn; unsigned long end_pfn = zone_end_pfn(zone); unsigned long count[MIGRATE_TYPES] = { 0, }; int pageblock_mt, page_mt; int i; /* Scan block by block. First and last block may be incomplete */ pfn = zone->zone_start_pfn; /* * Walk the zone in pageblock_nr_pages steps. If a page block spans * a zone boundary, it will be double counted between zones. This does * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { page = pfn_to_online_page(pfn); if (!page) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); pageblock_mt = get_pageblock_migratetype(page); for (; pfn < block_end_pfn; pfn++) { /* The pageblock is online, no need to recheck. */ page = pfn_to_page(pfn); if (page_zone(page) != zone) continue; if (PageBuddy(page)) { unsigned long freepage_order; freepage_order = buddy_order_unsafe(page); if (freepage_order <= MAX_PAGE_ORDER) pfn += (1UL << freepage_order) - 1; continue; } if (PageReserved(page)) continue; page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) goto ext_put_continue; page_owner = get_page_owner(page_ext); page_mt = gfp_migratetype(page_owner->gfp_mask); if (pageblock_mt != page_mt) { if (is_migrate_cma(pageblock_mt)) count[MIGRATE_MOVABLE]++; else count[pageblock_mt]++; pfn = block_end_pfn; page_ext_put(page_ext); break; } pfn += (1UL << page_owner->order) - 1; ext_put_continue: page_ext_put(page_ext); } } /* Print counts */ seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); for (i = 0; i < MIGRATE_TYPES; i++) seq_printf(m, "%12lu ", count[i]); seq_putc(m, '\n'); } /* * Looking for memcg information and print it out */ static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret, struct page *page) { #ifdef CONFIG_MEMCG unsigned long memcg_data; struct mem_cgroup *memcg; bool online; char name[80]; rcu_read_lock(); memcg_data = READ_ONCE(page->memcg_data); if (!memcg_data) goto out_unlock; if (memcg_data & MEMCG_DATA_OBJEXTS) ret += scnprintf(kbuf + ret, count - ret, "Slab cache page\n"); memcg = page_memcg_check(page); if (!memcg) goto out_unlock; online = (memcg->css.flags & CSS_ONLINE); cgroup_name(memcg->css.cgroup, name, sizeof(name)); ret += scnprintf(kbuf + ret, count - ret, "Charged %sto %smemcg %s\n", PageMemcgKmem(page) ? "(via objcg) " : "", online ? "" : "offline ", name); out_unlock: rcu_read_unlock(); #endif /* CONFIG_MEMCG */ return ret; } static ssize_t print_page_owner(char __user *buf, size_t count, unsigned long pfn, struct page *page, struct page_owner *page_owner, depot_stack_handle_t handle) { int ret, pageblock_mt, page_mt; char *kbuf; count = min_t(size_t, count, PAGE_SIZE); kbuf = kmalloc(count, GFP_KERNEL); if (!kbuf) return -ENOMEM; ret = scnprintf(kbuf, count, "Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns\n", page_owner->order, page_owner->gfp_mask, &page_owner->gfp_mask, page_owner->pid, page_owner->tgid, page_owner->comm, page_owner->ts_nsec); /* Print information relevant to grouping pages by mobility */ pageblock_mt = get_pageblock_migratetype(page); page_mt = gfp_migratetype(page_owner->gfp_mask); ret += scnprintf(kbuf + ret, count - ret, "PFN 0x%lx type %s Block %lu type %s Flags %pGp\n", pfn, migratetype_names[page_mt], pfn >> pageblock_order, migratetype_names[pageblock_mt], &page->flags); ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0); if (ret >= count) goto err; if (page_owner->last_migrate_reason != -1) { ret += scnprintf(kbuf + ret, count - ret, "Page has been migrated, last migrate reason: %s\n", migrate_reason_names[page_owner->last_migrate_reason]); } ret = print_page_owner_memcg(kbuf, count, ret, page); ret += snprintf(kbuf + ret, count - ret, "\n"); if (ret >= count) goto err; if (copy_to_user(buf, kbuf, ret)) ret = -EFAULT; kfree(kbuf); return ret; err: kfree(kbuf); return -ENOMEM; } void __dump_page_owner(const struct page *page) { struct page_ext *page_ext = page_ext_get((void *)page); struct page_owner *page_owner; depot_stack_handle_t handle; gfp_t gfp_mask; int mt; if (unlikely(!page_ext)) { pr_alert("There is not page extension available.\n"); return; } page_owner = get_page_owner(page_ext); gfp_mask = page_owner->gfp_mask; mt = gfp_migratetype(gfp_mask); if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not present (never set?)\n"); page_ext_put(page_ext); return; } if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) pr_alert("page_owner tracks the page as allocated\n"); else pr_alert("page_owner tracks the page as freed\n"); pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n", page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask, page_owner->pid, page_owner->tgid, page_owner->comm, page_owner->ts_nsec, page_owner->free_ts_nsec); handle = READ_ONCE(page_owner->handle); if (!handle) pr_alert("page_owner allocation stack trace missing\n"); else stack_depot_print(handle); handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); } else { pr_alert("page last free pid %d tgid %d stack trace:\n", page_owner->free_pid, page_owner->free_tgid); stack_depot_print(handle); } if (page_owner->last_migrate_reason != -1) pr_alert("page has been migrated, last migrate reason: %s\n", migrate_reason_names[page_owner->last_migrate_reason]); page_ext_put(page_ext); } static ssize_t read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long pfn; struct page *page; struct page_ext *page_ext; struct page_owner *page_owner; depot_stack_handle_t handle; if (!static_branch_unlikely(&page_owner_inited)) return -EINVAL; page = NULL; if (*ppos == 0) pfn = min_low_pfn; else pfn = *ppos; /* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */ while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) pfn++; /* Find an allocated page */ for (; pfn < max_pfn; pfn++) { /* * This temporary page_owner is required so * that we can avoid the context switches while holding * the rcu lock and copying the page owner information to * user through copy_to_user() or GFP_KERNEL allocations. */ struct page_owner page_owner_tmp; /* * If the new page is in a new MAX_ORDER_NR_PAGES area, * validate the area as existing, skip it if not */ if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) { pfn += MAX_ORDER_NR_PAGES - 1; continue; } page = pfn_to_page(pfn); if (PageBuddy(page)) { unsigned long freepage_order = buddy_order_unsafe(page); if (freepage_order <= MAX_PAGE_ORDER) pfn += (1UL << freepage_order) - 1; continue; } page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; /* * Some pages could be missed by concurrent allocation or free, * because we don't hold the zone lock. */ if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) goto ext_put_continue; /* * Although we do have the info about past allocation of free * pages, it's not relevant for current memory usage. */ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags)) goto ext_put_continue; page_owner = get_page_owner(page_ext); /* * Don't print "tail" pages of high-order allocations as that * would inflate the stats. */ if (!IS_ALIGNED(pfn, 1 << page_owner->order)) goto ext_put_continue; /* * Access to page_ext->handle isn't synchronous so we should * be careful to access it. */ handle = READ_ONCE(page_owner->handle); if (!handle) goto ext_put_continue; /* Record the next PFN to read in the file offset */ *ppos = pfn + 1; page_owner_tmp = *page_owner; page_ext_put(page_ext); return print_page_owner(buf, count, pfn, page, &page_owner_tmp, handle); ext_put_continue: page_ext_put(page_ext); } return 0; } static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig) { switch (orig) { case SEEK_SET: file->f_pos = offset; break; case SEEK_CUR: file->f_pos += offset; break; default: return -EINVAL; } return file->f_pos; } static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) { unsigned long pfn = zone->zone_start_pfn; unsigned long end_pfn = zone_end_pfn(zone); unsigned long count = 0; /* * Walk the zone in pageblock_nr_pages steps. If a page block spans * a zone boundary, it will be double counted between zones. This does * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { unsigned long block_end_pfn; if (!pfn_valid(pfn)) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } block_end_pfn = pageblock_end_pfn(pfn); block_end_pfn = min(block_end_pfn, end_pfn); for (; pfn < block_end_pfn; pfn++) { struct page *page = pfn_to_page(pfn); struct page_ext *page_ext; if (page_zone(page) != zone) continue; /* * To avoid having to grab zone->lock, be a little * careful when reading buddy page order. The only * danger is that we skip too much and potentially miss * some early allocated pages, which is better than * heavy lock contention. */ if (PageBuddy(page)) { unsigned long order = buddy_order_unsafe(page); if (order > 0 && order <= MAX_PAGE_ORDER) pfn += (1UL << order) - 1; continue; } if (PageReserved(page)) continue; page_ext = page_ext_get(page); if (unlikely(!page_ext)) continue; /* Maybe overlapping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) goto ext_put_continue; /* Found early allocated page */ __update_page_owner_handle(page_ext, early_handle, 0, 0, -1, local_clock(), current->pid, current->tgid, current->comm); count++; ext_put_continue: page_ext_put(page_ext); } cond_resched(); } pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n", pgdat->node_id, zone->name, count); } static void init_zones_in_node(pg_data_t *pgdat) { struct zone *zone; struct zone *node_zones = pgdat->node_zones; for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { if (!populated_zone(zone)) continue; init_pages_in_zone(pgdat, zone); } } static void init_early_allocated_pages(void) { pg_data_t *pgdat; for_each_online_pgdat(pgdat) init_zones_in_node(pgdat); } static const struct file_operations proc_page_owner_operations = { .read = read_page_owner, .llseek = lseek_page_owner, }; static void *stack_start(struct seq_file *m, loff_t *ppos) { struct stack *stack; if (*ppos == -1UL) return NULL; if (!*ppos) { /* * This pairs with smp_store_release() from function * add_stack_record_to_list(), so we get a consistent * value of stack_list. */ stack = smp_load_acquire(&stack_list); m->private = stack; } else { stack = m->private; } return stack; } static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) { struct stack *stack = v; stack = stack->next; *ppos = stack ? *ppos + 1 : -1UL; m->private = stack; return stack; } static unsigned long page_owner_pages_threshold; static int stack_print(struct seq_file *m, void *v) { int i, nr_base_pages; struct stack *stack = v; unsigned long *entries; unsigned long nr_entries; struct stack_record *stack_record = stack->stack_record; if (!stack->stack_record) return 0; nr_entries = stack_record->size; entries = stack_record->entries; nr_base_pages = refcount_read(&stack_record->count) - 1; if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold) return 0; for (i = 0; i < nr_entries; i++) seq_printf(m, " %pS\n", (void *)entries[i]); seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); return 0; } static void stack_stop(struct seq_file *m, void *v) { } static const struct seq_operations page_owner_stack_op = { .start = stack_start, .next = stack_next, .stop = stack_stop, .show = stack_print }; static int page_owner_stack_open(struct inode *inode, struct file *file) { return seq_open_private(file, &page_owner_stack_op, 0); } static const struct file_operations page_owner_stack_operations = { .open = page_owner_stack_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; static int page_owner_threshold_get(void *data, u64 *val) { *val = READ_ONCE(page_owner_pages_threshold); return 0; } static int page_owner_threshold_set(void *data, u64 val) { WRITE_ONCE(page_owner_pages_threshold, val); return 0; } DEFINE_SIMPLE_ATTRIBUTE(proc_page_owner_threshold, &page_owner_threshold_get, &page_owner_threshold_set, "%llu"); static int __init pageowner_init(void) { struct dentry *dir; if (!static_branch_unlikely(&page_owner_inited)) { pr_info("page_owner is disabled\n"); return 0; } debugfs_create_file("page_owner", 0400, NULL, NULL, &proc_page_owner_operations); dir = debugfs_create_dir("page_owner_stacks", NULL); debugfs_create_file("show_stacks", 0400, dir, NULL, &page_owner_stack_operations); debugfs_create_file("count_threshold", 0600, dir, NULL, &proc_page_owner_threshold); return 0; } late_initcall(pageowner_init)
76 2 28 30 26 77 2 1 30 45 38 2 37 40 33 9 9 8 29 40 35 18 35 18 14 8 2 22 114 93 31 187 187 185 187 26 157 13 13 13 13 13 36 36 1 20 17 44 40 8 13 13 52 49 49 4 10 17 1 27 48 3 1 1 21 38 56 2 2 52 4 17 36 17 36 4 48 39 3 2 3 229 3 57 174 4 1 4 3 3 67 1 120 186 186 120 1 183 4 24 167 19 8 12 4 4 12 9 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/file.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/file.c * * Copyright (C) 1991, 1992 Linus Torvalds * * ext4 fs regular file handling primitives * * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) */ #include <linux/time.h> #include <linux/fs.h> #include <linux/iomap.h> #include <linux/mount.h> #include <linux/path.h> #include <linux/dax.h> #include <linux/quotaops.h> #include <linux/pagevec.h> #include <linux/uio.h> #include <linux/mman.h> #include <linux/backing-dev.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" #include "truncate.h" /* * Returns %true if the given DIO request should be attempted with DIO, or * %false if it should fall back to buffered I/O. * * DIO isn't well specified; when it's unsupported (either due to the request * being misaligned, or due to the file not supporting DIO at all), filesystems * either fall back to buffered I/O or return EINVAL. For files that don't use * any special features like encryption or verity, ext4 has traditionally * returned EINVAL for misaligned DIO. iomap_dio_rw() uses this convention too. * In this case, we should attempt the DIO, *not* fall back to buffered I/O. * * In contrast, in cases where DIO is unsupported due to ext4 features, ext4 * traditionally falls back to buffered I/O. * * This function implements the traditional ext4 behavior in all these cases. */ static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = file_inode(iocb->ki_filp); u32 dio_align = ext4_dio_alignment(inode); if (dio_align == 0) return false; if (dio_align == 1) return true; return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align); } static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) { ssize_t ret; struct inode *inode = file_inode(iocb->ki_filp); if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { inode_lock_shared(inode); } if (!ext4_should_use_dio(iocb, to)) { inode_unlock_shared(inode); /* * Fallback to buffered I/O if the operation being performed on * the inode is not supported by direct I/O. The IOCB_DIRECT * flag needs to be cleared here in order to ensure that the * direct I/O path within generic_file_read_iter() is not * taken. */ iocb->ki_flags &= ~IOCB_DIRECT; return generic_file_read_iter(iocb, to); } ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, NULL, 0); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); return ret; } #ifdef CONFIG_FS_DAX static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { inode_lock_shared(inode); } /* * Recheck under inode lock - at this point we are sure it cannot * change anymore */ if (!IS_DAX(inode)) { inode_unlock_shared(inode); /* Fallback to buffered IO in case we cannot support DAX */ return generic_file_read_iter(iocb, to); } ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); return ret; } #endif static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; if (!iov_iter_count(to)) return 0; /* skip atime */ #ifdef CONFIG_FS_DAX if (IS_DAX(inode)) return ext4_dax_read_iter(iocb, to); #endif if (iocb->ki_flags & IOCB_DIRECT) return ext4_dio_read_iter(iocb, to); return generic_file_read_iter(iocb, to); } static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct inode *inode = file_inode(in); if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; return filemap_splice_read(in, ppos, pipe, len, flags); } /* * Called when an inode is released. Note that this is different * from ext4_file_open: open gets called at every open, but release * gets called only when /all/ the files are closed. */ static int ext4_release_file(struct inode *inode, struct file *filp) { if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { ext4_alloc_da_blocks(inode); ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) ext4_htree_free_dir_info(filp->private_data); return 0; } /* * This tests whether the IO in question is block-aligned or not. * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they * are converted to written only after the IO is complete. Until they are * mapped, these blocks appear as holes, so dio_zero_block() will assume that * it needs to zero out portions of the start and/or end block. If 2 AIO * threads are at work on the same unwritten block, they must be synchronized * or one thread will zero the other's data, causing corruption. */ static bool ext4_unaligned_io(struct inode *inode, struct iov_iter *from, loff_t pos) { struct super_block *sb = inode->i_sb; unsigned long blockmask = sb->s_blocksize - 1; if ((pos | iov_iter_alignment(from)) & blockmask) return true; return false; } static bool ext4_extending_io(struct inode *inode, loff_t offset, size_t len) { if (offset + len > i_size_read(inode) || offset + len > EXT4_I(inode)->i_disksize) return true; return false; } /* Is IO overwriting allocated or initialized blocks? */ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len, bool *unwritten) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; int err, blklen; if (pos + len > i_size_read(inode)) return false; map.m_lblk = pos >> blkbits; map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits); blklen = map.m_len; err = ext4_map_blocks(NULL, inode, &map, 0); if (err != blklen) return false; /* * 'err==len' means that all of the blocks have been preallocated, * regardless of whether they have been initialized or not. We need to * check m_flags to distinguish the unwritten extents. */ *unwritten = !(map.m_flags & EXT4_MAP_MAPPED); return true; } static ssize_t ext4_generic_write_checks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (unlikely(IS_IMMUTABLE(inode))) return -EPERM; ret = generic_write_checks(iocb, from); if (ret <= 0) return ret; /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) return -EFBIG; iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); } return iov_iter_count(from); } static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) { ssize_t ret, count; count = ext4_generic_write_checks(iocb, from); if (count <= 0) return count; ret = file_modified(iocb->ki_filp); if (ret) return ret; return count; } static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, struct iov_iter *from) { ssize_t ret; struct inode *inode = file_inode(iocb->ki_filp); if (iocb->ki_flags & IOCB_NOWAIT) return -EOPNOTSUPP; inode_lock(inode); ret = ext4_write_checks(iocb, from); if (ret <= 0) goto out; ret = generic_perform_write(iocb, from); out: inode_unlock(inode); if (unlikely(ret <= 0)) return ret; return generic_write_sync(iocb, ret); } static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, ssize_t count) { handle_t *handle; lockdep_assert_held_write(&inode->i_rwsem); handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) return PTR_ERR(handle); if (ext4_update_inode_size(inode, offset + count)) { int ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) { ext4_journal_stop(handle); return ret; } } if (inode->i_nlink) ext4_orphan_del(handle, inode); ext4_journal_stop(handle); return count; } /* * Clean up the inode after DIO or DAX extending write has completed and the * inode size has been updated using ext4_handle_inode_extension(). */ static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) { lockdep_assert_held_write(&inode->i_rwsem); if (count < 0) { ext4_truncate_failed_write(inode); /* * If the truncate operation failed early, then the inode may * still be on the orphan list. In that case, we need to try * remove the inode from the in-memory linked list. */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); return; } /* * If i_disksize got extended either due to writeback of delalloc * blocks or extending truncate while the DIO was running we could fail * to cleanup the orphan list in ext4_handle_inode_extension(). Do it * now. */ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { /* * The write has successfully completed. Not much to * do with the error here so just cleanup the orphan * list and hope for the best. */ ext4_orphan_del(NULL, inode); return; } ext4_orphan_del(handle, inode); ext4_journal_stop(handle); } } static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, unsigned int flags) { loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(iocb->ki_filp); if (!error && size && flags & IOMAP_DIO_UNWRITTEN) error = ext4_convert_unwritten_extents(NULL, inode, pos, size); if (error) return error; /* * Note that EXT4_I(inode)->i_disksize can get extended up to * inode->i_size while the I/O was running due to writeback of delalloc * blocks. But the code in ext4_iomap_alloc() is careful to use * zeroed/unwritten extents if this is possible; thus we won't leave * uninitialized blocks in a file even if we didn't succeed in writing * as much as we intended. Also we can race with truncate or write * expanding the file so we have to be a bit careful here. */ if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) && pos + size <= i_size_read(inode)) return size; return ext4_handle_inode_extension(inode, pos, size); } static const struct iomap_dio_ops ext4_dio_write_ops = { .end_io = ext4_dio_write_end_io, }; /* * The intention here is to start with shared lock acquired then see if any * condition requires an exclusive inode lock. If yes, then we restart the * whole operation by releasing the shared lock and acquiring exclusive lock. * * - For unaligned_io we never take shared lock as it may cause data corruption * when two unaligned IO tries to modify the same block e.g. while zeroing. * * - For extending writes case we don't take the shared lock, since it requires * updating inode i_disksize and/or orphan handling with exclusive lock. * * - shared locking will only be true mostly with overwrites, including * initialized blocks and unwritten blocks. For overwrite unwritten blocks * we protect splitting extents by i_data_sem in ext4_inode_info, so we can * also release exclusive i_rwsem lock. * * - Otherwise we will switch to exclusive i_rwsem lock. */ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, bool *ilock_shared, bool *extend, bool *unwritten, int *dio_flags) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); loff_t offset; size_t count; ssize_t ret; bool overwrite, unaligned_io; restart: ret = ext4_generic_write_checks(iocb, from); if (ret <= 0) goto out; offset = iocb->ki_pos; count = ret; unaligned_io = ext4_unaligned_io(inode, from, offset); *extend = ext4_extending_io(inode, offset, count); overwrite = ext4_overwrite_io(inode, offset, count, unwritten); /* * Determine whether we need to upgrade to an exclusive lock. This is * required to change security info in file_modified(), for extending * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten * extents (as partial block zeroing may be required). * * Note that unaligned writes are allowed under shared lock so long as * they are pure overwrites. Otherwise, concurrent unaligned writes risk * data corruption due to partial block zeroing in the dio layer, and so * the I/O must occur exclusively. */ if (*ilock_shared && ((!IS_NOSEC(inode) || *extend || !overwrite || (unaligned_io && *unwritten)))) { if (iocb->ki_flags & IOCB_NOWAIT) { ret = -EAGAIN; goto out; } inode_unlock_shared(inode); *ilock_shared = false; inode_lock(inode); goto restart; } /* * Now that locking is settled, determine dio flags and exclusivity * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce * behavior already. The inode lock is already held exclusive if the * write is non-overwrite or extending, so drain all outstanding dio and * set the force wait dio flag. */ if (!*ilock_shared && (unaligned_io || *extend)) { if (iocb->ki_flags & IOCB_NOWAIT) { ret = -EAGAIN; goto out; } if (unaligned_io && (!overwrite || *unwritten)) inode_dio_wait(inode); *dio_flags = IOMAP_DIO_FORCE_WAIT; } ret = file_modified(file); if (ret < 0) goto out; return count; out: if (*ilock_shared) inode_unlock_shared(inode); else inode_unlock(inode); return ret; } static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) { ssize_t ret; handle_t *handle; struct inode *inode = file_inode(iocb->ki_filp); loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(from); const struct iomap_ops *iomap_ops = &ext4_iomap_ops; bool extend = false, unwritten = false; bool ilock_shared = true; int dio_flags = 0; /* * Quick check here without any i_rwsem lock to see if it is extending * IO. A more reliable check is done in ext4_dio_write_checks() with * proper locking in place. */ if (offset + count > i_size_read(inode)) ilock_shared = false; if (iocb->ki_flags & IOCB_NOWAIT) { if (ilock_shared) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { if (!inode_trylock(inode)) return -EAGAIN; } } else { if (ilock_shared) inode_lock_shared(inode); else inode_lock(inode); } /* Fallback to buffered I/O if the inode does not support direct I/O. */ if (!ext4_should_use_dio(iocb, from)) { if (ilock_shared) inode_unlock_shared(inode); else inode_unlock(inode); return ext4_buffered_write_iter(iocb, from); } /* * Prevent inline data from being created since we are going to allocate * blocks for DIO. We know the inode does not currently have inline data * because ext4_should_use_dio() checked for it, but we have to clear * the state flag before the write checks because a lock cycle could * introduce races with other writers. */ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend, &unwritten, &dio_flags); if (ret <= 0) return ret; offset = iocb->ki_pos; count = ret; if (extend) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } ret = ext4_orphan_add(handle, inode); if (ret) { ext4_journal_stop(handle); goto out; } ext4_journal_stop(handle); } if (ilock_shared && !unwritten) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, dio_flags, NULL, 0); if (ret == -ENOTBLK) ret = 0; if (extend) { /* * We always perform extending DIO write synchronously so by * now the IO is completed and ext4_handle_inode_extension() * was called. Cleanup the inode in case of error or race with * writeback of delalloc blocks. */ WARN_ON_ONCE(ret == -EIOCBQUEUED); ext4_inode_extension_cleanup(inode, ret); } out: if (ilock_shared) inode_unlock_shared(inode); else inode_unlock(inode); if (ret >= 0 && iov_iter_count(from)) { ssize_t err; loff_t endbyte; offset = iocb->ki_pos; err = ext4_buffered_write_iter(iocb, from); if (err < 0) return err; /* * We need to ensure that the pages within the page cache for * the range covered by this I/O are written to disk and * invalidated. This is in attempt to preserve the expected * direct I/O semantics in the case we fallback to buffered I/O * to complete off the I/O request. */ ret += err; endbyte = offset + err - 1; err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping, offset, endbyte); if (!err) invalidate_mapping_pages(iocb->ki_filp->f_mapping, offset >> PAGE_SHIFT, endbyte >> PAGE_SHIFT); } return ret; } #ifdef CONFIG_FS_DAX static ssize_t ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { ssize_t ret; size_t count; loff_t offset; handle_t *handle; bool extend = false; struct inode *inode = file_inode(iocb->ki_filp); if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) return -EAGAIN; } else { inode_lock(inode); } ret = ext4_write_checks(iocb, from); if (ret <= 0) goto out; offset = iocb->ki_pos; count = iov_iter_count(from); if (offset + count > EXT4_I(inode)->i_disksize) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } ret = ext4_orphan_add(handle, inode); if (ret) { ext4_journal_stop(handle); goto out; } extend = true; ext4_journal_stop(handle); } ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); if (extend) { ret = ext4_handle_inode_extension(inode, offset, ret); ext4_inode_extension_cleanup(inode, ret); } out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } #endif static ssize_t ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; #ifdef CONFIG_FS_DAX if (IS_DAX(inode)) return ext4_dax_write_iter(iocb, from); #endif if (iocb->ki_flags & IOCB_DIRECT) return ext4_dio_write_iter(iocb, from); else return ext4_buffered_write_iter(iocb, from); } #ifdef CONFIG_FS_DAX static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order) { int error = 0; vm_fault_t result; int retries = 0; handle_t *handle = NULL; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; /* * We have to distinguish real writes from writes which will result in a * COW page; COW writes should *not* poke the journal (the file will not * be changed). Doing so would cause unintended failures when mounted * read-only. * * We check for VM_SHARED rather than vmf->cow_page since the latter is * unset for order != 0 (i.e. only in do_cow_fault); for * other sizes, dax_iomap_fault will handle splitting / fallback so that * we eventually come back with a COW page. */ bool write = (vmf->flags & FAULT_FLAG_WRITE) && (vmf->vma->vm_flags & VM_SHARED); struct address_space *mapping = vmf->vma->vm_file->f_mapping; pfn_t pfn; if (write) { sb_start_pagefault(sb); file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(mapping); retry: handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, EXT4_DATA_TRANS_BLOCKS(sb)); if (IS_ERR(handle)) { filemap_invalidate_unlock_shared(mapping); sb_end_pagefault(sb); return VM_FAULT_SIGBUS; } } else { filemap_invalidate_lock_shared(mapping); } result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops); if (write) { ext4_journal_stop(handle); if ((result & VM_FAULT_ERROR) && error == -ENOSPC && ext4_should_retry_alloc(sb, &retries)) goto retry; /* Handling synchronous page fault? */ if (result & VM_FAULT_NEEDDSYNC) result = dax_finish_sync_fault(vmf, order, pfn); filemap_invalidate_unlock_shared(mapping); sb_end_pagefault(sb); } else { filemap_invalidate_unlock_shared(mapping); } return result; } static vm_fault_t ext4_dax_fault(struct vm_fault *vmf) { return ext4_dax_huge_fault(vmf, 0); } static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, .huge_fault = ext4_dax_huge_fault, .page_mkwrite = ext4_dax_fault, .pfn_mkwrite = ext4_dax_fault, }; #else #define ext4_dax_vm_ops ext4_file_vm_ops #endif static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, }; static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_mapping->host; struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; /* * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ if (!daxdev_mapping_supported(vma, dax_dev)) return -EOPNOTSUPP; file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; vm_flags_set(vma, VM_HUGEPAGE); } else { vma->vm_ops = &ext4_file_vm_ops; } return 0; } static int ext4_sample_last_mounted(struct super_block *sb, struct vfsmount *mnt) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct path path; char buf[64], *cp; handle_t *handle; int err; if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED))) return 0; if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb)) return 0; ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED); /* * Sample where the filesystem has been mounted and * store it in the superblock for sysadmin convenience * when trying to sort through large numbers of block * devices or filesystem images. */ memset(buf, 0, sizeof(buf)); path.mnt = mnt; path.dentry = mnt->mnt_root; cp = d_path(&path, buf, sizeof(buf)); err = 0; if (IS_ERR(cp)) goto out; handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); err = PTR_ERR(handle); if (IS_ERR(handle)) goto out; BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, EXT4_JTR_NONE); if (err) goto out_journal; lock_buffer(sbi->s_sbh); strtomem_pad(sbi->s_es->s_last_mounted, cp, 0); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); out_journal: ext4_journal_stop(handle); out: sb_end_intwrite(sb); return err; } static int ext4_file_open(struct inode *inode, struct file *filp) { int ret; if (unlikely(ext4_forced_shutdown(inode->i_sb))) return -EIO; ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt); if (ret) return ret; ret = fscrypt_file_open(inode, filp); if (ret) return ret; ret = fsverity_file_open(inode, filp); if (ret) return ret; /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present */ if (filp->f_mode & FMODE_WRITE) { ret = ext4_inode_attach_jinode(inode); if (ret < 0) return ret; } filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; return dquot_file_open(inode, filp); } /* * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values * by calling generic_file_llseek_size() with the appropriate maxbytes * value for each. */ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; else maxbytes = inode->i_sb->s_maxbytes; switch (whence) { default: return generic_file_llseek_size(file, offset, whence, maxbytes, i_size_read(inode)); case SEEK_HOLE: inode_lock_shared(inode); offset = iomap_seek_hole(inode, offset, &ext4_iomap_report_ops); inode_unlock_shared(inode); break; case SEEK_DATA: inode_lock_shared(inode); offset = iomap_seek_data(inode, offset, &ext4_iomap_report_ops); inode_unlock_shared(inode); break; } if (offset < 0) return offset; return vfs_setpos(file, offset, maxbytes); } const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read_iter = ext4_file_read_iter, .write_iter = ext4_file_write_iter, .iopoll = iocb_bio_iopoll, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif .mmap = ext4_file_mmap, .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, .get_unmapped_area = thp_get_unmapped_area, .splice_read = ext4_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, .fop_flags = FOP_MMAP_SYNC | FOP_BUFFER_RASYNC | FOP_DIO_PARALLEL_WRITE, }; const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_file_getattr, .listxattr = ext4_listxattr, .get_inode_acl = ext4_get_acl, .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, .fileattr_get = ext4_fileattr_get, .fileattr_set = ext4_fileattr_set, };
35 2 136 2 2 2 3 3 5 5 5 5 5 2 5 5 5 5 5 2 2 2 2 2 2 2 19 19 19 19 19 19 10 16 19 19 1 1 1 5 3 5 5 5 5 5 5 5 5 5 8 2 6 5 5 5 5 5 3 3 2 3 3 3 3 6 3 2 1 4 2 2 1 2 1 2 2 2 2 2 8 8 8 6 3 3 6 6 5 5 1 1 1 2 3 6 41 1 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 // SPDX-License-Identifier: GPL-2.0+ /* * NILFS segment usage file. * * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. * Revised by Ryusuke Konishi. */ #include <linux/kernel.h> #include <linux/fs.h> #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/errno.h> #include "mdt.h" #include "sufile.h" #include <trace/events/nilfs2.h> /** * struct nilfs_sufile_info - on-memory private data of sufile * @mi: on-memory private data of metadata file * @ncleansegs: number of clean segments * @allocmin: lower limit of allocatable segment range * @allocmax: upper limit of allocatable segment range */ struct nilfs_sufile_info { struct nilfs_mdt_info mi; unsigned long ncleansegs;/* number of clean segments */ __u64 allocmin; /* lower limit of allocatable segment range */ __u64 allocmax; /* upper limit of allocatable segment range */ }; static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) { return (struct nilfs_sufile_info *)NILFS_MDT(sufile); } static inline unsigned long nilfs_sufile_segment_usages_per_block(const struct inode *sufile) { return NILFS_MDT(sufile)->mi_entries_per_block; } static unsigned long nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile)); return (unsigned long)t; } static unsigned long nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; return do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); } static unsigned long nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, __u64 max) { return min_t(unsigned long, nilfs_sufile_segment_usages_per_block(sufile) - nilfs_sufile_get_offset(sufile, curr), max - curr + 1); } static struct nilfs_segment_usage * nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, struct buffer_head *bh, void *kaddr) { return kaddr + bh_offset(bh) + nilfs_sufile_get_offset(sufile, segnum) * NILFS_MDT(sufile)->mi_entry_size; } static inline int nilfs_sufile_get_header_block(struct inode *sufile, struct buffer_head **bhp) { return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); } static inline int nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, int create, struct buffer_head **bhp) { return nilfs_mdt_get_block(sufile, nilfs_sufile_get_blkoff(sufile, segnum), create, NULL, bhp); } static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile, __u64 segnum) { return nilfs_mdt_delete_block(sufile, nilfs_sufile_get_blkoff(sufile, segnum)); } static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, u64 ncleanadd, u64 ndirtyadd) { struct nilfs_sufile_header *header; void *kaddr; kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); kunmap_local(kaddr); mark_buffer_dirty(header_bh); } /** * nilfs_sufile_get_ncleansegs - return the number of clean segments * @sufile: inode of segment usage file */ unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) { return NILFS_SUI(sufile)->ncleansegs; } /** * nilfs_sufile_updatev - modify multiple segment usages at a time * @sufile: inode of segment usage file * @segnumv: array of segment numbers * @nsegs: size of @segnumv array * @create: creation flag * @ndone: place to store number of modified segments on @segnumv * @dofunc: primitive operation for the update * * Description: nilfs_sufile_updatev() repeatedly calls @dofunc * against the given array of segments. The @dofunc is called with * buffers of a header block and the sufile block in which the target * segment usage entry is contained. If @ndone is given, the number * of successfully modified segments from the head is stored in the * place @ndone points to. * * Return Value: On success, zero is returned. On error, one of the * following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOENT - Given segment usage is in hole block (may be returned if * @create is zero) * * %-EINVAL - Invalid segment usage number */ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, int create, size_t *ndone, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)) { struct buffer_head *header_bh, *bh; unsigned long blkoff, prev_blkoff; __u64 *seg; size_t nerr = 0, n = 0; int ret = 0; if (unlikely(nsegs == 0)) goto out; down_write(&NILFS_MDT(sufile)->mi_sem); for (seg = segnumv; seg < segnumv + nsegs; seg++) { if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", __func__, (unsigned long long)*seg); nerr++; } } if (nerr > 0) { ret = -EINVAL; goto out_sem; } ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; seg = segnumv; blkoff = nilfs_sufile_get_blkoff(sufile, *seg); ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); if (ret < 0) goto out_header; for (;;) { dofunc(sufile, *seg, header_bh, bh); if (++seg >= segnumv + nsegs) break; prev_blkoff = blkoff; blkoff = nilfs_sufile_get_blkoff(sufile, *seg); if (blkoff == prev_blkoff) continue; /* get different block */ brelse(bh); ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh); if (unlikely(ret < 0)) goto out_header; } brelse(bh); out_header: n = seg - segnumv; brelse(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); out: if (ndone) *ndone = n; return ret; } int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, void (*dofunc)(struct inode *, __u64, struct buffer_head *, struct buffer_head *)) { struct buffer_head *header_bh, *bh; int ret; if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", __func__, (unsigned long long)segnum); return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh); if (!ret) { dofunc(sufile, segnum, header_bh, bh); brelse(bh); } brelse(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_set_alloc_range - limit range of segment to be allocated * @sufile: inode of segment usage file * @start: minimum segment number of allocatable region (inclusive) * @end: maximum segment number of allocatable region (inclusive) * * Return Value: On success, 0 is returned. On error, one of the * following negative error codes is returned. * * %-ERANGE - invalid segment region */ int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end) { struct nilfs_sufile_info *sui = NILFS_SUI(sufile); __u64 nsegs; int ret = -ERANGE; down_write(&NILFS_MDT(sufile)->mi_sem); nsegs = nilfs_sufile_get_nsegments(sufile); if (start <= end && end < nsegs) { sui->allocmin = start; sui->allocmax = end; ret = 0; } up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file * @segnump: pointer to segment number * * Description: nilfs_sufile_alloc() allocates a clean segment. * * Return Value: On success, 0 is returned and the segment number of the * allocated segment is stored in the place pointed by @segnump. On error, one * of the following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOSPC - No clean segment left. */ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { struct buffer_head *header_bh, *su_bh; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; void *kaddr; unsigned long nsegments, nsus, cnt; int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); last_alloc = le64_to_cpu(header->sh_last_alloc); kunmap_local(kaddr); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; segnum = last_alloc + 1; if (segnum < sui->allocmin || segnum > sui->allocmax) segnum = sui->allocmin; for (cnt = 0; cnt < nsegments; cnt += nsus) { if (segnum > maxsegnum) { if (cnt < sui->allocmax - sui->allocmin + 1) { /* * wrap around in the limited region. * if allocation started from * sui->allocmin, this never happens. */ segnum = sui->allocmin; maxsegnum = last_alloc; } else if (segnum > sui->allocmin && sui->allocmax + 1 < nsegments) { segnum = sui->allocmax + 1; maxsegnum = nsegments - 1; } else if (sui->allocmin > 0) { segnum = 0; maxsegnum = sui->allocmin - 1; } else { break; /* never happens */ } } trace_nilfs2_segment_usage_check(sufile, segnum, cnt); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) goto out_header; kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); nsus = nilfs_sufile_segment_usages_in_block( sufile, segnum, maxsegnum); for (j = 0; j < nsus; j++, su = (void *)su + susz, segnum++) { if (!nilfs_segment_usage_clean(su)) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); kunmap_local(kaddr); kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); kunmap_local(kaddr); sui->ncleansegs--; mark_buffer_dirty(header_bh); mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; trace_nilfs2_segment_usage_allocated(sufile, segnum); goto out_header; } kunmap_local(kaddr); brelse(su_bh); } /* no segments left */ ret = -ENOSPC; out_header: brelse(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; void *kaddr; kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", __func__, (unsigned long long)segnum); kunmap_local(kaddr); return; } nilfs_segment_usage_set_dirty(su); kunmap_local(kaddr); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; void *kaddr; int clean, dirty; kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) && su->su_nblocks == cpu_to_le32(0)) { kunmap_local(kaddr); return; } clean = nilfs_segment_usage_clean(su); dirty = nilfs_segment_usage_dirty(su); /* make the segment garbage */ su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)); kunmap_local(kaddr); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; void *kaddr; int sudirty; kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", __func__, (unsigned long long)segnum); kunmap_local(kaddr); return; } if (unlikely(nilfs_segment_usage_error(su))) nilfs_warn(sufile->i_sb, "free segment %llu marked in error", (unsigned long long)segnum); sudirty = nilfs_segment_usage_dirty(su); if (unlikely(!sudirty)) nilfs_warn(sufile->i_sb, "free unallocated segment %llu", (unsigned long long)segnum); nilfs_segment_usage_set_clean(su); kunmap_local(kaddr); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); NILFS_SUI(sufile)->ncleansegs++; nilfs_mdt_mark_dirty(sufile); trace_nilfs2_segment_usage_freed(sufile, segnum); } /** * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number */ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; void *kaddr; struct nilfs_segment_usage *su; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (ret) goto out_sem; kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); if (unlikely(nilfs_segment_usage_error(su))) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; kunmap_local(kaddr); brelse(bh); if (nilfs_segment_is_active(nilfs, segnum)) { nilfs_error(sufile->i_sb, "active segment %llu is erroneous", (unsigned long long)segnum); } else { /* * Segments marked erroneous are never allocated by * nilfs_sufile_alloc(); only active segments, ie, * the segments indexed by ns_segnum or ns_nextnum, * can be erroneous here. */ WARN_ON_ONCE(1); } ret = -EIO; } else { nilfs_segment_usage_set_dirty(su); kunmap_local(kaddr); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); } out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_set_segment_usage - set usage of a segment * @sufile: inode of segment usage file * @segnum: segment number * @nblocks: number of live blocks in the segment * @modtime: modification time (option) */ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, unsigned long nblocks, time64_t modtime) { struct buffer_head *bh; struct nilfs_segment_usage *su; void *kaddr; int ret; down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (ret < 0) goto out_sem; kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); if (modtime) { /* * Check segusage error and set su_lastmod only when updating * this entry with a valid timestamp, not for cancellation. */ WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); } su->su_nblocks = cpu_to_le32(nblocks); kunmap_local(kaddr); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_get_stat - get segment usage statistics * @sufile: inode of segment usage file * @sustat: pointer to a structure of segment usage statistics * * Description: nilfs_sufile_get_stat() returns information about segment * usage. * * Return Value: On success, 0 is returned, and segment usage information is * stored in the place pointed by @sustat. On error, one of the following * negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; int ret; down_read(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); sustat->ss_ctime = nilfs->ns_ctime; sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime; spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); kunmap_local(kaddr); brelse(header_bh); out_sem: up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) { struct nilfs_segment_usage *su; void *kaddr; int suclean; kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { kunmap_local(kaddr); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); kunmap_local(kaddr); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); NILFS_SUI(sufile)->ncleansegs--; } mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } /** * nilfs_sufile_truncate_range - truncate range of segment array * @sufile: inode of segment usage file * @start: start segment number (inclusive) * @end: end segment number (inclusive) * * Return Value: On success, 0 is returned. On error, one of the * following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-EINVAL - Invalid number of segments specified * * %-EBUSY - Dirty or active segments are present in the range */ static int nilfs_sufile_truncate_range(struct inode *sufile, __u64 start, __u64 end) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *header_bh; struct buffer_head *su_bh; struct nilfs_segment_usage *su, *su2; size_t susz = NILFS_MDT(sufile)->mi_entry_size; unsigned long segusages_per_block; unsigned long nsegs, ncleaned; __u64 segnum; void *kaddr; ssize_t n, nc; int ret; int j; nsegs = nilfs_sufile_get_nsegments(sufile); ret = -EINVAL; if (start > end || start >= nsegs) goto out; ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out; segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); ncleaned = 0; for (segnum = start; segnum <= end; segnum += n) { n = min_t(unsigned long, segusages_per_block - nilfs_sufile_get_offset(sufile, segnum), end - segnum + 1); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); if (ret < 0) { if (ret != -ENOENT) goto out_header; /* hole */ continue; } kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); su2 = su; for (j = 0; j < n; j++, su = (void *)su + susz) { if ((le32_to_cpu(su->su_flags) & ~BIT(NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; kunmap_local(kaddr); brelse(su_bh); goto out_header; } } nc = 0; for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) { if (nilfs_segment_usage_error(su)) { nilfs_segment_usage_set_clean(su); nc++; } } kunmap_local(kaddr); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; } brelse(su_bh); if (n == segusages_per_block) { /* make hole */ nilfs_sufile_delete_segment_usage_block(sufile, segnum); } } ret = 0; out_header: if (ncleaned > 0) { NILFS_SUI(sufile)->ncleansegs += ncleaned; nilfs_sufile_mod_counter(header_bh, ncleaned, 0); nilfs_mdt_mark_dirty(sufile); } brelse(header_bh); out: return ret; } /** * nilfs_sufile_resize - resize segment array * @sufile: inode of segment usage file * @newnsegs: new number of segments * * Return Value: On success, 0 is returned. On error, one of the * following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-ENOSPC - Enough free space is not left for shrinking * * %-EBUSY - Dirty or active segments exist in the region to be truncated */ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *header_bh; struct nilfs_sufile_header *header; struct nilfs_sufile_info *sui = NILFS_SUI(sufile); void *kaddr; unsigned long nsegs, nrsvsegs; int ret = 0; down_write(&NILFS_MDT(sufile)->mi_sem); nsegs = nilfs_sufile_get_nsegments(sufile); if (nsegs == newnsegs) goto out; ret = -ENOSPC; nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs); if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs) goto out; ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out; if (newnsegs > nsegs) { sui->ncleansegs += newnsegs - nsegs; } else /* newnsegs < nsegs */ { ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1); if (ret < 0) goto out_header; sui->ncleansegs -= nsegs - newnsegs; /* * If the sufile is successfully truncated, immediately adjust * the segment allocation space while locking the semaphore * "mi_sem" so that nilfs_sufile_alloc() never allocates * segments in the truncated space. */ sui->allocmax = newnsegs - 1; sui->allocmin = 0; } kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); kunmap_local(kaddr); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); nilfs_set_nsegments(nilfs, newnsegs); out_header: brelse(header_bh); out: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_get_suinfo - * @sufile: inode of segment usage file * @segnum: segment number to start looking * @buf: array of suinfo * @sisz: byte size of suinfo * @nsi: size of suinfo array * * Description: * * Return Value: On success, 0 is returned and .... On error, one of the * following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) { struct buffer_head *su_bh; struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; int ret, i, j; down_read(&NILFS_MDT(sufile)->mi_sem); segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); nsegs = min_t(unsigned long, nilfs_sufile_get_nsegments(sufile) - segnum, nsi); for (i = 0; i < nsegs; i += n, segnum += n) { n = min_t(unsigned long, segusages_per_block - nilfs_sufile_get_offset(sufile, segnum), nsegs - i); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); if (ret < 0) { if (ret != -ENOENT) goto out; /* hole */ memset(si, 0, sisz * n); si = (void *)si + sisz * n; continue; } kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); for (j = 0; j < n; j++, su = (void *)su + susz, si = (void *)si + sisz) { si->sui_lastmod = le64_to_cpu(su->su_lastmod); si->sui_nblocks = le32_to_cpu(su->su_nblocks); si->sui_flags = le32_to_cpu(su->su_flags) & ~BIT(NILFS_SEGMENT_USAGE_ACTIVE); if (nilfs_segment_is_active(nilfs, segnum + j)) si->sui_flags |= BIT(NILFS_SEGMENT_USAGE_ACTIVE); } kunmap_local(kaddr); brelse(su_bh); } ret = nsegs; out: up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_set_suinfo - sets segment usage info * @sufile: inode of segment usage file * @buf: array of suinfo_update * @supsz: byte size of suinfo_update * @nsup: size of suinfo_update array * * Description: Takes an array of nilfs_suinfo_update structs and updates * segment usage accordingly. Only the fields indicated by the sup_flags * are updated. * * Return Value: On success, 0 is returned. On error, one of the * following negative error codes is returned. * * %-EIO - I/O error. * * %-ENOMEM - Insufficient amount of memory available. * * %-EINVAL - Invalid values in input (segment number, flags or nblocks) */ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, unsigned int supsz, size_t nsup) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *header_bh, *bh; struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; struct nilfs_segment_usage *su; void *kaddr; unsigned long blkoff, prev_blkoff; int cleansi, cleansu, dirtysi, dirtysu; long ncleaned = 0, ndirtied = 0; int ret = 0; if (unlikely(nsup == 0)) return ret; for (sup = buf; sup < supend; sup = (void *)sup + supsz) { if (sup->sup_segnum >= nilfs->ns_nsegments || (sup->sup_flags & (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) || (nilfs_suinfo_update_nblocks(sup) && sup->sup_sui.sui_nblocks > nilfs->ns_blocks_per_segment)) return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; sup = buf; blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); if (ret < 0) goto out_header; for (;;) { kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, sup->sup_segnum, bh, kaddr); if (nilfs_suinfo_update_lastmod(sup)) su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); if (nilfs_suinfo_update_nblocks(sup)) su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); if (nilfs_suinfo_update_flags(sup)) { /* * Active flag is a virtual flag projected by running * nilfs kernel code - drop it not to write it to * disk. */ sup->sup_sui.sui_flags &= ~BIT(NILFS_SEGMENT_USAGE_ACTIVE); cleansi = nilfs_suinfo_clean(&sup->sup_sui); cleansu = nilfs_segment_usage_clean(su); dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); dirtysu = nilfs_segment_usage_dirty(su); if (cleansi && !cleansu) ++ncleaned; else if (!cleansi && cleansu) --ncleaned; if (dirtysi && !dirtysu) ++ndirtied; else if (!dirtysi && dirtysu) --ndirtied; su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); } kunmap_local(kaddr); sup = (void *)sup + supsz; if (sup >= supend) break; prev_blkoff = blkoff; blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); if (blkoff == prev_blkoff) continue; /* get different block */ mark_buffer_dirty(bh); put_bh(bh); ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); if (unlikely(ret < 0)) goto out_mark; } mark_buffer_dirty(bh); put_bh(bh); out_mark: if (ncleaned || ndirtied) { nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, (u64)ndirtied); NILFS_SUI(sufile)->ncleansegs += ncleaned; } nilfs_mdt_mark_dirty(sufile); out_header: put_bh(header_bh); out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } /** * nilfs_sufile_trim_fs() - trim ioctl handle function * @sufile: inode of segment usage file * @range: fstrim_range structure * * start: First Byte to trim * len: number of Bytes to trim from start * minlen: minimum extent length in Bytes * * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes * from start to start+len. start is rounded up to the next block boundary * and start+len is rounded down. For each clean segment blkdev_issue_discard * function is invoked. * * Return Value: On success, 0 is returned or negative error code, otherwise. */ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; struct buffer_head *su_bh; struct nilfs_segment_usage *su; void *kaddr; size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; sector_t seg_start, seg_end, start_block, end_block; sector_t start = 0, nblocks = 0; u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; int ret = 0; unsigned int sects_per_block; sects_per_block = (1 << nilfs->ns_blocksize_bits) / bdev_logical_block_size(nilfs->ns_bdev); len = range->len >> nilfs->ns_blocksize_bits; minlen = range->minlen >> nilfs->ns_blocksize_bits; max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) return -EINVAL; start_block = (range->start + nilfs->ns_blocksize - 1) >> nilfs->ns_blocksize_bits; /* * range->len can be very large (actually, it is set to * ULLONG_MAX by default) - truncate upper end of the range * carefully so as not to overflow. */ if (max_blocks - start_block < len) end_block = max_blocks - 1; else end_block = start_block + len - 1; segnum = nilfs_get_segnum_of_block(nilfs, start_block); segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); down_read(&NILFS_MDT(sufile)->mi_sem); while (segnum <= segnum_end) { n = nilfs_sufile_segment_usages_in_block(sufile, segnum, segnum_end); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); if (ret < 0) { if (ret != -ENOENT) goto out_sem; /* hole */ segnum += n; continue; } kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { if (!nilfs_segment_usage_clean(su)) continue; nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); if (!nblocks) { /* start new extent */ start = seg_start; nblocks = seg_end - seg_start + 1; continue; } if (start + nblocks == seg_start) { /* add to previous extent */ nblocks += seg_end - seg_start + 1; continue; } /* discard previous extent */ if (start < start_block) { nblocks -= start_block - start; start = start_block; } if (nblocks >= minlen) { kunmap_local(kaddr); ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS); if (ret < 0) { put_bh(su_bh); goto out_sem; } ndiscarded += nblocks; kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); } /* start new extent */ start = seg_start; nblocks = seg_end - seg_start + 1; } kunmap_local(kaddr); put_bh(su_bh); } if (nblocks) { /* discard last extent */ if (start < start_block) { nblocks -= start_block - start; start = start_block; } if (start + nblocks > end_block + 1) nblocks = end_block - start + 1; if (nblocks >= minlen) { ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS); if (!ret) ndiscarded += nblocks; } } out_sem: up_read(&NILFS_MDT(sufile)->mi_sem); range->len = ndiscarded << nilfs->ns_blocksize_bits; return ret; } /** * nilfs_sufile_read - read or get sufile inode * @sb: super block instance * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode * @inodep: buffer to store the inode */ int nilfs_sufile_read(struct super_block *sb, size_t susize, struct nilfs_inode *raw_inode, struct inode **inodep) { struct inode *sufile; struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; void *kaddr; int err; if (susize > sb->s_blocksize) { nilfs_err(sb, "too large segment usage size: %zu bytes", susize); return -EINVAL; } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { nilfs_err(sb, "too small segment usage size: %zu bytes", susize); return -EINVAL; } sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); if (unlikely(!sufile)) return -ENOMEM; if (!(sufile->i_state & I_NEW)) goto out; err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); if (err) goto failed; nilfs_mdt_set_entry_size(sufile, susize, sizeof(struct nilfs_sufile_header)); err = nilfs_read_inode_common(sufile, raw_inode); if (err) goto failed; err = nilfs_sufile_get_header_block(sufile, &header_bh); if (err) goto failed; sui = NILFS_SUI(sufile); kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); kunmap_local(kaddr); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; sui->allocmin = 0; unlock_new_inode(sufile); out: *inodep = sufile; return 0; failed: iget_failed(sufile); return err; }
256 256 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 #ifndef __ASSEMBLY__ /* * A clear pte value is special, and doesn't get inverted. * * Note that even users that only pass a pgprot_t (rather * than a full pte) won't trigger the special zero case, * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED * set. So the all zero case really is limited to just the * cleared page table entry case. */ static inline bool __pte_needs_invert(u64 val) { return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ static inline u64 protnone_mask(u64 val) { return __pte_needs_invert(val) ? ~0ull : 0; } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) { /* * When a PTE transitions from NONE to !NONE or vice-versa * invert the PFN part to stop speculation. * pte_pfn undoes this when needed. */ if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) val = (val & ~mask) | (~val & mask); return val; } #endif /* __ASSEMBLY__ */ #endif
15 29 15 29 26 15 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 /* * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ #include <linux/time.h> #include <linux/fs.h> #include "reiserfs.h" #include <linux/string.h> #include <linux/buffer_head.h> #include <linux/stdarg.h> static char error_buf[1024]; static char fmt_buf[1024]; static char off_buf[80]; static char *reiserfs_cpu_offset(struct cpu_key *key) { if (cpu_key_k_type(key) == TYPE_DIRENTRY) sprintf(off_buf, "%llu(%llu)", (unsigned long long) GET_HASH_VALUE(cpu_key_k_offset(key)), (unsigned long long) GET_GENERATION_NUMBER(cpu_key_k_offset(key))); else sprintf(off_buf, "0x%Lx", (unsigned long long)cpu_key_k_offset(key)); return off_buf; } static char *le_offset(struct reiserfs_key *key) { int version; version = le_key_version(key); if (le_key_k_type(version, key) == TYPE_DIRENTRY) sprintf(off_buf, "%llu(%llu)", (unsigned long long) GET_HASH_VALUE(le_key_k_offset(version, key)), (unsigned long long) GET_GENERATION_NUMBER(le_key_k_offset(version, key))); else sprintf(off_buf, "0x%Lx", (unsigned long long)le_key_k_offset(version, key)); return off_buf; } static char *cpu_type(struct cpu_key *key) { if (cpu_key_k_type(key) == TYPE_STAT_DATA) return "SD"; if (cpu_key_k_type(key) == TYPE_DIRENTRY) return "DIR"; if (cpu_key_k_type(key) == TYPE_DIRECT) return "DIRECT"; if (cpu_key_k_type(key) == TYPE_INDIRECT) return "IND"; return "UNKNOWN"; } static char *le_type(struct reiserfs_key *key) { int version; version = le_key_version(key); if (le_key_k_type(version, key) == TYPE_STAT_DATA) return "SD"; if (le_key_k_type(version, key) == TYPE_DIRENTRY) return "DIR"; if (le_key_k_type(version, key) == TYPE_DIRECT) return "DIRECT"; if (le_key_k_type(version, key) == TYPE_INDIRECT) return "IND"; return "UNKNOWN"; } /* %k */ static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) { if (key) return scnprintf(buf, size, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), le32_to_cpu(key->k_objectid), le_offset(key), le_type(key)); else return scnprintf(buf, size, "[NULL]"); } /* %K */ static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) { if (key) return scnprintf(buf, size, "[%d %d %s %s]", key->on_disk_key.k_dir_id, key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), cpu_type(key)); else return scnprintf(buf, size, "[NULL]"); } static int scnprintf_de_head(char *buf, size_t size, struct reiserfs_de_head *deh) { if (deh) return scnprintf(buf, size, "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), deh_location(deh), deh_state(deh)); else return scnprintf(buf, size, "[NULL]"); } static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) { if (ih) { char *p = buf; char * const end = buf + size; p += scnprintf(p, end - p, "%s", (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); p += scnprintf_le_key(p, end - p, &ih->ih_key); p += scnprintf(p, end - p, ", item_len %d, item_location %d, free_space(entry_count) %d", ih_item_len(ih), ih_location(ih), ih_free_space(ih)); return p - buf; } else return scnprintf(buf, size, "[NULL]"); } static int scnprintf_direntry(char *buf, size_t size, struct reiserfs_dir_entry *de) { char name[20]; memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; return scnprintf(buf, size, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); } static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) { return scnprintf(buf, size, "level=%d, nr_items=%d, free_space=%d rdkey ", B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) { return scnprintf(buf, size, "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", bh->b_bdev, bh->b_size, (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), bh->b_state, bh->b_page, buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", buffer_dirty(bh) ? "DIRTY" : "CLEAN", buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) { return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), dc_size(dc)); } static char *is_there_reiserfs_struct(char *fmt, int *what) { char *k = fmt; while ((k = strchr(k, '%')) != NULL) { if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') { *what = k[1]; break; } k++; } return k; } /* * debugging reiserfs we used to print out a lot of different * variables, like keys, item headers, buffer heads etc. Values of * most fields matter. So it took a long time just to write * appropriative printk. With this reiserfs_warning you can use format * specification for complex structures like you used to do with * printfs for integers, doubles and pointers. For instance, to print * out key structure you have to write just: * reiserfs_warning ("bad key %k", key); * instead of * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, * key->k_offset, key->k_uniqueness); */ static DEFINE_SPINLOCK(error_lock); static void prepare_error_buf(const char *fmt, va_list args) { char *fmt1 = fmt_buf; char *k; char *p = error_buf; char * const end = &error_buf[sizeof(error_buf)]; int what; spin_lock(&error_lock); if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { strscpy(error_buf, "format string too long", end - error_buf); goto out_unlock; } while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { *k = 0; p += vscnprintf(p, end - p, fmt1, args); switch (what) { case 'k': p += scnprintf_le_key(p, end - p, va_arg(args, struct reiserfs_key *)); break; case 'K': p += scnprintf_cpu_key(p, end - p, va_arg(args, struct cpu_key *)); break; case 'h': p += scnprintf_item_head(p, end - p, va_arg(args, struct item_head *)); break; case 't': p += scnprintf_direntry(p, end - p, va_arg(args, struct reiserfs_dir_entry *)); break; case 'y': p += scnprintf_disk_child(p, end - p, va_arg(args, struct disk_child *)); break; case 'z': p += scnprintf_block_head(p, end - p, va_arg(args, struct buffer_head *)); break; case 'b': p += scnprintf_buffer_head(p, end - p, va_arg(args, struct buffer_head *)); break; case 'a': p += scnprintf_de_head(p, end - p, va_arg(args, struct reiserfs_de_head *)); break; } fmt1 = k + 2; } p += vscnprintf(p, end - p, fmt1, args); out_unlock: spin_unlock(&error_lock); } /* * in addition to usual conversion specifiers this accepts reiserfs * specific conversion specifiers: * %k to print little endian key, * %K to print cpu key, * %h to print item_head, * %t to print directory entry * %z to print block head (arg must be struct buffer_head * * %b to print buffer_head */ #define do_reiserfs_warning(fmt)\ {\ va_list args;\ va_start( args, fmt );\ prepare_error_buf( fmt, args );\ va_end( args );\ } void __reiserfs_warning(struct super_block *sb, const char *id, const char *function, const char *fmt, ...) { do_reiserfs_warning(fmt); if (sb) printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: " "%s\n", sb->s_id, id ? id : "", id ? " " : "", function, error_buf); else printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n", id ? id : "", id ? " " : "", function, error_buf); } /* No newline.. reiserfs_info calls can be followed by printk's */ void reiserfs_info(struct super_block *sb, const char *fmt, ...) { do_reiserfs_warning(fmt); if (sb) printk(KERN_NOTICE "REISERFS (device %s): %s", sb->s_id, error_buf); else printk(KERN_NOTICE "REISERFS %s:", error_buf); } /* No newline.. reiserfs_printk calls can be followed by printk's */ static void reiserfs_printk(const char *fmt, ...) { do_reiserfs_warning(fmt); printk(error_buf); } void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) { #ifdef CONFIG_REISERFS_CHECK do_reiserfs_warning(fmt); if (s) printk(KERN_DEBUG "REISERFS debug (device %s): %s\n", s->s_id, error_buf); else printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf); #endif } /* * The format: * * maintainer-errorid: [function-name:] message * * where errorid is unique to the maintainer and function-name is * optional, is recommended, so that anyone can easily find the bug * with a simple grep for the short to type string * maintainer-errorid. Don't bother with reusing errorids, there are * lots of numbers out there. * * Example: * * reiserfs_panic( * p_sb, "reiser-29: reiserfs_new_blocknrs: " * "one of search_start or rn(%d) is equal to MAX_B_NUM," * "which means that we are optimizing location based on the " * "bogus location of a temp buffer (%p).", * rn, bh * ); * * Regular panic()s sometimes clear the screen before the message can * be read, thus the need for the while loop. * * Numbering scheme for panic used by Vladimir and Anatoly( Hans completely * ignores this scheme, and considers it pointless complexity): * * panics in reiserfs_fs.h have numbers from 1000 to 1999 * super.c 2000 to 2999 * preserve.c (unused) 3000 to 3999 * bitmap.c 4000 to 4999 * stree.c 5000 to 5999 * prints.c 6000 to 6999 * namei.c 7000 to 7999 * fix_nodes.c 8000 to 8999 * dir.c 9000 to 9999 * lbalance.c 10000 to 10999 * ibalance.c 11000 to 11999 not ready * do_balan.c 12000 to 12999 * inode.c 13000 to 13999 * file.c 14000 to 14999 * objectid.c 15000 - 15999 * buffer.c 16000 - 16999 * symlink.c 17000 - 17999 * * . */ void __reiserfs_panic(struct super_block *sb, const char *id, const char *function, const char *fmt, ...) { do_reiserfs_warning(fmt); #ifdef CONFIG_REISERFS_CHECK dump_stack(); #endif if (sb) printk(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n", sb->s_id, id ? id : "", id ? " " : "", function, error_buf); else printk(KERN_WARNING "REISERFS panic: %s%s%s: %s\n", id ? id : "", id ? " " : "", function, error_buf); BUG(); } void __reiserfs_error(struct super_block *sb, const char *id, const char *function, const char *fmt, ...) { do_reiserfs_warning(fmt); BUG_ON(sb == NULL); if (reiserfs_error_panic(sb)) __reiserfs_panic(sb, id, function, error_buf); if (id && id[0]) printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n", sb->s_id, id, function, error_buf); else printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n", sb->s_id, function, error_buf); if (sb_rdonly(sb)) return; reiserfs_info(sb, "Remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; reiserfs_abort_journal(sb, -EIO); } void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) { do_reiserfs_warning(fmt); if (reiserfs_error_panic(sb)) { panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id, error_buf); } if (reiserfs_is_journal_aborted(SB_JOURNAL(sb))) return; printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id, error_buf); sb->s_flags |= SB_RDONLY; reiserfs_abort_journal(sb, errno); } /* * this prints internal nodes (4 keys/items in line) (dc_number, * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, * dc_size)... */ static int print_internal(struct buffer_head *bh, int first, int last) { struct reiserfs_key *key; struct disk_child *dc; int i; int from, to; if (!B_IS_KEYS_LEVEL(bh)) return 1; check_internal(bh); if (first == -1) { from = 0; to = B_NR_ITEMS(bh); } else { from = first; to = min_t(int, last, B_NR_ITEMS(bh)); } reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh); dc = B_N_CHILD(bh, from); reiserfs_printk("PTR %d: %y ", from, dc); for (i = from, key = internal_key(bh, from), dc++; i < to; i++, key++, dc++) { reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); if (i && i % 4 == 0) printk("\n"); } printk("\n"); return 0; } static int print_leaf(struct buffer_head *bh, int print_mode, int first, int last) { struct block_head *blkh; struct item_head *ih; int i, nr; int from, to; if (!B_IS_ITEMS_LEVEL(bh)) return 1; check_leaf(bh); blkh = B_BLK_HEAD(bh); ih = item_head(bh, 0); nr = blkh_nr_item(blkh); printk ("\n===================================================================\n"); reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh); if (!(print_mode & PRINT_LEAF_ITEMS)) { reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", &(ih->ih_key), &((ih + nr - 1)->ih_key)); return 0; } if (first < 0 || first > nr - 1) from = 0; else from = first; if (last < 0 || last > nr) to = nr; else to = last; ih += from; printk ("-------------------------------------------------------------------------------\n"); printk ("|##| type | key | ilen | free_space | version | loc |\n"); for (i = from; i < to; i++, ih++) { printk ("-------------------------------------------------------------------------------\n"); reiserfs_printk("|%2d| %h |\n", i, ih); if (print_mode & PRINT_LEAF_ITEMS) op_print_item(ih, ih_item_body(bh, ih)); } printk ("===================================================================\n"); return 0; } char *reiserfs_hashname(int code) { if (code == YURA_HASH) return "rupasov"; if (code == TEA_HASH) return "tea"; if (code == R5_HASH) return "r5"; return "unknown"; } /* return 1 if this is not super block */ static int print_super_block(struct buffer_head *bh) { struct reiserfs_super_block *rs = (struct reiserfs_super_block *)(bh->b_data); int skipped, data_blocks; char *version; if (is_reiserfs_3_5(rs)) { version = "3.5"; } else if (is_reiserfs_3_6(rs)) { version = "3.6"; } else if (is_reiserfs_jr(rs)) { version = ((sb_version(rs) == REISERFS_VERSION_2) ? "3.6" : "3.5"); } else { return 1; } printk("%pg\'s super block is in block %llu\n", bh->b_bdev, (unsigned long long)bh->b_blocknr); printk("Reiserfs version %s\n", version); printk("Block count %u\n", sb_block_count(rs)); printk("Blocksize %d\n", sb_blocksize(rs)); printk("Free blocks %u\n", sb_free_blocks(rs)); /* * FIXME: this would be confusing if * someone stores reiserfs super block in some data block ;) // skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); */ skipped = bh->b_blocknr; data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + 1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs); printk ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs), (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : sb_reserved_for_journal(rs)), data_blocks); printk("Root block %u\n", sb_root_block(rs)); printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); printk("Journal dev %d\n", sb_jp_journal_dev(rs)); printk("Journal orig size %d\n", sb_jp_journal_size(rs)); printk("FS state %d\n", sb_fs_state(rs)); printk("Hash function \"%s\"\n", reiserfs_hashname(sb_hash_function_code(rs))); printk("Tree height %d\n", sb_tree_height(rs)); return 0; } static int print_desc_block(struct buffer_head *bh) { struct reiserfs_journal_desc *desc; if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8)) return 1; desc = (struct reiserfs_journal_desc *)(bh->b_data); printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc), get_desc_mount_id(desc), get_desc_trans_len(desc)); return 0; } /* ..., int print_mode, int first, int last) */ void print_block(struct buffer_head *bh, ...) { va_list args; int mode, first, last; if (!bh) { printk("print_block: buffer is NULL\n"); return; } va_start(args, bh); mode = va_arg(args, int); first = va_arg(args, int); last = va_arg(args, int); if (print_leaf(bh, mode, first, last)) if (print_internal(bh, first, last)) if (print_super_block(bh)) if (print_desc_block(bh)) printk ("Block %llu contains unformatted data\n", (unsigned long long)bh->b_blocknr); va_end(args); } static char print_tb_buf[2048]; /* this stores initial state of tree balance in the print_tb_buf */ void store_print_tb(struct tree_balance *tb) { int h = 0; int i; struct buffer_head *tbSh, *tbFh; if (!tb) return; sprintf(print_tb_buf, "\n" "BALANCING %d\n" "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" "=====================================================================\n" "* h * S * L * R * F * FL * FR * CFL * CFR *\n", REISERFS_SB(tb->tb_sb)->s_do_balance, tb->tb_mode, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item); for (h = 0; h < ARRAY_SIZE(tb->insert_size); h++) { if (PATH_H_PATH_OFFSET(tb->tb_path, h) <= tb->tb_path->path_length && PATH_H_PATH_OFFSET(tb->tb_path, h) > ILLEGAL_PATH_ELEMENT_OFFSET) { tbSh = PATH_H_PBUFFER(tb->tb_path, h); tbFh = PATH_H_PPARENT(tb->tb_path, h); } else { tbSh = NULL; tbFh = NULL; } sprintf(print_tb_buf + strlen(print_tb_buf), "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", h, (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), (tbSh) ? atomic_read(&tbSh->b_count) : -1, (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), (tb->L[h]) ? atomic_read(&tb->L[h]->b_count) : -1, (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), (tb->R[h]) ? atomic_read(&tb->R[h]->b_count) : -1, (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), (tb->FL[h]) ? (long long)(tb->FL[h]-> b_blocknr) : (-1LL), (tb->FR[h]) ? (long long)(tb->FR[h]-> b_blocknr) : (-1LL), (tb->CFL[h]) ? (long long)(tb->CFL[h]-> b_blocknr) : (-1LL), (tb->CFR[h]) ? (long long)(tb->CFR[h]-> b_blocknr) : (-1LL)); } sprintf(print_tb_buf + strlen(print_tb_buf), "=====================================================================\n" "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], tb->rbytes, tb->blknum[0], tb->s0num, tb->snum[0], tb->sbytes[0], tb->snum[1], tb->sbytes[1], tb->cur_blknum, tb->lkey[0], tb->rkey[0]); /* this prints balance parameters for non-leaf levels */ h = 0; do { h++; sprintf(print_tb_buf + strlen(print_tb_buf), "* %d * %4d * %2d * * %2d * * %2d *\n", h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], tb->blknum[h]); } while (tb->insert_size[h]); sprintf(print_tb_buf + strlen(print_tb_buf), "=====================================================================\n" "FEB list: "); /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */ h = 0; for (i = 0; i < ARRAY_SIZE(tb->FEB); i++) sprintf(print_tb_buf + strlen(print_tb_buf), "%p (%llu %d)%s", tb->FEB[i], tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> b_blocknr : 0ULL, tb->FEB[i] ? atomic_read(&tb->FEB[i]->b_count) : 0, (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", "); sprintf(print_tb_buf + strlen(print_tb_buf), "======================== the end ====================================\n"); } void print_cur_tb(char *mes) { printk("%s\n%s", mes, print_tb_buf); } static void check_leaf_block_head(struct buffer_head *bh) { struct block_head *blkh; int nr; blkh = B_BLK_HEAD(bh); nr = blkh_nr_item(blkh); if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) reiserfs_panic(NULL, "vs-6010", "invalid item number %z", bh); if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr) reiserfs_panic(NULL, "vs-6020", "invalid free space %z", bh); } static void check_internal_block_head(struct buffer_head *bh) { if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT)) reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh); if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh); if (B_FREE_SPACE(bh) != bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) - DC_SIZE * (B_NR_ITEMS(bh) + 1)) reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh); } void check_leaf(struct buffer_head *bh) { int i; struct item_head *ih; if (!bh) return; check_leaf_block_head(bh); for (i = 0, ih = item_head(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) op_check_item(ih, ih_item_body(bh, ih)); } void check_internal(struct buffer_head *bh) { if (!bh) return; check_internal_block_head(bh); } void print_statistics(struct super_block *s) { /* printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); */ }
1 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong <djwong@kernel.org> */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_quota.h" #include "xfs_bmap_util.h" #include "xfs_reflink.h" #include "xfs_trace.h" #include "xfs_exchrange.h" #include "xfs_exchmaps.h" #include "xfs_sb.h" #include "xfs_icache.h" #include "xfs_log.h" #include "xfs_rtbitmap.h" #include <linux/fsnotify.h> /* Lock (and optionally join) two inodes for a file range exchange. */ void xfs_exchrange_ilock( struct xfs_trans *tp, struct xfs_inode *ip1, struct xfs_inode *ip2) { if (ip1 != ip2) xfs_lock_two_inodes(ip1, XFS_ILOCK_EXCL, ip2, XFS_ILOCK_EXCL); else xfs_ilock(ip1, XFS_ILOCK_EXCL); if (tp) { xfs_trans_ijoin(tp, ip1, 0); if (ip2 != ip1) xfs_trans_ijoin(tp, ip2, 0); } } /* Unlock two inodes after a file range exchange operation. */ void xfs_exchrange_iunlock( struct xfs_inode *ip1, struct xfs_inode *ip2) { if (ip2 != ip1) xfs_iunlock(ip2, XFS_ILOCK_EXCL); xfs_iunlock(ip1, XFS_ILOCK_EXCL); } /* * Estimate the resource requirements to exchange file contents between the two * files. The caller is required to hold the IOLOCK and the MMAPLOCK and to * have flushed both inodes' pagecache and active direct-ios. */ int xfs_exchrange_estimate( struct xfs_exchmaps_req *req) { int error; xfs_exchrange_ilock(NULL, req->ip1, req->ip2); error = xfs_exchmaps_estimate(req); xfs_exchrange_iunlock(req->ip1, req->ip2); return error; } #define QRETRY_IP1 (0x1) #define QRETRY_IP2 (0x2) /* * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip * this if quota enforcement is disabled or if both inodes' dquots are the * same. The qretry structure must be initialized to zeroes before the first * call to this function. */ STATIC int xfs_exchrange_reserve_quota( struct xfs_trans *tp, const struct xfs_exchmaps_req *req, unsigned int *qretry) { int64_t ddelta, rdelta; int ip1_error = 0; int error; /* * Don't bother with a quota reservation if we're not enforcing them * or the two inodes have the same dquots. */ if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || (req->ip1->i_udquot == req->ip2->i_udquot && req->ip1->i_gdquot == req->ip2->i_gdquot && req->ip1->i_pdquot == req->ip2->i_pdquot)) return 0; *qretry = 0; /* * For each file, compute the net gain in the number of regular blocks * that will be mapped into that file and reserve that much quota. The * quota counts must be able to absorb at least that much space. */ ddelta = req->ip2_bcount - req->ip1_bcount; rdelta = req->ip2_rtbcount - req->ip1_rtbcount; if (ddelta > 0 || rdelta > 0) { error = xfs_trans_reserve_quota_nblks(tp, req->ip1, ddelta > 0 ? ddelta : 0, rdelta > 0 ? rdelta : 0, false); if (error == -EDQUOT || error == -ENOSPC) { /* * Save this error and see what happens if we try to * reserve quota for ip2. Then report both. */ *qretry |= QRETRY_IP1; ip1_error = error; error = 0; } if (error) return error; } if (ddelta < 0 || rdelta < 0) { error = xfs_trans_reserve_quota_nblks(tp, req->ip2, ddelta < 0 ? -ddelta : 0, rdelta < 0 ? -rdelta : 0, false); if (error == -EDQUOT || error == -ENOSPC) *qretry |= QRETRY_IP2; if (error) return error; } if (ip1_error) return ip1_error; /* * For each file, forcibly reserve the gross gain in mapped blocks so * that we don't trip over any quota block reservation assertions. * We must reserve the gross gain because the quota code subtracts from * bcount the number of blocks that we unmap; it does not add that * quantity back to the quota block reservation. */ error = xfs_trans_reserve_quota_nblks(tp, req->ip1, req->ip1_bcount, req->ip1_rtbcount, true); if (error) return error; return xfs_trans_reserve_quota_nblks(tp, req->ip2, req->ip2_bcount, req->ip2_rtbcount, true); } /* Exchange the mappings (and hence the contents) of two files' forks. */ STATIC int xfs_exchrange_mappings( const struct xfs_exchrange *fxr, struct xfs_inode *ip1, struct xfs_inode *ip2) { struct xfs_mount *mp = ip1->i_mount; struct xfs_exchmaps_req req = { .ip1 = ip1, .ip2 = ip2, .startoff1 = XFS_B_TO_FSBT(mp, fxr->file1_offset), .startoff2 = XFS_B_TO_FSBT(mp, fxr->file2_offset), .blockcount = XFS_B_TO_FSB(mp, fxr->length), }; struct xfs_trans *tp; unsigned int qretry; bool retried = false; int error; trace_xfs_exchrange_mappings(fxr, ip1, ip2); if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) req.flags |= XFS_EXCHMAPS_SET_SIZES; if (fxr->flags & XFS_EXCHANGE_RANGE_FILE1_WRITTEN) req.flags |= XFS_EXCHMAPS_INO1_WRITTEN; /* * Round the request length up to the nearest file allocation unit. * The prep function already checked that the request offsets and * length in @fxr are safe to round up. */ if (xfs_inode_has_bigrtalloc(ip2)) req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount); error = xfs_exchrange_estimate(&req); if (error) return error; retry: /* Allocate the transaction, lock the inodes, and join them. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, req.resblks, 0, XFS_TRANS_RES_FDBLKS, &tp); if (error) return error; xfs_exchrange_ilock(tp, ip1, ip2); trace_xfs_exchrange_before(ip2, 2); trace_xfs_exchrange_before(ip1, 1); error = xfs_exchmaps_check_forks(mp, &req); if (error) goto out_trans_cancel; /* * Reserve ourselves some quota if any of them are in enforcing mode. * In theory we only need enough to satisfy the change in the number * of blocks between the two ranges being remapped. */ error = xfs_exchrange_reserve_quota(tp, &req, &qretry); if ((error == -EDQUOT || error == -ENOSPC) && !retried) { xfs_trans_cancel(tp); xfs_exchrange_iunlock(ip1, ip2); if (qretry & QRETRY_IP1) xfs_blockgc_free_quota(ip1, 0); if (qretry & QRETRY_IP2) xfs_blockgc_free_quota(ip2, 0); retried = true; goto retry; } if (error) goto out_trans_cancel; /* If we got this far on a dry run, all parameters are ok. */ if (fxr->flags & XFS_EXCHANGE_RANGE_DRY_RUN) goto out_trans_cancel; /* Update the mtime and ctime of both files. */ if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME1) xfs_trans_ichgtime(tp, ip1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME2) xfs_trans_ichgtime(tp, ip2, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_exchange_mappings(tp, &req); /* * Force the log to persist metadata updates if the caller or the * administrator requires this. The generic prep function already * flushed the relevant parts of the page cache. */ if (xfs_has_wsync(mp) || (fxr->flags & XFS_EXCHANGE_RANGE_DSYNC)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); trace_xfs_exchrange_after(ip2, 2); trace_xfs_exchrange_after(ip1, 1); if (error) goto out_unlock; /* * If the caller wanted us to exchange the contents of two complete * files of unequal length, exchange the incore sizes now. This should * be safe because we flushed both files' page caches, exchanged all * the mappings, and updated the ondisk sizes. */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { loff_t temp; temp = i_size_read(VFS_I(ip2)); i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1))); i_size_write(VFS_I(ip1), temp); } out_unlock: xfs_exchrange_iunlock(ip1, ip2); return error; out_trans_cancel: xfs_trans_cancel(tp); goto out_unlock; } /* * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE. * This part deals with struct file objects and byte ranges and does not deal * with XFS-specific data structures such as xfs_inodes and block ranges. This * separation may some day facilitate porting to another filesystem. * * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in * file1 with the same number of bytes starting at fxr.file2_offset in file2. * Implementations must call xfs_exchange_range_prep to prepare the two * files prior to taking locks; and they must update the inode change and mod * times of both files as part of the metadata update. The timestamp update * and freshness checks must be done atomically as part of the data exchange * operation to ensure correctness of the freshness check. * xfs_exchange_range_finish must be called after the operation completes * successfully but before locks are dropped. */ /* Verify that we have security clearance to perform this operation. */ static int xfs_exchange_range_verify_area( struct xfs_exchrange *fxr) { int ret; ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, true); if (ret) return ret; return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, true); } /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. */ static inline int xfs_exchange_range_checks( struct xfs_exchrange *fxr, unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; loff_t size1, size2, tmp; int error; /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2)) return -EPERM; if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; size1 = i_size_read(inode1); size2 = i_size_read(inode2); /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; /* * If the caller said to exchange to EOF, we set the length of the * request large enough to cover everything to the end of both files. */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); error = xfs_exchange_range_verify_area(fxr); if (error) return error; } /* * The start of both ranges must be aligned to the file allocation * unit. */ if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) || !IS_ALIGNED(fxr->file2_offset, alloc_unit)) return -EINVAL; /* Ensure offsets don't wrap. */ if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) || check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) return -EINVAL; /* * We require both ranges to end within EOF, unless we're exchanging * to EOF. */ if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && (fxr->file1_offset + fxr->length > size1 || fxr->file2_offset + fxr->length > size2)) return -EINVAL; /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole * operation. */ test_len = fxr->length; error = generic_write_check_limits(fxr->file2, fxr->file2_offset, &test_len); if (error) return error; error = generic_write_check_limits(fxr->file1, fxr->file1_offset, &test_len); if (error) return error; if (test_len != fxr->length) return -EINVAL; /* * If the user wanted us to exchange up to the infile's EOF, round up * to the next allocation unit boundary for this check. Do the same * for the outfile. * * Otherwise, reject the range length if it's not aligned to an * allocation unit. */ if (fxr->file1_offset + fxr->length == size1) blen = ALIGN(size1, alloc_unit) - fxr->file1_offset; else if (fxr->file2_offset + fxr->length == size2) blen = ALIGN(size2, alloc_unit) - fxr->file2_offset; else if (!IS_ALIGNED(fxr->length, alloc_unit)) return -EINVAL; else blen = fxr->length; /* Don't allow overlapped exchanges within the same file. */ if (inode1 == inode2 && fxr->file2_offset + blen > fxr->file1_offset && fxr->file1_offset + blen > fxr->file2_offset) return -EINVAL; /* * Ensure that we don't exchange a partial EOF block into the middle of * another file. */ if ((fxr->length & allocmask) == 0) return 0; blen = fxr->length; if (fxr->file2_offset + blen < size2) blen &= ~allocmask; if (fxr->file1_offset + blen < size1) blen &= ~allocmask; return blen == fxr->length ? 0 : -EINVAL; } /* * Check that the two inodes are eligible for range exchanges, the ranges make * sense, and then flush all dirty data. Caller must ensure that the inodes * have been locked against any other modifications. */ static inline int xfs_exchange_range_prep( struct xfs_exchrange *fxr, unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); bool same_inode = (inode1 == inode2); int error; /* Check that we don't violate system file offset limits. */ error = xfs_exchange_range_checks(fxr, alloc_unit); if (error || fxr->length == 0) return error; /* Wait for the completion of any pending IOs on both files */ inode_dio_wait(inode1); if (!same_inode) inode_dio_wait(inode2); error = filemap_write_and_wait_range(inode1->i_mapping, fxr->file1_offset, fxr->file1_offset + fxr->length - 1); if (error) return error; error = filemap_write_and_wait_range(inode2->i_mapping, fxr->file2_offset, fxr->file2_offset + fxr->length - 1); if (error) return error; /* * If the files or inodes involved require synchronous writes, amend * the request to force the filesystem to flush all data and metadata * to disk after the operation completes. */ if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) || IS_SYNC(inode1) || IS_SYNC(inode2)) fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC; return 0; } /* * Finish a range exchange operation, if it was successful. Caller must ensure * that the inodes are still locked against any other modifications. */ static inline int xfs_exchange_range_finish( struct xfs_exchrange *fxr) { int error; error = file_remove_privs(fxr->file1); if (error) return error; if (file_inode(fxr->file1) == file_inode(fxr->file2)) return 0; return file_remove_privs(fxr->file2); } /* * Check the alignment of an exchange request when the allocation unit size * isn't a power of two. The generic file-level helpers use (fast) * bitmask-based alignment checks, but here we have to use slow long division. */ static int xfs_exchrange_check_rtalign( const struct xfs_exchrange *fxr, struct xfs_inode *ip1, struct xfs_inode *ip2, unsigned int alloc_unit) { uint64_t length = fxr->length; uint64_t blen; loff_t size1, size2; size1 = i_size_read(VFS_I(ip1)); size2 = i_size_read(VFS_I(ip2)); /* The start of both ranges must be aligned to a rt extent. */ if (!isaligned_64(fxr->file1_offset, alloc_unit) || !isaligned_64(fxr->file2_offset, alloc_unit)) return -EINVAL; if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); /* * If the user wanted us to exchange up to the infile's EOF, round up * to the next rt extent boundary for this check. Do the same for the * outfile. * * Otherwise, reject the range length if it's not rt extent aligned. * We already confirmed the starting offsets' rt extent block * alignment. */ if (fxr->file1_offset + length == size1) blen = roundup_64(size1, alloc_unit) - fxr->file1_offset; else if (fxr->file2_offset + length == size2) blen = roundup_64(size2, alloc_unit) - fxr->file2_offset; else if (!isaligned_64(length, alloc_unit)) return -EINVAL; else blen = length; /* Don't allow overlapped exchanges within the same file. */ if (ip1 == ip2 && fxr->file2_offset + blen > fxr->file1_offset && fxr->file1_offset + blen > fxr->file2_offset) return -EINVAL; /* * Ensure that we don't exchange a partial EOF rt extent into the * middle of another file. */ if (isaligned_64(length, alloc_unit)) return 0; blen = length; if (fxr->file2_offset + length < size2) blen = rounddown_64(blen, alloc_unit); if (fxr->file1_offset + blen < size1) blen = rounddown_64(blen, alloc_unit); return blen == length ? 0 : -EINVAL; } /* Prepare two files to have their data exchanged. */ STATIC int xfs_exchrange_prep( struct xfs_exchrange *fxr, struct xfs_inode *ip1, struct xfs_inode *ip2) { struct xfs_mount *mp = ip2->i_mount; unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip2); int error; trace_xfs_exchrange_prep(fxr, ip1, ip2); /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2)) return -EINVAL; /* Check non-power of two alignment issues, if necessary. */ if (!is_power_of_2(alloc_unit)) { error = xfs_exchrange_check_rtalign(fxr, ip1, ip2, alloc_unit); if (error) return error; /* * Do the generic file-level checks with the regular block * alignment. */ alloc_unit = mp->m_sb.sb_blocksize; } error = xfs_exchange_range_prep(fxr, alloc_unit); if (error || fxr->length == 0) return error; /* Attach dquots to both inodes before changing block maps. */ error = xfs_qm_dqattach(ip2); if (error) return error; error = xfs_qm_dqattach(ip1); if (error) return error; trace_xfs_exchrange_flush(fxr, ip1, ip2); /* Flush the relevant ranges of both files. */ error = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length); if (error) return error; error = xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length); if (error) return error; /* * Cancel CoW fork preallocations for the ranges of both files. The * prep function should have flushed all the dirty data, so the only * CoW mappings remaining should be speculative. */ if (xfs_inode_has_cow_data(ip1)) { error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset, fxr->length, true); if (error) return error; } if (xfs_inode_has_cow_data(ip2)) { error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset, fxr->length, true); if (error) return error; } return 0; } /* * Exchange contents of files. This is the binding between the generic * file-level concepts and the XFS inode-specific implementation. */ STATIC int xfs_exchrange_contents( struct xfs_exchrange *fxr) { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); struct xfs_inode *ip1 = XFS_I(inode1); struct xfs_inode *ip2 = XFS_I(inode2); struct xfs_mount *mp = ip1->i_mount; int error; if (!xfs_has_exchange_range(mp)) return -EOPNOTSUPP; if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS | XFS_EXCHANGE_RANGE_PRIV_FLAGS)) return -EINVAL; if (xfs_is_shutdown(mp)) return -EIO; /* Lock both files against IO */ error = xfs_ilock2_io_mmap(ip1, ip2); if (error) goto out_err; /* Prepare and then exchange file contents. */ error = xfs_exchrange_prep(fxr, ip1, ip2); if (error) goto out_unlock; error = xfs_exchrange_mappings(fxr, ip1, ip2); if (error) goto out_unlock; /* * Finish the exchange by removing special file privileges like any * other file write would do. This may involve turning on support for * logged xattrs if either file has security capabilities. */ error = xfs_exchange_range_finish(fxr); if (error) goto out_unlock; out_unlock: xfs_iunlock2_io_mmap(ip1, ip2); out_err: if (error) trace_xfs_exchrange_error(ip2, error, _RET_IP_); return error; } /* Exchange parts of two files. */ static int xfs_exchange_range( struct xfs_exchrange *fxr) { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & XFS_EXCHANGE_RANGE_PRIV_FLAGS); /* Both files must be on the same mount/filesystem. */ if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt) return -EXDEV; if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) return -EINVAL; /* Userspace requests only honored for regular files. */ if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode)) return -EISDIR; if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode)) return -EINVAL; /* Both files must be opened for read and write. */ if (!(fxr->file1->f_mode & FMODE_READ) || !(fxr->file1->f_mode & FMODE_WRITE) || !(fxr->file2->f_mode & FMODE_READ) || !(fxr->file2->f_mode & FMODE_WRITE)) return -EBADF; /* Neither file can be opened append-only. */ if ((fxr->file1->f_flags & O_APPEND) || (fxr->file2->f_flags & O_APPEND)) return -EBADF; /* * If we're not exchanging to EOF, we can check the areas before * stabilizing both files' i_size. */ if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { ret = xfs_exchange_range_verify_area(fxr); if (ret) return ret; } /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1; if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2)) fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2; file_start_write(fxr->file2); ret = xfs_exchrange_contents(fxr); file_end_write(fxr->file2); if (ret) return ret; fsnotify_modify(fxr->file1); if (fxr->file2 != fxr->file1) fsnotify_modify(fxr->file2); return 0; } /* Collect exchange-range arguments from userspace. */ long xfs_ioc_exchange_range( struct file *file, struct xfs_exchange_range __user *argp) { struct xfs_exchrange fxr = { .file2 = file, }; struct xfs_exchange_range args; struct fd file1; int error; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; if (memchr_inv(&args.pad, 0, sizeof(args.pad))) return -EINVAL; if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) return -EINVAL; fxr.file1_offset = args.file1_offset; fxr.file2_offset = args.file2_offset; fxr.length = args.length; fxr.flags = args.flags; file1 = fdget(args.file1_fd); if (!file1.file) return -EBADF; fxr.file1 = file1.file; error = xfs_exchange_range(&fxr); fdput(file1); return error; }
3464 3464 3458 3454 3455 3454 3457 3396 3457 2197 3456 3453 127 100 3452 2133 2131 2133 2133 3447 3458 2138 2135 736 739 3454 3400 739 3452 3450 3456 6 3452 3456 3451 3445 3457 6 126 126 126 27 114 113 114 3448 3450 3447 3448 5 6 3437 3440 3437 3438 5 5 3441 14 126 127 127 127 14 125 126 126 126 126 126 3432 5 3439 3445 3450 95 3448 3458 3452 3458 3446 3449 3406 737 3458 3448 3446 3442 3449 3458 170 170 3436 3439 3457 3449 3453 3464 3437 3448 3435 3433 3457 3455 3454 3464 3453 3448 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/irqflags.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/bug.h> #include "printk_ringbuffer.h" #include "internal.h" /** * DOC: printk_ringbuffer overview * * Data Structure * -------------- * The printk_ringbuffer is made up of 3 internal ringbuffers: * * desc_ring * A ring of descriptors and their meta data (such as sequence number, * timestamp, loglevel, etc.) as well as internal state information about * the record and logical positions specifying where in the other * ringbuffer the text strings are located. * * text_data_ring * A ring of data blocks. A data block consists of an unsigned long * integer (ID) that maps to a desc_ring index followed by the text * string of the record. * * The internal state information of a descriptor is the key element to allow * readers and writers to locklessly synchronize access to the data. * * Implementation * -------------- * * Descriptor Ring * ~~~~~~~~~~~~~~~ * The descriptor ring is an array of descriptors. A descriptor contains * essential meta data to track the data of a printk record using * blk_lpos structs pointing to associated text data blocks (see * "Data Rings" below). Each descriptor is assigned an ID that maps * directly to index values of the descriptor array and has a state. The ID * and the state are bitwise combined into a single descriptor field named * @state_var, allowing ID and state to be synchronously and atomically * updated. * * Descriptors have four states: * * reserved * A writer is modifying the record. * * committed * The record and all its data are written. A writer can reopen the * descriptor (transitioning it back to reserved), but in the committed * state the data is consistent. * * finalized * The record and all its data are complete and available for reading. A * writer cannot reopen the descriptor. * * reusable * The record exists, but its text and/or meta data may no longer be * available. * * Querying the @state_var of a record requires providing the ID of the * descriptor to query. This can yield a possible fifth (pseudo) state: * * miss * The descriptor being queried has an unexpected ID. * * The descriptor ring has a @tail_id that contains the ID of the oldest * descriptor and @head_id that contains the ID of the newest descriptor. * * When a new descriptor should be created (and the ring is full), the tail * descriptor is invalidated by first transitioning to the reusable state and * then invalidating all tail data blocks up to and including the data blocks * associated with the tail descriptor (for the text ring). Then * @tail_id is advanced, followed by advancing @head_id. And finally the * @state_var of the new descriptor is initialized to the new ID and reserved * state. * * The @tail_id can only be advanced if the new @tail_id would be in the * committed or reusable queried state. This makes it possible that a valid * sequence number of the tail is always available. * * Descriptor Finalization * ~~~~~~~~~~~~~~~~~~~~~~~ * When a writer calls the commit function prb_commit(), record data is * fully stored and is consistent within the ringbuffer. However, a writer can * reopen that record, claiming exclusive access (as with prb_reserve()), and * modify that record. When finished, the writer must again commit the record. * * In order for a record to be made available to readers (and also become * recyclable for writers), it must be finalized. A finalized record cannot be * reopened and can never become "unfinalized". Record finalization can occur * in three different scenarios: * * 1) A writer can simultaneously commit and finalize its record by calling * prb_final_commit() instead of prb_commit(). * * 2) When a new record is reserved and the previous record has been * committed via prb_commit(), that previous record is automatically * finalized. * * 3) When a record is committed via prb_commit() and a newer record * already exists, the record being committed is automatically finalized. * * Data Ring * ~~~~~~~~~ * The text data ring is a byte array composed of data blocks. Data blocks are * referenced by blk_lpos structs that point to the logical position of the * beginning of a data block and the beginning of the next adjacent data * block. Logical positions are mapped directly to index values of the byte * array ringbuffer. * * Each data block consists of an ID followed by the writer data. The ID is * the identifier of a descriptor that is associated with the data block. A * given data block is considered valid if all of the following conditions * are met: * * 1) The descriptor associated with the data block is in the committed * or finalized queried state. * * 2) The blk_lpos struct within the descriptor associated with the data * block references back to the same data block. * * 3) The data block is within the head/tail logical position range. * * If the writer data of a data block would extend beyond the end of the * byte array, only the ID of the data block is stored at the logical * position and the full data block (ID and writer data) is stored at the * beginning of the byte array. The referencing blk_lpos will point to the * ID before the wrap and the next data block will be at the logical * position adjacent the full data block after the wrap. * * Data rings have a @tail_lpos that points to the beginning of the oldest * data block and a @head_lpos that points to the logical position of the * next (not yet existing) data block. * * When a new data block should be created (and the ring is full), tail data * blocks will first be invalidated by putting their associated descriptors * into the reusable state and then pushing the @tail_lpos forward beyond * them. Then the @head_lpos is pushed forward and is associated with a new * descriptor. If a data block is not valid, the @tail_lpos cannot be * advanced beyond it. * * Info Array * ~~~~~~~~~~ * The general meta data of printk records are stored in printk_info structs, * stored in an array with the same number of elements as the descriptor ring. * Each info corresponds to the descriptor of the same index in the * descriptor ring. Info validity is confirmed by evaluating the corresponding * descriptor before and after loading the info. * * Usage * ----- * Here are some simple examples demonstrating writers and readers. For the * examples a global ringbuffer (test_rb) is available (which is not the * actual ringbuffer used by printk):: * * DEFINE_PRINTKRB(test_rb, 15, 5); * * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of * 1 MiB (2 ^ (15 + 5)) for text data. * * Sample writer code:: * * const char *textstr = "message text"; * struct prb_reserved_entry e; * struct printk_record r; * * // specify how much to allocate * prb_rec_init_wr(&r, strlen(textstr) + 1); * * if (prb_reserve(&e, &test_rb, &r)) { * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); * * r.info->text_len = strlen(textstr); * r.info->ts_nsec = local_clock(); * r.info->caller_id = printk_caller_id(); * * // commit and finalize the record * prb_final_commit(&e); * } * * Note that additional writer functions are available to extend a record * after it has been committed but not yet finalized. This can be done as * long as no new records have been reserved and the caller is the same. * * Sample writer code (record extending):: * * // alternate rest of previous example * * r.info->text_len = strlen(textstr); * r.info->ts_nsec = local_clock(); * r.info->caller_id = printk_caller_id(); * * // commit the record (but do not finalize yet) * prb_commit(&e); * } * * ... * * // specify additional 5 bytes text space to extend * prb_rec_init_wr(&r, 5); * * // try to extend, but only if it does not exceed 32 bytes * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) { * snprintf(&r.text_buf[r.info->text_len], * r.text_buf_size - r.info->text_len, "hello"); * * r.info->text_len += 5; * * // commit and finalize the record * prb_final_commit(&e); * } * * Sample reader code:: * * struct printk_info info; * struct printk_record r; * char text_buf[32]; * u64 seq; * * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf)); * * prb_for_each_record(0, &test_rb, &seq, &r) { * if (info.seq != seq) * pr_warn("lost %llu records\n", info.seq - seq); * * if (info.text_len > r.text_buf_size) { * pr_warn("record %llu text truncated\n", info.seq); * text_buf[r.text_buf_size - 1] = 0; * } * * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec, * &text_buf[0]); * } * * Note that additional less convenient reader functions are available to * allow complex record access. * * ABA Issues * ~~~~~~~~~~ * To help avoid ABA issues, descriptors are referenced by IDs (array index * values combined with tagged bits counting array wraps) and data blocks are * referenced by logical positions (array index values combined with tagged * bits counting array wraps). However, on 32-bit systems the number of * tagged bits is relatively small such that an ABA incident is (at least * theoretically) possible. For example, if 4 million maximally sized (1KiB) * printk messages were to occur in NMI context on a 32-bit system, the * interrupted context would not be able to recognize that the 32-bit integer * completely wrapped and thus represents a different data block than the one * the interrupted context expects. * * To help combat this possibility, additional state checking is performed * (such as using cmpxchg() even though set() would suffice). These extra * checks are commented as such and will hopefully catch any ABA issue that * a 32-bit system might experience. * * Memory Barriers * ~~~~~~~~~~~~~~~ * Multiple memory barriers are used. To simplify proving correctness and * generating litmus tests, lines of code related to memory barriers * (loads, stores, and the associated memory barriers) are labeled:: * * LMM(function:letter) * * Comments reference the labels using only the "function:letter" part. * * The memory barrier pairs and their ordering are: * * desc_reserve:D / desc_reserve:B * push descriptor tail (id), then push descriptor head (id) * * desc_reserve:D / data_push_tail:B * push data tail (lpos), then set new descriptor reserved (state) * * desc_reserve:D / desc_push_tail:C * push descriptor tail (id), then set new descriptor reserved (state) * * desc_reserve:D / prb_first_seq:C * push descriptor tail (id), then set new descriptor reserved (state) * * desc_reserve:F / desc_read:D * set new descriptor id and reserved (state), then allow writer changes * * data_alloc:A (or data_realloc:A) / desc_read:D * set old descriptor reusable (state), then modify new data block area * * data_alloc:A (or data_realloc:A) / data_push_tail:B * push data tail (lpos), then modify new data block area * * _prb_commit:B / desc_read:B * store writer changes, then set new descriptor committed (state) * * desc_reopen_last:A / _prb_commit:B * set descriptor reserved (state), then read descriptor data * * _prb_commit:B / desc_reserve:D * set new descriptor committed (state), then check descriptor head (id) * * data_push_tail:D / data_push_tail:A * set descriptor reusable (state), then push data tail (lpos) * * desc_push_tail:B / desc_reserve:D * set descriptor reusable (state), then push descriptor tail (id) * * desc_update_last_finalized:A / desc_last_finalized_seq:A * store finalized record, then set new highest finalized sequence number */ #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) #define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1) #define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits) #define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1) /* Determine the data array index from a logical position. */ #define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring)) /* Determine the desc array index from an ID or sequence number. */ #define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring)) /* Determine how many times the data array has wrapped. */ #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits) /* Determine if a logical position refers to a data-less block. */ #define LPOS_DATALESS(lpos) ((lpos) & 1UL) #define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \ LPOS_DATALESS((blk)->next)) /* Get the logical position at index 0 of the current wrap. */ #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ ((lpos) & ~DATA_SIZE_MASK(data_ring)) /* Get the ID for the same index of the previous wrap as the given ID. */ #define DESC_ID_PREV_WRAP(desc_ring, id) \ DESC_ID((id) - DESCS_COUNT(desc_ring)) /* * A data block: mapped directly to the beginning of the data block area * specified as a logical position within the data ring. * * @id: the ID of the associated descriptor * @data: the writer data * * Note that the size of a data block is only known by its associated * descriptor. */ struct prb_data_block { unsigned long id; char data[]; }; /* * Return the descriptor associated with @n. @n can be either a * descriptor ID or a sequence number. */ static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n) { return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; } /* * Return the printk_info associated with @n. @n can be either a * descriptor ID or a sequence number. */ static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n) { return &desc_ring->infos[DESC_INDEX(desc_ring, n)]; } static struct prb_data_block *to_block(struct prb_data_ring *data_ring, unsigned long begin_lpos) { return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)]; } /* * Increase the data size to account for data block meta data plus any * padding so that the adjacent data block is aligned on the ID size. */ static unsigned int to_blk_size(unsigned int size) { struct prb_data_block *db = NULL; size += sizeof(*db); size = ALIGN(size, sizeof(db->id)); return size; } /* * Sanity checker for reserve size. The ringbuffer code assumes that a data * block does not exceed the maximum possible size that could fit within the * ringbuffer. This function provides that basic size check so that the * assumption is safe. */ static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) { struct prb_data_block *db = NULL; if (size == 0) return true; /* * Ensure the alignment padded size could possibly fit in the data * array. The largest possible data block must still leave room for * at least the ID of the next block. */ size = to_blk_size(size); if (size > DATA_SIZE(data_ring) - sizeof(db->id)) return false; return true; } /* Query the state of a descriptor. */ static enum desc_state get_desc_state(unsigned long id, unsigned long state_val) { if (id != DESC_ID(state_val)) return desc_miss; return DESC_STATE(state_val); } /* * Get a copy of a specified descriptor and return its queried state. If the * descriptor is in an inconsistent state (miss or reserved), the caller can * only expect the descriptor's @state_var field to be valid. * * The sequence number and caller_id can be optionally retrieved. Like all * non-state_var data, they are only valid if the descriptor is in a * consistent state. */ static enum desc_state desc_read(struct prb_desc_ring *desc_ring, unsigned long id, struct prb_desc *desc_out, u64 *seq_out, u32 *caller_id_out) { struct printk_info *info = to_info(desc_ring, id); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; enum desc_state d_state; unsigned long state_val; /* Check the descriptor state. */ state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */ d_state = get_desc_state(id, state_val); if (d_state == desc_miss || d_state == desc_reserved) { /* * The descriptor is in an inconsistent state. Set at least * @state_var so that the caller can see the details of * the inconsistent state. */ goto out; } /* * Guarantee the state is loaded before copying the descriptor * content. This avoids copying obsolete descriptor content that might * not apply to the descriptor state. This pairs with _prb_commit:B. * * Memory barrier involvement: * * If desc_read:A reads from _prb_commit:B, then desc_read:C reads * from _prb_commit:A. * * Relies on: * * WMB from _prb_commit:A to _prb_commit:B * matching * RMB from desc_read:A to desc_read:C */ smp_rmb(); /* LMM(desc_read:B) */ /* * Copy the descriptor data. The data is not valid until the * state has been re-checked. A memcpy() for all of @desc * cannot be used because of the atomic_t @state_var field. */ if (desc_out) { memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ } if (seq_out) *seq_out = info->seq; /* also part of desc_read:C */ if (caller_id_out) *caller_id_out = info->caller_id; /* also part of desc_read:C */ /* * 1. Guarantee the descriptor content is loaded before re-checking * the state. This avoids reading an obsolete descriptor state * that may not apply to the copied content. This pairs with * desc_reserve:F. * * Memory barrier involvement: * * If desc_read:C reads from desc_reserve:G, then desc_read:E * reads from desc_reserve:F. * * Relies on: * * WMB from desc_reserve:F to desc_reserve:G * matching * RMB from desc_read:C to desc_read:E * * 2. Guarantee the record data is loaded before re-checking the * state. This avoids reading an obsolete descriptor state that may * not apply to the copied data. This pairs with data_alloc:A and * data_realloc:A. * * Memory barrier involvement: * * If copy_data:A reads from data_alloc:B, then desc_read:E * reads from desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to data_alloc:B * matching * RMB from desc_read:C to desc_read:E * * Note: desc_make_reusable:A and data_alloc:B can be different * CPUs. However, the data_alloc:B CPU (which performs the * full memory barrier) must have previously seen * desc_make_reusable:A. */ smp_rmb(); /* LMM(desc_read:D) */ /* * The data has been copied. Return the current descriptor state, * which may have changed since the load above. */ state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */ d_state = get_desc_state(id, state_val); out: if (desc_out) atomic_long_set(&desc_out->state_var, state_val); return d_state; } /* * Take a specified descriptor out of the finalized state by attempting * the transition from finalized to reusable. Either this context or some * other context will have been successful. */ static void desc_make_reusable(struct prb_desc_ring *desc_ring, unsigned long id) { unsigned long val_finalized = DESC_SV(id, desc_finalized); unsigned long val_reusable = DESC_SV(id, desc_reusable); struct prb_desc *desc = to_desc(desc_ring, id); atomic_long_t *state_var = &desc->state_var; atomic_long_cmpxchg_relaxed(state_var, val_finalized, val_reusable); /* LMM(desc_make_reusable:A) */ } /* * Given the text data ring, put the associated descriptor of each * data block from @lpos_begin until @lpos_end into the reusable state. * * If there is any problem making the associated descriptor reusable, either * the descriptor has not yet been finalized or another writer context has * already pushed the tail lpos past the problematic data block. Regardless, * on error the caller can re-load the tail lpos to determine the situation. */ static bool data_make_reusable(struct printk_ringbuffer *rb, unsigned long lpos_begin, unsigned long lpos_end, unsigned long *lpos_out) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_desc_ring *desc_ring = &rb->desc_ring; struct prb_data_block *blk; enum desc_state d_state; struct prb_desc desc; struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos; unsigned long id; /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { blk = to_block(data_ring, lpos_begin); /* * Load the block ID from the data block. This is a data race * against a writer that may have newly reserved this data * area. If the loaded value matches a valid descriptor ID, * the blk_lpos of that descriptor will be checked to make * sure it points back to this data block. If the check fails, * the data area has been recycled by another writer. */ id = blk->id; /* LMM(data_make_reusable:A) */ d_state = desc_read(desc_ring, id, &desc, NULL, NULL); /* LMM(data_make_reusable:B) */ switch (d_state) { case desc_miss: case desc_reserved: case desc_committed: return false; case desc_finalized: /* * This data block is invalid if the descriptor * does not point back to it. */ if (blk_lpos->begin != lpos_begin) return false; desc_make_reusable(desc_ring, id); break; case desc_reusable: /* * This data block is invalid if the descriptor * does not point back to it. */ if (blk_lpos->begin != lpos_begin) return false; break; } /* Advance @lpos_begin to the next data block. */ lpos_begin = blk_lpos->next; } *lpos_out = lpos_begin; return true; } /* * Advance the data ring tail to at least @lpos. This function puts * descriptors into the reusable state if the tail is pushed beyond * their associated data block. */ static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos) { struct prb_data_ring *data_ring = &rb->text_data_ring; unsigned long tail_lpos_new; unsigned long tail_lpos; unsigned long next_lpos; /* If @lpos is from a data-less block, there is nothing to do. */ if (LPOS_DATALESS(lpos)) return true; /* * Any descriptor states that have transitioned to reusable due to the * data tail being pushed to this loaded value will be visible to this * CPU. This pairs with data_push_tail:D. * * Memory barrier involvement: * * If data_push_tail:A reads from data_push_tail:D, then this CPU can * see desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to data_push_tail:D * matches * READFROM from data_push_tail:D to data_push_tail:A * thus * READFROM from desc_make_reusable:A to this CPU */ tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */ /* * Loop until the tail lpos is at or beyond @lpos. This condition * may already be satisfied, resulting in no full memory barrier * from data_push_tail:D being performed. However, since this CPU * sees the new tail lpos, any descriptor states that transitioned to * the reusable state must already be visible. */ while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { /* * Make all descriptors reusable that are associated with * data blocks before @lpos. */ if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) { /* * 1. Guarantee the block ID loaded in * data_make_reusable() is performed before * reloading the tail lpos. The failed * data_make_reusable() may be due to a newly * recycled data area causing the tail lpos to * have been previously pushed. This pairs with * data_alloc:A and data_realloc:A. * * Memory barrier involvement: * * If data_make_reusable:A reads from data_alloc:B, * then data_push_tail:C reads from * data_push_tail:D. * * Relies on: * * MB from data_push_tail:D to data_alloc:B * matching * RMB from data_make_reusable:A to * data_push_tail:C * * Note: data_push_tail:D and data_alloc:B can be * different CPUs. However, the data_alloc:B * CPU (which performs the full memory * barrier) must have previously seen * data_push_tail:D. * * 2. Guarantee the descriptor state loaded in * data_make_reusable() is performed before * reloading the tail lpos. The failed * data_make_reusable() may be due to a newly * recycled descriptor causing the tail lpos to * have been previously pushed. This pairs with * desc_reserve:D. * * Memory barrier involvement: * * If data_make_reusable:B reads from * desc_reserve:F, then data_push_tail:C reads * from data_push_tail:D. * * Relies on: * * MB from data_push_tail:D to desc_reserve:F * matching * RMB from data_make_reusable:B to * data_push_tail:C * * Note: data_push_tail:D and desc_reserve:F can * be different CPUs. However, the * desc_reserve:F CPU (which performs the * full memory barrier) must have previously * seen data_push_tail:D. */ smp_rmb(); /* LMM(data_push_tail:B) */ tail_lpos_new = atomic_long_read(&data_ring->tail_lpos ); /* LMM(data_push_tail:C) */ if (tail_lpos_new == tail_lpos) return false; /* Another CPU pushed the tail. Try again. */ tail_lpos = tail_lpos_new; continue; } /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail lpos. A full * memory barrier is needed since other CPUs may have made * the descriptor states reusable. This pairs with * data_push_tail:A. */ if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos, next_lpos)) { /* LMM(data_push_tail:D) */ break; } } return true; } /* * Advance the desc ring tail. This function advances the tail by one * descriptor, thus invalidating the oldest descriptor. Before advancing * the tail, the tail descriptor is made reusable and all data blocks up to * and including the descriptor's data block are invalidated (i.e. the data * ring tail is pushed past the data block of the descriptor being made * reusable). */ static bool desc_push_tail(struct printk_ringbuffer *rb, unsigned long tail_id) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; struct prb_desc desc; d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL); switch (d_state) { case desc_miss: /* * If the ID is exactly 1 wrap behind the expected, it is * in the process of being reserved by another writer and * must be considered reserved. */ if (DESC_ID(atomic_long_read(&desc.state_var)) == DESC_ID_PREV_WRAP(desc_ring, tail_id)) { return false; } /* * The ID has changed. Another writer must have pushed the * tail and recycled the descriptor already. Success is * returned because the caller is only interested in the * specified tail being pushed, which it was. */ return true; case desc_reserved: case desc_committed: return false; case desc_finalized: desc_make_reusable(desc_ring, tail_id); break; case desc_reusable: break; } /* * Data blocks must be invalidated before their associated * descriptor can be made available for recycling. Invalidating * them later is not possible because there is no way to trust * data blocks once their associated descriptor is gone. */ if (!data_push_tail(rb, desc.text_blk_lpos.next)) return false; /* * Check the next descriptor after @tail_id before pushing the tail * to it because the tail must always be in a finalized or reusable * state. The implementation of prb_first_seq() relies on this. * * A successful read implies that the next descriptor is less than or * equal to @head_id so there is no risk of pushing the tail past the * head. */ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc, NULL, NULL); /* LMM(desc_push_tail:A) */ if (d_state == desc_finalized || d_state == desc_reusable) { /* * Guarantee any descriptor states that have transitioned to * reusable are stored before pushing the tail ID. This allows * verifying the recycled descriptor state. A full memory * barrier is needed since other CPUs may have made the * descriptor states reusable. This pairs with desc_reserve:D. */ atomic_long_cmpxchg(&desc_ring->tail_id, tail_id, DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */ } else { /* * Guarantee the last state load from desc_read() is before * reloading @tail_id in order to see a new tail ID in the * case that the descriptor has been recycled. This pairs * with desc_reserve:D. * * Memory barrier involvement: * * If desc_push_tail:A reads from desc_reserve:F, then * desc_push_tail:D reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:F * matching * RMB from desc_push_tail:A to desc_push_tail:D * * Note: desc_push_tail:B and desc_reserve:F can be different * CPUs. However, the desc_reserve:F CPU (which performs * the full memory barrier) must have previously seen * desc_push_tail:B. */ smp_rmb(); /* LMM(desc_push_tail:C) */ /* * Re-check the tail ID. The descriptor following @tail_id is * not in an allowed tail state. But if the tail has since * been moved by another CPU, then it does not matter. */ if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */ return false; } return true; } /* Reserve a new descriptor, invalidating the oldest if necessary. */ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long prev_state_val; unsigned long id_prev_wrap; struct prb_desc *desc; unsigned long head_id; unsigned long id; head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */ do { id = DESC_ID(head_id + 1); id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id); /* * Guarantee the head ID is read before reading the tail ID. * Since the tail ID is updated before the head ID, this * guarantees that @id_prev_wrap is never ahead of the tail * ID. This pairs with desc_reserve:D. * * Memory barrier involvement: * * If desc_reserve:A reads from desc_reserve:D, then * desc_reserve:C reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:D * matching * RMB from desc_reserve:A to desc_reserve:C * * Note: desc_push_tail:B and desc_reserve:D can be different * CPUs. However, the desc_reserve:D CPU (which performs * the full memory barrier) must have previously seen * desc_push_tail:B. */ smp_rmb(); /* LMM(desc_reserve:B) */ if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id )) { /* LMM(desc_reserve:C) */ /* * Make space for the new descriptor by * advancing the tail. */ if (!desc_push_tail(rb, id_prev_wrap)) return false; } /* * 1. Guarantee the tail ID is read before validating the * recycled descriptor state. A read memory barrier is * sufficient for this. This pairs with desc_push_tail:B. * * Memory barrier involvement: * * If desc_reserve:C reads from desc_push_tail:B, then * desc_reserve:E reads from desc_make_reusable:A. * * Relies on: * * MB from desc_make_reusable:A to desc_push_tail:B * matching * RMB from desc_reserve:C to desc_reserve:E * * Note: desc_make_reusable:A and desc_push_tail:B can be * different CPUs. However, the desc_push_tail:B CPU * (which performs the full memory barrier) must have * previously seen desc_make_reusable:A. * * 2. Guarantee the tail ID is stored before storing the head * ID. This pairs with desc_reserve:B. * * 3. Guarantee any data ring tail changes are stored before * recycling the descriptor. Data ring tail changes can * happen via desc_push_tail()->data_push_tail(). A full * memory barrier is needed since another CPU may have * pushed the data ring tails. This pairs with * data_push_tail:B. * * 4. Guarantee a new tail ID is stored before recycling the * descriptor. A full memory barrier is needed since * another CPU may have pushed the tail ID. This pairs * with desc_push_tail:C and this also pairs with * prb_first_seq:C. * * 5. Guarantee the head ID is stored before trying to * finalize the previous descriptor. This pairs with * _prb_commit:B. */ } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, id)); /* LMM(desc_reserve:D) */ desc = to_desc(desc_ring, id); /* * If the descriptor has been recycled, verify the old state val. * See "ABA Issues" about why this verification is performed. */ prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ if (prev_state_val && get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) { WARN_ON_ONCE(1); return false; } /* * Assign the descriptor a new ID and set its state to reserved. * See "ABA Issues" about why cmpxchg() instead of set() is used. * * Guarantee the new descriptor ID and state is stored before making * any other changes. A write memory barrier is sufficient for this. * This pairs with desc_read:D. */ if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ WARN_ON_ONCE(1); return false; } /* Now data in @desc can be modified: LMM(desc_reserve:G) */ *id_out = id; return true; } /* Determine the end of a data block. */ static unsigned long get_next_lpos(struct prb_data_ring *data_ring, unsigned long lpos, unsigned int size) { unsigned long begin_lpos; unsigned long next_lpos; begin_lpos = lpos; next_lpos = lpos + size; /* First check if the data block does not wrap. */ if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) return next_lpos; /* Wrapping data blocks store their data at the beginning. */ return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size); } /* * Allocate a new data block, invalidating the oldest data block(s) * if necessary. This function also associates the data block with * a specified descriptor. */ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, struct prb_data_blk_lpos *blk_lpos, unsigned long id) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_data_block *blk; unsigned long begin_lpos; unsigned long next_lpos; if (size == 0) { /* * Data blocks are not created for empty lines. Instead, the * reader will recognize these special lpos values and handle * it appropriately. */ blk_lpos->begin = EMPTY_LINE_LPOS; blk_lpos->next = EMPTY_LINE_LPOS; return NULL; } size = to_blk_size(size); begin_lpos = atomic_long_read(&data_ring->head_lpos); do { next_lpos = get_next_lpos(data_ring, begin_lpos, size); if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) { /* Failed to allocate, specify a data-less block. */ blk_lpos->begin = FAILED_LPOS; blk_lpos->next = FAILED_LPOS; return NULL; } /* * 1. Guarantee any descriptor states that have transitioned * to reusable are stored before modifying the newly * allocated data area. A full memory barrier is needed * since other CPUs may have made the descriptor states * reusable. See data_push_tail:A about why the reusable * states are visible. This pairs with desc_read:D. * * 2. Guarantee any updated tail lpos is stored before * modifying the newly allocated data area. Another CPU may * be in data_make_reusable() and is reading a block ID * from this area. data_make_reusable() can handle reading * a garbage block ID value, but then it must be able to * load a new tail lpos. A full memory barrier is needed * since other CPUs may have updated the tail lpos. This * pairs with data_push_tail:B. */ } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos, next_lpos)); /* LMM(data_alloc:A) */ blk = to_block(data_ring, begin_lpos); blk->id = id; /* LMM(data_alloc:B) */ if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); /* * Store the ID on the wrapped block for consistency. * The printk_ringbuffer does not actually use it. */ blk->id = id; } blk_lpos->begin = begin_lpos; blk_lpos->next = next_lpos; return &blk->data[0]; } /* * Try to resize an existing data block associated with the descriptor * specified by @id. If the resized data block should become wrapped, it * copies the old data to the new data block. If @size yields a data block * with the same or less size, the data block is left as is. * * Fail if this is not the last allocated data block or if there is not * enough space or it is not possible make enough space. * * Return a pointer to the beginning of the entire data buffer or NULL on * failure. */ static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size, struct prb_data_blk_lpos *blk_lpos, unsigned long id) { struct prb_data_ring *data_ring = &rb->text_data_ring; struct prb_data_block *blk; unsigned long head_lpos; unsigned long next_lpos; bool wrapped; /* Reallocation only works if @blk_lpos is the newest data block. */ head_lpos = atomic_long_read(&data_ring->head_lpos); if (head_lpos != blk_lpos->next) return NULL; /* Keep track if @blk_lpos was a wrapping data block. */ wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); size = to_blk_size(size); next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); /* If the data block does not increase, there is nothing to do. */ if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { if (wrapped) blk = to_block(data_ring, 0); else blk = to_block(data_ring, blk_lpos->begin); return &blk->data[0]; } if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) return NULL; /* The memory barrier involvement is the same as data_alloc:A. */ if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, next_lpos)) { /* LMM(data_realloc:A) */ return NULL; } blk = to_block(data_ring, blk_lpos->begin); if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { struct prb_data_block *old_blk = blk; /* Wrapping data blocks store their data at the beginning. */ blk = to_block(data_ring, 0); /* * Store the ID on the wrapped block for consistency. * The printk_ringbuffer does not actually use it. */ blk->id = id; if (!wrapped) { /* * Since the allocated space is now in the newly * created wrapping data block, copy the content * from the old data block. */ memcpy(&blk->data[0], &old_blk->data[0], (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); } } blk_lpos->next = next_lpos; return &blk->data[0]; } /* Return the number of bytes used by a data block. */ static unsigned int space_used(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos) { /* Data-less blocks take no space. */ if (BLK_DATALESS(blk_lpos)) return 0; if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { /* Data block does not wrap. */ return (DATA_INDEX(data_ring, blk_lpos->next) - DATA_INDEX(data_ring, blk_lpos->begin)); } /* * For wrapping data blocks, the trailing (wasted) space is * also counted. */ return (DATA_INDEX(data_ring, blk_lpos->next) + DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); } /* * Given @blk_lpos, return a pointer to the writer data from the data block * and calculate the size of the data part. A NULL pointer is returned if * @blk_lpos specifies values that could never be legal. * * This function (used by readers) performs strict validation on the lpos * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is * triggered if an internal error is detected. */ static const char *get_data(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos, unsigned int *data_size) { struct prb_data_block *db; /* Data-less data block description. */ if (BLK_DATALESS(blk_lpos)) { /* * Records that are just empty lines are also valid, even * though they do not have a data block. For such records * explicitly return empty string data to signify success. */ if (blk_lpos->begin == EMPTY_LINE_LPOS && blk_lpos->next == EMPTY_LINE_LPOS) { *data_size = 0; return ""; } /* Data lost, invalid, or otherwise unavailable. */ return NULL; } /* Regular data block: @begin less than @next and in same wrap. */ if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && blk_lpos->begin < blk_lpos->next) { db = to_block(data_ring, blk_lpos->begin); *data_size = blk_lpos->next - blk_lpos->begin; /* Wrapping data block: @begin is one wrap behind @next. */ } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == DATA_WRAPS(data_ring, blk_lpos->next)) { db = to_block(data_ring, 0); *data_size = DATA_INDEX(data_ring, blk_lpos->next); /* Illegal block description. */ } else { WARN_ON_ONCE(1); return NULL; } /* A valid data block will always be aligned to the ID size. */ if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { return NULL; } /* A valid data block will always have at least an ID. */ if (WARN_ON_ONCE(*data_size < sizeof(db->id))) return NULL; /* Subtract block ID space from size to reflect data size. */ *data_size -= sizeof(db->id); return &db->data[0]; } /* * Attempt to transition the newest descriptor from committed back to reserved * so that the record can be modified by a writer again. This is only possible * if the descriptor is not yet finalized and the provided @caller_id matches. */ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, u32 caller_id, unsigned long *id_out) { unsigned long prev_state_val; enum desc_state d_state; struct prb_desc desc; struct prb_desc *d; unsigned long id; u32 cid; id = atomic_long_read(&desc_ring->head_id); /* * To reduce unnecessarily reopening, first check if the descriptor * state and caller ID are correct. */ d_state = desc_read(desc_ring, id, &desc, NULL, &cid); if (d_state != desc_committed || cid != caller_id) return NULL; d = to_desc(desc_ring, id); prev_state_val = DESC_SV(id, desc_committed); /* * Guarantee the reserved state is stored before reading any * record data. A full memory barrier is needed because @state_var * modification is followed by reading. This pairs with _prb_commit:B. * * Memory barrier involvement: * * If desc_reopen_last:A reads from _prb_commit:B, then * prb_reserve_in_last:A reads from _prb_commit:A. * * Relies on: * * WMB from _prb_commit:A to _prb_commit:B * matching * MB If desc_reopen_last:A to prb_reserve_in_last:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ return NULL; } *id_out = id; return d; } /** * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer * used by the newest record. * * @e: The entry structure to setup. * @rb: The ringbuffer to re-reserve and extend data in. * @r: The record structure to allocate buffers for. * @caller_id: The caller ID of the caller (reserving writer). * @max_size: Fail if the extended size would be greater than this. * * This is the public function available to writers to re-reserve and extend * data. * * The writer specifies the text size to extend (not the new total size) by * setting the @text_buf_size field of @r. To ensure proper initialization * of @r, prb_rec_init_wr() should be used. * * This function will fail if @caller_id does not match the caller ID of the * newest record. In that case the caller must reserve new data using * prb_reserve(). * * Context: Any context. Disables local interrupts on success. * Return: true if text data could be extended, otherwise false. * * On success: * * - @r->text_buf points to the beginning of the entire text buffer. * * - @r->text_buf_size is set to the new total size of the buffer. * * - @r->info is not touched so that @r->info->text_len could be used * to append the text. * * - prb_record_text_space() can be used on @e to query the new * actually used space. * * Important: All @r->info fields will already be set with the current values * for the record. I.e. @r->info->text_len will be less than * @text_buf_size. Writers can use @r->info->text_len to know * where concatenation begins and writers should update * @r->info->text_len after concatenating. */ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r, u32 caller_id, unsigned int max_size) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info; unsigned int data_size; struct prb_desc *d; unsigned long id; local_irq_save(e->irqflags); /* Transition the newest descriptor back to the reserved state. */ d = desc_reopen_last(desc_ring, caller_id, &id); if (!d) { local_irq_restore(e->irqflags); goto fail_reopen; } /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ info = to_info(desc_ring, id); /* * Set the @e fields here so that prb_commit() can be used if * anything fails from now on. */ e->rb = rb; e->id = id; /* * desc_reopen_last() checked the caller_id, but there was no * exclusive access at that point. The descriptor may have * changed since then. */ if (caller_id != info->caller_id) goto fail; if (BLK_DATALESS(&d->text_blk_lpos)) { if (WARN_ON_ONCE(info->text_len != 0)) { pr_warn_once("wrong text_len value (%hu, expecting 0)\n", info->text_len); info->text_len = 0; } if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; if (r->text_buf_size > max_size) goto fail; r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); } else { if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size)) goto fail; /* * Increase the buffer size to include the original size. If * the meta data (@text_len) is not sane, use the full data * block size. */ if (WARN_ON_ONCE(info->text_len > data_size)) { pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", info->text_len, data_size); info->text_len = data_size; } r->text_buf_size += info->text_len; if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; if (r->text_buf_size > max_size) goto fail; r->text_buf = data_realloc(rb, r->text_buf_size, &d->text_blk_lpos, id); } if (r->text_buf_size && !r->text_buf) goto fail; r->info = info; e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); return true; fail: prb_commit(e); /* prb_commit() re-enabled interrupts. */ fail_reopen: /* Make it clear to the caller that the re-reserve failed. */ memset(r, 0, sizeof(*r)); return false; } /* * @last_finalized_seq value guarantees that all records up to and including * this sequence number are finalized and can be read. The only exception are * too old records which have already been overwritten. * * It is also guaranteed that @last_finalized_seq only increases. * * Be aware that finalized records following non-finalized records are not * reported because they are not yet available to the reader. For example, * a new record stored via printk() will not be available to a printer if * it follows a record that has not been finalized yet. However, once that * non-finalized record becomes finalized, @last_finalized_seq will be * appropriately updated and the full set of finalized records will be * available to the printer. And since each printk() caller will either * directly print or trigger deferred printing of all available unprinted * records, all printk() messages will get printed. */ static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long ulseq; /* * Guarantee the sequence number is loaded before loading the * associated record in order to guarantee that the record can be * seen by this CPU. This pairs with desc_update_last_finalized:A. */ ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq ); /* LMM(desc_last_finalized_seq:A) */ return __ulseq_to_u64seq(rb, ulseq); } static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, struct printk_record *r, unsigned int *line_count); /* * Check if there are records directly following @last_finalized_seq that are * finalized. If so, update @last_finalized_seq to the latest of these * records. It is not allowed to skip over records that are not yet finalized. */ static void desc_update_last_finalized(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; u64 old_seq = desc_last_finalized_seq(rb); unsigned long oldval; unsigned long newval; u64 finalized_seq; u64 try_seq; try_again: finalized_seq = old_seq; try_seq = finalized_seq + 1; /* Try to find later finalized records. */ while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { finalized_seq = try_seq; try_seq++; } /* No update needed if no later finalized record was found. */ if (finalized_seq == old_seq) return; oldval = __u64seq_to_ulseq(old_seq); newval = __u64seq_to_ulseq(finalized_seq); /* * Set the sequence number of a later finalized record that has been * seen. * * Guarantee the record data is visible to other CPUs before storing * its sequence number. This pairs with desc_last_finalized_seq:A. * * Memory barrier involvement: * * If desc_last_finalized_seq:A reads from * desc_update_last_finalized:A, then desc_read:A reads from * _prb_commit:B. * * Relies on: * * RELEASE from _prb_commit:B to desc_update_last_finalized:A * matching * ACQUIRE from desc_last_finalized_seq:A to desc_read:A * * Note: _prb_commit:B and desc_update_last_finalized:A can be * different CPUs. However, the desc_update_last_finalized:A * CPU (which performs the release) must have previously seen * _prb_commit:B. */ if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ old_seq = __ulseq_to_u64seq(rb, oldval); goto try_again; } } /* * Attempt to finalize a specified descriptor. If this fails, the descriptor * is either already final or it will finalize itself when the writer commits. */ static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long prev_state_val = DESC_SV(id, desc_committed); struct prb_desc *d = to_desc(desc_ring, id); if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ desc_update_last_finalized(rb); } } /** * prb_reserve() - Reserve space in the ringbuffer. * * @e: The entry structure to setup. * @rb: The ringbuffer to reserve data in. * @r: The record structure to allocate buffers for. * * This is the public function available to writers to reserve data. * * The writer specifies the text size to reserve by setting the * @text_buf_size field of @r. To ensure proper initialization of @r, * prb_rec_init_wr() should be used. * * Context: Any context. Disables local interrupts on success. * Return: true if at least text data could be allocated, otherwise false. * * On success, the fields @info and @text_buf of @r will be set by this * function and should be filled in by the writer before committing. Also * on success, prb_record_text_space() can be used on @e to query the actual * space used for the text data block. * * Important: @info->text_len needs to be set correctly by the writer in * order for data to be readable and/or extended. Its value * is initialized to 0. */ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, struct printk_record *r) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info; struct prb_desc *d; unsigned long id; u64 seq; if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) goto fail; /* * Descriptors in the reserved state act as blockers to all further * reservations once the desc_ring has fully wrapped. Disable * interrupts during the reserve/commit window in order to minimize * the likelihood of this happening. */ local_irq_save(e->irqflags); if (!desc_reserve(rb, &id)) { /* Descriptor reservation failures are tracked. */ atomic_long_inc(&rb->fail); local_irq_restore(e->irqflags); goto fail; } d = to_desc(desc_ring, id); info = to_info(desc_ring, id); /* * All @info fields (except @seq) are cleared and must be filled in * by the writer. Save @seq before clearing because it is used to * determine the new sequence number. */ seq = info->seq; memset(info, 0, sizeof(*info)); /* * Set the @e fields here so that prb_commit() can be used if * text data allocation fails. */ e->rb = rb; e->id = id; /* * Initialize the sequence number if it has "never been set". * Otherwise just increment it by a full wrap. * * @seq is considered "never been set" if it has a value of 0, * _except_ for @infos[0], which was specially setup by the ringbuffer * initializer and therefore is always considered as set. * * See the "Bootstrap" comment block in printk_ringbuffer.h for * details about how the initializer bootstraps the descriptors. */ if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) info->seq = DESC_INDEX(desc_ring, id); else info->seq = seq + DESCS_COUNT(desc_ring); /* * New data is about to be reserved. Once that happens, previous * descriptors are no longer able to be extended. Finalize the * previous descriptor now so that it can be made available to * readers. (For seq==0 there is no previous descriptor.) */ if (info->seq > 0) desc_make_final(rb, DESC_ID(id - 1)); r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); /* If text data allocation fails, a data-less record is committed. */ if (r->text_buf_size && !r->text_buf) { prb_commit(e); /* prb_commit() re-enabled interrupts. */ goto fail; } r->info = info; /* Record full text space used by record. */ e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); return true; fail: /* Make it clear to the caller that the reserve failed. */ memset(r, 0, sizeof(*r)); return false; } /* Commit the data (possibly finalizing it) and restore interrupts. */ static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; struct prb_desc *d = to_desc(desc_ring, e->id); unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); /* Now the writer has finished all writing: LMM(_prb_commit:A) */ /* * Set the descriptor as committed. See "ABA Issues" about why * cmpxchg() instead of set() is used. * * 1 Guarantee all record data is stored before the descriptor state * is stored as committed. A write memory barrier is sufficient * for this. This pairs with desc_read:B and desc_reopen_last:A. * * 2. Guarantee the descriptor state is stored as committed before * re-checking the head ID in order to possibly finalize this * descriptor. This pairs with desc_reserve:D. * * Memory barrier involvement: * * If prb_commit:A reads from desc_reserve:D, then * desc_make_final:A reads from _prb_commit:B. * * Relies on: * * MB _prb_commit:B to prb_commit:A * matching * MB desc_reserve:D to desc_make_final:A */ if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ WARN_ON_ONCE(1); } /* Restore interrupts, the reserve/commit window is finished. */ local_irq_restore(e->irqflags); } /** * prb_commit() - Commit (previously reserved) data to the ringbuffer. * * @e: The entry containing the reserved data information. * * This is the public function available to writers to commit data. * * Note that the data is not yet available to readers until it is finalized. * Finalizing happens automatically when space for the next record is * reserved. * * See prb_final_commit() for a version of this function that finalizes * immediately. * * Context: Any context. Enables local interrupts. */ void prb_commit(struct prb_reserved_entry *e) { struct prb_desc_ring *desc_ring = &e->rb->desc_ring; unsigned long head_id; _prb_commit(e, desc_committed); /* * If this descriptor is no longer the head (i.e. a new record has * been allocated), extending the data for this record is no longer * allowed and therefore it must be finalized. */ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ if (head_id != e->id) desc_make_final(e->rb, e->id); } /** * prb_final_commit() - Commit and finalize (previously reserved) data to * the ringbuffer. * * @e: The entry containing the reserved data information. * * This is the public function available to writers to commit+finalize data. * * By finalizing, the data is made immediately available to readers. * * This function should only be used if there are no intentions of extending * this data using prb_reserve_in_last(). * * Context: Any context. Enables local interrupts. */ void prb_final_commit(struct prb_reserved_entry *e) { _prb_commit(e, desc_finalized); desc_update_last_finalized(e->rb); } /* * Count the number of lines in provided text. All text has at least 1 line * (even if @text_size is 0). Each '\n' processed is counted as an additional * line. */ static unsigned int count_lines(const char *text, unsigned int text_size) { unsigned int next_size = text_size; unsigned int line_count = 1; const char *next = text; while (next_size) { next = memchr(next, '\n', next_size); if (!next) break; line_count++; next++; next_size = text_size - (next - text); } return line_count; } /* * Given @blk_lpos, copy an expected @len of data into the provided buffer. * If @line_count is provided, count the number of lines in the data. * * This function (used by readers) performs strict validation on the data * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is * triggered if an internal error is detected. */ static bool copy_data(struct prb_data_ring *data_ring, struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf, unsigned int buf_size, unsigned int *line_count) { unsigned int data_size; const char *data; /* Caller might not want any data. */ if ((!buf || !buf_size) && !line_count) return true; data = get_data(data_ring, blk_lpos, &data_size); if (!data) return false; /* * Actual cannot be less than expected. It can be more than expected * because of the trailing alignment padding. * * Note that invalid @len values can occur because the caller loads * the value during an allowed data race. */ if (data_size < (unsigned int)len) return false; /* Caller interested in the line count? */ if (line_count) *line_count = count_lines(data, len); /* Caller interested in the data content? */ if (!buf || !buf_size) return true; data_size = min_t(unsigned int, buf_size, len); memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ return true; } /* * This is an extended version of desc_read(). It gets a copy of a specified * descriptor. However, it also verifies that the record is finalized and has * the sequence number @seq. On success, 0 is returned. * * Error return values: * -EINVAL: A finalized record with sequence number @seq does not exist. * -ENOENT: A finalized record with sequence number @seq exists, but its data * is not available. This is a valid record, so readers should * continue with the next record. */ static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, unsigned long id, u64 seq, struct prb_desc *desc_out) { struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; enum desc_state d_state; u64 s; d_state = desc_read(desc_ring, id, desc_out, &s, NULL); /* * An unexpected @id (desc_miss) or @seq mismatch means the record * does not exist. A descriptor in the reserved or committed state * means the record does not yet exist for the reader. */ if (d_state == desc_miss || d_state == desc_reserved || d_state == desc_committed || s != seq) { return -EINVAL; } /* * A descriptor in the reusable state may no longer have its data * available; report it as existing but with lost data. Or the record * may actually be a record with lost data. */ if (d_state == desc_reusable || (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) { return -ENOENT; } return 0; } /* * Copy the ringbuffer data from the record with @seq to the provided * @r buffer. On success, 0 is returned. * * See desc_read_finalized_seq() for error return values. */ static int prb_read(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r, unsigned int *line_count) { struct prb_desc_ring *desc_ring = &rb->desc_ring; struct printk_info *info = to_info(desc_ring, seq); struct prb_desc *rdesc = to_desc(desc_ring, seq); atomic_long_t *state_var = &rdesc->state_var; struct prb_desc desc; unsigned long id; int err; /* Extract the ID, used to specify the descriptor to read. */ id = DESC_ID(atomic_long_read(state_var)); /* Get a local copy of the correct descriptor (if available). */ err = desc_read_finalized_seq(desc_ring, id, seq, &desc); /* * If @r is NULL, the caller is only interested in the availability * of the record. */ if (err || !r) return err; /* If requested, copy meta data. */ if (r->info) memcpy(r->info, info, sizeof(*(r->info))); /* Copy text data. If it fails, this is a data-less record. */ if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len, r->text_buf, r->text_buf_size, line_count)) { return -ENOENT; } /* Ensure the record is still finalized and has the same @seq. */ return desc_read_finalized_seq(desc_ring, id, seq, &desc); } /* Get the sequence number of the tail descriptor. */ u64 prb_first_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; enum desc_state d_state; struct prb_desc desc; unsigned long id; u64 seq; for (;;) { id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */ /* * This loop will not be infinite because the tail is * _always_ in the finalized or reusable state. */ if (d_state == desc_finalized || d_state == desc_reusable) break; /* * Guarantee the last state load from desc_read() is before * reloading @tail_id in order to see a new tail in the case * that the descriptor has been recycled. This pairs with * desc_reserve:D. * * Memory barrier involvement: * * If prb_first_seq:B reads from desc_reserve:F, then * prb_first_seq:A reads from desc_push_tail:B. * * Relies on: * * MB from desc_push_tail:B to desc_reserve:F * matching * RMB prb_first_seq:B to prb_first_seq:A */ smp_rmb(); /* LMM(prb_first_seq:C) */ } return seq; } /** * prb_next_reserve_seq() - Get the sequence number after the most recently * reserved record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what sequence * number will be assigned to the next reserved record. * * Note that depending on the situation, this value can be equal to or * higher than the sequence number returned by prb_next_seq(). * * Context: Any context. * Return: The sequence number that will be assigned to the next record * reserved. */ u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) { struct prb_desc_ring *desc_ring = &rb->desc_ring; unsigned long last_finalized_id; atomic_long_t *state_var; u64 last_finalized_seq; unsigned long head_id; struct prb_desc desc; unsigned long diff; struct prb_desc *d; int err; /* * It may not be possible to read a sequence number for @head_id. * So the ID of @last_finailzed_seq is used to calculate what the * sequence number of @head_id will be. */ try_again: last_finalized_seq = desc_last_finalized_seq(rb); /* * @head_id is loaded after @last_finalized_seq to ensure that * it points to the record with @last_finalized_seq or newer. * * Memory barrier involvement: * * If desc_last_finalized_seq:A reads from * desc_update_last_finalized:A, then * prb_next_reserve_seq:A reads from desc_reserve:D. * * Relies on: * * RELEASE from desc_reserve:D to desc_update_last_finalized:A * matching * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A * * Note: desc_reserve:D and desc_update_last_finalized:A can be * different CPUs. However, the desc_update_last_finalized:A CPU * (which performs the release) must have previously seen * desc_read:C, which implies desc_reserve:D can be seen. */ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ d = to_desc(desc_ring, last_finalized_seq); state_var = &d->state_var; /* Extract the ID, used to specify the descriptor to read. */ last_finalized_id = DESC_ID(atomic_long_read(state_var)); /* Ensure @last_finalized_id is correct. */ err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); if (err == -EINVAL) { if (last_finalized_seq == 0) { /* * No record has been finalized or even reserved yet. * * The @head_id is initialized such that the first * increment will yield the first record (seq=0). * Handle it separately to avoid a negative @diff * below. */ if (head_id == DESC0_ID(desc_ring->count_bits)) return 0; /* * One or more descriptors are already reserved. Use * the descriptor ID of the first one (@seq=0) for * the @diff below. */ last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; } else { /* Record must have been overwritten. Try again. */ goto try_again; } } /* Diff of known descriptor IDs to compute related sequence numbers. */ diff = head_id - last_finalized_id; /* * @head_id points to the most recently reserved record, but this * function returns the sequence number that will be assigned to the * next (not yet reserved) record. Thus +1 is needed. */ return (last_finalized_seq + diff + 1); } /* * Non-blocking read of a record. * * On success @seq is updated to the record that was read and (if provided) * @r and @line_count will contain the read/calculated data. * * On failure @seq is updated to a record that is not yet available to the * reader, but it will be the next record available to the reader. * * Note: When the current CPU is in panic, this function will skip over any * non-existent/non-finalized records in order to allow the panic CPU * to print any and all records that have been finalized. */ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, struct printk_record *r, unsigned int *line_count) { u64 tail_seq; int err; while ((err = prb_read(rb, *seq, r, line_count))) { tail_seq = prb_first_seq(rb); if (*seq < tail_seq) { /* * Behind the tail. Catch up and try again. This * can happen for -ENOENT and -EINVAL cases. */ *seq = tail_seq; } else if (err == -ENOENT) { /* Record exists, but the data was lost. Skip. */ (*seq)++; } else { /* * Non-existent/non-finalized record. Must stop. * * For panic situations it cannot be expected that * non-finalized records will become finalized. But * there may be other finalized records beyond that * need to be printed for a panic situation. If this * is the panic CPU, skip this * non-existent/non-finalized record unless it is * at or beyond the head, in which case it is not * possible to continue. * * Note that new messages printed on panic CPU are * finalized when we are here. The only exception * might be the last message without trailing newline. * But it would have the sequence number returned * by "prb_next_reserve_seq() - 1". */ if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) (*seq)++; else return false; } } return true; } /** * prb_read_valid() - Non-blocking read of a requested record or (if gone) * the next available record. * * @rb: The ringbuffer to read from. * @seq: The sequence number of the record to read. * @r: A record data buffer to store the read record to. * * This is the public function available to readers to read a record. * * The reader provides the @info and @text_buf buffers of @r to be * filled in. Any of the buffer pointers can be set to NULL if the reader * is not interested in that data. To ensure proper initialization of @r, * prb_rec_init_rd() should be used. * * Context: Any context. * Return: true if a record was read, otherwise false. * * On success, the reader must check r->info.seq to see which record was * actually read. This allows the reader to detect dropped records. * * Failure means @seq refers to a record not yet available to the reader. */ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, struct printk_record *r) { return _prb_read_valid(rb, &seq, r, NULL); } /** * prb_read_valid_info() - Non-blocking read of meta data for a requested * record or (if gone) the next available record. * * @rb: The ringbuffer to read from. * @seq: The sequence number of the record to read. * @info: A buffer to store the read record meta data to. * @line_count: A buffer to store the number of lines in the record text. * * This is the public function available to readers to read only the * meta data of a record. * * The reader provides the @info, @line_count buffers to be filled in. * Either of the buffer pointers can be set to NULL if the reader is not * interested in that data. * * Context: Any context. * Return: true if a record's meta data was read, otherwise false. * * On success, the reader must check info->seq to see which record meta data * was actually read. This allows the reader to detect dropped records. * * Failure means @seq refers to a record not yet available to the reader. */ bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, struct printk_info *info, unsigned int *line_count) { struct printk_record r; prb_rec_init_rd(&r, info, NULL, 0); return _prb_read_valid(rb, &seq, &r, line_count); } /** * prb_first_valid_seq() - Get the sequence number of the oldest available * record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what the * first/oldest valid sequence number is. * * This provides readers a starting point to begin iterating the ringbuffer. * * Context: Any context. * Return: The sequence number of the first/oldest record or, if the * ringbuffer is empty, 0 is returned. */ u64 prb_first_valid_seq(struct printk_ringbuffer *rb) { u64 seq = 0; if (!_prb_read_valid(rb, &seq, NULL, NULL)) return 0; return seq; } /** * prb_next_seq() - Get the sequence number after the last available record. * * @rb: The ringbuffer to get the sequence number from. * * This is the public function available to readers to see what the next * newest sequence number available to readers will be. * * This provides readers a sequence number to jump to if all currently * available records should be skipped. It is guaranteed that all records * previous to the returned value have been finalized and are (or were) * available to the reader. * * Context: Any context. * Return: The sequence number of the next newest (not yet available) record * for readers. */ u64 prb_next_seq(struct printk_ringbuffer *rb) { u64 seq; seq = desc_last_finalized_seq(rb); /* * Begin searching after the last finalized record. * * On 0, the search must begin at 0 because of hack#2 * of the bootstrapping phase it is not known if a * record at index 0 exists. */ if (seq != 0) seq++; /* * The information about the last finalized @seq might be inaccurate. * Search forward to find the current one. */ while (_prb_read_valid(rb, &seq, NULL, NULL)) seq++; return seq; } /** * prb_init() - Initialize a ringbuffer to use provided external buffers. * * @rb: The ringbuffer to initialize. * @text_buf: The data buffer for text data. * @textbits: The size of @text_buf as a power-of-2 value. * @descs: The descriptor buffer for ringbuffer records. * @descbits: The count of @descs items as a power-of-2 value. * @infos: The printk_info buffer for ringbuffer records. * * This is the public function available to writers to setup a ringbuffer * during runtime using provided buffers. * * This must match the initialization of DEFINE_PRINTKRB(). * * Context: Any context. */ void prb_init(struct printk_ringbuffer *rb, char *text_buf, unsigned int textbits, struct prb_desc *descs, unsigned int descbits, struct printk_info *infos) { memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0])); rb->desc_ring.count_bits = descbits; rb->desc_ring.descs = descs; rb->desc_ring.infos = infos; atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); rb->text_data_ring.size_bits = textbits; rb->text_data_ring.data = text_buf; atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); atomic_long_set(&rb->fail, 0); atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; infos[0].seq = -(u64)_DESCS_COUNT(descbits); infos[_DESCS_COUNT(descbits) - 1].seq = 0; } /** * prb_record_text_space() - Query the full actual used ringbuffer space for * the text data of a reserved entry. * * @e: The successfully reserved entry to query. * * This is the public function available to writers to see how much actual * space is used in the ringbuffer to store the text data of the specified * entry. * * This function is only valid if @e has been successfully reserved using * prb_reserve(). * * Context: Any context. * Return: The size in bytes used by the text data of the associated record. */ unsigned int prb_record_text_space(struct prb_reserved_entry *e) { return e->text_space; }
4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 // SPDX-License-Identifier: GPL-2.0-or-later /* * "TEE" target extension for Xtables * Copyright © Sebastian Claßen, 2007 * Jan Engelhardt, 2007-2010 * * based on ipt_ROUTE.c from Cédric de Launois * <delaunois@info.ucl.be> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/route.h> #include <linux/netfilter/x_tables.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/route.h> #include <net/netfilter/ipv4/nf_dup_ipv4.h> #include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> struct xt_tee_priv { struct list_head list; struct xt_tee_tginfo *tginfo; int oif; }; static unsigned int tee_net_id __read_mostly; static const union nf_inet_addr tee_zero_address; struct tee_net { struct list_head priv_list; /* lock protects the priv_list */ struct mutex lock; }; static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } #endif static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct tee_net *tn = net_generic(net, tee_net_id); struct xt_tee_priv *priv; mutex_lock(&tn->lock); list_for_each_entry(priv, &tn->priv_list, list) { switch (event) { case NETDEV_REGISTER: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; break; case NETDEV_UNREGISTER: if (dev->ifindex == priv->oif) priv->oif = -1; break; case NETDEV_CHANGENAME: if (!strcmp(dev->name, priv->tginfo->oif)) priv->oif = dev->ifindex; else if (dev->ifindex == priv->oif) priv->oif = -1; break; } } mutex_unlock(&tn->lock); return NOTIFY_DONE; } static int tee_tg_check(const struct xt_tgchk_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; struct xt_tee_priv *priv; /* 0.0.0.0 and :: not allowed */ if (memcmp(&info->gw, &tee_zero_address, sizeof(tee_zero_address)) == 0) return -EINVAL; if (info->oif[0]) { struct net_device *dev; if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) return -ENOMEM; priv->tginfo = info; priv->oif = -1; info->priv = priv; dev = dev_get_by_name(par->net, info->oif); if (dev) { priv->oif = dev->ifindex; dev_put(dev); } mutex_lock(&tn->lock); list_add(&priv->list, &tn->priv_list); mutex_unlock(&tn->lock); } else info->priv = NULL; static_key_slow_inc(&xt_tee_enabled); return 0; } static void tee_tg_destroy(const struct xt_tgdtor_param *par) { struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { mutex_lock(&tn->lock); list_del(&info->priv->list); mutex_unlock(&tn->lock); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); } static struct xt_target tee_tg_reg[] __read_mostly = { { .name = "TEE", .revision = 1, .family = NFPROTO_IPV4, .target = tee_tg4, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TEE", .revision = 1, .family = NFPROTO_IPV6, .target = tee_tg6, .targetsize = sizeof(struct xt_tee_tginfo), .usersize = offsetof(struct xt_tee_tginfo, priv), .checkentry = tee_tg_check, .destroy = tee_tg_destroy, .me = THIS_MODULE, }, #endif }; static int __net_init tee_net_init(struct net *net) { struct tee_net *tn = net_generic(net, tee_net_id); INIT_LIST_HEAD(&tn->priv_list); mutex_init(&tn->lock); return 0; } static struct pernet_operations tee_net_ops = { .init = tee_net_init, .id = &tee_net_id, .size = sizeof(struct tee_net), }; static struct notifier_block tee_netdev_notifier = { .notifier_call = tee_netdev_event, }; static int __init tee_tg_init(void) { int ret; ret = register_pernet_subsys(&tee_net_ops); if (ret < 0) return ret; ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); if (ret < 0) goto cleanup_subsys; ret = register_netdevice_notifier(&tee_netdev_notifier); if (ret < 0) goto unregister_targets; return 0; unregister_targets: xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); cleanup_subsys: unregister_pernet_subsys(&tee_net_ops); return ret; } static void __exit tee_tg_exit(void) { unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); unregister_pernet_subsys(&tee_net_ops); } module_init(tee_tg_init); module_exit(tee_tg_exit); MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Reroute packet copy"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TEE"); MODULE_ALIAS("ip6t_TEE");
2 2 2 2 2 1 1 10 10 68 20 20 166 1 1 1 1 1 1 2 2 2 2 2 6 6 6 6 6 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trace.h" #include "xfs_health.h" #include "xfs_ag.h" #include "xfs_btree.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_quota_defs.h" /* * Warn about metadata corruption that we detected but haven't fixed, and * make sure we're not sitting on anything that would get in the way of * recovery. */ void xfs_health_unmount( struct xfs_mount *mp) { struct xfs_perag *pag; xfs_agnumber_t agno; unsigned int sick = 0; unsigned int checked = 0; bool warn = false; if (xfs_is_shutdown(mp)) return; /* Measure AG corruption levels. */ for_each_perag(mp, agno, pag) { xfs_ag_measure_sickness(pag, &sick, &checked); if (sick) { trace_xfs_ag_unfixed_corruption(mp, agno, sick); warn = true; } } /* Measure realtime volume corruption levels. */ xfs_rt_measure_sickness(mp, &sick, &checked); if (sick) { trace_xfs_rt_unfixed_corruption(mp, sick); warn = true; } /* * Measure fs corruption and keep the sample around for the warning. * See the note below for why we exempt FS_COUNTERS. */ xfs_fs_measure_sickness(mp, &sick, &checked); if (sick & ~XFS_SICK_FS_COUNTERS) { trace_xfs_fs_unfixed_corruption(mp, sick); warn = true; } if (warn) { xfs_warn(mp, "Uncorrected metadata errors detected; please run xfs_repair."); /* * We discovered uncorrected metadata problems at some point * during this filesystem mount and have advised the * administrator to run repair once the unmount completes. * * However, we must be careful -- when FSCOUNTERS are flagged * unhealthy, the unmount procedure omits writing the clean * unmount record to the log so that the next mount will run * recovery and recompute the summary counters. In other * words, we leave a dirty log to get the counters fixed. * * Unfortunately, xfs_repair cannot recover dirty logs, so if * there were filesystem problems, FSCOUNTERS was flagged, and * the administrator takes our advice to run xfs_repair, * they'll have to zap the log before repairing structures. * We don't really want to encourage this, so we mark the * FSCOUNTERS healthy so that a subsequent repair run won't see * a dirty log. */ if (sick & XFS_SICK_FS_COUNTERS) xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); } } /* Mark unhealthy per-fs metadata. */ void xfs_fs_mark_sick( struct xfs_mount *mp, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_sick(mp, mask); spin_lock(&mp->m_sb_lock); mp->m_fs_sick |= mask; spin_unlock(&mp->m_sb_lock); } /* Mark per-fs metadata as having been checked and found unhealthy by fsck. */ void xfs_fs_mark_corrupt( struct xfs_mount *mp, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_corrupt(mp, mask); spin_lock(&mp->m_sb_lock); mp->m_fs_sick |= mask; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); } /* Mark a per-fs metadata healed. */ void xfs_fs_mark_healthy( struct xfs_mount *mp, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_FS_ALL)); trace_xfs_fs_mark_healthy(mp, mask); spin_lock(&mp->m_sb_lock); mp->m_fs_sick &= ~mask; if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY)) mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY; mp->m_fs_checked |= mask; spin_unlock(&mp->m_sb_lock); } /* Sample which per-fs metadata are unhealthy. */ void xfs_fs_measure_sickness( struct xfs_mount *mp, unsigned int *sick, unsigned int *checked) { spin_lock(&mp->m_sb_lock); *sick = mp->m_fs_sick; *checked = mp->m_fs_checked; spin_unlock(&mp->m_sb_lock); } /* Mark unhealthy realtime metadata. */ void xfs_rt_mark_sick( struct xfs_mount *mp, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_RT_ALL)); trace_xfs_rt_mark_sick(mp, mask); spin_lock(&mp->m_sb_lock); mp->m_rt_sick |= mask; spin_unlock(&mp->m_sb_lock); } /* Mark realtime metadata as having been checked and found unhealthy by fsck. */ void xfs_rt_mark_corrupt( struct xfs_mount *mp, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_RT_ALL)); trace_xfs_rt_mark_corrupt(mp, mask); spin_lock(&mp->m_sb_lock); mp->m_rt_sick |= mask; mp->m_rt_checked |= mask; spin_unlock(&mp->m_sb_lock); } /* Mark a realtime metadata healed. */ void xfs_rt_mark_healthy( struct xfs_mount *mp, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_RT_ALL)); trace_xfs_rt_mark_healthy(mp, mask); spin_lock(&mp->m_sb_lock); mp->m_rt_sick &= ~mask; if (!(mp->m_rt_sick & XFS_SICK_RT_PRIMARY)) mp->m_rt_sick &= ~XFS_SICK_RT_SECONDARY; mp->m_rt_checked |= mask; spin_unlock(&mp->m_sb_lock); } /* Sample which realtime metadata are unhealthy. */ void xfs_rt_measure_sickness( struct xfs_mount *mp, unsigned int *sick, unsigned int *checked) { spin_lock(&mp->m_sb_lock); *sick = mp->m_rt_sick; *checked = mp->m_rt_checked; spin_unlock(&mp->m_sb_lock); } /* Mark unhealthy per-ag metadata given a raw AG number. */ void xfs_agno_mark_sick( struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int mask) { struct xfs_perag *pag = xfs_perag_get(mp, agno); /* per-ag structure not set up yet? */ if (!pag) return; xfs_ag_mark_sick(pag, mask); xfs_perag_put(pag); } /* Mark unhealthy per-ag metadata. */ void xfs_ag_mark_sick( struct xfs_perag *pag, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_AG_ALL)); trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask); spin_lock(&pag->pag_state_lock); pag->pag_sick |= mask; spin_unlock(&pag->pag_state_lock); } /* Mark per-ag metadata as having been checked and found unhealthy by fsck. */ void xfs_ag_mark_corrupt( struct xfs_perag *pag, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_AG_ALL)); trace_xfs_ag_mark_corrupt(pag->pag_mount, pag->pag_agno, mask); spin_lock(&pag->pag_state_lock); pag->pag_sick |= mask; pag->pag_checked |= mask; spin_unlock(&pag->pag_state_lock); } /* Mark per-ag metadata ok. */ void xfs_ag_mark_healthy( struct xfs_perag *pag, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_AG_ALL)); trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask); spin_lock(&pag->pag_state_lock); pag->pag_sick &= ~mask; if (!(pag->pag_sick & XFS_SICK_AG_PRIMARY)) pag->pag_sick &= ~XFS_SICK_AG_SECONDARY; pag->pag_checked |= mask; spin_unlock(&pag->pag_state_lock); } /* Sample which per-ag metadata are unhealthy. */ void xfs_ag_measure_sickness( struct xfs_perag *pag, unsigned int *sick, unsigned int *checked) { spin_lock(&pag->pag_state_lock); *sick = pag->pag_sick; *checked = pag->pag_checked; spin_unlock(&pag->pag_state_lock); } /* Mark the unhealthy parts of an inode. */ void xfs_inode_mark_sick( struct xfs_inode *ip, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_sick(ip, mask); spin_lock(&ip->i_flags_lock); ip->i_sick |= mask; spin_unlock(&ip->i_flags_lock); /* * Keep this inode around so we don't lose the sickness report. Scrub * grabs inodes with DONTCACHE assuming that most inode are ok, which * is not the case here. */ spin_lock(&VFS_I(ip)->i_lock); VFS_I(ip)->i_state &= ~I_DONTCACHE; spin_unlock(&VFS_I(ip)->i_lock); } /* Mark inode metadata as having been checked and found unhealthy by fsck. */ void xfs_inode_mark_corrupt( struct xfs_inode *ip, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_corrupt(ip, mask); spin_lock(&ip->i_flags_lock); ip->i_sick |= mask; ip->i_checked |= mask; spin_unlock(&ip->i_flags_lock); /* * Keep this inode around so we don't lose the sickness report. Scrub * grabs inodes with DONTCACHE assuming that most inode are ok, which * is not the case here. */ spin_lock(&VFS_I(ip)->i_lock); VFS_I(ip)->i_state &= ~I_DONTCACHE; spin_unlock(&VFS_I(ip)->i_lock); } /* Mark parts of an inode healed. */ void xfs_inode_mark_healthy( struct xfs_inode *ip, unsigned int mask) { ASSERT(!(mask & ~XFS_SICK_INO_ALL)); trace_xfs_inode_mark_healthy(ip, mask); spin_lock(&ip->i_flags_lock); ip->i_sick &= ~mask; if (!(ip->i_sick & XFS_SICK_INO_PRIMARY)) ip->i_sick &= ~XFS_SICK_INO_SECONDARY; ip->i_checked |= mask; spin_unlock(&ip->i_flags_lock); } /* Sample which parts of an inode are unhealthy. */ void xfs_inode_measure_sickness( struct xfs_inode *ip, unsigned int *sick, unsigned int *checked) { spin_lock(&ip->i_flags_lock); *sick = ip->i_sick; *checked = ip->i_checked; spin_unlock(&ip->i_flags_lock); } /* Mappings between internal sick masks and ioctl sick masks. */ struct ioctl_sick_map { unsigned int sick_mask; unsigned int ioctl_mask; }; static const struct ioctl_sick_map fs_map[] = { { XFS_SICK_FS_COUNTERS, XFS_FSOP_GEOM_SICK_COUNTERS}, { XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA }, { XFS_SICK_FS_GQUOTA, XFS_FSOP_GEOM_SICK_GQUOTA }, { XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA }, { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK }, { XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS }, { 0, 0 }, }; static const struct ioctl_sick_map rt_map[] = { { XFS_SICK_RT_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP }, { XFS_SICK_RT_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY }, { 0, 0 }, }; static inline void xfgeo_health_tick( struct xfs_fsop_geom *geo, unsigned int sick, unsigned int checked, const struct ioctl_sick_map *m) { if (checked & m->sick_mask) geo->checked |= m->ioctl_mask; if (sick & m->sick_mask) geo->sick |= m->ioctl_mask; } /* Fill out fs geometry health info. */ void xfs_fsop_geom_health( struct xfs_mount *mp, struct xfs_fsop_geom *geo) { const struct ioctl_sick_map *m; unsigned int sick; unsigned int checked; geo->sick = 0; geo->checked = 0; xfs_fs_measure_sickness(mp, &sick, &checked); for (m = fs_map; m->sick_mask; m++) xfgeo_health_tick(geo, sick, checked, m); xfs_rt_measure_sickness(mp, &sick, &checked); for (m = rt_map; m->sick_mask; m++) xfgeo_health_tick(geo, sick, checked, m); } static const struct ioctl_sick_map ag_map[] = { { XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB }, { XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF }, { XFS_SICK_AG_AGFL, XFS_AG_GEOM_SICK_AGFL }, { XFS_SICK_AG_AGI, XFS_AG_GEOM_SICK_AGI }, { XFS_SICK_AG_BNOBT, XFS_AG_GEOM_SICK_BNOBT }, { XFS_SICK_AG_CNTBT, XFS_AG_GEOM_SICK_CNTBT }, { XFS_SICK_AG_INOBT, XFS_AG_GEOM_SICK_INOBT }, { XFS_SICK_AG_FINOBT, XFS_AG_GEOM_SICK_FINOBT }, { XFS_SICK_AG_RMAPBT, XFS_AG_GEOM_SICK_RMAPBT }, { XFS_SICK_AG_REFCNTBT, XFS_AG_GEOM_SICK_REFCNTBT }, { XFS_SICK_AG_INODES, XFS_AG_GEOM_SICK_INODES }, { 0, 0 }, }; /* Fill out ag geometry health info. */ void xfs_ag_geom_health( struct xfs_perag *pag, struct xfs_ag_geometry *ageo) { const struct ioctl_sick_map *m; unsigned int sick; unsigned int checked; ageo->ag_sick = 0; ageo->ag_checked = 0; xfs_ag_measure_sickness(pag, &sick, &checked); for (m = ag_map; m->sick_mask; m++) { if (checked & m->sick_mask) ageo->ag_checked |= m->ioctl_mask; if (sick & m->sick_mask) ageo->ag_sick |= m->ioctl_mask; } } static const struct ioctl_sick_map ino_map[] = { { XFS_SICK_INO_CORE, XFS_BS_SICK_INODE }, { XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD }, { XFS_SICK_INO_BMBTA, XFS_BS_SICK_BMBTA }, { XFS_SICK_INO_BMBTC, XFS_BS_SICK_BMBTC }, { XFS_SICK_INO_DIR, XFS_BS_SICK_DIR }, { XFS_SICK_INO_XATTR, XFS_BS_SICK_XATTR }, { XFS_SICK_INO_SYMLINK, XFS_BS_SICK_SYMLINK }, { XFS_SICK_INO_PARENT, XFS_BS_SICK_PARENT }, { XFS_SICK_INO_BMBTD_ZAPPED, XFS_BS_SICK_BMBTD }, { XFS_SICK_INO_BMBTA_ZAPPED, XFS_BS_SICK_BMBTA }, { XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR }, { XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK }, { XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE }, { 0, 0 }, }; /* Fill out bulkstat health info. */ void xfs_bulkstat_health( struct xfs_inode *ip, struct xfs_bulkstat *bs) { const struct ioctl_sick_map *m; unsigned int sick; unsigned int checked; bs->bs_sick = 0; bs->bs_checked = 0; xfs_inode_measure_sickness(ip, &sick, &checked); for (m = ino_map; m->sick_mask; m++) { if (checked & m->sick_mask) bs->bs_checked |= m->ioctl_mask; if (sick & m->sick_mask) bs->bs_sick |= m->ioctl_mask; } } /* Mark a block mapping sick. */ void xfs_bmap_mark_sick( struct xfs_inode *ip, int whichfork) { unsigned int mask; switch (whichfork) { case XFS_DATA_FORK: mask = XFS_SICK_INO_BMBTD; break; case XFS_ATTR_FORK: mask = XFS_SICK_INO_BMBTA; break; case XFS_COW_FORK: mask = XFS_SICK_INO_BMBTC; break; default: ASSERT(0); return; } xfs_inode_mark_sick(ip, mask); } /* Record observations of btree corruption with the health tracking system. */ void xfs_btree_mark_sick( struct xfs_btree_cur *cur) { switch (cur->bc_ops->type) { case XFS_BTREE_TYPE_MEM: /* no health state tracking for ephemeral btrees */ return; case XFS_BTREE_TYPE_AG: ASSERT(cur->bc_ops->sick_mask); xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask); return; case XFS_BTREE_TYPE_INODE: if (xfs_btree_is_bmap(cur->bc_ops)) { xfs_bmap_mark_sick(cur->bc_ino.ip, cur->bc_ino.whichfork); return; } fallthrough; default: ASSERT(0); return; } } /* * Record observations of dir/attr btree corruption with the health tracking * system. */ void xfs_dirattr_mark_sick( struct xfs_inode *ip, int whichfork) { unsigned int mask; switch (whichfork) { case XFS_DATA_FORK: mask = XFS_SICK_INO_DIR; break; case XFS_ATTR_FORK: mask = XFS_SICK_INO_XATTR; break; default: ASSERT(0); return; } xfs_inode_mark_sick(ip, mask); } /* * Record observations of dir/attr btree corruption with the health tracking * system. */ void xfs_da_mark_sick( struct xfs_da_args *args) { xfs_dirattr_mark_sick(args->dp, args->whichfork); }
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux network device link state notification * * Author: * Stefan Rompf <sux@loplof.de> */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/rtnetlink.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/types.h> #include "dev.h" enum lw_bits { LW_URGENT = 0, }; static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned int default_operstate(const struct net_device *dev) { if (netif_testing(dev)) return IF_OPER_TESTING; /* Some uppers (DSA) have additional sources for being down, so * first check whether lower is indeed the source of its down state. */ if (!netif_carrier_ok(dev)) { int iflink = dev_get_iflink(dev); struct net_device *peer; if (iflink == dev->ifindex) return IF_OPER_DOWN; peer = __dev_get_by_index(dev_net(dev), iflink); if (!peer) return IF_OPER_DOWN; return netif_carrier_ok(peer) ? IF_OPER_DOWN : IF_OPER_LOWERLAYERDOWN; } if (netif_dormant(dev)) return IF_OPER_DORMANT; return IF_OPER_UP; } static void rfc2863_policy(struct net_device *dev) { unsigned int operstate = default_operstate(dev); if (operstate == READ_ONCE(dev->operstate)) return; switch(dev->link_mode) { case IF_LINK_MODE_TESTING: if (operstate == IF_OPER_UP) operstate = IF_OPER_TESTING; break; case IF_LINK_MODE_DORMANT: if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT; break; case IF_LINK_MODE_DEFAULT: default: break; } WRITE_ONCE(dev->operstate, operstate); } void linkwatch_init_dev(struct net_device *dev) { /* Handle pre-registration link state changes */ if (!netif_carrier_ok(dev) || netif_dormant(dev) || netif_testing(dev)) rfc2863_policy(dev); } static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) return false; if (dev->ifindex != dev_get_iflink(dev)) return true; if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } static void linkwatch_add_event(struct net_device *dev) { unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } static void linkwatch_schedule_work(int urgent) { unsigned long delay = linkwatch_nextevent - jiffies; if (test_bit(LW_URGENT, &linkwatch_flags)) return; /* Minimise down-time: drop delay for up event. */ if (urgent) { if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) return; delay = 0; } /* If we wrap around we'll delay it by at most HZ. */ if (delay > HZ) delay = 0; /* * If urgent, schedule immediate execution; otherwise, don't * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } static void linkwatch_do_dev(struct net_device *dev) { /* * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. */ smp_mb__before_atomic(); /* We are about to handle this device, * so new events can be accepted */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else dev_deactivate(dev); netdev_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). */ __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) { #define MAX_DO_DEV_PER_LOOP 100 int do_dev = MAX_DO_DEV_PER_LOOP; /* Use a local list here since we add non-urgent * events back to the global one when called with * urgent_only=1. */ LIST_HEAD(wrk); /* Give urgent case more budget */ if (urgent_only) do_dev += MAX_DO_DEV_PER_LOOP; /* * Limit the number of linkwatch events to one * per second so that a runaway driver does not * cause a storm of messages on the netlink * socket. This limit does not apply to up events * while the device qdisc is down. */ if (!urgent_only) linkwatch_nextevent = jiffies + HZ; /* Limit wrap-around effect on delay. */ else if (time_after(linkwatch_nextevent, jiffies + HZ)) linkwatch_nextevent = jiffies; clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist, &wrk); while (!list_empty(&wrk) && do_dev > 0) { struct net_device *dev; dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); if (!netif_device_present(dev) || (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } /* We must free netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); linkwatch_do_dev(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } /* Add the remaining work back to lweventlist */ list_splice_init(&wrk, &lweventlist); if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); } void linkwatch_sync_dev(struct net_device *dev) { unsigned long flags; int clean = 0; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); clean = 1; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); if (clean) linkwatch_do_dev(dev); } /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { __linkwatch_run_queue(0); } static void linkwatch_event(struct work_struct *dummy) { rtnl_lock(); __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { linkwatch_add_event(dev); } else if (!urgent) return; linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event);
1530 1527 109 109 187 188 3 3 839 840 191 193 726 728 141 143 3 3 32 33 1225 439 1213 460 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 /* * Compatibility functions which bloat the callers too much to make inline. * All of the callers of these functions should be converted to use folios * eventually. */ #include <linux/migrate.h> #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/swap.h> #include "internal.h" void unlock_page(struct page *page) { return folio_unlock(page_folio(page)); } EXPORT_SYMBOL(unlock_page); void end_page_writeback(struct page *page) { return folio_end_writeback(page_folio(page)); } EXPORT_SYMBOL(end_page_writeback); void wait_on_page_writeback(struct page *page) { return folio_wait_writeback(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_on_page_writeback); void wait_for_stable_page(struct page *page) { return folio_wait_stable(page_folio(page)); } EXPORT_SYMBOL_GPL(wait_for_stable_page); void mark_page_accessed(struct page *page) { folio_mark_accessed(page_folio(page)); } EXPORT_SYMBOL(mark_page_accessed); void set_page_writeback(struct page *page) { folio_start_writeback(page_folio(page)); } EXPORT_SYMBOL(set_page_writeback); bool set_page_dirty(struct page *page) { return folio_mark_dirty(page_folio(page)); } EXPORT_SYMBOL(set_page_dirty); bool clear_page_dirty_for_io(struct page *page) { return folio_clear_dirty_for_io(page_folio(page)); } EXPORT_SYMBOL(clear_page_dirty_for_io); bool redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { return folio_redirty_for_writepage(wbc, page_folio(page)); } EXPORT_SYMBOL(redirty_page_for_writepage); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp) { return filemap_add_folio(mapping, page_folio(page), index, gfp); } EXPORT_SYMBOL(add_to_page_cache_lru); noinline struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; folio = __filemap_get_folio(mapping, index, fgp_flags, gfp); if (IS_ERR(folio)) return NULL; return folio_file_page(folio, index); } EXPORT_SYMBOL(pagecache_get_page); struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index) { return pagecache_get_page(mapping, index, FGP_WRITEBEGIN, mapping_gfp_mask(mapping)); } EXPORT_SYMBOL(grab_cache_page_write_begin); bool isolate_lru_page(struct page *page) { if (WARN_RATELIMIT(PageTail(page), "trying to isolate tail page")) return false; return folio_isolate_lru((struct folio *)page); } void putback_lru_page(struct page *page) { folio_putback_lru(page_folio(page)); }
9 9 9 22 22 22 18 12 12 3 2 3 1 1 8 1 8 2 7 2 3 8 2 8 1 7 12 11 12 12 12 7 7 2 9 11 13 13 13 4 9 13 13 13 13 13 5 13 13 13 13 13 13 13 13 7 6 13 13 13 5 13 13 8 5 5 13 20 3 1 13 1 1 22 1 21 20 18 18 14 1 1 13 13 9 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2017 Oracle. All Rights Reserved. * * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "ext4.h" #include <linux/fsmap.h> #include "fsmap.h" #include "mballoc.h" #include <linux/sort.h> #include <linux/list_sort.h> #include <trace/events/ext4.h> /* Convert an ext4_fsmap to an fsmap. */ void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest, struct ext4_fsmap *src) { dest->fmr_device = src->fmr_device; dest->fmr_flags = src->fmr_flags; dest->fmr_physical = src->fmr_physical << sb->s_blocksize_bits; dest->fmr_owner = src->fmr_owner; dest->fmr_offset = 0; dest->fmr_length = src->fmr_length << sb->s_blocksize_bits; dest->fmr_reserved[0] = 0; dest->fmr_reserved[1] = 0; dest->fmr_reserved[2] = 0; } /* Convert an fsmap to an ext4_fsmap. */ void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest, struct fsmap *src) { dest->fmr_device = src->fmr_device; dest->fmr_flags = src->fmr_flags; dest->fmr_physical = src->fmr_physical >> sb->s_blocksize_bits; dest->fmr_owner = src->fmr_owner; dest->fmr_length = src->fmr_length >> sb->s_blocksize_bits; } /* getfsmap query state */ struct ext4_getfsmap_info { struct ext4_fsmap_head *gfi_head; ext4_fsmap_format_t gfi_formatter; /* formatting fn */ void *gfi_format_arg;/* format buffer */ ext4_fsblk_t gfi_next_fsblk; /* next fsblock we expect */ u32 gfi_dev; /* device id */ ext4_group_t gfi_agno; /* bg number, if applicable */ struct ext4_fsmap gfi_low; /* low rmap key */ struct ext4_fsmap gfi_high; /* high rmap key */ struct ext4_fsmap gfi_lastfree; /* free ext at end of last bg */ struct list_head gfi_meta_list; /* fixed metadata list */ bool gfi_last; /* last extent? */ }; /* Associate a device with a getfsmap handler. */ struct ext4_getfsmap_dev { int (*gfd_fn)(struct super_block *sb, struct ext4_fsmap *keys, struct ext4_getfsmap_info *info); u32 gfd_dev; }; /* Compare two getfsmap device handlers. */ static int ext4_getfsmap_dev_compare(const void *p1, const void *p2) { const struct ext4_getfsmap_dev *d1 = p1; const struct ext4_getfsmap_dev *d2 = p2; return d1->gfd_dev - d2->gfd_dev; } /* Compare a record against our starting point */ static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info, struct ext4_fsmap *rec) { return rec->fmr_physical < info->gfi_low.fmr_physical; } /* * Format a reverse mapping for getfsmap, having translated rm_startblock * into the appropriate daddr units. */ static int ext4_getfsmap_helper(struct super_block *sb, struct ext4_getfsmap_info *info, struct ext4_fsmap *rec) { struct ext4_fsmap fmr; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t rec_fsblk = rec->fmr_physical; ext4_group_t agno; ext4_grpblk_t cno; int error; if (fatal_signal_pending(current)) return -EINTR; /* * Filter out records that start before our startpoint, if the * caller requested that. */ if (ext4_getfsmap_rec_before_low_key(info, rec)) { rec_fsblk += rec->fmr_length; if (info->gfi_next_fsblk < rec_fsblk) info->gfi_next_fsblk = rec_fsblk; return EXT4_QUERY_RANGE_CONTINUE; } /* Are we just counting mappings? */ if (info->gfi_head->fmh_count == 0) { if (info->gfi_head->fmh_entries == UINT_MAX) return EXT4_QUERY_RANGE_ABORT; if (rec_fsblk > info->gfi_next_fsblk) info->gfi_head->fmh_entries++; if (info->gfi_last) return EXT4_QUERY_RANGE_CONTINUE; info->gfi_head->fmh_entries++; rec_fsblk += rec->fmr_length; if (info->gfi_next_fsblk < rec_fsblk) info->gfi_next_fsblk = rec_fsblk; return EXT4_QUERY_RANGE_CONTINUE; } /* * If the record starts past the last physical block we saw, * then we've found a gap. Report the gap as being owned by * whatever the caller specified is the missing owner. */ if (rec_fsblk > info->gfi_next_fsblk) { if (info->gfi_head->fmh_entries >= info->gfi_head->fmh_count) return EXT4_QUERY_RANGE_ABORT; ext4_get_group_no_and_offset(sb, info->gfi_next_fsblk, &agno, &cno); trace_ext4_fsmap_mapping(sb, info->gfi_dev, agno, EXT4_C2B(sbi, cno), rec_fsblk - info->gfi_next_fsblk, EXT4_FMR_OWN_UNKNOWN); fmr.fmr_device = info->gfi_dev; fmr.fmr_physical = info->gfi_next_fsblk; fmr.fmr_owner = EXT4_FMR_OWN_UNKNOWN; fmr.fmr_length = rec_fsblk - info->gfi_next_fsblk; fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; error = info->gfi_formatter(&fmr, info->gfi_format_arg); if (error) return error; info->gfi_head->fmh_entries++; } if (info->gfi_last) goto out; /* Fill out the extent we found */ if (info->gfi_head->fmh_entries >= info->gfi_head->fmh_count) return EXT4_QUERY_RANGE_ABORT; ext4_get_group_no_and_offset(sb, rec_fsblk, &agno, &cno); trace_ext4_fsmap_mapping(sb, info->gfi_dev, agno, EXT4_C2B(sbi, cno), rec->fmr_length, rec->fmr_owner); fmr.fmr_device = info->gfi_dev; fmr.fmr_physical = rec_fsblk; fmr.fmr_owner = rec->fmr_owner; fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; fmr.fmr_length = rec->fmr_length; error = info->gfi_formatter(&fmr, info->gfi_format_arg); if (error) return error; info->gfi_head->fmh_entries++; out: rec_fsblk += rec->fmr_length; if (info->gfi_next_fsblk < rec_fsblk) info->gfi_next_fsblk = rec_fsblk; return EXT4_QUERY_RANGE_CONTINUE; } static inline ext4_fsblk_t ext4_fsmap_next_pblk(struct ext4_fsmap *fmr) { return fmr->fmr_physical + fmr->fmr_length; } /* Transform a blockgroup's free record into a fsmap */ static int ext4_getfsmap_datadev_helper(struct super_block *sb, ext4_group_t agno, ext4_grpblk_t start, ext4_grpblk_t len, void *priv) { struct ext4_fsmap irec; struct ext4_getfsmap_info *info = priv; struct ext4_fsmap *p; struct ext4_fsmap *tmp; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t fsb; ext4_fsblk_t fslen; int error; fsb = (EXT4_C2B(sbi, start) + ext4_group_first_block_no(sb, agno)); fslen = EXT4_C2B(sbi, len); /* If the retained free extent record is set... */ if (info->gfi_lastfree.fmr_owner) { /* ...and abuts this one, lengthen it and return. */ if (ext4_fsmap_next_pblk(&info->gfi_lastfree) == fsb) { info->gfi_lastfree.fmr_length += fslen; return 0; } /* * There's a gap between the two free extents; emit the * retained extent prior to merging the meta_list. */ error = ext4_getfsmap_helper(sb, info, &info->gfi_lastfree); if (error) return error; info->gfi_lastfree.fmr_owner = 0; } /* Merge in any relevant extents from the meta_list */ list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) { if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) { list_del(&p->fmr_list); kfree(p); } else if (p->fmr_physical < fsb) { error = ext4_getfsmap_helper(sb, info, p); if (error) return error; list_del(&p->fmr_list); kfree(p); } } irec.fmr_device = 0; irec.fmr_physical = fsb; irec.fmr_length = fslen; irec.fmr_owner = EXT4_FMR_OWN_FREE; irec.fmr_flags = 0; /* If this is a free extent at the end of a bg, buffer it. */ if (ext4_fsmap_next_pblk(&irec) == ext4_group_first_block_no(sb, agno + 1)) { info->gfi_lastfree = irec; return 0; } /* Otherwise, emit it */ return ext4_getfsmap_helper(sb, info, &irec); } /* Execute a getfsmap query against the log device. */ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys, struct ext4_getfsmap_info *info) { journal_t *journal = EXT4_SB(sb)->s_journal; struct ext4_fsmap irec; /* Set up search keys */ info->gfi_low = keys[0]; info->gfi_low.fmr_length = 0; memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high)); trace_ext4_fsmap_low_key(sb, info->gfi_dev, 0, info->gfi_low.fmr_physical, info->gfi_low.fmr_length, info->gfi_low.fmr_owner); trace_ext4_fsmap_high_key(sb, info->gfi_dev, 0, info->gfi_high.fmr_physical, info->gfi_high.fmr_length, info->gfi_high.fmr_owner); if (keys[0].fmr_physical > 0) return 0; /* Fabricate an rmap entry for the external log device. */ irec.fmr_physical = journal->j_blk_offset; irec.fmr_length = journal->j_total_len; irec.fmr_owner = EXT4_FMR_OWN_LOG; irec.fmr_flags = 0; return ext4_getfsmap_helper(sb, info, &irec); } /* Helper to fill out an ext4_fsmap. */ static inline int ext4_getfsmap_fill(struct list_head *meta_list, ext4_fsblk_t fsb, ext4_fsblk_t len, uint64_t owner) { struct ext4_fsmap *fsm; fsm = kmalloc(sizeof(*fsm), GFP_NOFS); if (!fsm) return -ENOMEM; fsm->fmr_device = 0; fsm->fmr_flags = 0; fsm->fmr_physical = fsb; fsm->fmr_owner = owner; fsm->fmr_length = len; list_add_tail(&fsm->fmr_list, meta_list); return 0; } /* * This function returns the number of file system metadata blocks at * the beginning of a block group, including the reserved gdt blocks. */ static unsigned int ext4_getfsmap_find_sb(struct super_block *sb, ext4_group_t agno, struct list_head *meta_list) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t fsb = ext4_group_first_block_no(sb, agno); ext4_fsblk_t len; unsigned long first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); unsigned long metagroup = agno / EXT4_DESC_PER_BLOCK(sb); int error; /* Record the superblock. */ if (ext4_bg_has_super(sb, agno)) { error = ext4_getfsmap_fill(meta_list, fsb, 1, EXT4_FMR_OWN_FS); if (error) return error; fsb++; } /* Record the group descriptors. */ len = ext4_bg_num_gdb(sb, agno); if (!len) return 0; error = ext4_getfsmap_fill(meta_list, fsb, len, EXT4_FMR_OWN_GDT); if (error) return error; fsb += len; /* Reserved GDT blocks */ if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) { len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); error = ext4_getfsmap_fill(meta_list, fsb, len, EXT4_FMR_OWN_RESV_GDT); if (error) return error; } return 0; } /* Compare two fsmap items. */ static int ext4_getfsmap_compare(void *priv, const struct list_head *a, const struct list_head *b) { struct ext4_fsmap *fa; struct ext4_fsmap *fb; fa = container_of(a, struct ext4_fsmap, fmr_list); fb = container_of(b, struct ext4_fsmap, fmr_list); if (fa->fmr_physical < fb->fmr_physical) return -1; else if (fa->fmr_physical > fb->fmr_physical) return 1; return 0; } /* Merge adjacent extents of fixed metadata. */ static void ext4_getfsmap_merge_fixed_metadata(struct list_head *meta_list) { struct ext4_fsmap *p; struct ext4_fsmap *prev = NULL; struct ext4_fsmap *tmp; list_for_each_entry_safe(p, tmp, meta_list, fmr_list) { if (!prev) { prev = p; continue; } if (prev->fmr_owner == p->fmr_owner && prev->fmr_physical + prev->fmr_length == p->fmr_physical) { prev->fmr_length += p->fmr_length; list_del(&p->fmr_list); kfree(p); } else prev = p; } } /* Free a list of fixed metadata. */ static void ext4_getfsmap_free_fixed_metadata(struct list_head *meta_list) { struct ext4_fsmap *p; struct ext4_fsmap *tmp; list_for_each_entry_safe(p, tmp, meta_list, fmr_list) { list_del(&p->fmr_list); kfree(p); } } /* Find all the fixed metadata in the filesystem. */ static int ext4_getfsmap_find_fixed_metadata(struct super_block *sb, struct list_head *meta_list) { struct ext4_group_desc *gdp; ext4_group_t agno; int error; INIT_LIST_HEAD(meta_list); /* Collect everything. */ for (agno = 0; agno < EXT4_SB(sb)->s_groups_count; agno++) { gdp = ext4_get_group_desc(sb, agno, NULL); if (!gdp) { error = -EFSCORRUPTED; goto err; } /* Superblock & GDT */ error = ext4_getfsmap_find_sb(sb, agno, meta_list); if (error) goto err; /* Block bitmap */ error = ext4_getfsmap_fill(meta_list, ext4_block_bitmap(sb, gdp), 1, EXT4_FMR_OWN_BLKBM); if (error) goto err; /* Inode bitmap */ error = ext4_getfsmap_fill(meta_list, ext4_inode_bitmap(sb, gdp), 1, EXT4_FMR_OWN_INOBM); if (error) goto err; /* Inodes */ error = ext4_getfsmap_fill(meta_list, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group, EXT4_FMR_OWN_INODES); if (error) goto err; } /* Sort the list */ list_sort(NULL, meta_list, ext4_getfsmap_compare); /* Merge adjacent extents */ ext4_getfsmap_merge_fixed_metadata(meta_list); return 0; err: ext4_getfsmap_free_fixed_metadata(meta_list); return error; } /* Execute a getfsmap query against the buddy bitmaps */ static int ext4_getfsmap_datadev(struct super_block *sb, struct ext4_fsmap *keys, struct ext4_getfsmap_info *info) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start_fsb; ext4_fsblk_t end_fsb; ext4_fsblk_t bofs; ext4_fsblk_t eofs; ext4_group_t start_ag; ext4_group_t end_ag; ext4_grpblk_t first_cluster; ext4_grpblk_t last_cluster; int error = 0; bofs = le32_to_cpu(sbi->s_es->s_first_data_block); eofs = ext4_blocks_count(sbi->s_es); if (keys[0].fmr_physical >= eofs) return 0; else if (keys[0].fmr_physical < bofs) keys[0].fmr_physical = bofs; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; if (keys[1].fmr_physical < keys[0].fmr_physical) return 0; start_fsb = keys[0].fmr_physical; end_fsb = keys[1].fmr_physical; /* Determine first and last group to examine based on start and end */ ext4_get_group_no_and_offset(sb, start_fsb, &start_ag, &first_cluster); ext4_get_group_no_and_offset(sb, end_fsb, &end_ag, &last_cluster); /* * Convert the fsmap low/high keys to bg based keys. Initialize * low to the fsmap low key and max out the high key to the end * of the bg. */ info->gfi_low = keys[0]; info->gfi_low.fmr_physical = EXT4_C2B(sbi, first_cluster); info->gfi_low.fmr_length = 0; memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high)); /* Assemble a list of all the fixed-location metadata. */ error = ext4_getfsmap_find_fixed_metadata(sb, &info->gfi_meta_list); if (error) goto err; /* Query each bg */ for (info->gfi_agno = start_ag; info->gfi_agno <= end_ag; info->gfi_agno++) { /* * Set the bg high key from the fsmap high key if this * is the last bg that we're querying. */ if (info->gfi_agno == end_ag) { info->gfi_high = keys[1]; info->gfi_high.fmr_physical = EXT4_C2B(sbi, last_cluster); info->gfi_high.fmr_length = 0; } trace_ext4_fsmap_low_key(sb, info->gfi_dev, info->gfi_agno, info->gfi_low.fmr_physical, info->gfi_low.fmr_length, info->gfi_low.fmr_owner); trace_ext4_fsmap_high_key(sb, info->gfi_dev, info->gfi_agno, info->gfi_high.fmr_physical, info->gfi_high.fmr_length, info->gfi_high.fmr_owner); error = ext4_mballoc_query_range(sb, info->gfi_agno, EXT4_B2C(sbi, info->gfi_low.fmr_physical), EXT4_B2C(sbi, info->gfi_high.fmr_physical), ext4_getfsmap_datadev_helper, info); if (error) goto err; /* * Set the bg low key to the start of the bg prior to * moving on to the next bg. */ if (info->gfi_agno == start_ag) memset(&info->gfi_low, 0, sizeof(info->gfi_low)); } /* Do we have a retained free extent? */ if (info->gfi_lastfree.fmr_owner) { error = ext4_getfsmap_helper(sb, info, &info->gfi_lastfree); if (error) goto err; } /* Report any gaps at the end of the bg */ info->gfi_last = true; error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster, 0, info); if (error) goto err; err: ext4_getfsmap_free_fixed_metadata(&info->gfi_meta_list); return error; } /* Do we recognize the device? */ static bool ext4_getfsmap_is_valid_device(struct super_block *sb, struct ext4_fsmap *fm) { if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev)) return true; if (EXT4_SB(sb)->s_journal_bdev_file && fm->fmr_device == new_encode_dev(file_bdev(EXT4_SB(sb)->s_journal_bdev_file)->bd_dev)) return true; return false; } /* Ensure that the low key is less than the high key. */ static bool ext4_getfsmap_check_keys(struct ext4_fsmap *low_key, struct ext4_fsmap *high_key) { if (low_key->fmr_device > high_key->fmr_device) return false; if (low_key->fmr_device < high_key->fmr_device) return true; if (low_key->fmr_physical > high_key->fmr_physical) return false; if (low_key->fmr_physical < high_key->fmr_physical) return true; if (low_key->fmr_owner > high_key->fmr_owner) return false; if (low_key->fmr_owner < high_key->fmr_owner) return true; return false; } #define EXT4_GETFSMAP_DEVS 2 /* * Get filesystem's extents as described in head, and format for * output. Calls formatter to fill the user's buffer until all * extents are mapped, until the passed-in head->fmh_count slots have * been filled, or until the formatter short-circuits the loop, if it * is tracking filled-in extents on its own. * * Key to Confusion * ---------------- * There are multiple levels of keys and counters at work here: * _fsmap_head.fmh_keys -- low and high fsmap keys passed in; * these reflect fs-wide block addrs. * dkeys -- fmh_keys used to query each device; * these are fmh_keys but w/ the low key * bumped up by fmr_length. * _getfsmap_info.gfi_next_fsblk-- next fs block we expect to see; this * is how we detect gaps in the fsmap * records and report them. * _getfsmap_info.gfi_low/high -- per-bg low/high keys computed from * dkeys; used to query the free space. */ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, ext4_fsmap_format_t formatter, void *arg) { struct ext4_fsmap dkeys[2]; /* per-dev keys */ struct ext4_getfsmap_dev handlers[EXT4_GETFSMAP_DEVS]; struct ext4_getfsmap_info info = { NULL }; int i; int error = 0; if (head->fmh_iflags & ~FMH_IF_VALID) return -EINVAL; if (!ext4_getfsmap_is_valid_device(sb, &head->fmh_keys[0]) || !ext4_getfsmap_is_valid_device(sb, &head->fmh_keys[1])) return -EINVAL; head->fmh_entries = 0; /* Set up our device handlers. */ memset(handlers, 0, sizeof(handlers)); handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev); handlers[0].gfd_fn = ext4_getfsmap_datadev; if (EXT4_SB(sb)->s_journal_bdev_file) { handlers[1].gfd_dev = new_encode_dev( file_bdev(EXT4_SB(sb)->s_journal_bdev_file)->bd_dev); handlers[1].gfd_fn = ext4_getfsmap_logdev; } sort(handlers, EXT4_GETFSMAP_DEVS, sizeof(struct ext4_getfsmap_dev), ext4_getfsmap_dev_compare, NULL); /* * To continue where we left off, we allow userspace to use the * last mapping from a previous call as the low key of the next. * This is identified by a non-zero length in the low key. We * have to increment the low key in this scenario to ensure we * don't return the same mapping again, and instead return the * very next mapping. * * Bump the physical offset as there can be no other mapping for * the same physical block range. */ dkeys[0] = head->fmh_keys[0]; dkeys[0].fmr_physical += dkeys[0].fmr_length; dkeys[0].fmr_owner = 0; dkeys[0].fmr_length = 0; memset(&dkeys[1], 0xFF, sizeof(struct ext4_fsmap)); if (!ext4_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) return -EINVAL; info.gfi_next_fsblk = head->fmh_keys[0].fmr_physical + head->fmh_keys[0].fmr_length; info.gfi_formatter = formatter; info.gfi_format_arg = arg; info.gfi_head = head; /* For each device we support... */ for (i = 0; i < EXT4_GETFSMAP_DEVS; i++) { /* Is this device within the range the user asked for? */ if (!handlers[i].gfd_fn) continue; if (head->fmh_keys[0].fmr_device > handlers[i].gfd_dev) continue; if (head->fmh_keys[1].fmr_device < handlers[i].gfd_dev) break; /* * If this device number matches the high key, we have * to pass the high key to the handler to limit the * query results. If the device number exceeds the * low key, zero out the low key so that we get * everything from the beginning. */ if (handlers[i].gfd_dev == head->fmh_keys[1].fmr_device) dkeys[1] = head->fmh_keys[1]; if (handlers[i].gfd_dev > head->fmh_keys[0].fmr_device) memset(&dkeys[0], 0, sizeof(struct ext4_fsmap)); info.gfi_dev = handlers[i].gfd_dev; info.gfi_last = false; info.gfi_agno = -1; error = handlers[i].gfd_fn(sb, dkeys, &info); if (error) break; info.gfi_next_fsblk = 0; } head->fmh_oflags = FMH_OF_DEV_T; return error; }
4879 4879 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 #ifndef BLK_THROTTLE_H #define BLK_THROTTLE_H #include "blk-cgroup-rwstat.h" /* * To implement hierarchical throttling, throtl_grps form a tree and bios * are dispatched upwards level by level until they reach the top and get * issued. When dispatching bios from the children and local group at each * level, if the bios are dispatched into a single bio_list, there's a risk * of a local or child group which can queue many bios at once filling up * the list starving others. * * To avoid such starvation, dispatched bios are queued separately * according to where they came from. When they are again dispatched to * the parent, they're popped in round-robin order so that no single source * hogs the dispatch window. * * throtl_qnode is used to keep the queued bios separated by their sources. * Bios are queued to throtl_qnode which in turn is queued to * throtl_service_queue and then dispatched in round-robin order. * * It's also used to track the reference counts on blkg's. A qnode always * belongs to a throtl_grp and gets queued on itself or the parent, so * incrementing the reference of the associated throtl_grp when a qnode is * queued and decrementing when dequeued is enough to keep the whole blkg * tree pinned while bios are in flight. */ struct throtl_qnode { struct list_head node; /* service_queue->queued[] */ struct bio_list bios; /* queued bios */ struct throtl_grp *tg; /* tg this qnode belongs to */ }; struct throtl_service_queue { struct throtl_service_queue *parent_sq; /* the parent service_queue */ /* * Bios queued directly to this service_queue or dispatched from * children throtl_grp's. */ struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ unsigned int nr_queued[2]; /* number of queued bios */ /* * RB tree of active children throtl_grp's, which are sorted by * their ->disptime. */ struct rb_root_cached pending_tree; /* RB tree of active tgs */ unsigned int nr_pending; /* # queued in the tree */ unsigned long first_pending_disptime; /* disptime of the first tg */ struct timer_list pending_timer; /* fires on first_pending_disptime */ }; enum tg_state_flags { THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */ THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */ THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */ }; struct throtl_grp { /* must be the first member */ struct blkg_policy_data pd; /* active throtl group service_queue member */ struct rb_node rb_node; /* throtl_data this group belongs to */ struct throtl_data *td; /* this group's service queue */ struct throtl_service_queue service_queue; /* * qnode_on_self is used when bios are directly queued to this * throtl_grp so that local bios compete fairly with bios * dispatched from children. qnode_on_parent is used when bios are * dispatched from this throtl_grp into its parent and will compete * with the sibling qnode_on_parents and the parent's * qnode_on_self. */ struct throtl_qnode qnode_on_self[2]; struct throtl_qnode qnode_on_parent[2]; /* * Dispatch time in jiffies. This is the estimated time when group * will unthrottle and is ready to dispatch more bio. It is used as * key to sort active groups in service tree. */ unsigned long disptime; unsigned int flags; /* are there any throtl rules between this group and td? */ bool has_rules_bps[2]; bool has_rules_iops[2]; /* bytes per second rate limits */ uint64_t bps[2]; /* IOPS limits */ unsigned int iops[2]; /* Number of bytes dispatched in current slice */ uint64_t bytes_disp[2]; /* Number of bio's dispatched in current slice */ unsigned int io_disp[2]; unsigned long last_low_overflow_time[2]; uint64_t last_bytes_disp[2]; unsigned int last_io_disp[2]; /* * The following two fields are updated when new configuration is * submitted while some bios are still throttled, they record how many * bytes/ios are waited already in previous configuration, and they will * be used to calculate wait time under new configuration. */ long long carryover_bytes[2]; int carryover_ios[2]; unsigned long last_check_time; /* When did we start a new slice */ unsigned long slice_start[2]; unsigned long slice_end[2]; struct blkg_rwstat stat_bytes; struct blkg_rwstat stat_ios; }; extern struct blkcg_policy blkcg_policy_throtl; static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct throtl_grp, pd) : NULL; } static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) { return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); } /* * Internal throttling interface */ #ifndef CONFIG_BLK_DEV_THROTTLING static inline void blk_throtl_exit(struct gendisk *disk) { } static inline bool blk_throtl_bio(struct bio *bio) { return false; } static inline void blk_throtl_cancel_bios(struct gendisk *disk) { } #else /* CONFIG_BLK_DEV_THROTTLING */ void blk_throtl_exit(struct gendisk *disk); bool __blk_throtl_bio(struct bio *bio); void blk_throtl_cancel_bios(struct gendisk *disk); static inline bool blk_throtl_activated(struct request_queue *q) { return q->td != NULL; } static inline bool blk_should_throtl(struct bio *bio) { struct throtl_grp *tg; int rw = bio_data_dir(bio); /* * This is called under bio_queue_enter(), and it's synchronized with * the activation of blk-throtl, which is protected by * blk_mq_freeze_queue(). */ if (!blk_throtl_activated(bio->bi_bdev->bd_queue)) return false; tg = blkg_to_tg(bio->bi_blkg); if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) { if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { bio_set_flag(bio, BIO_CGROUP_ACCT); blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf, bio->bi_iter.bi_size); } blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1); } /* iops limit is always counted */ if (tg->has_rules_iops[rw]) return true; if (tg->has_rules_bps[rw] && !bio_flagged(bio, BIO_BPS_THROTTLED)) return true; return false; } static inline bool blk_throtl_bio(struct bio *bio) { if (!blk_should_throtl(bio)) return false; return __blk_throtl_bio(bio); } #endif /* CONFIG_BLK_DEV_THROTTLING */ #endif
7965 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H extern struct fpstate init_fpstate; /* CPU feature check wrappers */ static __always_inline __pure bool use_xsave(void) { return cpu_feature_enabled(X86_FEATURE_XSAVE); } static __always_inline __pure bool use_fxsr(void) { return cpu_feature_enabled(X86_FEATURE_FXSR); } #ifdef CONFIG_X86_DEBUG_FPU # define WARN_ON_FPU(x) WARN_ON_ONCE(x) #else # define WARN_ON_FPU(x) ({ (void)(x); 0; }) #endif /* Used in init.c */ extern void fpstate_init_user(struct fpstate *fpstate); extern void fpstate_reset(struct fpu *fpu); #endif
2865 2872 2400 2402 2402 2876 797 126 921 919 921 922 3425 3427 3435 3429 3435 3427 21 21 21 3435 3435 3491 3281 3492 3499 3490 3489 5598 5605 2 2 2 2 2 2 1 1 1 1 1 1 1 3 3 3 2499 2504 2505 2505 2498 2505 2505 2499 3279 3027 2484 2310 2317 2304 2317 2310 2316 4085 4078 1876 4012 2137 3475 4085 6713 6715 4809 6420 6421 4426 2312 4085 4419 6731 5294 5311 5296 5312 5298 5310 776 777 1 771 254 252 254 254 24 232 232 232 232 15 15 219 27 215 3277 3278 2643 3069 3279 3274 2 2 3264 252 3278 3274 3284 351 352 351 153 254 253 254 3271 3285 3283 466 139 479 1 481 481 481 478 3671 3508 3666 3667 3529 5 2400 562 3567 2360 3509 2473 279 279 275 277 276 277 2 274 3201 1014 3198 1331 1333 1329 1331 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/swap.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ /* * This file contains the default values for the operation of the * Linux VM subsystem. Fine-tuning documentation can be found in * Documentation/admin-guide/sysctl/vm.rst. * Started 18.12.91 * Swap aging added 23.2.95, Stephen Tweedie. * Buffermem limits added 12.3.98, Rik van Riel. */ #include <linux/mm.h> #include <linux/sched.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/mman.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/init.h> #include <linux/export.h> #include <linux/mm_inline.h> #include <linux/percpu_counter.h> #include <linux/memremap.h> #include <linux/percpu.h> #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/backing-dev.h> #include <linux/memcontrol.h> #include <linux/gfp.h> #include <linux/uio.h> #include <linux/hugetlb.h> #include <linux/page_idle.h> #include <linux/local_lock.h> #include <linux/buffer_head.h> #include "internal.h" #define CREATE_TRACE_POINTS #include <trace/events/pagemap.h> /* How many pages do we try to swap or page in/out together? As a power of 2 */ int page_cluster; const int page_cluster_max = 31; /* Protecting only lru_rotate.fbatch which requires disabling interrupts */ struct lru_rotate { local_lock_t lock; struct folio_batch fbatch; }; static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = { .lock = INIT_LOCAL_LOCK(lock), }; /* * The following folio batches are grouped together because they are protected * by disabling preemption (and interrupts remain enabled). */ struct cpu_fbatches { local_lock_t lock; struct folio_batch lru_add; struct folio_batch lru_deactivate_file; struct folio_batch lru_deactivate; struct folio_batch lru_lazyfree; #ifdef CONFIG_SMP struct folio_batch activate; #endif }; static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = { .lock = INIT_LOCAL_LOCK(lock), }; static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp, unsigned long *flagsp) { if (folio_test_lru(folio)) { folio_lruvec_relock_irqsave(folio, lruvecp, flagsp); lruvec_del_folio(*lruvecp, folio); __folio_clear_lru_flags(folio); } /* * In rare cases, when truncation or holepunching raced with * munlock after VM_LOCKED was cleared, Mlocked may still be * found set here. This does not indicate a problem, unless * "unevictable_pgs_cleared" appears worryingly large. */ if (unlikely(folio_test_mlocked(folio))) { long nr_pages = folio_nr_pages(folio); __folio_clear_mlocked(folio); zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages); count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); } } /* * This path almost never happens for VM activity - pages are normally freed * in batches. But it gets used by networking - and for compound pages. */ static void page_cache_release(struct folio *folio) { struct lruvec *lruvec = NULL; unsigned long flags; __page_cache_release(folio, &lruvec, &flags); if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); } void __folio_put(struct folio *folio) { if (unlikely(folio_is_zone_device(folio))) { free_zone_device_folio(folio); return; } else if (folio_test_hugetlb(folio)) { free_huge_folio(folio); return; } page_cache_release(folio); folio_undo_large_rmappable(folio); mem_cgroup_uncharge(folio); free_unref_page(&folio->page, folio_order(folio)); } EXPORT_SYMBOL(__folio_put); /** * put_pages_list() - release a list of pages * @pages: list of pages threaded on page->lru * * Release a list of pages which are strung together on page.lru. */ void put_pages_list(struct list_head *pages) { struct folio_batch fbatch; struct folio *folio, *next; folio_batch_init(&fbatch); list_for_each_entry_safe(folio, next, pages, lru) { if (!folio_put_testzero(folio)) continue; if (folio_test_hugetlb(folio)) { free_huge_folio(folio); continue; } /* LRU flag must be clear because it's passed using the lru */ if (folio_batch_add(&fbatch, folio) > 0) continue; free_unref_folios(&fbatch); } if (fbatch.nr) free_unref_folios(&fbatch); INIT_LIST_HEAD(pages); } EXPORT_SYMBOL(put_pages_list); typedef void (*move_fn_t)(struct lruvec *lruvec, struct folio *folio); static void lru_add_fn(struct lruvec *lruvec, struct folio *folio) { int was_unevictable = folio_test_clear_unevictable(folio); long nr_pages = folio_nr_pages(folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* * Is an smp_mb__after_atomic() still required here, before * folio_evictable() tests the mlocked flag, to rule out the possibility * of stranding an evictable folio on an unevictable LRU? I think * not, because __munlock_folio() only clears the mlocked flag * while the LRU lock is held. * * (That is not true of __page_cache_release(), and not necessarily * true of folios_put(): but those only clear the mlocked flag after * folio_put_testzero() has excluded any other users of the folio.) */ if (folio_evictable(folio)) { if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { folio_clear_active(folio); folio_set_unevictable(folio); /* * folio->mlock_count = !!folio_test_mlocked(folio)? * But that leaves __mlock_folio() in doubt whether another * actor has already counted the mlock or not. Err on the * safe side, underestimate, let page reclaim fix it, rather * than leaving a page on the unevictable LRU indefinitely. */ folio->mlock_count = 0; if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } lruvec_add_folio(lruvec, folio); trace_mm_lru_insertion(folio); } static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) { int i; struct lruvec *lruvec = NULL; unsigned long flags = 0; for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; folio_lruvec_relock_irqsave(folio, &lruvec, &flags); move_fn(lruvec, folio); folio_set_lru(folio); } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); folios_put(fbatch); } static void folio_batch_add_and_move(struct folio_batch *fbatch, struct folio *folio, move_fn_t move_fn) { if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) && !lru_cache_disabled()) return; folio_batch_move_lru(fbatch, move_fn); } static void lru_move_tail_fn(struct lruvec *lruvec, struct folio *folio) { if (!folio_test_unevictable(folio)) { lruvec_del_folio(lruvec, folio); folio_clear_active(folio); lruvec_add_folio_tail(lruvec, folio); __count_vm_events(PGROTATED, folio_nr_pages(folio)); } } /* * Writeback is about to end against a folio which has been marked for * immediate reclaim. If it still appears to be reclaimable, move it * to the tail of the inactive list. * * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races. */ void folio_rotate_reclaimable(struct folio *folio) { if (!folio_test_locked(folio) && !folio_test_dirty(folio) && !folio_test_unevictable(folio)) { struct folio_batch *fbatch; unsigned long flags; folio_get(folio); if (!folio_test_clear_lru(folio)) { folio_put(folio); return; } local_lock_irqsave(&lru_rotate.lock, flags); fbatch = this_cpu_ptr(&lru_rotate.fbatch); folio_batch_add_and_move(fbatch, folio, lru_move_tail_fn); local_unlock_irqrestore(&lru_rotate.lock, flags); } } void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_io, unsigned int nr_rotated) { unsigned long cost; /* * Reflect the relative cost of incurring IO and spending CPU * time on rotations. This doesn't attempt to make a precise * comparison, it just says: if reloads are about comparable * between the LRU lists, or rotations are overwhelmingly * different between them, adjust scan balance for CPU work. */ cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated; do { unsigned long lrusize; /* * Hold lruvec->lru_lock is safe here, since * 1) The pinned lruvec in reclaim, or * 2) From a pre-LRU page during refault (which also holds the * rcu lock, so would be safe even if the page was on the LRU * and could move simultaneously to a new lruvec). */ spin_lock_irq(&lruvec->lru_lock); /* Record cost event */ if (file) lruvec->file_cost += cost; else lruvec->anon_cost += cost; /* * Decay previous events * * Because workloads change over time (and to avoid * overflow) we keep these statistics as a floating * average, which ends up weighing recent refaults * more than old ones. */ lrusize = lruvec_page_state(lruvec, NR_INACTIVE_ANON) + lruvec_page_state(lruvec, NR_ACTIVE_ANON) + lruvec_page_state(lruvec, NR_INACTIVE_FILE) + lruvec_page_state(lruvec, NR_ACTIVE_FILE); if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) { lruvec->file_cost /= 2; lruvec->anon_cost /= 2; } spin_unlock_irq(&lruvec->lru_lock); } while ((lruvec = parent_lruvec(lruvec))); } void lru_note_cost_refault(struct folio *folio) { lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio), folio_nr_pages(folio), 0); } static void folio_activate_fn(struct lruvec *lruvec, struct folio *folio) { if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { long nr_pages = folio_nr_pages(folio); lruvec_del_folio(lruvec, folio); folio_set_active(folio); lruvec_add_folio(lruvec, folio); trace_mm_lru_activate(folio); __count_vm_events(PGACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGACTIVATE, nr_pages); } } #ifdef CONFIG_SMP static void folio_activate_drain(int cpu) { struct folio_batch *fbatch = &per_cpu(cpu_fbatches.activate, cpu); if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, folio_activate_fn); } void folio_activate(struct folio *folio) { if (!folio_test_active(folio) && !folio_test_unevictable(folio)) { struct folio_batch *fbatch; folio_get(folio); if (!folio_test_clear_lru(folio)) { folio_put(folio); return; } local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.activate); folio_batch_add_and_move(fbatch, folio, folio_activate_fn); local_unlock(&cpu_fbatches.lock); } } #else static inline void folio_activate_drain(int cpu) { } void folio_activate(struct folio *folio) { struct lruvec *lruvec; if (folio_test_clear_lru(folio)) { lruvec = folio_lruvec_lock_irq(folio); folio_activate_fn(lruvec, folio); unlock_page_lruvec_irq(lruvec); folio_set_lru(folio); } } #endif static void __lru_cache_activate_folio(struct folio *folio) { struct folio_batch *fbatch; int i; local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.lru_add); /* * Search backwards on the optimistic assumption that the folio being * activated has just been added to this batch. Note that only * the local batch is examined as a !LRU folio could be in the * process of being released, reclaimed, migrated or on a remote * batch that is currently being drained. Furthermore, marking * a remote batch's folio active potentially hits a race where * a folio is marked active just after it is added to the inactive * list causing accounting errors and BUG_ON checks to trigger. */ for (i = folio_batch_count(fbatch) - 1; i >= 0; i--) { struct folio *batch_folio = fbatch->folios[i]; if (batch_folio == folio) { folio_set_active(folio); break; } } local_unlock(&cpu_fbatches.lock); } #ifdef CONFIG_LRU_GEN static void folio_inc_refs(struct folio *folio) { unsigned long new_flags, old_flags = READ_ONCE(folio->flags); if (folio_test_unevictable(folio)) return; if (!folio_test_referenced(folio)) { folio_set_referenced(folio); return; } if (!folio_test_workingset(folio)) { folio_set_workingset(folio); return; } /* see the comment on MAX_NR_TIERS */ do { new_flags = old_flags & LRU_REFS_MASK; if (new_flags == LRU_REFS_MASK) break; new_flags += BIT(LRU_REFS_PGOFF); new_flags |= old_flags & ~LRU_REFS_MASK; } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); } #else static void folio_inc_refs(struct folio *folio) { } #endif /* CONFIG_LRU_GEN */ /** * folio_mark_accessed - Mark a folio as having seen activity. * @folio: The folio to mark. * * This function will perform one of the following transitions: * * * inactive,unreferenced -> inactive,referenced * * inactive,referenced -> active,unreferenced * * active,unreferenced -> active,referenced * * When a newly allocated folio is not yet visible, so safe for non-atomic ops, * __folio_set_referenced() may be substituted for folio_mark_accessed(). */ void folio_mark_accessed(struct folio *folio) { if (lru_gen_enabled()) { folio_inc_refs(folio); return; } if (!folio_test_referenced(folio)) { folio_set_referenced(folio); } else if (folio_test_unevictable(folio)) { /* * Unevictable pages are on the "LRU_UNEVICTABLE" list. But, * this list is never rotated or maintained, so marking an * unevictable page accessed has no effect. */ } else if (!folio_test_active(folio)) { /* * If the folio is on the LRU, queue it for activation via * cpu_fbatches.activate. Otherwise, assume the folio is in a * folio_batch, mark it active and it'll be moved to the active * LRU on the next drain. */ if (folio_test_lru(folio)) folio_activate(folio); else __lru_cache_activate_folio(folio); folio_clear_referenced(folio); workingset_activation(folio); } if (folio_test_idle(folio)) folio_clear_idle(folio); } EXPORT_SYMBOL(folio_mark_accessed); /** * folio_add_lru - Add a folio to an LRU list. * @folio: The folio to be added to the LRU. * * Queue the folio for addition to the LRU. The decision on whether * to add the page to the [in]active [file|anon] list is deferred until the * folio_batch is drained. This gives a chance for the caller of folio_add_lru() * have the folio added to the active list using folio_mark_accessed(). */ void folio_add_lru(struct folio *folio) { struct folio_batch *fbatch; VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); /* see the comment in lru_gen_add_folio() */ if (lru_gen_enabled() && !folio_test_unevictable(folio) && lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) folio_set_active(folio); folio_get(folio); local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.lru_add); folio_batch_add_and_move(fbatch, folio, lru_add_fn); local_unlock(&cpu_fbatches.lock); } EXPORT_SYMBOL(folio_add_lru); /** * folio_add_lru_vma() - Add a folio to the appropate LRU list for this VMA. * @folio: The folio to be added to the LRU. * @vma: VMA in which the folio is mapped. * * If the VMA is mlocked, @folio is added to the unevictable list. * Otherwise, it is treated the same way as folio_add_lru(). */ void folio_add_lru_vma(struct folio *folio, struct vm_area_struct *vma) { VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED)) mlock_new_folio(folio); else folio_add_lru(folio); } /* * If the folio cannot be invalidated, it is moved to the * inactive list to speed up its reclaim. It is moved to the * head of the list, rather than the tail, to give the flusher * threads some time to write it out, as this is much more * effective than the single-page writeout from reclaim. * * If the folio isn't mapped and dirty/writeback, the folio * could be reclaimed asap using the reclaim flag. * * 1. active, mapped folio -> none * 2. active, dirty/writeback folio -> inactive, head, reclaim * 3. inactive, mapped folio -> none * 4. inactive, dirty/writeback folio -> inactive, head, reclaim * 5. inactive, clean -> inactive, tail * 6. Others -> none * * In 4, it moves to the head of the inactive list so the folio is * written out by flusher threads as this is much more efficient * than the single-page writeout from reclaim. */ static void lru_deactivate_file_fn(struct lruvec *lruvec, struct folio *folio) { bool active = folio_test_active(folio); long nr_pages = folio_nr_pages(folio); if (folio_test_unevictable(folio)) return; /* Some processes are using the folio */ if (folio_mapped(folio)) return; lruvec_del_folio(lruvec, folio); folio_clear_active(folio); folio_clear_referenced(folio); if (folio_test_writeback(folio) || folio_test_dirty(folio)) { /* * Setting the reclaim flag could race with * folio_end_writeback() and confuse readahead. But the * race window is _really_ small and it's not a critical * problem. */ lruvec_add_folio(lruvec, folio); folio_set_reclaim(folio); } else { /* * The folio's writeback ended while it was in the batch. * We move that folio to the tail of the inactive list. */ lruvec_add_folio_tail(lruvec, folio); __count_vm_events(PGROTATED, nr_pages); } if (active) { __count_vm_events(PGDEACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } } static void lru_deactivate_fn(struct lruvec *lruvec, struct folio *folio) { if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) { long nr_pages = folio_nr_pages(folio); lruvec_del_folio(lruvec, folio); folio_clear_active(folio); folio_clear_referenced(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(PGDEACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_pages); } } static void lru_lazyfree_fn(struct lruvec *lruvec, struct folio *folio) { if (folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) { long nr_pages = folio_nr_pages(folio); lruvec_del_folio(lruvec, folio); folio_clear_active(folio); folio_clear_referenced(folio); /* * Lazyfree folios are clean anonymous folios. They have * the swapbacked flag cleared, to distinguish them from normal * anonymous folios */ folio_clear_swapbacked(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(PGLAZYFREE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, nr_pages); } } /* * Drain pages out of the cpu's folio_batch. * Either "cpu" is the current CPU, and preemption has already been * disabled; or "cpu" is being hot-unplugged, and is already dead. */ void lru_add_drain_cpu(int cpu) { struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu); struct folio_batch *fbatch = &fbatches->lru_add; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_add_fn); fbatch = &per_cpu(lru_rotate.fbatch, cpu); /* Disabling interrupts below acts as a compiler barrier. */ if (data_race(folio_batch_count(fbatch))) { unsigned long flags; /* No harm done if a racing interrupt already did this */ local_lock_irqsave(&lru_rotate.lock, flags); folio_batch_move_lru(fbatch, lru_move_tail_fn); local_unlock_irqrestore(&lru_rotate.lock, flags); } fbatch = &fbatches->lru_deactivate_file; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_deactivate_file_fn); fbatch = &fbatches->lru_deactivate; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_deactivate_fn); fbatch = &fbatches->lru_lazyfree; if (folio_batch_count(fbatch)) folio_batch_move_lru(fbatch, lru_lazyfree_fn); folio_activate_drain(cpu); } /** * deactivate_file_folio() - Deactivate a file folio. * @folio: Folio to deactivate. * * This function hints to the VM that @folio is a good reclaim candidate, * for example if its invalidation fails due to the folio being dirty * or under writeback. * * Context: Caller holds a reference on the folio. */ void deactivate_file_folio(struct folio *folio) { struct folio_batch *fbatch; /* Deactivating an unevictable folio will not accelerate reclaim */ if (folio_test_unevictable(folio)) return; folio_get(folio); if (!folio_test_clear_lru(folio)) { folio_put(folio); return; } local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file); folio_batch_add_and_move(fbatch, folio, lru_deactivate_file_fn); local_unlock(&cpu_fbatches.lock); } /* * folio_deactivate - deactivate a folio * @folio: folio to deactivate * * folio_deactivate() moves @folio to the inactive list if @folio was on the * active list and was not unevictable. This is done to accelerate the * reclaim of @folio. */ void folio_deactivate(struct folio *folio) { if (!folio_test_unevictable(folio) && (folio_test_active(folio) || lru_gen_enabled())) { struct folio_batch *fbatch; folio_get(folio); if (!folio_test_clear_lru(folio)) { folio_put(folio); return; } local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate); folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn); local_unlock(&cpu_fbatches.lock); } } /** * folio_mark_lazyfree - make an anon folio lazyfree * @folio: folio to deactivate * * folio_mark_lazyfree() moves @folio to the inactive file list. * This is done to accelerate the reclaim of @folio. */ void folio_mark_lazyfree(struct folio *folio) { if (folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) { struct folio_batch *fbatch; folio_get(folio); if (!folio_test_clear_lru(folio)) { folio_put(folio); return; } local_lock(&cpu_fbatches.lock); fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree); folio_batch_add_and_move(fbatch, folio, lru_lazyfree_fn); local_unlock(&cpu_fbatches.lock); } } void lru_add_drain(void) { local_lock(&cpu_fbatches.lock); lru_add_drain_cpu(smp_processor_id()); local_unlock(&cpu_fbatches.lock); mlock_drain_local(); } /* * It's called from per-cpu workqueue context in SMP case so * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on * the same cpu. It shouldn't be a problem in !SMP case since * the core is only one and the locks will disable preemption. */ static void lru_add_and_bh_lrus_drain(void) { local_lock(&cpu_fbatches.lock); lru_add_drain_cpu(smp_processor_id()); local_unlock(&cpu_fbatches.lock); invalidate_bh_lrus_cpu(); mlock_drain_local(); } void lru_add_drain_cpu_zone(struct zone *zone) { local_lock(&cpu_fbatches.lock); lru_add_drain_cpu(smp_processor_id()); drain_local_pages(zone); local_unlock(&cpu_fbatches.lock); mlock_drain_local(); } #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); static void lru_add_drain_per_cpu(struct work_struct *dummy) { lru_add_and_bh_lrus_drain(); } static bool cpu_needs_drain(unsigned int cpu) { struct cpu_fbatches *fbatches = &per_cpu(cpu_fbatches, cpu); /* Check these in order of likelihood that they're not zero */ return folio_batch_count(&fbatches->lru_add) || data_race(folio_batch_count(&per_cpu(lru_rotate.fbatch, cpu))) || folio_batch_count(&fbatches->lru_deactivate_file) || folio_batch_count(&fbatches->lru_deactivate) || folio_batch_count(&fbatches->lru_lazyfree) || folio_batch_count(&fbatches->activate) || need_mlock_drain(cpu) || has_bh_in_lru(cpu, NULL); } /* * Doesn't need any cpu hotplug locking because we do rely on per-cpu * kworkers being shut down before our page_alloc_cpu_dead callback is * executed on the offlined cpu. * Calling this function with cpu hotplug locks held can actually lead * to obscure indirect dependencies via WQ context. */ static inline void __lru_add_drain_all(bool force_all_cpus) { /* * lru_drain_gen - Global pages generation number * * (A) Definition: global lru_drain_gen = x implies that all generations * 0 < n <= x are already *scheduled* for draining. * * This is an optimization for the highly-contended use case where a * user space workload keeps constantly generating a flow of pages for * each CPU. */ static unsigned int lru_drain_gen; static struct cpumask has_work; static DEFINE_MUTEX(lock); unsigned cpu, this_gen; /* * Make sure nobody triggers this path before mm_percpu_wq is fully * initialized. */ if (WARN_ON(!mm_percpu_wq)) return; /* * Guarantee folio_batch counter stores visible by this CPU * are visible to other CPUs before loading the current drain * generation. */ smp_mb(); /* * (B) Locally cache global LRU draining generation number * * The read barrier ensures that the counter is loaded before the mutex * is taken. It pairs with smp_mb() inside the mutex critical section * at (D). */ this_gen = smp_load_acquire(&lru_drain_gen); mutex_lock(&lock); /* * (C) Exit the draining operation if a newer generation, from another * lru_add_drain_all(), was already scheduled for draining. Check (A). */ if (unlikely(this_gen != lru_drain_gen && !force_all_cpus)) goto done; /* * (D) Increment global generation number * * Pairs with smp_load_acquire() at (B), outside of the critical * section. Use a full memory barrier to guarantee that the * new global drain generation number is stored before loading * folio_batch counters. * * This pairing must be done here, before the for_each_online_cpu loop * below which drains the page vectors. * * Let x, y, and z represent some system CPU numbers, where x < y < z. * Assume CPU #z is in the middle of the for_each_online_cpu loop * below and has already reached CPU #y's per-cpu data. CPU #x comes * along, adds some pages to its per-cpu vectors, then calls * lru_add_drain_all(). * * If the paired barrier is done at any later step, e.g. after the * loop, CPU #x will just exit at (C) and miss flushing out all of its * added pages. */ WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1); smp_mb(); cpumask_clear(&has_work); for_each_online_cpu(cpu) { struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); if (cpu_needs_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); queue_work_on(cpu, mm_percpu_wq, work); __cpumask_set_cpu(cpu, &has_work); } } for_each_cpu(cpu, &has_work) flush_work(&per_cpu(lru_add_drain_work, cpu)); done: mutex_unlock(&lock); } void lru_add_drain_all(void) { __lru_add_drain_all(false); } #else void lru_add_drain_all(void) { lru_add_drain(); } #endif /* CONFIG_SMP */ atomic_t lru_disable_count = ATOMIC_INIT(0); /* * lru_cache_disable() needs to be called before we start compiling * a list of pages to be migrated using isolate_lru_page(). * It drains pages on LRU cache and then disable on all cpus until * lru_cache_enable is called. * * Must be paired with a call to lru_cache_enable(). */ void lru_cache_disable(void) { atomic_inc(&lru_disable_count); /* * Readers of lru_disable_count are protected by either disabling * preemption or rcu_read_lock: * * preempt_disable, local_irq_disable [bh_lru_lock()] * rcu_read_lock [rt_spin_lock CONFIG_PREEMPT_RT] * preempt_disable [local_lock !CONFIG_PREEMPT_RT] * * Since v5.1 kernel, synchronize_rcu() is guaranteed to wait on * preempt_disable() regions of code. So any CPU which sees * lru_disable_count = 0 will have exited the critical * section when synchronize_rcu() returns. */ synchronize_rcu_expedited(); #ifdef CONFIG_SMP __lru_add_drain_all(true); #else lru_add_and_bh_lrus_drain(); #endif } /** * folios_put_refs - Reduce the reference count on a batch of folios. * @folios: The folios. * @refs: The number of refs to subtract from each folio. * * Like folio_put(), but for a batch of folios. This is more efficient * than writing the loop yourself as it will optimise the locks which need * to be taken if the folios are freed. The folios batch is returned * empty and ready to be reused for another batch; there is no need * to reinitialise it. If @refs is NULL, we subtract one from each * folio refcount. * * Context: May be called in process or interrupt context, but not in NMI * context. May be called while holding a spinlock. */ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) { int i, j; struct lruvec *lruvec = NULL; unsigned long flags = 0; for (i = 0, j = 0; i < folios->nr; i++) { struct folio *folio = folios->folios[i]; unsigned int nr_refs = refs ? refs[i] : 1; if (is_huge_zero_folio(folio)) continue; if (folio_is_zone_device(folio)) { if (lruvec) { unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } if (put_devmap_managed_folio_refs(folio, nr_refs)) continue; if (folio_ref_sub_and_test(folio, nr_refs)) free_zone_device_folio(folio); continue; } if (!folio_ref_sub_and_test(folio, nr_refs)) continue; /* hugetlb has its own memcg */ if (folio_test_hugetlb(folio)) { if (lruvec) { unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } free_huge_folio(folio); continue; } folio_undo_large_rmappable(folio); __page_cache_release(folio, &lruvec, &flags); if (j != i) folios->folios[j] = folio; j++; } if (lruvec) unlock_page_lruvec_irqrestore(lruvec, flags); if (!j) { folio_batch_reinit(folios); return; } folios->nr = j; mem_cgroup_uncharge_folios(folios); free_unref_folios(folios); } EXPORT_SYMBOL(folios_put_refs); /** * release_pages - batched put_page() * @arg: array of pages to release * @nr: number of pages * * Decrement the reference count on all the pages in @arg. If it * fell to zero, remove the page from the LRU and free it. * * Note that the argument can be an array of pages, encoded pages, * or folio pointers. We ignore any encoded bits, and turn any of * them into just a folio that gets free'd. */ void release_pages(release_pages_arg arg, int nr) { struct folio_batch fbatch; int refs[PAGEVEC_SIZE]; struct encoded_page **encoded = arg.encoded_pages; int i; folio_batch_init(&fbatch); for (i = 0; i < nr; i++) { /* Turn any of the argument types into a folio */ struct folio *folio = page_folio(encoded_page_ptr(encoded[i])); /* Is our next entry actually "nr_pages" -> "nr_refs" ? */ refs[fbatch.nr] = 1; if (unlikely(encoded_page_flags(encoded[i]) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) refs[fbatch.nr] = encoded_nr_pages(encoded[++i]); if (folio_batch_add(&fbatch, folio) > 0) continue; folios_put_refs(&fbatch, refs); } if (fbatch.nr) folios_put_refs(&fbatch, refs); } EXPORT_SYMBOL(release_pages); /* * The folios which we're about to release may be in the deferred lru-addition * queues. That would prevent them from really being freed right now. That's * OK from a correctness point of view but is inefficient - those folios may be * cache-warm and we want to give them back to the page allocator ASAP. * * So __folio_batch_release() will drain those queues here. * folio_batch_move_lru() calls folios_put() directly to avoid * mutual recursion. */ void __folio_batch_release(struct folio_batch *fbatch) { if (!fbatch->percpu_pvec_drained) { lru_add_drain(); fbatch->percpu_pvec_drained = true; } folios_put(fbatch); } EXPORT_SYMBOL(__folio_batch_release); /** * folio_batch_remove_exceptionals() - Prune non-folios from a batch. * @fbatch: The batch to prune * * find_get_entries() fills a batch with both folios and shadow/swap/DAX * entries. This function prunes all the non-folio entries from @fbatch * without leaving holes, so that it can be passed on to folio-only batch * operations. */ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) { unsigned int i, j; for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; if (!xa_is_value(folio)) fbatch->folios[j++] = folio; } fbatch->nr = j; } /* * Perform any setup for the swap system */ void __init swap_setup(void) { unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ if (megs < 16) page_cluster = 2; else page_cluster = 3; /* * Right now other parts of the system means that we * _really_ don't want to cluster much more */ }
28 8 12 27 50 48 21 48 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 /* * linux/fs/nls/nls_cp950.c * * Charset cp950 translation tables. * This translation table was generated automatically, the * original table can be download from the Microsoft website. * (http://www.microsoft.com/typography/unicode/unicodecp.htm) */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t c2u_A1[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x3000,0xFF0C,0x3001,0x3002,0xFF0E,0x2027,0xFF1B,0xFF1A,/* 0x40-0x47 */ 0xFF1F,0xFF01,0xFE30,0x2026,0x2025,0xFE50,0xFE51,0xFE52,/* 0x48-0x4F */ 0x00B7,0xFE54,0xFE55,0xFE56,0xFE57,0xFF5C,0x2013,0xFE31,/* 0x50-0x57 */ 0x2014,0xFE33,0x2574,0xFE34,0xFE4F,0xFF08,0xFF09,0xFE35,/* 0x58-0x5F */ 0xFE36,0xFF5B,0xFF5D,0xFE37,0xFE38,0x3014,0x3015,0xFE39,/* 0x60-0x67 */ 0xFE3A,0x3010,0x3011,0xFE3B,0xFE3C,0x300A,0x300B,0xFE3D,/* 0x68-0x6F */ 0xFE3E,0x3008,0x3009,0xFE3F,0xFE40,0x300C,0x300D,0xFE41,/* 0x70-0x77 */ 0xFE42,0x300E,0x300F,0xFE43,0xFE44,0xFE59,0xFE5A,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0xFE5B,0xFE5C,0xFE5D,0xFE5E,0x2018,0x2019,0x201C,/* 0xA0-0xA7 */ 0x201D,0x301D,0x301E,0x2035,0x2032,0xFF03,0xFF06,0xFF0A,/* 0xA8-0xAF */ 0x203B,0x00A7,0x3003,0x25CB,0x25CF,0x25B3,0x25B2,0x25CE,/* 0xB0-0xB7 */ 0x2606,0x2605,0x25C7,0x25C6,0x25A1,0x25A0,0x25BD,0x25BC,/* 0xB8-0xBF */ 0x32A3,0x2105,0x00AF,0xFFE3,0xFF3F,0x02CD,0xFE49,0xFE4A,/* 0xC0-0xC7 */ 0xFE4D,0xFE4E,0xFE4B,0xFE4C,0xFE5F,0xFE60,0xFE61,0xFF0B,/* 0xC8-0xCF */ 0xFF0D,0x00D7,0x00F7,0x00B1,0x221A,0xFF1C,0xFF1E,0xFF1D,/* 0xD0-0xD7 */ 0x2266,0x2267,0x2260,0x221E,0x2252,0x2261,0xFE62,0xFE63,/* 0xD8-0xDF */ 0xFE64,0xFE65,0xFE66,0xFF5E,0x2229,0x222A,0x22A5,0x2220,/* 0xE0-0xE7 */ 0x221F,0x22BF,0x33D2,0x33D1,0x222B,0x222E,0x2235,0x2234,/* 0xE8-0xEF */ 0x2640,0x2642,0x2295,0x2299,0x2191,0x2193,0x2190,0x2192,/* 0xF0-0xF7 */ 0x2196,0x2197,0x2199,0x2198,0x2225,0x2223,0xFF0F,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A2[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0xFF3C,0x2215,0xFE68,0xFF04,0xFFE5,0x3012,0xFFE0,0xFFE1,/* 0x40-0x47 */ 0xFF05,0xFF20,0x2103,0x2109,0xFE69,0xFE6A,0xFE6B,0x33D5,/* 0x48-0x4F */ 0x339C,0x339D,0x339E,0x33CE,0x33A1,0x338E,0x338F,0x33C4,/* 0x50-0x57 */ 0x00B0,0x5159,0x515B,0x515E,0x515D,0x5161,0x5163,0x55E7,/* 0x58-0x5F */ 0x74E9,0x7CCE,0x2581,0x2582,0x2583,0x2584,0x2585,0x2586,/* 0x60-0x67 */ 0x2587,0x2588,0x258F,0x258E,0x258D,0x258C,0x258B,0x258A,/* 0x68-0x6F */ 0x2589,0x253C,0x2534,0x252C,0x2524,0x251C,0x2594,0x2500,/* 0x70-0x77 */ 0x2502,0x2595,0x250C,0x2510,0x2514,0x2518,0x256D,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x256E,0x2570,0x256F,0x2550,0x255E,0x256A,0x2561,/* 0xA0-0xA7 */ 0x25E2,0x25E3,0x25E5,0x25E4,0x2571,0x2572,0x2573,0xFF10,/* 0xA8-0xAF */ 0xFF11,0xFF12,0xFF13,0xFF14,0xFF15,0xFF16,0xFF17,0xFF18,/* 0xB0-0xB7 */ 0xFF19,0x2160,0x2161,0x2162,0x2163,0x2164,0x2165,0x2166,/* 0xB8-0xBF */ 0x2167,0x2168,0x2169,0x3021,0x3022,0x3023,0x3024,0x3025,/* 0xC0-0xC7 */ 0x3026,0x3027,0x3028,0x3029,0x5341,0x5344,0x5345,0xFF21,/* 0xC8-0xCF */ 0xFF22,0xFF23,0xFF24,0xFF25,0xFF26,0xFF27,0xFF28,0xFF29,/* 0xD0-0xD7 */ 0xFF2A,0xFF2B,0xFF2C,0xFF2D,0xFF2E,0xFF2F,0xFF30,0xFF31,/* 0xD8-0xDF */ 0xFF32,0xFF33,0xFF34,0xFF35,0xFF36,0xFF37,0xFF38,0xFF39,/* 0xE0-0xE7 */ 0xFF3A,0xFF41,0xFF42,0xFF43,0xFF44,0xFF45,0xFF46,0xFF47,/* 0xE8-0xEF */ 0xFF48,0xFF49,0xFF4A,0xFF4B,0xFF4C,0xFF4D,0xFF4E,0xFF4F,/* 0xF0-0xF7 */ 0xFF50,0xFF51,0xFF52,0xFF53,0xFF54,0xFF55,0xFF56,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A3[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0xFF57,0xFF58,0xFF59,0xFF5A,0x0391,0x0392,0x0393,0x0394,/* 0x40-0x47 */ 0x0395,0x0396,0x0397,0x0398,0x0399,0x039A,0x039B,0x039C,/* 0x48-0x4F */ 0x039D,0x039E,0x039F,0x03A0,0x03A1,0x03A3,0x03A4,0x03A5,/* 0x50-0x57 */ 0x03A6,0x03A7,0x03A8,0x03A9,0x03B1,0x03B2,0x03B3,0x03B4,/* 0x58-0x5F */ 0x03B5,0x03B6,0x03B7,0x03B8,0x03B9,0x03BA,0x03BB,0x03BC,/* 0x60-0x67 */ 0x03BD,0x03BE,0x03BF,0x03C0,0x03C1,0x03C3,0x03C4,0x03C5,/* 0x68-0x6F */ 0x03C6,0x03C7,0x03C8,0x03C9,0x3105,0x3106,0x3107,0x3108,/* 0x70-0x77 */ 0x3109,0x310A,0x310B,0x310C,0x310D,0x310E,0x310F,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x3110,0x3111,0x3112,0x3113,0x3114,0x3115,0x3116,/* 0xA0-0xA7 */ 0x3117,0x3118,0x3119,0x311A,0x311B,0x311C,0x311D,0x311E,/* 0xA8-0xAF */ 0x311F,0x3120,0x3121,0x3122,0x3123,0x3124,0x3125,0x3126,/* 0xB0-0xB7 */ 0x3127,0x3128,0x3129,0x02D9,0x02C9,0x02CA,0x02C7,0x02CB,/* 0xB8-0xBF */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0xC0-0xC7 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0xC8-0xCF */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0xD0-0xD7 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0xD8-0xDF */ 0x0000,0x20AC,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0xE0-0xE7 */ }; static const wchar_t c2u_A4[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x4E00,0x4E59,0x4E01,0x4E03,0x4E43,0x4E5D,0x4E86,0x4E8C,/* 0x40-0x47 */ 0x4EBA,0x513F,0x5165,0x516B,0x51E0,0x5200,0x5201,0x529B,/* 0x48-0x4F */ 0x5315,0x5341,0x535C,0x53C8,0x4E09,0x4E0B,0x4E08,0x4E0A,/* 0x50-0x57 */ 0x4E2B,0x4E38,0x51E1,0x4E45,0x4E48,0x4E5F,0x4E5E,0x4E8E,/* 0x58-0x5F */ 0x4EA1,0x5140,0x5203,0x52FA,0x5343,0x53C9,0x53E3,0x571F,/* 0x60-0x67 */ 0x58EB,0x5915,0x5927,0x5973,0x5B50,0x5B51,0x5B53,0x5BF8,/* 0x68-0x6F */ 0x5C0F,0x5C22,0x5C38,0x5C71,0x5DDD,0x5DE5,0x5DF1,0x5DF2,/* 0x70-0x77 */ 0x5DF3,0x5DFE,0x5E72,0x5EFE,0x5F0B,0x5F13,0x624D,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x4E11,0x4E10,0x4E0D,0x4E2D,0x4E30,0x4E39,0x4E4B,/* 0xA0-0xA7 */ 0x5C39,0x4E88,0x4E91,0x4E95,0x4E92,0x4E94,0x4EA2,0x4EC1,/* 0xA8-0xAF */ 0x4EC0,0x4EC3,0x4EC6,0x4EC7,0x4ECD,0x4ECA,0x4ECB,0x4EC4,/* 0xB0-0xB7 */ 0x5143,0x5141,0x5167,0x516D,0x516E,0x516C,0x5197,0x51F6,/* 0xB8-0xBF */ 0x5206,0x5207,0x5208,0x52FB,0x52FE,0x52FF,0x5316,0x5339,/* 0xC0-0xC7 */ 0x5348,0x5347,0x5345,0x535E,0x5384,0x53CB,0x53CA,0x53CD,/* 0xC8-0xCF */ 0x58EC,0x5929,0x592B,0x592A,0x592D,0x5B54,0x5C11,0x5C24,/* 0xD0-0xD7 */ 0x5C3A,0x5C6F,0x5DF4,0x5E7B,0x5EFF,0x5F14,0x5F15,0x5FC3,/* 0xD8-0xDF */ 0x6208,0x6236,0x624B,0x624E,0x652F,0x6587,0x6597,0x65A4,/* 0xE0-0xE7 */ 0x65B9,0x65E5,0x66F0,0x6708,0x6728,0x6B20,0x6B62,0x6B79,/* 0xE8-0xEF */ 0x6BCB,0x6BD4,0x6BDB,0x6C0F,0x6C34,0x706B,0x722A,0x7236,/* 0xF0-0xF7 */ 0x723B,0x7247,0x7259,0x725B,0x72AC,0x738B,0x4E19,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A5[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x4E16,0x4E15,0x4E14,0x4E18,0x4E3B,0x4E4D,0x4E4F,0x4E4E,/* 0x40-0x47 */ 0x4EE5,0x4ED8,0x4ED4,0x4ED5,0x4ED6,0x4ED7,0x4EE3,0x4EE4,/* 0x48-0x4F */ 0x4ED9,0x4EDE,0x5145,0x5144,0x5189,0x518A,0x51AC,0x51F9,/* 0x50-0x57 */ 0x51FA,0x51F8,0x520A,0x52A0,0x529F,0x5305,0x5306,0x5317,/* 0x58-0x5F */ 0x531D,0x4EDF,0x534A,0x5349,0x5361,0x5360,0x536F,0x536E,/* 0x60-0x67 */ 0x53BB,0x53EF,0x53E4,0x53F3,0x53EC,0x53EE,0x53E9,0x53E8,/* 0x68-0x6F */ 0x53FC,0x53F8,0x53F5,0x53EB,0x53E6,0x53EA,0x53F2,0x53F1,/* 0x70-0x77 */ 0x53F0,0x53E5,0x53ED,0x53FB,0x56DB,0x56DA,0x5916,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x592E,0x5931,0x5974,0x5976,0x5B55,0x5B83,0x5C3C,/* 0xA0-0xA7 */ 0x5DE8,0x5DE7,0x5DE6,0x5E02,0x5E03,0x5E73,0x5E7C,0x5F01,/* 0xA8-0xAF */ 0x5F18,0x5F17,0x5FC5,0x620A,0x6253,0x6254,0x6252,0x6251,/* 0xB0-0xB7 */ 0x65A5,0x65E6,0x672E,0x672C,0x672A,0x672B,0x672D,0x6B63,/* 0xB8-0xBF */ 0x6BCD,0x6C11,0x6C10,0x6C38,0x6C41,0x6C40,0x6C3E,0x72AF,/* 0xC0-0xC7 */ 0x7384,0x7389,0x74DC,0x74E6,0x7518,0x751F,0x7528,0x7529,/* 0xC8-0xCF */ 0x7530,0x7531,0x7532,0x7533,0x758B,0x767D,0x76AE,0x76BF,/* 0xD0-0xD7 */ 0x76EE,0x77DB,0x77E2,0x77F3,0x793A,0x79BE,0x7A74,0x7ACB,/* 0xD8-0xDF */ 0x4E1E,0x4E1F,0x4E52,0x4E53,0x4E69,0x4E99,0x4EA4,0x4EA6,/* 0xE0-0xE7 */ 0x4EA5,0x4EFF,0x4F09,0x4F19,0x4F0A,0x4F15,0x4F0D,0x4F10,/* 0xE8-0xEF */ 0x4F11,0x4F0F,0x4EF2,0x4EF6,0x4EFB,0x4EF0,0x4EF3,0x4EFD,/* 0xF0-0xF7 */ 0x4F01,0x4F0B,0x5149,0x5147,0x5146,0x5148,0x5168,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A6[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5171,0x518D,0x51B0,0x5217,0x5211,0x5212,0x520E,0x5216,/* 0x40-0x47 */ 0x52A3,0x5308,0x5321,0x5320,0x5370,0x5371,0x5409,0x540F,/* 0x48-0x4F */ 0x540C,0x540A,0x5410,0x5401,0x540B,0x5404,0x5411,0x540D,/* 0x50-0x57 */ 0x5408,0x5403,0x540E,0x5406,0x5412,0x56E0,0x56DE,0x56DD,/* 0x58-0x5F */ 0x5733,0x5730,0x5728,0x572D,0x572C,0x572F,0x5729,0x5919,/* 0x60-0x67 */ 0x591A,0x5937,0x5938,0x5984,0x5978,0x5983,0x597D,0x5979,/* 0x68-0x6F */ 0x5982,0x5981,0x5B57,0x5B58,0x5B87,0x5B88,0x5B85,0x5B89,/* 0x70-0x77 */ 0x5BFA,0x5C16,0x5C79,0x5DDE,0x5E06,0x5E76,0x5E74,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x5F0F,0x5F1B,0x5FD9,0x5FD6,0x620E,0x620C,0x620D,/* 0xA0-0xA7 */ 0x6210,0x6263,0x625B,0x6258,0x6536,0x65E9,0x65E8,0x65EC,/* 0xA8-0xAF */ 0x65ED,0x66F2,0x66F3,0x6709,0x673D,0x6734,0x6731,0x6735,/* 0xB0-0xB7 */ 0x6B21,0x6B64,0x6B7B,0x6C16,0x6C5D,0x6C57,0x6C59,0x6C5F,/* 0xB8-0xBF */ 0x6C60,0x6C50,0x6C55,0x6C61,0x6C5B,0x6C4D,0x6C4E,0x7070,/* 0xC0-0xC7 */ 0x725F,0x725D,0x767E,0x7AF9,0x7C73,0x7CF8,0x7F36,0x7F8A,/* 0xC8-0xCF */ 0x7FBD,0x8001,0x8003,0x800C,0x8012,0x8033,0x807F,0x8089,/* 0xD0-0xD7 */ 0x808B,0x808C,0x81E3,0x81EA,0x81F3,0x81FC,0x820C,0x821B,/* 0xD8-0xDF */ 0x821F,0x826E,0x8272,0x827E,0x866B,0x8840,0x884C,0x8863,/* 0xE0-0xE7 */ 0x897F,0x9621,0x4E32,0x4EA8,0x4F4D,0x4F4F,0x4F47,0x4F57,/* 0xE8-0xEF */ 0x4F5E,0x4F34,0x4F5B,0x4F55,0x4F30,0x4F50,0x4F51,0x4F3D,/* 0xF0-0xF7 */ 0x4F3A,0x4F38,0x4F43,0x4F54,0x4F3C,0x4F46,0x4F63,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A7[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x4F5C,0x4F60,0x4F2F,0x4F4E,0x4F36,0x4F59,0x4F5D,0x4F48,/* 0x40-0x47 */ 0x4F5A,0x514C,0x514B,0x514D,0x5175,0x51B6,0x51B7,0x5225,/* 0x48-0x4F */ 0x5224,0x5229,0x522A,0x5228,0x52AB,0x52A9,0x52AA,0x52AC,/* 0x50-0x57 */ 0x5323,0x5373,0x5375,0x541D,0x542D,0x541E,0x543E,0x5426,/* 0x58-0x5F */ 0x544E,0x5427,0x5446,0x5443,0x5433,0x5448,0x5442,0x541B,/* 0x60-0x67 */ 0x5429,0x544A,0x5439,0x543B,0x5438,0x542E,0x5435,0x5436,/* 0x68-0x6F */ 0x5420,0x543C,0x5440,0x5431,0x542B,0x541F,0x542C,0x56EA,/* 0x70-0x77 */ 0x56F0,0x56E4,0x56EB,0x574A,0x5751,0x5740,0x574D,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x5747,0x574E,0x573E,0x5750,0x574F,0x573B,0x58EF,/* 0xA0-0xA7 */ 0x593E,0x599D,0x5992,0x59A8,0x599E,0x59A3,0x5999,0x5996,/* 0xA8-0xAF */ 0x598D,0x59A4,0x5993,0x598A,0x59A5,0x5B5D,0x5B5C,0x5B5A,/* 0xB0-0xB7 */ 0x5B5B,0x5B8C,0x5B8B,0x5B8F,0x5C2C,0x5C40,0x5C41,0x5C3F,/* 0xB8-0xBF */ 0x5C3E,0x5C90,0x5C91,0x5C94,0x5C8C,0x5DEB,0x5E0C,0x5E8F,/* 0xC0-0xC7 */ 0x5E87,0x5E8A,0x5EF7,0x5F04,0x5F1F,0x5F64,0x5F62,0x5F77,/* 0xC8-0xCF */ 0x5F79,0x5FD8,0x5FCC,0x5FD7,0x5FCD,0x5FF1,0x5FEB,0x5FF8,/* 0xD0-0xD7 */ 0x5FEA,0x6212,0x6211,0x6284,0x6297,0x6296,0x6280,0x6276,/* 0xD8-0xDF */ 0x6289,0x626D,0x628A,0x627C,0x627E,0x6279,0x6273,0x6292,/* 0xE0-0xE7 */ 0x626F,0x6298,0x626E,0x6295,0x6293,0x6291,0x6286,0x6539,/* 0xE8-0xEF */ 0x653B,0x6538,0x65F1,0x66F4,0x675F,0x674E,0x674F,0x6750,/* 0xF0-0xF7 */ 0x6751,0x675C,0x6756,0x675E,0x6749,0x6746,0x6760,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A8[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6753,0x6757,0x6B65,0x6BCF,0x6C42,0x6C5E,0x6C99,0x6C81,/* 0x40-0x47 */ 0x6C88,0x6C89,0x6C85,0x6C9B,0x6C6A,0x6C7A,0x6C90,0x6C70,/* 0x48-0x4F */ 0x6C8C,0x6C68,0x6C96,0x6C92,0x6C7D,0x6C83,0x6C72,0x6C7E,/* 0x50-0x57 */ 0x6C74,0x6C86,0x6C76,0x6C8D,0x6C94,0x6C98,0x6C82,0x7076,/* 0x58-0x5F */ 0x707C,0x707D,0x7078,0x7262,0x7261,0x7260,0x72C4,0x72C2,/* 0x60-0x67 */ 0x7396,0x752C,0x752B,0x7537,0x7538,0x7682,0x76EF,0x77E3,/* 0x68-0x6F */ 0x79C1,0x79C0,0x79BF,0x7A76,0x7CFB,0x7F55,0x8096,0x8093,/* 0x70-0x77 */ 0x809D,0x8098,0x809B,0x809A,0x80B2,0x826F,0x8292,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x828B,0x828D,0x898B,0x89D2,0x8A00,0x8C37,0x8C46,/* 0xA0-0xA7 */ 0x8C55,0x8C9D,0x8D64,0x8D70,0x8DB3,0x8EAB,0x8ECA,0x8F9B,/* 0xA8-0xAF */ 0x8FB0,0x8FC2,0x8FC6,0x8FC5,0x8FC4,0x5DE1,0x9091,0x90A2,/* 0xB0-0xB7 */ 0x90AA,0x90A6,0x90A3,0x9149,0x91C6,0x91CC,0x9632,0x962E,/* 0xB8-0xBF */ 0x9631,0x962A,0x962C,0x4E26,0x4E56,0x4E73,0x4E8B,0x4E9B,/* 0xC0-0xC7 */ 0x4E9E,0x4EAB,0x4EAC,0x4F6F,0x4F9D,0x4F8D,0x4F73,0x4F7F,/* 0xC8-0xCF */ 0x4F6C,0x4F9B,0x4F8B,0x4F86,0x4F83,0x4F70,0x4F75,0x4F88,/* 0xD0-0xD7 */ 0x4F69,0x4F7B,0x4F96,0x4F7E,0x4F8F,0x4F91,0x4F7A,0x5154,/* 0xD8-0xDF */ 0x5152,0x5155,0x5169,0x5177,0x5176,0x5178,0x51BD,0x51FD,/* 0xE0-0xE7 */ 0x523B,0x5238,0x5237,0x523A,0x5230,0x522E,0x5236,0x5241,/* 0xE8-0xEF */ 0x52BE,0x52BB,0x5352,0x5354,0x5353,0x5351,0x5366,0x5377,/* 0xF0-0xF7 */ 0x5378,0x5379,0x53D6,0x53D4,0x53D7,0x5473,0x5475,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_A9[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5496,0x5478,0x5495,0x5480,0x547B,0x5477,0x5484,0x5492,/* 0x40-0x47 */ 0x5486,0x547C,0x5490,0x5471,0x5476,0x548C,0x549A,0x5462,/* 0x48-0x4F */ 0x5468,0x548B,0x547D,0x548E,0x56FA,0x5783,0x5777,0x576A,/* 0x50-0x57 */ 0x5769,0x5761,0x5766,0x5764,0x577C,0x591C,0x5949,0x5947,/* 0x58-0x5F */ 0x5948,0x5944,0x5954,0x59BE,0x59BB,0x59D4,0x59B9,0x59AE,/* 0x60-0x67 */ 0x59D1,0x59C6,0x59D0,0x59CD,0x59CB,0x59D3,0x59CA,0x59AF,/* 0x68-0x6F */ 0x59B3,0x59D2,0x59C5,0x5B5F,0x5B64,0x5B63,0x5B97,0x5B9A,/* 0x70-0x77 */ 0x5B98,0x5B9C,0x5B99,0x5B9B,0x5C1A,0x5C48,0x5C45,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x5C46,0x5CB7,0x5CA1,0x5CB8,0x5CA9,0x5CAB,0x5CB1,/* 0xA0-0xA7 */ 0x5CB3,0x5E18,0x5E1A,0x5E16,0x5E15,0x5E1B,0x5E11,0x5E78,/* 0xA8-0xAF */ 0x5E9A,0x5E97,0x5E9C,0x5E95,0x5E96,0x5EF6,0x5F26,0x5F27,/* 0xB0-0xB7 */ 0x5F29,0x5F80,0x5F81,0x5F7F,0x5F7C,0x5FDD,0x5FE0,0x5FFD,/* 0xB8-0xBF */ 0x5FF5,0x5FFF,0x600F,0x6014,0x602F,0x6035,0x6016,0x602A,/* 0xC0-0xC7 */ 0x6015,0x6021,0x6027,0x6029,0x602B,0x601B,0x6216,0x6215,/* 0xC8-0xCF */ 0x623F,0x623E,0x6240,0x627F,0x62C9,0x62CC,0x62C4,0x62BF,/* 0xD0-0xD7 */ 0x62C2,0x62B9,0x62D2,0x62DB,0x62AB,0x62D3,0x62D4,0x62CB,/* 0xD8-0xDF */ 0x62C8,0x62A8,0x62BD,0x62BC,0x62D0,0x62D9,0x62C7,0x62CD,/* 0xE0-0xE7 */ 0x62B5,0x62DA,0x62B1,0x62D8,0x62D6,0x62D7,0x62C6,0x62AC,/* 0xE8-0xEF */ 0x62CE,0x653E,0x65A7,0x65BC,0x65FA,0x6614,0x6613,0x660C,/* 0xF0-0xF7 */ 0x6606,0x6602,0x660E,0x6600,0x660F,0x6615,0x660A,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_AA[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6607,0x670D,0x670B,0x676D,0x678B,0x6795,0x6771,0x679C,/* 0x40-0x47 */ 0x6773,0x6777,0x6787,0x679D,0x6797,0x676F,0x6770,0x677F,/* 0x48-0x4F */ 0x6789,0x677E,0x6790,0x6775,0x679A,0x6793,0x677C,0x676A,/* 0x50-0x57 */ 0x6772,0x6B23,0x6B66,0x6B67,0x6B7F,0x6C13,0x6C1B,0x6CE3,/* 0x58-0x5F */ 0x6CE8,0x6CF3,0x6CB1,0x6CCC,0x6CE5,0x6CB3,0x6CBD,0x6CBE,/* 0x60-0x67 */ 0x6CBC,0x6CE2,0x6CAB,0x6CD5,0x6CD3,0x6CB8,0x6CC4,0x6CB9,/* 0x68-0x6F */ 0x6CC1,0x6CAE,0x6CD7,0x6CC5,0x6CF1,0x6CBF,0x6CBB,0x6CE1,/* 0x70-0x77 */ 0x6CDB,0x6CCA,0x6CAC,0x6CEF,0x6CDC,0x6CD6,0x6CE0,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7095,0x708E,0x7092,0x708A,0x7099,0x722C,0x722D,/* 0xA0-0xA7 */ 0x7238,0x7248,0x7267,0x7269,0x72C0,0x72CE,0x72D9,0x72D7,/* 0xA8-0xAF */ 0x72D0,0x73A9,0x73A8,0x739F,0x73AB,0x73A5,0x753D,0x759D,/* 0xB0-0xB7 */ 0x7599,0x759A,0x7684,0x76C2,0x76F2,0x76F4,0x77E5,0x77FD,/* 0xB8-0xBF */ 0x793E,0x7940,0x7941,0x79C9,0x79C8,0x7A7A,0x7A79,0x7AFA,/* 0xC0-0xC7 */ 0x7CFE,0x7F54,0x7F8C,0x7F8B,0x8005,0x80BA,0x80A5,0x80A2,/* 0xC8-0xCF */ 0x80B1,0x80A1,0x80AB,0x80A9,0x80B4,0x80AA,0x80AF,0x81E5,/* 0xD0-0xD7 */ 0x81FE,0x820D,0x82B3,0x829D,0x8299,0x82AD,0x82BD,0x829F,/* 0xD8-0xDF */ 0x82B9,0x82B1,0x82AC,0x82A5,0x82AF,0x82B8,0x82A3,0x82B0,/* 0xE0-0xE7 */ 0x82BE,0x82B7,0x864E,0x8671,0x521D,0x8868,0x8ECB,0x8FCE,/* 0xE8-0xEF */ 0x8FD4,0x8FD1,0x90B5,0x90B8,0x90B1,0x90B6,0x91C7,0x91D1,/* 0xF0-0xF7 */ 0x9577,0x9580,0x961C,0x9640,0x963F,0x963B,0x9644,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_AB[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x9642,0x96B9,0x96E8,0x9752,0x975E,0x4E9F,0x4EAD,0x4EAE,/* 0x40-0x47 */ 0x4FE1,0x4FB5,0x4FAF,0x4FBF,0x4FE0,0x4FD1,0x4FCF,0x4FDD,/* 0x48-0x4F */ 0x4FC3,0x4FB6,0x4FD8,0x4FDF,0x4FCA,0x4FD7,0x4FAE,0x4FD0,/* 0x50-0x57 */ 0x4FC4,0x4FC2,0x4FDA,0x4FCE,0x4FDE,0x4FB7,0x5157,0x5192,/* 0x58-0x5F */ 0x5191,0x51A0,0x524E,0x5243,0x524A,0x524D,0x524C,0x524B,/* 0x60-0x67 */ 0x5247,0x52C7,0x52C9,0x52C3,0x52C1,0x530D,0x5357,0x537B,/* 0x68-0x6F */ 0x539A,0x53DB,0x54AC,0x54C0,0x54A8,0x54CE,0x54C9,0x54B8,/* 0x70-0x77 */ 0x54A6,0x54B3,0x54C7,0x54C2,0x54BD,0x54AA,0x54C1,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x54C4,0x54C8,0x54AF,0x54AB,0x54B1,0x54BB,0x54A9,/* 0xA0-0xA7 */ 0x54A7,0x54BF,0x56FF,0x5782,0x578B,0x57A0,0x57A3,0x57A2,/* 0xA8-0xAF */ 0x57CE,0x57AE,0x5793,0x5955,0x5951,0x594F,0x594E,0x5950,/* 0xB0-0xB7 */ 0x59DC,0x59D8,0x59FF,0x59E3,0x59E8,0x5A03,0x59E5,0x59EA,/* 0xB8-0xBF */ 0x59DA,0x59E6,0x5A01,0x59FB,0x5B69,0x5BA3,0x5BA6,0x5BA4,/* 0xC0-0xC7 */ 0x5BA2,0x5BA5,0x5C01,0x5C4E,0x5C4F,0x5C4D,0x5C4B,0x5CD9,/* 0xC8-0xCF */ 0x5CD2,0x5DF7,0x5E1D,0x5E25,0x5E1F,0x5E7D,0x5EA0,0x5EA6,/* 0xD0-0xD7 */ 0x5EFA,0x5F08,0x5F2D,0x5F65,0x5F88,0x5F85,0x5F8A,0x5F8B,/* 0xD8-0xDF */ 0x5F87,0x5F8C,0x5F89,0x6012,0x601D,0x6020,0x6025,0x600E,/* 0xE0-0xE7 */ 0x6028,0x604D,0x6070,0x6068,0x6062,0x6046,0x6043,0x606C,/* 0xE8-0xEF */ 0x606B,0x606A,0x6064,0x6241,0x62DC,0x6316,0x6309,0x62FC,/* 0xF0-0xF7 */ 0x62ED,0x6301,0x62EE,0x62FD,0x6307,0x62F1,0x62F7,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_AC[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x62EF,0x62EC,0x62FE,0x62F4,0x6311,0x6302,0x653F,0x6545,/* 0x40-0x47 */ 0x65AB,0x65BD,0x65E2,0x6625,0x662D,0x6620,0x6627,0x662F,/* 0x48-0x4F */ 0x661F,0x6628,0x6631,0x6624,0x66F7,0x67FF,0x67D3,0x67F1,/* 0x50-0x57 */ 0x67D4,0x67D0,0x67EC,0x67B6,0x67AF,0x67F5,0x67E9,0x67EF,/* 0x58-0x5F */ 0x67C4,0x67D1,0x67B4,0x67DA,0x67E5,0x67B8,0x67CF,0x67DE,/* 0x60-0x67 */ 0x67F3,0x67B0,0x67D9,0x67E2,0x67DD,0x67D2,0x6B6A,0x6B83,/* 0x68-0x6F */ 0x6B86,0x6BB5,0x6BD2,0x6BD7,0x6C1F,0x6CC9,0x6D0B,0x6D32,/* 0x70-0x77 */ 0x6D2A,0x6D41,0x6D25,0x6D0C,0x6D31,0x6D1E,0x6D17,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6D3B,0x6D3D,0x6D3E,0x6D36,0x6D1B,0x6CF5,0x6D39,/* 0xA0-0xA7 */ 0x6D27,0x6D38,0x6D29,0x6D2E,0x6D35,0x6D0E,0x6D2B,0x70AB,/* 0xA8-0xAF */ 0x70BA,0x70B3,0x70AC,0x70AF,0x70AD,0x70B8,0x70AE,0x70A4,/* 0xB0-0xB7 */ 0x7230,0x7272,0x726F,0x7274,0x72E9,0x72E0,0x72E1,0x73B7,/* 0xB8-0xBF */ 0x73CA,0x73BB,0x73B2,0x73CD,0x73C0,0x73B3,0x751A,0x752D,/* 0xC0-0xC7 */ 0x754F,0x754C,0x754E,0x754B,0x75AB,0x75A4,0x75A5,0x75A2,/* 0xC8-0xCF */ 0x75A3,0x7678,0x7686,0x7687,0x7688,0x76C8,0x76C6,0x76C3,/* 0xD0-0xD7 */ 0x76C5,0x7701,0x76F9,0x76F8,0x7709,0x770B,0x76FE,0x76FC,/* 0xD8-0xDF */ 0x7707,0x77DC,0x7802,0x7814,0x780C,0x780D,0x7946,0x7949,/* 0xE0-0xE7 */ 0x7948,0x7947,0x79B9,0x79BA,0x79D1,0x79D2,0x79CB,0x7A7F,/* 0xE8-0xEF */ 0x7A81,0x7AFF,0x7AFD,0x7C7D,0x7D02,0x7D05,0x7D00,0x7D09,/* 0xF0-0xF7 */ 0x7D07,0x7D04,0x7D06,0x7F38,0x7F8E,0x7FBF,0x8004,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_AD[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8010,0x800D,0x8011,0x8036,0x80D6,0x80E5,0x80DA,0x80C3,/* 0x40-0x47 */ 0x80C4,0x80CC,0x80E1,0x80DB,0x80CE,0x80DE,0x80E4,0x80DD,/* 0x48-0x4F */ 0x81F4,0x8222,0x82E7,0x8303,0x8305,0x82E3,0x82DB,0x82E6,/* 0x50-0x57 */ 0x8304,0x82E5,0x8302,0x8309,0x82D2,0x82D7,0x82F1,0x8301,/* 0x58-0x5F */ 0x82DC,0x82D4,0x82D1,0x82DE,0x82D3,0x82DF,0x82EF,0x8306,/* 0x60-0x67 */ 0x8650,0x8679,0x867B,0x867A,0x884D,0x886B,0x8981,0x89D4,/* 0x68-0x6F */ 0x8A08,0x8A02,0x8A03,0x8C9E,0x8CA0,0x8D74,0x8D73,0x8DB4,/* 0x70-0x77 */ 0x8ECD,0x8ECC,0x8FF0,0x8FE6,0x8FE2,0x8FEA,0x8FE5,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8FED,0x8FEB,0x8FE4,0x8FE8,0x90CA,0x90CE,0x90C1,/* 0xA0-0xA7 */ 0x90C3,0x914B,0x914A,0x91CD,0x9582,0x9650,0x964B,0x964C,/* 0xA8-0xAF */ 0x964D,0x9762,0x9769,0x97CB,0x97ED,0x97F3,0x9801,0x98A8,/* 0xB0-0xB7 */ 0x98DB,0x98DF,0x9996,0x9999,0x4E58,0x4EB3,0x500C,0x500D,/* 0xB8-0xBF */ 0x5023,0x4FEF,0x5026,0x5025,0x4FF8,0x5029,0x5016,0x5006,/* 0xC0-0xC7 */ 0x503C,0x501F,0x501A,0x5012,0x5011,0x4FFA,0x5000,0x5014,/* 0xC8-0xCF */ 0x5028,0x4FF1,0x5021,0x500B,0x5019,0x5018,0x4FF3,0x4FEE,/* 0xD0-0xD7 */ 0x502D,0x502A,0x4FFE,0x502B,0x5009,0x517C,0x51A4,0x51A5,/* 0xD8-0xDF */ 0x51A2,0x51CD,0x51CC,0x51C6,0x51CB,0x5256,0x525C,0x5254,/* 0xE0-0xE7 */ 0x525B,0x525D,0x532A,0x537F,0x539F,0x539D,0x53DF,0x54E8,/* 0xE8-0xEF */ 0x5510,0x5501,0x5537,0x54FC,0x54E5,0x54F2,0x5506,0x54FA,/* 0xF0-0xF7 */ 0x5514,0x54E9,0x54ED,0x54E1,0x5509,0x54EE,0x54EA,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_AE[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x54E6,0x5527,0x5507,0x54FD,0x550F,0x5703,0x5704,0x57C2,/* 0x40-0x47 */ 0x57D4,0x57CB,0x57C3,0x5809,0x590F,0x5957,0x5958,0x595A,/* 0x48-0x4F */ 0x5A11,0x5A18,0x5A1C,0x5A1F,0x5A1B,0x5A13,0x59EC,0x5A20,/* 0x50-0x57 */ 0x5A23,0x5A29,0x5A25,0x5A0C,0x5A09,0x5B6B,0x5C58,0x5BB0,/* 0x58-0x5F */ 0x5BB3,0x5BB6,0x5BB4,0x5BAE,0x5BB5,0x5BB9,0x5BB8,0x5C04,/* 0x60-0x67 */ 0x5C51,0x5C55,0x5C50,0x5CED,0x5CFD,0x5CFB,0x5CEA,0x5CE8,/* 0x68-0x6F */ 0x5CF0,0x5CF6,0x5D01,0x5CF4,0x5DEE,0x5E2D,0x5E2B,0x5EAB,/* 0x70-0x77 */ 0x5EAD,0x5EA7,0x5F31,0x5F92,0x5F91,0x5F90,0x6059,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6063,0x6065,0x6050,0x6055,0x606D,0x6069,0x606F,/* 0xA0-0xA7 */ 0x6084,0x609F,0x609A,0x608D,0x6094,0x608C,0x6085,0x6096,/* 0xA8-0xAF */ 0x6247,0x62F3,0x6308,0x62FF,0x634E,0x633E,0x632F,0x6355,/* 0xB0-0xB7 */ 0x6342,0x6346,0x634F,0x6349,0x633A,0x6350,0x633D,0x632A,/* 0xB8-0xBF */ 0x632B,0x6328,0x634D,0x634C,0x6548,0x6549,0x6599,0x65C1,/* 0xC0-0xC7 */ 0x65C5,0x6642,0x6649,0x664F,0x6643,0x6652,0x664C,0x6645,/* 0xC8-0xCF */ 0x6641,0x66F8,0x6714,0x6715,0x6717,0x6821,0x6838,0x6848,/* 0xD0-0xD7 */ 0x6846,0x6853,0x6839,0x6842,0x6854,0x6829,0x68B3,0x6817,/* 0xD8-0xDF */ 0x684C,0x6851,0x683D,0x67F4,0x6850,0x6840,0x683C,0x6843,/* 0xE0-0xE7 */ 0x682A,0x6845,0x6813,0x6818,0x6841,0x6B8A,0x6B89,0x6BB7,/* 0xE8-0xEF */ 0x6C23,0x6C27,0x6C28,0x6C26,0x6C24,0x6CF0,0x6D6A,0x6D95,/* 0xF0-0xF7 */ 0x6D88,0x6D87,0x6D66,0x6D78,0x6D77,0x6D59,0x6D93,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_AF[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6D6C,0x6D89,0x6D6E,0x6D5A,0x6D74,0x6D69,0x6D8C,0x6D8A,/* 0x40-0x47 */ 0x6D79,0x6D85,0x6D65,0x6D94,0x70CA,0x70D8,0x70E4,0x70D9,/* 0x48-0x4F */ 0x70C8,0x70CF,0x7239,0x7279,0x72FC,0x72F9,0x72FD,0x72F8,/* 0x50-0x57 */ 0x72F7,0x7386,0x73ED,0x7409,0x73EE,0x73E0,0x73EA,0x73DE,/* 0x58-0x5F */ 0x7554,0x755D,0x755C,0x755A,0x7559,0x75BE,0x75C5,0x75C7,/* 0x60-0x67 */ 0x75B2,0x75B3,0x75BD,0x75BC,0x75B9,0x75C2,0x75B8,0x768B,/* 0x68-0x6F */ 0x76B0,0x76CA,0x76CD,0x76CE,0x7729,0x771F,0x7720,0x7728,/* 0x70-0x77 */ 0x77E9,0x7830,0x7827,0x7838,0x781D,0x7834,0x7837,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7825,0x782D,0x7820,0x781F,0x7832,0x7955,0x7950,/* 0xA0-0xA7 */ 0x7960,0x795F,0x7956,0x795E,0x795D,0x7957,0x795A,0x79E4,/* 0xA8-0xAF */ 0x79E3,0x79E7,0x79DF,0x79E6,0x79E9,0x79D8,0x7A84,0x7A88,/* 0xB0-0xB7 */ 0x7AD9,0x7B06,0x7B11,0x7C89,0x7D21,0x7D17,0x7D0B,0x7D0A,/* 0xB8-0xBF */ 0x7D20,0x7D22,0x7D14,0x7D10,0x7D15,0x7D1A,0x7D1C,0x7D0D,/* 0xC0-0xC7 */ 0x7D19,0x7D1B,0x7F3A,0x7F5F,0x7F94,0x7FC5,0x7FC1,0x8006,/* 0xC8-0xCF */ 0x8018,0x8015,0x8019,0x8017,0x803D,0x803F,0x80F1,0x8102,/* 0xD0-0xD7 */ 0x80F0,0x8105,0x80ED,0x80F4,0x8106,0x80F8,0x80F3,0x8108,/* 0xD8-0xDF */ 0x80FD,0x810A,0x80FC,0x80EF,0x81ED,0x81EC,0x8200,0x8210,/* 0xE0-0xE7 */ 0x822A,0x822B,0x8228,0x822C,0x82BB,0x832B,0x8352,0x8354,/* 0xE8-0xEF */ 0x834A,0x8338,0x8350,0x8349,0x8335,0x8334,0x834F,0x8332,/* 0xF0-0xF7 */ 0x8339,0x8336,0x8317,0x8340,0x8331,0x8328,0x8343,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B0[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8654,0x868A,0x86AA,0x8693,0x86A4,0x86A9,0x868C,0x86A3,/* 0x40-0x47 */ 0x869C,0x8870,0x8877,0x8881,0x8882,0x887D,0x8879,0x8A18,/* 0x48-0x4F */ 0x8A10,0x8A0E,0x8A0C,0x8A15,0x8A0A,0x8A17,0x8A13,0x8A16,/* 0x50-0x57 */ 0x8A0F,0x8A11,0x8C48,0x8C7A,0x8C79,0x8CA1,0x8CA2,0x8D77,/* 0x58-0x5F */ 0x8EAC,0x8ED2,0x8ED4,0x8ECF,0x8FB1,0x9001,0x9006,0x8FF7,/* 0x60-0x67 */ 0x9000,0x8FFA,0x8FF4,0x9003,0x8FFD,0x9005,0x8FF8,0x9095,/* 0x68-0x6F */ 0x90E1,0x90DD,0x90E2,0x9152,0x914D,0x914C,0x91D8,0x91DD,/* 0x70-0x77 */ 0x91D7,0x91DC,0x91D9,0x9583,0x9662,0x9663,0x9661,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x965B,0x965D,0x9664,0x9658,0x965E,0x96BB,0x98E2,/* 0xA0-0xA7 */ 0x99AC,0x9AA8,0x9AD8,0x9B25,0x9B32,0x9B3C,0x4E7E,0x507A,/* 0xA8-0xAF */ 0x507D,0x505C,0x5047,0x5043,0x504C,0x505A,0x5049,0x5065,/* 0xB0-0xB7 */ 0x5076,0x504E,0x5055,0x5075,0x5074,0x5077,0x504F,0x500F,/* 0xB8-0xBF */ 0x506F,0x506D,0x515C,0x5195,0x51F0,0x526A,0x526F,0x52D2,/* 0xC0-0xC7 */ 0x52D9,0x52D8,0x52D5,0x5310,0x530F,0x5319,0x533F,0x5340,/* 0xC8-0xCF */ 0x533E,0x53C3,0x66FC,0x5546,0x556A,0x5566,0x5544,0x555E,/* 0xD0-0xD7 */ 0x5561,0x5543,0x554A,0x5531,0x5556,0x554F,0x5555,0x552F,/* 0xD8-0xDF */ 0x5564,0x5538,0x552E,0x555C,0x552C,0x5563,0x5533,0x5541,/* 0xE0-0xE7 */ 0x5557,0x5708,0x570B,0x5709,0x57DF,0x5805,0x580A,0x5806,/* 0xE8-0xEF */ 0x57E0,0x57E4,0x57FA,0x5802,0x5835,0x57F7,0x57F9,0x5920,/* 0xF0-0xF7 */ 0x5962,0x5A36,0x5A41,0x5A49,0x5A66,0x5A6A,0x5A40,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B1[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5A3C,0x5A62,0x5A5A,0x5A46,0x5A4A,0x5B70,0x5BC7,0x5BC5,/* 0x40-0x47 */ 0x5BC4,0x5BC2,0x5BBF,0x5BC6,0x5C09,0x5C08,0x5C07,0x5C60,/* 0x48-0x4F */ 0x5C5C,0x5C5D,0x5D07,0x5D06,0x5D0E,0x5D1B,0x5D16,0x5D22,/* 0x50-0x57 */ 0x5D11,0x5D29,0x5D14,0x5D19,0x5D24,0x5D27,0x5D17,0x5DE2,/* 0x58-0x5F */ 0x5E38,0x5E36,0x5E33,0x5E37,0x5EB7,0x5EB8,0x5EB6,0x5EB5,/* 0x60-0x67 */ 0x5EBE,0x5F35,0x5F37,0x5F57,0x5F6C,0x5F69,0x5F6B,0x5F97,/* 0x68-0x6F */ 0x5F99,0x5F9E,0x5F98,0x5FA1,0x5FA0,0x5F9C,0x607F,0x60A3,/* 0x70-0x77 */ 0x6089,0x60A0,0x60A8,0x60CB,0x60B4,0x60E6,0x60BD,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x60C5,0x60BB,0x60B5,0x60DC,0x60BC,0x60D8,0x60D5,/* 0xA0-0xA7 */ 0x60C6,0x60DF,0x60B8,0x60DA,0x60C7,0x621A,0x621B,0x6248,/* 0xA8-0xAF */ 0x63A0,0x63A7,0x6372,0x6396,0x63A2,0x63A5,0x6377,0x6367,/* 0xB0-0xB7 */ 0x6398,0x63AA,0x6371,0x63A9,0x6389,0x6383,0x639B,0x636B,/* 0xB8-0xBF */ 0x63A8,0x6384,0x6388,0x6399,0x63A1,0x63AC,0x6392,0x638F,/* 0xC0-0xC7 */ 0x6380,0x637B,0x6369,0x6368,0x637A,0x655D,0x6556,0x6551,/* 0xC8-0xCF */ 0x6559,0x6557,0x555F,0x654F,0x6558,0x6555,0x6554,0x659C,/* 0xD0-0xD7 */ 0x659B,0x65AC,0x65CF,0x65CB,0x65CC,0x65CE,0x665D,0x665A,/* 0xD8-0xDF */ 0x6664,0x6668,0x6666,0x665E,0x66F9,0x52D7,0x671B,0x6881,/* 0xE0-0xE7 */ 0x68AF,0x68A2,0x6893,0x68B5,0x687F,0x6876,0x68B1,0x68A7,/* 0xE8-0xEF */ 0x6897,0x68B0,0x6883,0x68C4,0x68AD,0x6886,0x6885,0x6894,/* 0xF0-0xF7 */ 0x689D,0x68A8,0x689F,0x68A1,0x6882,0x6B32,0x6BBA,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B2[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6BEB,0x6BEC,0x6C2B,0x6D8E,0x6DBC,0x6DF3,0x6DD9,0x6DB2,/* 0x40-0x47 */ 0x6DE1,0x6DCC,0x6DE4,0x6DFB,0x6DFA,0x6E05,0x6DC7,0x6DCB,/* 0x48-0x4F */ 0x6DAF,0x6DD1,0x6DAE,0x6DDE,0x6DF9,0x6DB8,0x6DF7,0x6DF5,/* 0x50-0x57 */ 0x6DC5,0x6DD2,0x6E1A,0x6DB5,0x6DDA,0x6DEB,0x6DD8,0x6DEA,/* 0x58-0x5F */ 0x6DF1,0x6DEE,0x6DE8,0x6DC6,0x6DC4,0x6DAA,0x6DEC,0x6DBF,/* 0x60-0x67 */ 0x6DE6,0x70F9,0x7109,0x710A,0x70FD,0x70EF,0x723D,0x727D,/* 0x68-0x6F */ 0x7281,0x731C,0x731B,0x7316,0x7313,0x7319,0x7387,0x7405,/* 0x70-0x77 */ 0x740A,0x7403,0x7406,0x73FE,0x740D,0x74E0,0x74F6,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x74F7,0x751C,0x7522,0x7565,0x7566,0x7562,0x7570,/* 0xA0-0xA7 */ 0x758F,0x75D4,0x75D5,0x75B5,0x75CA,0x75CD,0x768E,0x76D4,/* 0xA8-0xAF */ 0x76D2,0x76DB,0x7737,0x773E,0x773C,0x7736,0x7738,0x773A,/* 0xB0-0xB7 */ 0x786B,0x7843,0x784E,0x7965,0x7968,0x796D,0x79FB,0x7A92,/* 0xB8-0xBF */ 0x7A95,0x7B20,0x7B28,0x7B1B,0x7B2C,0x7B26,0x7B19,0x7B1E,/* 0xC0-0xC7 */ 0x7B2E,0x7C92,0x7C97,0x7C95,0x7D46,0x7D43,0x7D71,0x7D2E,/* 0xC8-0xCF */ 0x7D39,0x7D3C,0x7D40,0x7D30,0x7D33,0x7D44,0x7D2F,0x7D42,/* 0xD0-0xD7 */ 0x7D32,0x7D31,0x7F3D,0x7F9E,0x7F9A,0x7FCC,0x7FCE,0x7FD2,/* 0xD8-0xDF */ 0x801C,0x804A,0x8046,0x812F,0x8116,0x8123,0x812B,0x8129,/* 0xE0-0xE7 */ 0x8130,0x8124,0x8202,0x8235,0x8237,0x8236,0x8239,0x838E,/* 0xE8-0xEF */ 0x839E,0x8398,0x8378,0x83A2,0x8396,0x83BD,0x83AB,0x8392,/* 0xF0-0xF7 */ 0x838A,0x8393,0x8389,0x83A0,0x8377,0x837B,0x837C,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B3[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8386,0x83A7,0x8655,0x5F6A,0x86C7,0x86C0,0x86B6,0x86C4,/* 0x40-0x47 */ 0x86B5,0x86C6,0x86CB,0x86B1,0x86AF,0x86C9,0x8853,0x889E,/* 0x48-0x4F */ 0x8888,0x88AB,0x8892,0x8896,0x888D,0x888B,0x8993,0x898F,/* 0x50-0x57 */ 0x8A2A,0x8A1D,0x8A23,0x8A25,0x8A31,0x8A2D,0x8A1F,0x8A1B,/* 0x58-0x5F */ 0x8A22,0x8C49,0x8C5A,0x8CA9,0x8CAC,0x8CAB,0x8CA8,0x8CAA,/* 0x60-0x67 */ 0x8CA7,0x8D67,0x8D66,0x8DBE,0x8DBA,0x8EDB,0x8EDF,0x9019,/* 0x68-0x6F */ 0x900D,0x901A,0x9017,0x9023,0x901F,0x901D,0x9010,0x9015,/* 0x70-0x77 */ 0x901E,0x9020,0x900F,0x9022,0x9016,0x901B,0x9014,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x90E8,0x90ED,0x90FD,0x9157,0x91CE,0x91F5,0x91E6,/* 0xA0-0xA7 */ 0x91E3,0x91E7,0x91ED,0x91E9,0x9589,0x966A,0x9675,0x9673,/* 0xA8-0xAF */ 0x9678,0x9670,0x9674,0x9676,0x9677,0x966C,0x96C0,0x96EA,/* 0xB0-0xB7 */ 0x96E9,0x7AE0,0x7ADF,0x9802,0x9803,0x9B5A,0x9CE5,0x9E75,/* 0xB8-0xBF */ 0x9E7F,0x9EA5,0x9EBB,0x50A2,0x508D,0x5085,0x5099,0x5091,/* 0xC0-0xC7 */ 0x5080,0x5096,0x5098,0x509A,0x6700,0x51F1,0x5272,0x5274,/* 0xC8-0xCF */ 0x5275,0x5269,0x52DE,0x52DD,0x52DB,0x535A,0x53A5,0x557B,/* 0xD0-0xD7 */ 0x5580,0x55A7,0x557C,0x558A,0x559D,0x5598,0x5582,0x559C,/* 0xD8-0xDF */ 0x55AA,0x5594,0x5587,0x558B,0x5583,0x55B3,0x55AE,0x559F,/* 0xE0-0xE7 */ 0x553E,0x55B2,0x559A,0x55BB,0x55AC,0x55B1,0x557E,0x5589,/* 0xE8-0xEF */ 0x55AB,0x5599,0x570D,0x582F,0x582A,0x5834,0x5824,0x5830,/* 0xF0-0xF7 */ 0x5831,0x5821,0x581D,0x5820,0x58F9,0x58FA,0x5960,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B4[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5A77,0x5A9A,0x5A7F,0x5A92,0x5A9B,0x5AA7,0x5B73,0x5B71,/* 0x40-0x47 */ 0x5BD2,0x5BCC,0x5BD3,0x5BD0,0x5C0A,0x5C0B,0x5C31,0x5D4C,/* 0x48-0x4F */ 0x5D50,0x5D34,0x5D47,0x5DFD,0x5E45,0x5E3D,0x5E40,0x5E43,/* 0x50-0x57 */ 0x5E7E,0x5ECA,0x5EC1,0x5EC2,0x5EC4,0x5F3C,0x5F6D,0x5FA9,/* 0x58-0x5F */ 0x5FAA,0x5FA8,0x60D1,0x60E1,0x60B2,0x60B6,0x60E0,0x611C,/* 0x60-0x67 */ 0x6123,0x60FA,0x6115,0x60F0,0x60FB,0x60F4,0x6168,0x60F1,/* 0x68-0x6F */ 0x610E,0x60F6,0x6109,0x6100,0x6112,0x621F,0x6249,0x63A3,/* 0x70-0x77 */ 0x638C,0x63CF,0x63C0,0x63E9,0x63C9,0x63C6,0x63CD,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x63D2,0x63E3,0x63D0,0x63E1,0x63D6,0x63ED,0x63EE,/* 0xA0-0xA7 */ 0x6376,0x63F4,0x63EA,0x63DB,0x6452,0x63DA,0x63F9,0x655E,/* 0xA8-0xAF */ 0x6566,0x6562,0x6563,0x6591,0x6590,0x65AF,0x666E,0x6670,/* 0xB0-0xB7 */ 0x6674,0x6676,0x666F,0x6691,0x667A,0x667E,0x6677,0x66FE,/* 0xB8-0xBF */ 0x66FF,0x671F,0x671D,0x68FA,0x68D5,0x68E0,0x68D8,0x68D7,/* 0xC0-0xC7 */ 0x6905,0x68DF,0x68F5,0x68EE,0x68E7,0x68F9,0x68D2,0x68F2,/* 0xC8-0xCF */ 0x68E3,0x68CB,0x68CD,0x690D,0x6912,0x690E,0x68C9,0x68DA,/* 0xD0-0xD7 */ 0x696E,0x68FB,0x6B3E,0x6B3A,0x6B3D,0x6B98,0x6B96,0x6BBC,/* 0xD8-0xDF */ 0x6BEF,0x6C2E,0x6C2F,0x6C2C,0x6E2F,0x6E38,0x6E54,0x6E21,/* 0xE0-0xE7 */ 0x6E32,0x6E67,0x6E4A,0x6E20,0x6E25,0x6E23,0x6E1B,0x6E5B,/* 0xE8-0xEF */ 0x6E58,0x6E24,0x6E56,0x6E6E,0x6E2D,0x6E26,0x6E6F,0x6E34,/* 0xF0-0xF7 */ 0x6E4D,0x6E3A,0x6E2C,0x6E43,0x6E1D,0x6E3E,0x6ECB,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B5[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6E89,0x6E19,0x6E4E,0x6E63,0x6E44,0x6E72,0x6E69,0x6E5F,/* 0x40-0x47 */ 0x7119,0x711A,0x7126,0x7130,0x7121,0x7136,0x716E,0x711C,/* 0x48-0x4F */ 0x724C,0x7284,0x7280,0x7336,0x7325,0x7334,0x7329,0x743A,/* 0x50-0x57 */ 0x742A,0x7433,0x7422,0x7425,0x7435,0x7436,0x7434,0x742F,/* 0x58-0x5F */ 0x741B,0x7426,0x7428,0x7525,0x7526,0x756B,0x756A,0x75E2,/* 0x60-0x67 */ 0x75DB,0x75E3,0x75D9,0x75D8,0x75DE,0x75E0,0x767B,0x767C,/* 0x68-0x6F */ 0x7696,0x7693,0x76B4,0x76DC,0x774F,0x77ED,0x785D,0x786C,/* 0x70-0x77 */ 0x786F,0x7A0D,0x7A08,0x7A0B,0x7A05,0x7A00,0x7A98,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7A97,0x7A96,0x7AE5,0x7AE3,0x7B49,0x7B56,0x7B46,/* 0xA0-0xA7 */ 0x7B50,0x7B52,0x7B54,0x7B4D,0x7B4B,0x7B4F,0x7B51,0x7C9F,/* 0xA8-0xAF */ 0x7CA5,0x7D5E,0x7D50,0x7D68,0x7D55,0x7D2B,0x7D6E,0x7D72,/* 0xB0-0xB7 */ 0x7D61,0x7D66,0x7D62,0x7D70,0x7D73,0x5584,0x7FD4,0x7FD5,/* 0xB8-0xBF */ 0x800B,0x8052,0x8085,0x8155,0x8154,0x814B,0x8151,0x814E,/* 0xC0-0xC7 */ 0x8139,0x8146,0x813E,0x814C,0x8153,0x8174,0x8212,0x821C,/* 0xC8-0xCF */ 0x83E9,0x8403,0x83F8,0x840D,0x83E0,0x83C5,0x840B,0x83C1,/* 0xD0-0xD7 */ 0x83EF,0x83F1,0x83F4,0x8457,0x840A,0x83F0,0x840C,0x83CC,/* 0xD8-0xDF */ 0x83FD,0x83F2,0x83CA,0x8438,0x840E,0x8404,0x83DC,0x8407,/* 0xE0-0xE7 */ 0x83D4,0x83DF,0x865B,0x86DF,0x86D9,0x86ED,0x86D4,0x86DB,/* 0xE8-0xEF */ 0x86E4,0x86D0,0x86DE,0x8857,0x88C1,0x88C2,0x88B1,0x8983,/* 0xF0-0xF7 */ 0x8996,0x8A3B,0x8A60,0x8A55,0x8A5E,0x8A3C,0x8A41,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B6[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8A54,0x8A5B,0x8A50,0x8A46,0x8A34,0x8A3A,0x8A36,0x8A56,/* 0x40-0x47 */ 0x8C61,0x8C82,0x8CAF,0x8CBC,0x8CB3,0x8CBD,0x8CC1,0x8CBB,/* 0x48-0x4F */ 0x8CC0,0x8CB4,0x8CB7,0x8CB6,0x8CBF,0x8CB8,0x8D8A,0x8D85,/* 0x50-0x57 */ 0x8D81,0x8DCE,0x8DDD,0x8DCB,0x8DDA,0x8DD1,0x8DCC,0x8DDB,/* 0x58-0x5F */ 0x8DC6,0x8EFB,0x8EF8,0x8EFC,0x8F9C,0x902E,0x9035,0x9031,/* 0x60-0x67 */ 0x9038,0x9032,0x9036,0x9102,0x90F5,0x9109,0x90FE,0x9163,/* 0x68-0x6F */ 0x9165,0x91CF,0x9214,0x9215,0x9223,0x9209,0x921E,0x920D,/* 0x70-0x77 */ 0x9210,0x9207,0x9211,0x9594,0x958F,0x958B,0x9591,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9593,0x9592,0x958E,0x968A,0x968E,0x968B,0x967D,/* 0xA0-0xA7 */ 0x9685,0x9686,0x968D,0x9672,0x9684,0x96C1,0x96C5,0x96C4,/* 0xA8-0xAF */ 0x96C6,0x96C7,0x96EF,0x96F2,0x97CC,0x9805,0x9806,0x9808,/* 0xB0-0xB7 */ 0x98E7,0x98EA,0x98EF,0x98E9,0x98F2,0x98ED,0x99AE,0x99AD,/* 0xB8-0xBF */ 0x9EC3,0x9ECD,0x9ED1,0x4E82,0x50AD,0x50B5,0x50B2,0x50B3,/* 0xC0-0xC7 */ 0x50C5,0x50BE,0x50AC,0x50B7,0x50BB,0x50AF,0x50C7,0x527F,/* 0xC8-0xCF */ 0x5277,0x527D,0x52DF,0x52E6,0x52E4,0x52E2,0x52E3,0x532F,/* 0xD0-0xD7 */ 0x55DF,0x55E8,0x55D3,0x55E6,0x55CE,0x55DC,0x55C7,0x55D1,/* 0xD8-0xDF */ 0x55E3,0x55E4,0x55EF,0x55DA,0x55E1,0x55C5,0x55C6,0x55E5,/* 0xE0-0xE7 */ 0x55C9,0x5712,0x5713,0x585E,0x5851,0x5858,0x5857,0x585A,/* 0xE8-0xEF */ 0x5854,0x586B,0x584C,0x586D,0x584A,0x5862,0x5852,0x584B,/* 0xF0-0xF7 */ 0x5967,0x5AC1,0x5AC9,0x5ACC,0x5ABE,0x5ABD,0x5ABC,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B7[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5AB3,0x5AC2,0x5AB2,0x5D69,0x5D6F,0x5E4C,0x5E79,0x5EC9,/* 0x40-0x47 */ 0x5EC8,0x5F12,0x5F59,0x5FAC,0x5FAE,0x611A,0x610F,0x6148,/* 0x48-0x4F */ 0x611F,0x60F3,0x611B,0x60F9,0x6101,0x6108,0x614E,0x614C,/* 0x50-0x57 */ 0x6144,0x614D,0x613E,0x6134,0x6127,0x610D,0x6106,0x6137,/* 0x58-0x5F */ 0x6221,0x6222,0x6413,0x643E,0x641E,0x642A,0x642D,0x643D,/* 0x60-0x67 */ 0x642C,0x640F,0x641C,0x6414,0x640D,0x6436,0x6416,0x6417,/* 0x68-0x6F */ 0x6406,0x656C,0x659F,0x65B0,0x6697,0x6689,0x6687,0x6688,/* 0x70-0x77 */ 0x6696,0x6684,0x6698,0x668D,0x6703,0x6994,0x696D,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x695A,0x6977,0x6960,0x6954,0x6975,0x6930,0x6982,/* 0xA0-0xA7 */ 0x694A,0x6968,0x696B,0x695E,0x6953,0x6979,0x6986,0x695D,/* 0xA8-0xAF */ 0x6963,0x695B,0x6B47,0x6B72,0x6BC0,0x6BBF,0x6BD3,0x6BFD,/* 0xB0-0xB7 */ 0x6EA2,0x6EAF,0x6ED3,0x6EB6,0x6EC2,0x6E90,0x6E9D,0x6EC7,/* 0xB8-0xBF */ 0x6EC5,0x6EA5,0x6E98,0x6EBC,0x6EBA,0x6EAB,0x6ED1,0x6E96,/* 0xC0-0xC7 */ 0x6E9C,0x6EC4,0x6ED4,0x6EAA,0x6EA7,0x6EB4,0x714E,0x7159,/* 0xC8-0xCF */ 0x7169,0x7164,0x7149,0x7167,0x715C,0x716C,0x7166,0x714C,/* 0xD0-0xD7 */ 0x7165,0x715E,0x7146,0x7168,0x7156,0x723A,0x7252,0x7337,/* 0xD8-0xDF */ 0x7345,0x733F,0x733E,0x746F,0x745A,0x7455,0x745F,0x745E,/* 0xE0-0xE7 */ 0x7441,0x743F,0x7459,0x745B,0x745C,0x7576,0x7578,0x7600,/* 0xE8-0xEF */ 0x75F0,0x7601,0x75F2,0x75F1,0x75FA,0x75FF,0x75F4,0x75F3,/* 0xF0-0xF7 */ 0x76DE,0x76DF,0x775B,0x776B,0x7766,0x775E,0x7763,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B8[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7779,0x776A,0x776C,0x775C,0x7765,0x7768,0x7762,0x77EE,/* 0x40-0x47 */ 0x788E,0x78B0,0x7897,0x7898,0x788C,0x7889,0x787C,0x7891,/* 0x48-0x4F */ 0x7893,0x787F,0x797A,0x797F,0x7981,0x842C,0x79BD,0x7A1C,/* 0x50-0x57 */ 0x7A1A,0x7A20,0x7A14,0x7A1F,0x7A1E,0x7A9F,0x7AA0,0x7B77,/* 0x58-0x5F */ 0x7BC0,0x7B60,0x7B6E,0x7B67,0x7CB1,0x7CB3,0x7CB5,0x7D93,/* 0x60-0x67 */ 0x7D79,0x7D91,0x7D81,0x7D8F,0x7D5B,0x7F6E,0x7F69,0x7F6A,/* 0x68-0x6F */ 0x7F72,0x7FA9,0x7FA8,0x7FA4,0x8056,0x8058,0x8086,0x8084,/* 0x70-0x77 */ 0x8171,0x8170,0x8178,0x8165,0x816E,0x8173,0x816B,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8179,0x817A,0x8166,0x8205,0x8247,0x8482,0x8477,/* 0xA0-0xA7 */ 0x843D,0x8431,0x8475,0x8466,0x846B,0x8449,0x846C,0x845B,/* 0xA8-0xAF */ 0x843C,0x8435,0x8461,0x8463,0x8469,0x846D,0x8446,0x865E,/* 0xB0-0xB7 */ 0x865C,0x865F,0x86F9,0x8713,0x8708,0x8707,0x8700,0x86FE,/* 0xB8-0xBF */ 0x86FB,0x8702,0x8703,0x8706,0x870A,0x8859,0x88DF,0x88D4,/* 0xC0-0xC7 */ 0x88D9,0x88DC,0x88D8,0x88DD,0x88E1,0x88CA,0x88D5,0x88D2,/* 0xC8-0xCF */ 0x899C,0x89E3,0x8A6B,0x8A72,0x8A73,0x8A66,0x8A69,0x8A70,/* 0xD0-0xD7 */ 0x8A87,0x8A7C,0x8A63,0x8AA0,0x8A71,0x8A85,0x8A6D,0x8A62,/* 0xD8-0xDF */ 0x8A6E,0x8A6C,0x8A79,0x8A7B,0x8A3E,0x8A68,0x8C62,0x8C8A,/* 0xE0-0xE7 */ 0x8C89,0x8CCA,0x8CC7,0x8CC8,0x8CC4,0x8CB2,0x8CC3,0x8CC2,/* 0xE8-0xEF */ 0x8CC5,0x8DE1,0x8DDF,0x8DE8,0x8DEF,0x8DF3,0x8DFA,0x8DEA,/* 0xF0-0xF7 */ 0x8DE4,0x8DE6,0x8EB2,0x8F03,0x8F09,0x8EFE,0x8F0A,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_B9[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8F9F,0x8FB2,0x904B,0x904A,0x9053,0x9042,0x9054,0x903C,/* 0x40-0x47 */ 0x9055,0x9050,0x9047,0x904F,0x904E,0x904D,0x9051,0x903E,/* 0x48-0x4F */ 0x9041,0x9112,0x9117,0x916C,0x916A,0x9169,0x91C9,0x9237,/* 0x50-0x57 */ 0x9257,0x9238,0x923D,0x9240,0x923E,0x925B,0x924B,0x9264,/* 0x58-0x5F */ 0x9251,0x9234,0x9249,0x924D,0x9245,0x9239,0x923F,0x925A,/* 0x60-0x67 */ 0x9598,0x9698,0x9694,0x9695,0x96CD,0x96CB,0x96C9,0x96CA,/* 0x68-0x6F */ 0x96F7,0x96FB,0x96F9,0x96F6,0x9756,0x9774,0x9776,0x9810,/* 0x70-0x77 */ 0x9811,0x9813,0x980A,0x9812,0x980C,0x98FC,0x98F4,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x98FD,0x98FE,0x99B3,0x99B1,0x99B4,0x9AE1,0x9CE9,/* 0xA0-0xA7 */ 0x9E82,0x9F0E,0x9F13,0x9F20,0x50E7,0x50EE,0x50E5,0x50D6,/* 0xA8-0xAF */ 0x50ED,0x50DA,0x50D5,0x50CF,0x50D1,0x50F1,0x50CE,0x50E9,/* 0xB0-0xB7 */ 0x5162,0x51F3,0x5283,0x5282,0x5331,0x53AD,0x55FE,0x5600,/* 0xB8-0xBF */ 0x561B,0x5617,0x55FD,0x5614,0x5606,0x5609,0x560D,0x560E,/* 0xC0-0xC7 */ 0x55F7,0x5616,0x561F,0x5608,0x5610,0x55F6,0x5718,0x5716,/* 0xC8-0xCF */ 0x5875,0x587E,0x5883,0x5893,0x588A,0x5879,0x5885,0x587D,/* 0xD0-0xD7 */ 0x58FD,0x5925,0x5922,0x5924,0x596A,0x5969,0x5AE1,0x5AE6,/* 0xD8-0xDF */ 0x5AE9,0x5AD7,0x5AD6,0x5AD8,0x5AE3,0x5B75,0x5BDE,0x5BE7,/* 0xE0-0xE7 */ 0x5BE1,0x5BE5,0x5BE6,0x5BE8,0x5BE2,0x5BE4,0x5BDF,0x5C0D,/* 0xE8-0xEF */ 0x5C62,0x5D84,0x5D87,0x5E5B,0x5E63,0x5E55,0x5E57,0x5E54,/* 0xF0-0xF7 */ 0x5ED3,0x5ED6,0x5F0A,0x5F46,0x5F70,0x5FB9,0x6147,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_BA[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x613F,0x614B,0x6177,0x6162,0x6163,0x615F,0x615A,0x6158,/* 0x40-0x47 */ 0x6175,0x622A,0x6487,0x6458,0x6454,0x64A4,0x6478,0x645F,/* 0x48-0x4F */ 0x647A,0x6451,0x6467,0x6434,0x646D,0x647B,0x6572,0x65A1,/* 0x50-0x57 */ 0x65D7,0x65D6,0x66A2,0x66A8,0x669D,0x699C,0x69A8,0x6995,/* 0x58-0x5F */ 0x69C1,0x69AE,0x69D3,0x69CB,0x699B,0x69B7,0x69BB,0x69AB,/* 0x60-0x67 */ 0x69B4,0x69D0,0x69CD,0x69AD,0x69CC,0x69A6,0x69C3,0x69A3,/* 0x68-0x6F */ 0x6B49,0x6B4C,0x6C33,0x6F33,0x6F14,0x6EFE,0x6F13,0x6EF4,/* 0x70-0x77 */ 0x6F29,0x6F3E,0x6F20,0x6F2C,0x6F0F,0x6F02,0x6F22,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6EFF,0x6EEF,0x6F06,0x6F31,0x6F38,0x6F32,0x6F23,/* 0xA0-0xA7 */ 0x6F15,0x6F2B,0x6F2F,0x6F88,0x6F2A,0x6EEC,0x6F01,0x6EF2,/* 0xA8-0xAF */ 0x6ECC,0x6EF7,0x7194,0x7199,0x717D,0x718A,0x7184,0x7192,/* 0xB0-0xB7 */ 0x723E,0x7292,0x7296,0x7344,0x7350,0x7464,0x7463,0x746A,/* 0xB8-0xBF */ 0x7470,0x746D,0x7504,0x7591,0x7627,0x760D,0x760B,0x7609,/* 0xC0-0xC7 */ 0x7613,0x76E1,0x76E3,0x7784,0x777D,0x777F,0x7761,0x78C1,/* 0xC8-0xCF */ 0x789F,0x78A7,0x78B3,0x78A9,0x78A3,0x798E,0x798F,0x798D,/* 0xD0-0xD7 */ 0x7A2E,0x7A31,0x7AAA,0x7AA9,0x7AED,0x7AEF,0x7BA1,0x7B95,/* 0xD8-0xDF */ 0x7B8B,0x7B75,0x7B97,0x7B9D,0x7B94,0x7B8F,0x7BB8,0x7B87,/* 0xE0-0xE7 */ 0x7B84,0x7CB9,0x7CBD,0x7CBE,0x7DBB,0x7DB0,0x7D9C,0x7DBD,/* 0xE8-0xEF */ 0x7DBE,0x7DA0,0x7DCA,0x7DB4,0x7DB2,0x7DB1,0x7DBA,0x7DA2,/* 0xF0-0xF7 */ 0x7DBF,0x7DB5,0x7DB8,0x7DAD,0x7DD2,0x7DC7,0x7DAC,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_BB[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7F70,0x7FE0,0x7FE1,0x7FDF,0x805E,0x805A,0x8087,0x8150,/* 0x40-0x47 */ 0x8180,0x818F,0x8188,0x818A,0x817F,0x8182,0x81E7,0x81FA,/* 0x48-0x4F */ 0x8207,0x8214,0x821E,0x824B,0x84C9,0x84BF,0x84C6,0x84C4,/* 0x50-0x57 */ 0x8499,0x849E,0x84B2,0x849C,0x84CB,0x84B8,0x84C0,0x84D3,/* 0x58-0x5F */ 0x8490,0x84BC,0x84D1,0x84CA,0x873F,0x871C,0x873B,0x8722,/* 0x60-0x67 */ 0x8725,0x8734,0x8718,0x8755,0x8737,0x8729,0x88F3,0x8902,/* 0x68-0x6F */ 0x88F4,0x88F9,0x88F8,0x88FD,0x88E8,0x891A,0x88EF,0x8AA6,/* 0x70-0x77 */ 0x8A8C,0x8A9E,0x8AA3,0x8A8D,0x8AA1,0x8A93,0x8AA4,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8AAA,0x8AA5,0x8AA8,0x8A98,0x8A91,0x8A9A,0x8AA7,/* 0xA0-0xA7 */ 0x8C6A,0x8C8D,0x8C8C,0x8CD3,0x8CD1,0x8CD2,0x8D6B,0x8D99,/* 0xA8-0xAF */ 0x8D95,0x8DFC,0x8F14,0x8F12,0x8F15,0x8F13,0x8FA3,0x9060,/* 0xB0-0xB7 */ 0x9058,0x905C,0x9063,0x9059,0x905E,0x9062,0x905D,0x905B,/* 0xB8-0xBF */ 0x9119,0x9118,0x911E,0x9175,0x9178,0x9177,0x9174,0x9278,/* 0xC0-0xC7 */ 0x9280,0x9285,0x9298,0x9296,0x927B,0x9293,0x929C,0x92A8,/* 0xC8-0xCF */ 0x927C,0x9291,0x95A1,0x95A8,0x95A9,0x95A3,0x95A5,0x95A4,/* 0xD0-0xD7 */ 0x9699,0x969C,0x969B,0x96CC,0x96D2,0x9700,0x977C,0x9785,/* 0xD8-0xDF */ 0x97F6,0x9817,0x9818,0x98AF,0x98B1,0x9903,0x9905,0x990C,/* 0xE0-0xE7 */ 0x9909,0x99C1,0x9AAF,0x9AB0,0x9AE6,0x9B41,0x9B42,0x9CF4,/* 0xE8-0xEF */ 0x9CF6,0x9CF3,0x9EBC,0x9F3B,0x9F4A,0x5104,0x5100,0x50FB,/* 0xF0-0xF7 */ 0x50F5,0x50F9,0x5102,0x5108,0x5109,0x5105,0x51DC,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_BC[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5287,0x5288,0x5289,0x528D,0x528A,0x52F0,0x53B2,0x562E,/* 0x40-0x47 */ 0x563B,0x5639,0x5632,0x563F,0x5634,0x5629,0x5653,0x564E,/* 0x48-0x4F */ 0x5657,0x5674,0x5636,0x562F,0x5630,0x5880,0x589F,0x589E,/* 0x50-0x57 */ 0x58B3,0x589C,0x58AE,0x58A9,0x58A6,0x596D,0x5B09,0x5AFB,/* 0x58-0x5F */ 0x5B0B,0x5AF5,0x5B0C,0x5B08,0x5BEE,0x5BEC,0x5BE9,0x5BEB,/* 0x60-0x67 */ 0x5C64,0x5C65,0x5D9D,0x5D94,0x5E62,0x5E5F,0x5E61,0x5EE2,/* 0x68-0x6F */ 0x5EDA,0x5EDF,0x5EDD,0x5EE3,0x5EE0,0x5F48,0x5F71,0x5FB7,/* 0x70-0x77 */ 0x5FB5,0x6176,0x6167,0x616E,0x615D,0x6155,0x6182,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x617C,0x6170,0x616B,0x617E,0x61A7,0x6190,0x61AB,/* 0xA0-0xA7 */ 0x618E,0x61AC,0x619A,0x61A4,0x6194,0x61AE,0x622E,0x6469,/* 0xA8-0xAF */ 0x646F,0x6479,0x649E,0x64B2,0x6488,0x6490,0x64B0,0x64A5,/* 0xB0-0xB7 */ 0x6493,0x6495,0x64A9,0x6492,0x64AE,0x64AD,0x64AB,0x649A,/* 0xB8-0xBF */ 0x64AC,0x6499,0x64A2,0x64B3,0x6575,0x6577,0x6578,0x66AE,/* 0xC0-0xC7 */ 0x66AB,0x66B4,0x66B1,0x6A23,0x6A1F,0x69E8,0x6A01,0x6A1E,/* 0xC8-0xCF */ 0x6A19,0x69FD,0x6A21,0x6A13,0x6A0A,0x69F3,0x6A02,0x6A05,/* 0xD0-0xD7 */ 0x69ED,0x6A11,0x6B50,0x6B4E,0x6BA4,0x6BC5,0x6BC6,0x6F3F,/* 0xD8-0xDF */ 0x6F7C,0x6F84,0x6F51,0x6F66,0x6F54,0x6F86,0x6F6D,0x6F5B,/* 0xE0-0xE7 */ 0x6F78,0x6F6E,0x6F8E,0x6F7A,0x6F70,0x6F64,0x6F97,0x6F58,/* 0xE8-0xEF */ 0x6ED5,0x6F6F,0x6F60,0x6F5F,0x719F,0x71AC,0x71B1,0x71A8,/* 0xF0-0xF7 */ 0x7256,0x729B,0x734E,0x7357,0x7469,0x748B,0x7483,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_BD[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x747E,0x7480,0x757F,0x7620,0x7629,0x761F,0x7624,0x7626,/* 0x40-0x47 */ 0x7621,0x7622,0x769A,0x76BA,0x76E4,0x778E,0x7787,0x778C,/* 0x48-0x4F */ 0x7791,0x778B,0x78CB,0x78C5,0x78BA,0x78CA,0x78BE,0x78D5,/* 0x50-0x57 */ 0x78BC,0x78D0,0x7A3F,0x7A3C,0x7A40,0x7A3D,0x7A37,0x7A3B,/* 0x58-0x5F */ 0x7AAF,0x7AAE,0x7BAD,0x7BB1,0x7BC4,0x7BB4,0x7BC6,0x7BC7,/* 0x60-0x67 */ 0x7BC1,0x7BA0,0x7BCC,0x7CCA,0x7DE0,0x7DF4,0x7DEF,0x7DFB,/* 0x68-0x6F */ 0x7DD8,0x7DEC,0x7DDD,0x7DE8,0x7DE3,0x7DDA,0x7DDE,0x7DE9,/* 0x70-0x77 */ 0x7D9E,0x7DD9,0x7DF2,0x7DF9,0x7F75,0x7F77,0x7FAF,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7FE9,0x8026,0x819B,0x819C,0x819D,0x81A0,0x819A,/* 0xA0-0xA7 */ 0x8198,0x8517,0x853D,0x851A,0x84EE,0x852C,0x852D,0x8513,/* 0xA8-0xAF */ 0x8511,0x8523,0x8521,0x8514,0x84EC,0x8525,0x84FF,0x8506,/* 0xB0-0xB7 */ 0x8782,0x8774,0x8776,0x8760,0x8766,0x8778,0x8768,0x8759,/* 0xB8-0xBF */ 0x8757,0x874C,0x8753,0x885B,0x885D,0x8910,0x8907,0x8912,/* 0xC0-0xC7 */ 0x8913,0x8915,0x890A,0x8ABC,0x8AD2,0x8AC7,0x8AC4,0x8A95,/* 0xC8-0xCF */ 0x8ACB,0x8AF8,0x8AB2,0x8AC9,0x8AC2,0x8ABF,0x8AB0,0x8AD6,/* 0xD0-0xD7 */ 0x8ACD,0x8AB6,0x8AB9,0x8ADB,0x8C4C,0x8C4E,0x8C6C,0x8CE0,/* 0xD8-0xDF */ 0x8CDE,0x8CE6,0x8CE4,0x8CEC,0x8CED,0x8CE2,0x8CE3,0x8CDC,/* 0xE0-0xE7 */ 0x8CEA,0x8CE1,0x8D6D,0x8D9F,0x8DA3,0x8E2B,0x8E10,0x8E1D,/* 0xE8-0xEF */ 0x8E22,0x8E0F,0x8E29,0x8E1F,0x8E21,0x8E1E,0x8EBA,0x8F1D,/* 0xF0-0xF7 */ 0x8F1B,0x8F1F,0x8F29,0x8F26,0x8F2A,0x8F1C,0x8F1E,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_BE[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8F25,0x9069,0x906E,0x9068,0x906D,0x9077,0x9130,0x912D,/* 0x40-0x47 */ 0x9127,0x9131,0x9187,0x9189,0x918B,0x9183,0x92C5,0x92BB,/* 0x48-0x4F */ 0x92B7,0x92EA,0x92AC,0x92E4,0x92C1,0x92B3,0x92BC,0x92D2,/* 0x50-0x57 */ 0x92C7,0x92F0,0x92B2,0x95AD,0x95B1,0x9704,0x9706,0x9707,/* 0x58-0x5F */ 0x9709,0x9760,0x978D,0x978B,0x978F,0x9821,0x982B,0x981C,/* 0x60-0x67 */ 0x98B3,0x990A,0x9913,0x9912,0x9918,0x99DD,0x99D0,0x99DF,/* 0x68-0x6F */ 0x99DB,0x99D1,0x99D5,0x99D2,0x99D9,0x9AB7,0x9AEE,0x9AEF,/* 0x70-0x77 */ 0x9B27,0x9B45,0x9B44,0x9B77,0x9B6F,0x9D06,0x9D09,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9D03,0x9EA9,0x9EBE,0x9ECE,0x58A8,0x9F52,0x5112,/* 0xA0-0xA7 */ 0x5118,0x5114,0x5110,0x5115,0x5180,0x51AA,0x51DD,0x5291,/* 0xA8-0xAF */ 0x5293,0x52F3,0x5659,0x566B,0x5679,0x5669,0x5664,0x5678,/* 0xB0-0xB7 */ 0x566A,0x5668,0x5665,0x5671,0x566F,0x566C,0x5662,0x5676,/* 0xB8-0xBF */ 0x58C1,0x58BE,0x58C7,0x58C5,0x596E,0x5B1D,0x5B34,0x5B78,/* 0xC0-0xC7 */ 0x5BF0,0x5C0E,0x5F4A,0x61B2,0x6191,0x61A9,0x618A,0x61CD,/* 0xC8-0xCF */ 0x61B6,0x61BE,0x61CA,0x61C8,0x6230,0x64C5,0x64C1,0x64CB,/* 0xD0-0xD7 */ 0x64BB,0x64BC,0x64DA,0x64C4,0x64C7,0x64C2,0x64CD,0x64BF,/* 0xD8-0xDF */ 0x64D2,0x64D4,0x64BE,0x6574,0x66C6,0x66C9,0x66B9,0x66C4,/* 0xE0-0xE7 */ 0x66C7,0x66B8,0x6A3D,0x6A38,0x6A3A,0x6A59,0x6A6B,0x6A58,/* 0xE8-0xEF */ 0x6A39,0x6A44,0x6A62,0x6A61,0x6A4B,0x6A47,0x6A35,0x6A5F,/* 0xF0-0xF7 */ 0x6A48,0x6B59,0x6B77,0x6C05,0x6FC2,0x6FB1,0x6FA1,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_BF[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6FC3,0x6FA4,0x6FC1,0x6FA7,0x6FB3,0x6FC0,0x6FB9,0x6FB6,/* 0x40-0x47 */ 0x6FA6,0x6FA0,0x6FB4,0x71BE,0x71C9,0x71D0,0x71D2,0x71C8,/* 0x48-0x4F */ 0x71D5,0x71B9,0x71CE,0x71D9,0x71DC,0x71C3,0x71C4,0x7368,/* 0x50-0x57 */ 0x749C,0x74A3,0x7498,0x749F,0x749E,0x74E2,0x750C,0x750D,/* 0x58-0x5F */ 0x7634,0x7638,0x763A,0x76E7,0x76E5,0x77A0,0x779E,0x779F,/* 0x60-0x67 */ 0x77A5,0x78E8,0x78DA,0x78EC,0x78E7,0x79A6,0x7A4D,0x7A4E,/* 0x68-0x6F */ 0x7A46,0x7A4C,0x7A4B,0x7ABA,0x7BD9,0x7C11,0x7BC9,0x7BE4,/* 0x70-0x77 */ 0x7BDB,0x7BE1,0x7BE9,0x7BE6,0x7CD5,0x7CD6,0x7E0A,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7E11,0x7E08,0x7E1B,0x7E23,0x7E1E,0x7E1D,0x7E09,/* 0xA0-0xA7 */ 0x7E10,0x7F79,0x7FB2,0x7FF0,0x7FF1,0x7FEE,0x8028,0x81B3,/* 0xA8-0xAF */ 0x81A9,0x81A8,0x81FB,0x8208,0x8258,0x8259,0x854A,0x8559,/* 0xB0-0xB7 */ 0x8548,0x8568,0x8569,0x8543,0x8549,0x856D,0x856A,0x855E,/* 0xB8-0xBF */ 0x8783,0x879F,0x879E,0x87A2,0x878D,0x8861,0x892A,0x8932,/* 0xC0-0xC7 */ 0x8925,0x892B,0x8921,0x89AA,0x89A6,0x8AE6,0x8AFA,0x8AEB,/* 0xC8-0xCF */ 0x8AF1,0x8B00,0x8ADC,0x8AE7,0x8AEE,0x8AFE,0x8B01,0x8B02,/* 0xD0-0xD7 */ 0x8AF7,0x8AED,0x8AF3,0x8AF6,0x8AFC,0x8C6B,0x8C6D,0x8C93,/* 0xD8-0xDF */ 0x8CF4,0x8E44,0x8E31,0x8E34,0x8E42,0x8E39,0x8E35,0x8F3B,/* 0xE0-0xE7 */ 0x8F2F,0x8F38,0x8F33,0x8FA8,0x8FA6,0x9075,0x9074,0x9078,/* 0xE8-0xEF */ 0x9072,0x907C,0x907A,0x9134,0x9192,0x9320,0x9336,0x92F8,/* 0xF0-0xF7 */ 0x9333,0x932F,0x9322,0x92FC,0x932B,0x9304,0x931A,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C0[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x9310,0x9326,0x9321,0x9315,0x932E,0x9319,0x95BB,0x96A7,/* 0x40-0x47 */ 0x96A8,0x96AA,0x96D5,0x970E,0x9711,0x9716,0x970D,0x9713,/* 0x48-0x4F */ 0x970F,0x975B,0x975C,0x9766,0x9798,0x9830,0x9838,0x983B,/* 0x50-0x57 */ 0x9837,0x982D,0x9839,0x9824,0x9910,0x9928,0x991E,0x991B,/* 0x58-0x5F */ 0x9921,0x991A,0x99ED,0x99E2,0x99F1,0x9AB8,0x9ABC,0x9AFB,/* 0x60-0x67 */ 0x9AED,0x9B28,0x9B91,0x9D15,0x9D23,0x9D26,0x9D28,0x9D12,/* 0x68-0x6F */ 0x9D1B,0x9ED8,0x9ED4,0x9F8D,0x9F9C,0x512A,0x511F,0x5121,/* 0x70-0x77 */ 0x5132,0x52F5,0x568E,0x5680,0x5690,0x5685,0x5687,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x568F,0x58D5,0x58D3,0x58D1,0x58CE,0x5B30,0x5B2A,/* 0xA0-0xA7 */ 0x5B24,0x5B7A,0x5C37,0x5C68,0x5DBC,0x5DBA,0x5DBD,0x5DB8,/* 0xA8-0xAF */ 0x5E6B,0x5F4C,0x5FBD,0x61C9,0x61C2,0x61C7,0x61E6,0x61CB,/* 0xB0-0xB7 */ 0x6232,0x6234,0x64CE,0x64CA,0x64D8,0x64E0,0x64F0,0x64E6,/* 0xB8-0xBF */ 0x64EC,0x64F1,0x64E2,0x64ED,0x6582,0x6583,0x66D9,0x66D6,/* 0xC0-0xC7 */ 0x6A80,0x6A94,0x6A84,0x6AA2,0x6A9C,0x6ADB,0x6AA3,0x6A7E,/* 0xC8-0xCF */ 0x6A97,0x6A90,0x6AA0,0x6B5C,0x6BAE,0x6BDA,0x6C08,0x6FD8,/* 0xD0-0xD7 */ 0x6FF1,0x6FDF,0x6FE0,0x6FDB,0x6FE4,0x6FEB,0x6FEF,0x6F80,/* 0xD8-0xDF */ 0x6FEC,0x6FE1,0x6FE9,0x6FD5,0x6FEE,0x6FF0,0x71E7,0x71DF,/* 0xE0-0xE7 */ 0x71EE,0x71E6,0x71E5,0x71ED,0x71EC,0x71F4,0x71E0,0x7235,/* 0xE8-0xEF */ 0x7246,0x7370,0x7372,0x74A9,0x74B0,0x74A6,0x74A8,0x7646,/* 0xF0-0xF7 */ 0x7642,0x764C,0x76EA,0x77B3,0x77AA,0x77B0,0x77AC,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C1[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x77A7,0x77AD,0x77EF,0x78F7,0x78FA,0x78F4,0x78EF,0x7901,/* 0x40-0x47 */ 0x79A7,0x79AA,0x7A57,0x7ABF,0x7C07,0x7C0D,0x7BFE,0x7BF7,/* 0x48-0x4F */ 0x7C0C,0x7BE0,0x7CE0,0x7CDC,0x7CDE,0x7CE2,0x7CDF,0x7CD9,/* 0x50-0x57 */ 0x7CDD,0x7E2E,0x7E3E,0x7E46,0x7E37,0x7E32,0x7E43,0x7E2B,/* 0x58-0x5F */ 0x7E3D,0x7E31,0x7E45,0x7E41,0x7E34,0x7E39,0x7E48,0x7E35,/* 0x60-0x67 */ 0x7E3F,0x7E2F,0x7F44,0x7FF3,0x7FFC,0x8071,0x8072,0x8070,/* 0x68-0x6F */ 0x806F,0x8073,0x81C6,0x81C3,0x81BA,0x81C2,0x81C0,0x81BF,/* 0x70-0x77 */ 0x81BD,0x81C9,0x81BE,0x81E8,0x8209,0x8271,0x85AA,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8584,0x857E,0x859C,0x8591,0x8594,0x85AF,0x859B,/* 0xA0-0xA7 */ 0x8587,0x85A8,0x858A,0x8667,0x87C0,0x87D1,0x87B3,0x87D2,/* 0xA8-0xAF */ 0x87C6,0x87AB,0x87BB,0x87BA,0x87C8,0x87CB,0x893B,0x8936,/* 0xB0-0xB7 */ 0x8944,0x8938,0x893D,0x89AC,0x8B0E,0x8B17,0x8B19,0x8B1B,/* 0xB8-0xBF */ 0x8B0A,0x8B20,0x8B1D,0x8B04,0x8B10,0x8C41,0x8C3F,0x8C73,/* 0xC0-0xC7 */ 0x8CFA,0x8CFD,0x8CFC,0x8CF8,0x8CFB,0x8DA8,0x8E49,0x8E4B,/* 0xC8-0xCF */ 0x8E48,0x8E4A,0x8F44,0x8F3E,0x8F42,0x8F45,0x8F3F,0x907F,/* 0xD0-0xD7 */ 0x907D,0x9084,0x9081,0x9082,0x9080,0x9139,0x91A3,0x919E,/* 0xD8-0xDF */ 0x919C,0x934D,0x9382,0x9328,0x9375,0x934A,0x9365,0x934B,/* 0xE0-0xE7 */ 0x9318,0x937E,0x936C,0x935B,0x9370,0x935A,0x9354,0x95CA,/* 0xE8-0xEF */ 0x95CB,0x95CC,0x95C8,0x95C6,0x96B1,0x96B8,0x96D6,0x971C,/* 0xF0-0xF7 */ 0x971E,0x97A0,0x97D3,0x9846,0x98B6,0x9935,0x9A01,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C2[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x99FF,0x9BAE,0x9BAB,0x9BAA,0x9BAD,0x9D3B,0x9D3F,0x9E8B,/* 0x40-0x47 */ 0x9ECF,0x9EDE,0x9EDC,0x9EDD,0x9EDB,0x9F3E,0x9F4B,0x53E2,/* 0x48-0x4F */ 0x5695,0x56AE,0x58D9,0x58D8,0x5B38,0x5F5D,0x61E3,0x6233,/* 0x50-0x57 */ 0x64F4,0x64F2,0x64FE,0x6506,0x64FA,0x64FB,0x64F7,0x65B7,/* 0x58-0x5F */ 0x66DC,0x6726,0x6AB3,0x6AAC,0x6AC3,0x6ABB,0x6AB8,0x6AC2,/* 0x60-0x67 */ 0x6AAE,0x6AAF,0x6B5F,0x6B78,0x6BAF,0x7009,0x700B,0x6FFE,/* 0x68-0x6F */ 0x7006,0x6FFA,0x7011,0x700F,0x71FB,0x71FC,0x71FE,0x71F8,/* 0x70-0x77 */ 0x7377,0x7375,0x74A7,0x74BF,0x7515,0x7656,0x7658,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7652,0x77BD,0x77BF,0x77BB,0x77BC,0x790E,0x79AE,/* 0xA0-0xA7 */ 0x7A61,0x7A62,0x7A60,0x7AC4,0x7AC5,0x7C2B,0x7C27,0x7C2A,/* 0xA8-0xAF */ 0x7C1E,0x7C23,0x7C21,0x7CE7,0x7E54,0x7E55,0x7E5E,0x7E5A,/* 0xB0-0xB7 */ 0x7E61,0x7E52,0x7E59,0x7F48,0x7FF9,0x7FFB,0x8077,0x8076,/* 0xB8-0xBF */ 0x81CD,0x81CF,0x820A,0x85CF,0x85A9,0x85CD,0x85D0,0x85C9,/* 0xC0-0xC7 */ 0x85B0,0x85BA,0x85B9,0x85A6,0x87EF,0x87EC,0x87F2,0x87E0,/* 0xC8-0xCF */ 0x8986,0x89B2,0x89F4,0x8B28,0x8B39,0x8B2C,0x8B2B,0x8C50,/* 0xD0-0xD7 */ 0x8D05,0x8E59,0x8E63,0x8E66,0x8E64,0x8E5F,0x8E55,0x8EC0,/* 0xD8-0xDF */ 0x8F49,0x8F4D,0x9087,0x9083,0x9088,0x91AB,0x91AC,0x91D0,/* 0xE0-0xE7 */ 0x9394,0x938A,0x9396,0x93A2,0x93B3,0x93AE,0x93AC,0x93B0,/* 0xE8-0xEF */ 0x9398,0x939A,0x9397,0x95D4,0x95D6,0x95D0,0x95D5,0x96E2,/* 0xF0-0xF7 */ 0x96DC,0x96D9,0x96DB,0x96DE,0x9724,0x97A3,0x97A6,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C3[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x97AD,0x97F9,0x984D,0x984F,0x984C,0x984E,0x9853,0x98BA,/* 0x40-0x47 */ 0x993E,0x993F,0x993D,0x992E,0x99A5,0x9A0E,0x9AC1,0x9B03,/* 0x48-0x4F */ 0x9B06,0x9B4F,0x9B4E,0x9B4D,0x9BCA,0x9BC9,0x9BFD,0x9BC8,/* 0x50-0x57 */ 0x9BC0,0x9D51,0x9D5D,0x9D60,0x9EE0,0x9F15,0x9F2C,0x5133,/* 0x58-0x5F */ 0x56A5,0x58DE,0x58DF,0x58E2,0x5BF5,0x9F90,0x5EEC,0x61F2,/* 0x60-0x67 */ 0x61F7,0x61F6,0x61F5,0x6500,0x650F,0x66E0,0x66DD,0x6AE5,/* 0x68-0x6F */ 0x6ADD,0x6ADA,0x6AD3,0x701B,0x701F,0x7028,0x701A,0x701D,/* 0x70-0x77 */ 0x7015,0x7018,0x7206,0x720D,0x7258,0x72A2,0x7378,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x737A,0x74BD,0x74CA,0x74E3,0x7587,0x7586,0x765F,/* 0xA0-0xA7 */ 0x7661,0x77C7,0x7919,0x79B1,0x7A6B,0x7A69,0x7C3E,0x7C3F,/* 0xA8-0xAF */ 0x7C38,0x7C3D,0x7C37,0x7C40,0x7E6B,0x7E6D,0x7E79,0x7E69,/* 0xB0-0xB7 */ 0x7E6A,0x7F85,0x7E73,0x7FB6,0x7FB9,0x7FB8,0x81D8,0x85E9,/* 0xB8-0xBF */ 0x85DD,0x85EA,0x85D5,0x85E4,0x85E5,0x85F7,0x87FB,0x8805,/* 0xC0-0xC7 */ 0x880D,0x87F9,0x87FE,0x8960,0x895F,0x8956,0x895E,0x8B41,/* 0xC8-0xCF */ 0x8B5C,0x8B58,0x8B49,0x8B5A,0x8B4E,0x8B4F,0x8B46,0x8B59,/* 0xD0-0xD7 */ 0x8D08,0x8D0A,0x8E7C,0x8E72,0x8E87,0x8E76,0x8E6C,0x8E7A,/* 0xD8-0xDF */ 0x8E74,0x8F54,0x8F4E,0x8FAD,0x908A,0x908B,0x91B1,0x91AE,/* 0xE0-0xE7 */ 0x93E1,0x93D1,0x93DF,0x93C3,0x93C8,0x93DC,0x93DD,0x93D6,/* 0xE8-0xEF */ 0x93E2,0x93CD,0x93D8,0x93E4,0x93D7,0x93E8,0x95DC,0x96B4,/* 0xF0-0xF7 */ 0x96E3,0x972A,0x9727,0x9761,0x97DC,0x97FB,0x985E,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C4[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x9858,0x985B,0x98BC,0x9945,0x9949,0x9A16,0x9A19,0x9B0D,/* 0x40-0x47 */ 0x9BE8,0x9BE7,0x9BD6,0x9BDB,0x9D89,0x9D61,0x9D72,0x9D6A,/* 0x48-0x4F */ 0x9D6C,0x9E92,0x9E97,0x9E93,0x9EB4,0x52F8,0x56A8,0x56B7,/* 0x50-0x57 */ 0x56B6,0x56B4,0x56BC,0x58E4,0x5B40,0x5B43,0x5B7D,0x5BF6,/* 0x58-0x5F */ 0x5DC9,0x61F8,0x61FA,0x6518,0x6514,0x6519,0x66E6,0x6727,/* 0x60-0x67 */ 0x6AEC,0x703E,0x7030,0x7032,0x7210,0x737B,0x74CF,0x7662,/* 0x68-0x6F */ 0x7665,0x7926,0x792A,0x792C,0x792B,0x7AC7,0x7AF6,0x7C4C,/* 0x70-0x77 */ 0x7C43,0x7C4D,0x7CEF,0x7CF0,0x8FAE,0x7E7D,0x7E7C,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7E82,0x7F4C,0x8000,0x81DA,0x8266,0x85FB,0x85F9,/* 0xA0-0xA7 */ 0x8611,0x85FA,0x8606,0x860B,0x8607,0x860A,0x8814,0x8815,/* 0xA8-0xAF */ 0x8964,0x89BA,0x89F8,0x8B70,0x8B6C,0x8B66,0x8B6F,0x8B5F,/* 0xB0-0xB7 */ 0x8B6B,0x8D0F,0x8D0D,0x8E89,0x8E81,0x8E85,0x8E82,0x91B4,/* 0xB8-0xBF */ 0x91CB,0x9418,0x9403,0x93FD,0x95E1,0x9730,0x98C4,0x9952,/* 0xC0-0xC7 */ 0x9951,0x99A8,0x9A2B,0x9A30,0x9A37,0x9A35,0x9C13,0x9C0D,/* 0xC8-0xCF */ 0x9E79,0x9EB5,0x9EE8,0x9F2F,0x9F5F,0x9F63,0x9F61,0x5137,/* 0xD0-0xD7 */ 0x5138,0x56C1,0x56C0,0x56C2,0x5914,0x5C6C,0x5DCD,0x61FC,/* 0xD8-0xDF */ 0x61FE,0x651D,0x651C,0x6595,0x66E9,0x6AFB,0x6B04,0x6AFA,/* 0xE0-0xE7 */ 0x6BB2,0x704C,0x721B,0x72A7,0x74D6,0x74D4,0x7669,0x77D3,/* 0xE8-0xEF */ 0x7C50,0x7E8F,0x7E8C,0x7FBC,0x8617,0x862D,0x861A,0x8823,/* 0xF0-0xF7 */ 0x8822,0x8821,0x881F,0x896A,0x896C,0x89BD,0x8B74,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C5[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8B77,0x8B7D,0x8D13,0x8E8A,0x8E8D,0x8E8B,0x8F5F,0x8FAF,/* 0x40-0x47 */ 0x91BA,0x942E,0x9433,0x9435,0x943A,0x9438,0x9432,0x942B,/* 0x48-0x4F */ 0x95E2,0x9738,0x9739,0x9732,0x97FF,0x9867,0x9865,0x9957,/* 0x50-0x57 */ 0x9A45,0x9A43,0x9A40,0x9A3E,0x9ACF,0x9B54,0x9B51,0x9C2D,/* 0x58-0x5F */ 0x9C25,0x9DAF,0x9DB4,0x9DC2,0x9DB8,0x9E9D,0x9EEF,0x9F19,/* 0x60-0x67 */ 0x9F5C,0x9F66,0x9F67,0x513C,0x513B,0x56C8,0x56CA,0x56C9,/* 0x68-0x6F */ 0x5B7F,0x5DD4,0x5DD2,0x5F4E,0x61FF,0x6524,0x6B0A,0x6B61,/* 0x70-0x77 */ 0x7051,0x7058,0x7380,0x74E4,0x758A,0x766E,0x766C,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x79B3,0x7C60,0x7C5F,0x807E,0x807D,0x81DF,0x8972,/* 0xA0-0xA7 */ 0x896F,0x89FC,0x8B80,0x8D16,0x8D17,0x8E91,0x8E93,0x8F61,/* 0xA8-0xAF */ 0x9148,0x9444,0x9451,0x9452,0x973D,0x973E,0x97C3,0x97C1,/* 0xB0-0xB7 */ 0x986B,0x9955,0x9A55,0x9A4D,0x9AD2,0x9B1A,0x9C49,0x9C31,/* 0xB8-0xBF */ 0x9C3E,0x9C3B,0x9DD3,0x9DD7,0x9F34,0x9F6C,0x9F6A,0x9F94,/* 0xC0-0xC7 */ 0x56CC,0x5DD6,0x6200,0x6523,0x652B,0x652A,0x66EC,0x6B10,/* 0xC8-0xCF */ 0x74DA,0x7ACA,0x7C64,0x7C63,0x7C65,0x7E93,0x7E96,0x7E94,/* 0xD0-0xD7 */ 0x81E2,0x8638,0x863F,0x8831,0x8B8A,0x9090,0x908F,0x9463,/* 0xD8-0xDF */ 0x9460,0x9464,0x9768,0x986F,0x995C,0x9A5A,0x9A5B,0x9A57,/* 0xE0-0xE7 */ 0x9AD3,0x9AD4,0x9AD1,0x9C54,0x9C57,0x9C56,0x9DE5,0x9E9F,/* 0xE8-0xEF */ 0x9EF4,0x56D1,0x58E9,0x652C,0x705E,0x7671,0x7672,0x77D7,/* 0xF0-0xF7 */ 0x7F50,0x7F88,0x8836,0x8839,0x8862,0x8B93,0x8B92,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_C6[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8B96,0x8277,0x8D1B,0x91C0,0x946A,0x9742,0x9748,0x9744,/* 0x40-0x47 */ 0x97C6,0x9870,0x9A5F,0x9B22,0x9B58,0x9C5F,0x9DF9,0x9DFA,/* 0x48-0x4F */ 0x9E7C,0x9E7D,0x9F07,0x9F77,0x9F72,0x5EF3,0x6B16,0x7063,/* 0x50-0x57 */ 0x7C6C,0x7C6E,0x883B,0x89C0,0x8EA1,0x91C1,0x9472,0x9470,/* 0x58-0x5F */ 0x9871,0x995E,0x9AD6,0x9B23,0x9ECC,0x7064,0x77DA,0x8B9A,/* 0x60-0x67 */ 0x9477,0x97C9,0x9A62,0x9A65,0x7E9C,0x8B9C,0x8EAA,0x91C5,/* 0x68-0x6F */ 0x947D,0x947E,0x947C,0x9C77,0x9C78,0x9EF7,0x8C54,0x947F,/* 0x70-0x77 */ 0x9E1A,0x7228,0x9A6A,0x9B31,0x9E1B,0x9E1E,0x7C72,0x0000,/* 0x78-0x7F */ }; static const wchar_t c2u_C9[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x4E42,0x4E5C,0x51F5,0x531A,0x5382,0x4E07,0x4E0C,0x4E47,/* 0x40-0x47 */ 0x4E8D,0x56D7,0xFA0C,0x5C6E,0x5F73,0x4E0F,0x5187,0x4E0E,/* 0x48-0x4F */ 0x4E2E,0x4E93,0x4EC2,0x4EC9,0x4EC8,0x5198,0x52FC,0x536C,/* 0x50-0x57 */ 0x53B9,0x5720,0x5903,0x592C,0x5C10,0x5DFF,0x65E1,0x6BB3,/* 0x58-0x5F */ 0x6BCC,0x6C14,0x723F,0x4E31,0x4E3C,0x4EE8,0x4EDC,0x4EE9,/* 0x60-0x67 */ 0x4EE1,0x4EDD,0x4EDA,0x520C,0x531C,0x534C,0x5722,0x5723,/* 0x68-0x6F */ 0x5917,0x592F,0x5B81,0x5B84,0x5C12,0x5C3B,0x5C74,0x5C73,/* 0x70-0x77 */ 0x5E04,0x5E80,0x5E82,0x5FC9,0x6209,0x6250,0x6C15,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6C36,0x6C43,0x6C3F,0x6C3B,0x72AE,0x72B0,0x738A,/* 0xA0-0xA7 */ 0x79B8,0x808A,0x961E,0x4F0E,0x4F18,0x4F2C,0x4EF5,0x4F14,/* 0xA8-0xAF */ 0x4EF1,0x4F00,0x4EF7,0x4F08,0x4F1D,0x4F02,0x4F05,0x4F22,/* 0xB0-0xB7 */ 0x4F13,0x4F04,0x4EF4,0x4F12,0x51B1,0x5213,0x5209,0x5210,/* 0xB8-0xBF */ 0x52A6,0x5322,0x531F,0x534D,0x538A,0x5407,0x56E1,0x56DF,/* 0xC0-0xC7 */ 0x572E,0x572A,0x5734,0x593C,0x5980,0x597C,0x5985,0x597B,/* 0xC8-0xCF */ 0x597E,0x5977,0x597F,0x5B56,0x5C15,0x5C25,0x5C7C,0x5C7A,/* 0xD0-0xD7 */ 0x5C7B,0x5C7E,0x5DDF,0x5E75,0x5E84,0x5F02,0x5F1A,0x5F74,/* 0xD8-0xDF */ 0x5FD5,0x5FD4,0x5FCF,0x625C,0x625E,0x6264,0x6261,0x6266,/* 0xE0-0xE7 */ 0x6262,0x6259,0x6260,0x625A,0x6265,0x65EF,0x65EE,0x673E,/* 0xE8-0xEF */ 0x6739,0x6738,0x673B,0x673A,0x673F,0x673C,0x6733,0x6C18,/* 0xF0-0xF7 */ 0x6C46,0x6C52,0x6C5C,0x6C4F,0x6C4A,0x6C54,0x6C4B,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_CA[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6C4C,0x7071,0x725E,0x72B4,0x72B5,0x738E,0x752A,0x767F,/* 0x40-0x47 */ 0x7A75,0x7F51,0x8278,0x827C,0x8280,0x827D,0x827F,0x864D,/* 0x48-0x4F */ 0x897E,0x9099,0x9097,0x9098,0x909B,0x9094,0x9622,0x9624,/* 0x50-0x57 */ 0x9620,0x9623,0x4F56,0x4F3B,0x4F62,0x4F49,0x4F53,0x4F64,/* 0x58-0x5F */ 0x4F3E,0x4F67,0x4F52,0x4F5F,0x4F41,0x4F58,0x4F2D,0x4F33,/* 0x60-0x67 */ 0x4F3F,0x4F61,0x518F,0x51B9,0x521C,0x521E,0x5221,0x52AD,/* 0x68-0x6F */ 0x52AE,0x5309,0x5363,0x5372,0x538E,0x538F,0x5430,0x5437,/* 0x70-0x77 */ 0x542A,0x5454,0x5445,0x5419,0x541C,0x5425,0x5418,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x543D,0x544F,0x5441,0x5428,0x5424,0x5447,0x56EE,/* 0xA0-0xA7 */ 0x56E7,0x56E5,0x5741,0x5745,0x574C,0x5749,0x574B,0x5752,/* 0xA8-0xAF */ 0x5906,0x5940,0x59A6,0x5998,0x59A0,0x5997,0x598E,0x59A2,/* 0xB0-0xB7 */ 0x5990,0x598F,0x59A7,0x59A1,0x5B8E,0x5B92,0x5C28,0x5C2A,/* 0xB8-0xBF */ 0x5C8D,0x5C8F,0x5C88,0x5C8B,0x5C89,0x5C92,0x5C8A,0x5C86,/* 0xC0-0xC7 */ 0x5C93,0x5C95,0x5DE0,0x5E0A,0x5E0E,0x5E8B,0x5E89,0x5E8C,/* 0xC8-0xCF */ 0x5E88,0x5E8D,0x5F05,0x5F1D,0x5F78,0x5F76,0x5FD2,0x5FD1,/* 0xD0-0xD7 */ 0x5FD0,0x5FED,0x5FE8,0x5FEE,0x5FF3,0x5FE1,0x5FE4,0x5FE3,/* 0xD8-0xDF */ 0x5FFA,0x5FEF,0x5FF7,0x5FFB,0x6000,0x5FF4,0x623A,0x6283,/* 0xE0-0xE7 */ 0x628C,0x628E,0x628F,0x6294,0x6287,0x6271,0x627B,0x627A,/* 0xE8-0xEF */ 0x6270,0x6281,0x6288,0x6277,0x627D,0x6272,0x6274,0x6537,/* 0xF0-0xF7 */ 0x65F0,0x65F4,0x65F3,0x65F2,0x65F5,0x6745,0x6747,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_CB[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6759,0x6755,0x674C,0x6748,0x675D,0x674D,0x675A,0x674B,/* 0x40-0x47 */ 0x6BD0,0x6C19,0x6C1A,0x6C78,0x6C67,0x6C6B,0x6C84,0x6C8B,/* 0x48-0x4F */ 0x6C8F,0x6C71,0x6C6F,0x6C69,0x6C9A,0x6C6D,0x6C87,0x6C95,/* 0x50-0x57 */ 0x6C9C,0x6C66,0x6C73,0x6C65,0x6C7B,0x6C8E,0x7074,0x707A,/* 0x58-0x5F */ 0x7263,0x72BF,0x72BD,0x72C3,0x72C6,0x72C1,0x72BA,0x72C5,/* 0x60-0x67 */ 0x7395,0x7397,0x7393,0x7394,0x7392,0x753A,0x7539,0x7594,/* 0x68-0x6F */ 0x7595,0x7681,0x793D,0x8034,0x8095,0x8099,0x8090,0x8092,/* 0x70-0x77 */ 0x809C,0x8290,0x828F,0x8285,0x828E,0x8291,0x8293,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x828A,0x8283,0x8284,0x8C78,0x8FC9,0x8FBF,0x909F,/* 0xA0-0xA7 */ 0x90A1,0x90A5,0x909E,0x90A7,0x90A0,0x9630,0x9628,0x962F,/* 0xA8-0xAF */ 0x962D,0x4E33,0x4F98,0x4F7C,0x4F85,0x4F7D,0x4F80,0x4F87,/* 0xB0-0xB7 */ 0x4F76,0x4F74,0x4F89,0x4F84,0x4F77,0x4F4C,0x4F97,0x4F6A,/* 0xB8-0xBF */ 0x4F9A,0x4F79,0x4F81,0x4F78,0x4F90,0x4F9C,0x4F94,0x4F9E,/* 0xC0-0xC7 */ 0x4F92,0x4F82,0x4F95,0x4F6B,0x4F6E,0x519E,0x51BC,0x51BE,/* 0xC8-0xCF */ 0x5235,0x5232,0x5233,0x5246,0x5231,0x52BC,0x530A,0x530B,/* 0xD0-0xD7 */ 0x533C,0x5392,0x5394,0x5487,0x547F,0x5481,0x5491,0x5482,/* 0xD8-0xDF */ 0x5488,0x546B,0x547A,0x547E,0x5465,0x546C,0x5474,0x5466,/* 0xE0-0xE7 */ 0x548D,0x546F,0x5461,0x5460,0x5498,0x5463,0x5467,0x5464,/* 0xE8-0xEF */ 0x56F7,0x56F9,0x576F,0x5772,0x576D,0x576B,0x5771,0x5770,/* 0xF0-0xF7 */ 0x5776,0x5780,0x5775,0x577B,0x5773,0x5774,0x5762,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_CC[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5768,0x577D,0x590C,0x5945,0x59B5,0x59BA,0x59CF,0x59CE,/* 0x40-0x47 */ 0x59B2,0x59CC,0x59C1,0x59B6,0x59BC,0x59C3,0x59D6,0x59B1,/* 0x48-0x4F */ 0x59BD,0x59C0,0x59C8,0x59B4,0x59C7,0x5B62,0x5B65,0x5B93,/* 0x50-0x57 */ 0x5B95,0x5C44,0x5C47,0x5CAE,0x5CA4,0x5CA0,0x5CB5,0x5CAF,/* 0x58-0x5F */ 0x5CA8,0x5CAC,0x5C9F,0x5CA3,0x5CAD,0x5CA2,0x5CAA,0x5CA7,/* 0x60-0x67 */ 0x5C9D,0x5CA5,0x5CB6,0x5CB0,0x5CA6,0x5E17,0x5E14,0x5E19,/* 0x68-0x6F */ 0x5F28,0x5F22,0x5F23,0x5F24,0x5F54,0x5F82,0x5F7E,0x5F7D,/* 0x70-0x77 */ 0x5FDE,0x5FE5,0x602D,0x6026,0x6019,0x6032,0x600B,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6034,0x600A,0x6017,0x6033,0x601A,0x601E,0x602C,/* 0xA0-0xA7 */ 0x6022,0x600D,0x6010,0x602E,0x6013,0x6011,0x600C,0x6009,/* 0xA8-0xAF */ 0x601C,0x6214,0x623D,0x62AD,0x62B4,0x62D1,0x62BE,0x62AA,/* 0xB0-0xB7 */ 0x62B6,0x62CA,0x62AE,0x62B3,0x62AF,0x62BB,0x62A9,0x62B0,/* 0xB8-0xBF */ 0x62B8,0x653D,0x65A8,0x65BB,0x6609,0x65FC,0x6604,0x6612,/* 0xC0-0xC7 */ 0x6608,0x65FB,0x6603,0x660B,0x660D,0x6605,0x65FD,0x6611,/* 0xC8-0xCF */ 0x6610,0x66F6,0x670A,0x6785,0x676C,0x678E,0x6792,0x6776,/* 0xD0-0xD7 */ 0x677B,0x6798,0x6786,0x6784,0x6774,0x678D,0x678C,0x677A,/* 0xD8-0xDF */ 0x679F,0x6791,0x6799,0x6783,0x677D,0x6781,0x6778,0x6779,/* 0xE0-0xE7 */ 0x6794,0x6B25,0x6B80,0x6B7E,0x6BDE,0x6C1D,0x6C93,0x6CEC,/* 0xE8-0xEF */ 0x6CEB,0x6CEE,0x6CD9,0x6CB6,0x6CD4,0x6CAD,0x6CE7,0x6CB7,/* 0xF0-0xF7 */ 0x6CD0,0x6CC2,0x6CBA,0x6CC3,0x6CC6,0x6CED,0x6CF2,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_CD[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6CD2,0x6CDD,0x6CB4,0x6C8A,0x6C9D,0x6C80,0x6CDE,0x6CC0,/* 0x40-0x47 */ 0x6D30,0x6CCD,0x6CC7,0x6CB0,0x6CF9,0x6CCF,0x6CE9,0x6CD1,/* 0x48-0x4F */ 0x7094,0x7098,0x7085,0x7093,0x7086,0x7084,0x7091,0x7096,/* 0x50-0x57 */ 0x7082,0x709A,0x7083,0x726A,0x72D6,0x72CB,0x72D8,0x72C9,/* 0x58-0x5F */ 0x72DC,0x72D2,0x72D4,0x72DA,0x72CC,0x72D1,0x73A4,0x73A1,/* 0x60-0x67 */ 0x73AD,0x73A6,0x73A2,0x73A0,0x73AC,0x739D,0x74DD,0x74E8,/* 0x68-0x6F */ 0x753F,0x7540,0x753E,0x758C,0x7598,0x76AF,0x76F3,0x76F1,/* 0x70-0x77 */ 0x76F0,0x76F5,0x77F8,0x77FC,0x77F9,0x77FB,0x77FA,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x77F7,0x7942,0x793F,0x79C5,0x7A78,0x7A7B,0x7AFB,/* 0xA0-0xA7 */ 0x7C75,0x7CFD,0x8035,0x808F,0x80AE,0x80A3,0x80B8,0x80B5,/* 0xA8-0xAF */ 0x80AD,0x8220,0x82A0,0x82C0,0x82AB,0x829A,0x8298,0x829B,/* 0xB0-0xB7 */ 0x82B5,0x82A7,0x82AE,0x82BC,0x829E,0x82BA,0x82B4,0x82A8,/* 0xB8-0xBF */ 0x82A1,0x82A9,0x82C2,0x82A4,0x82C3,0x82B6,0x82A2,0x8670,/* 0xC0-0xC7 */ 0x866F,0x866D,0x866E,0x8C56,0x8FD2,0x8FCB,0x8FD3,0x8FCD,/* 0xC8-0xCF */ 0x8FD6,0x8FD5,0x8FD7,0x90B2,0x90B4,0x90AF,0x90B3,0x90B0,/* 0xD0-0xD7 */ 0x9639,0x963D,0x963C,0x963A,0x9643,0x4FCD,0x4FC5,0x4FD3,/* 0xD8-0xDF */ 0x4FB2,0x4FC9,0x4FCB,0x4FC1,0x4FD4,0x4FDC,0x4FD9,0x4FBB,/* 0xE0-0xE7 */ 0x4FB3,0x4FDB,0x4FC7,0x4FD6,0x4FBA,0x4FC0,0x4FB9,0x4FEC,/* 0xE8-0xEF */ 0x5244,0x5249,0x52C0,0x52C2,0x533D,0x537C,0x5397,0x5396,/* 0xF0-0xF7 */ 0x5399,0x5398,0x54BA,0x54A1,0x54AD,0x54A5,0x54CF,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_CE[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x54C3,0x830D,0x54B7,0x54AE,0x54D6,0x54B6,0x54C5,0x54C6,/* 0x40-0x47 */ 0x54A0,0x5470,0x54BC,0x54A2,0x54BE,0x5472,0x54DE,0x54B0,/* 0x48-0x4F */ 0x57B5,0x579E,0x579F,0x57A4,0x578C,0x5797,0x579D,0x579B,/* 0x50-0x57 */ 0x5794,0x5798,0x578F,0x5799,0x57A5,0x579A,0x5795,0x58F4,/* 0x58-0x5F */ 0x590D,0x5953,0x59E1,0x59DE,0x59EE,0x5A00,0x59F1,0x59DD,/* 0x60-0x67 */ 0x59FA,0x59FD,0x59FC,0x59F6,0x59E4,0x59F2,0x59F7,0x59DB,/* 0x68-0x6F */ 0x59E9,0x59F3,0x59F5,0x59E0,0x59FE,0x59F4,0x59ED,0x5BA8,/* 0x70-0x77 */ 0x5C4C,0x5CD0,0x5CD8,0x5CCC,0x5CD7,0x5CCB,0x5CDB,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x5CDE,0x5CDA,0x5CC9,0x5CC7,0x5CCA,0x5CD6,0x5CD3,/* 0xA0-0xA7 */ 0x5CD4,0x5CCF,0x5CC8,0x5CC6,0x5CCE,0x5CDF,0x5CF8,0x5DF9,/* 0xA8-0xAF */ 0x5E21,0x5E22,0x5E23,0x5E20,0x5E24,0x5EB0,0x5EA4,0x5EA2,/* 0xB0-0xB7 */ 0x5E9B,0x5EA3,0x5EA5,0x5F07,0x5F2E,0x5F56,0x5F86,0x6037,/* 0xB8-0xBF */ 0x6039,0x6054,0x6072,0x605E,0x6045,0x6053,0x6047,0x6049,/* 0xC0-0xC7 */ 0x605B,0x604C,0x6040,0x6042,0x605F,0x6024,0x6044,0x6058,/* 0xC8-0xCF */ 0x6066,0x606E,0x6242,0x6243,0x62CF,0x630D,0x630B,0x62F5,/* 0xD0-0xD7 */ 0x630E,0x6303,0x62EB,0x62F9,0x630F,0x630C,0x62F8,0x62F6,/* 0xD8-0xDF */ 0x6300,0x6313,0x6314,0x62FA,0x6315,0x62FB,0x62F0,0x6541,/* 0xE0-0xE7 */ 0x6543,0x65AA,0x65BF,0x6636,0x6621,0x6632,0x6635,0x661C,/* 0xE8-0xEF */ 0x6626,0x6622,0x6633,0x662B,0x663A,0x661D,0x6634,0x6639,/* 0xF0-0xF7 */ 0x662E,0x670F,0x6710,0x67C1,0x67F2,0x67C8,0x67BA,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_CF[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x67DC,0x67BB,0x67F8,0x67D8,0x67C0,0x67B7,0x67C5,0x67EB,/* 0x40-0x47 */ 0x67E4,0x67DF,0x67B5,0x67CD,0x67B3,0x67F7,0x67F6,0x67EE,/* 0x48-0x4F */ 0x67E3,0x67C2,0x67B9,0x67CE,0x67E7,0x67F0,0x67B2,0x67FC,/* 0x50-0x57 */ 0x67C6,0x67ED,0x67CC,0x67AE,0x67E6,0x67DB,0x67FA,0x67C9,/* 0x58-0x5F */ 0x67CA,0x67C3,0x67EA,0x67CB,0x6B28,0x6B82,0x6B84,0x6BB6,/* 0x60-0x67 */ 0x6BD6,0x6BD8,0x6BE0,0x6C20,0x6C21,0x6D28,0x6D34,0x6D2D,/* 0x68-0x6F */ 0x6D1F,0x6D3C,0x6D3F,0x6D12,0x6D0A,0x6CDA,0x6D33,0x6D04,/* 0x70-0x77 */ 0x6D19,0x6D3A,0x6D1A,0x6D11,0x6D00,0x6D1D,0x6D42,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6D01,0x6D18,0x6D37,0x6D03,0x6D0F,0x6D40,0x6D07,/* 0xA0-0xA7 */ 0x6D20,0x6D2C,0x6D08,0x6D22,0x6D09,0x6D10,0x70B7,0x709F,/* 0xA8-0xAF */ 0x70BE,0x70B1,0x70B0,0x70A1,0x70B4,0x70B5,0x70A9,0x7241,/* 0xB0-0xB7 */ 0x7249,0x724A,0x726C,0x7270,0x7273,0x726E,0x72CA,0x72E4,/* 0xB8-0xBF */ 0x72E8,0x72EB,0x72DF,0x72EA,0x72E6,0x72E3,0x7385,0x73CC,/* 0xC0-0xC7 */ 0x73C2,0x73C8,0x73C5,0x73B9,0x73B6,0x73B5,0x73B4,0x73EB,/* 0xC8-0xCF */ 0x73BF,0x73C7,0x73BE,0x73C3,0x73C6,0x73B8,0x73CB,0x74EC,/* 0xD0-0xD7 */ 0x74EE,0x752E,0x7547,0x7548,0x75A7,0x75AA,0x7679,0x76C4,/* 0xD8-0xDF */ 0x7708,0x7703,0x7704,0x7705,0x770A,0x76F7,0x76FB,0x76FA,/* 0xE0-0xE7 */ 0x77E7,0x77E8,0x7806,0x7811,0x7812,0x7805,0x7810,0x780F,/* 0xE8-0xEF */ 0x780E,0x7809,0x7803,0x7813,0x794A,0x794C,0x794B,0x7945,/* 0xF0-0xF7 */ 0x7944,0x79D5,0x79CD,0x79CF,0x79D6,0x79CE,0x7A80,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D0[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7A7E,0x7AD1,0x7B00,0x7B01,0x7C7A,0x7C78,0x7C79,0x7C7F,/* 0x40-0x47 */ 0x7C80,0x7C81,0x7D03,0x7D08,0x7D01,0x7F58,0x7F91,0x7F8D,/* 0x48-0x4F */ 0x7FBE,0x8007,0x800E,0x800F,0x8014,0x8037,0x80D8,0x80C7,/* 0x50-0x57 */ 0x80E0,0x80D1,0x80C8,0x80C2,0x80D0,0x80C5,0x80E3,0x80D9,/* 0x58-0x5F */ 0x80DC,0x80CA,0x80D5,0x80C9,0x80CF,0x80D7,0x80E6,0x80CD,/* 0x60-0x67 */ 0x81FF,0x8221,0x8294,0x82D9,0x82FE,0x82F9,0x8307,0x82E8,/* 0x68-0x6F */ 0x8300,0x82D5,0x833A,0x82EB,0x82D6,0x82F4,0x82EC,0x82E1,/* 0x70-0x77 */ 0x82F2,0x82F5,0x830C,0x82FB,0x82F6,0x82F0,0x82EA,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x82E4,0x82E0,0x82FA,0x82F3,0x82ED,0x8677,0x8674,/* 0xA0-0xA7 */ 0x867C,0x8673,0x8841,0x884E,0x8867,0x886A,0x8869,0x89D3,/* 0xA8-0xAF */ 0x8A04,0x8A07,0x8D72,0x8FE3,0x8FE1,0x8FEE,0x8FE0,0x90F1,/* 0xB0-0xB7 */ 0x90BD,0x90BF,0x90D5,0x90C5,0x90BE,0x90C7,0x90CB,0x90C8,/* 0xB8-0xBF */ 0x91D4,0x91D3,0x9654,0x964F,0x9651,0x9653,0x964A,0x964E,/* 0xC0-0xC7 */ 0x501E,0x5005,0x5007,0x5013,0x5022,0x5030,0x501B,0x4FF5,/* 0xC8-0xCF */ 0x4FF4,0x5033,0x5037,0x502C,0x4FF6,0x4FF7,0x5017,0x501C,/* 0xD0-0xD7 */ 0x5020,0x5027,0x5035,0x502F,0x5031,0x500E,0x515A,0x5194,/* 0xD8-0xDF */ 0x5193,0x51CA,0x51C4,0x51C5,0x51C8,0x51CE,0x5261,0x525A,/* 0xE0-0xE7 */ 0x5252,0x525E,0x525F,0x5255,0x5262,0x52CD,0x530E,0x539E,/* 0xE8-0xEF */ 0x5526,0x54E2,0x5517,0x5512,0x54E7,0x54F3,0x54E4,0x551A,/* 0xF0-0xF7 */ 0x54FF,0x5504,0x5508,0x54EB,0x5511,0x5505,0x54F1,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D1[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x550A,0x54FB,0x54F7,0x54F8,0x54E0,0x550E,0x5503,0x550B,/* 0x40-0x47 */ 0x5701,0x5702,0x57CC,0x5832,0x57D5,0x57D2,0x57BA,0x57C6,/* 0x48-0x4F */ 0x57BD,0x57BC,0x57B8,0x57B6,0x57BF,0x57C7,0x57D0,0x57B9,/* 0x50-0x57 */ 0x57C1,0x590E,0x594A,0x5A19,0x5A16,0x5A2D,0x5A2E,0x5A15,/* 0x58-0x5F */ 0x5A0F,0x5A17,0x5A0A,0x5A1E,0x5A33,0x5B6C,0x5BA7,0x5BAD,/* 0x60-0x67 */ 0x5BAC,0x5C03,0x5C56,0x5C54,0x5CEC,0x5CFF,0x5CEE,0x5CF1,/* 0x68-0x6F */ 0x5CF7,0x5D00,0x5CF9,0x5E29,0x5E28,0x5EA8,0x5EAE,0x5EAA,/* 0x70-0x77 */ 0x5EAC,0x5F33,0x5F30,0x5F67,0x605D,0x605A,0x6067,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6041,0x60A2,0x6088,0x6080,0x6092,0x6081,0x609D,/* 0xA0-0xA7 */ 0x6083,0x6095,0x609B,0x6097,0x6087,0x609C,0x608E,0x6219,/* 0xA8-0xAF */ 0x6246,0x62F2,0x6310,0x6356,0x632C,0x6344,0x6345,0x6336,/* 0xB0-0xB7 */ 0x6343,0x63E4,0x6339,0x634B,0x634A,0x633C,0x6329,0x6341,/* 0xB8-0xBF */ 0x6334,0x6358,0x6354,0x6359,0x632D,0x6347,0x6333,0x635A,/* 0xC0-0xC7 */ 0x6351,0x6338,0x6357,0x6340,0x6348,0x654A,0x6546,0x65C6,/* 0xC8-0xCF */ 0x65C3,0x65C4,0x65C2,0x664A,0x665F,0x6647,0x6651,0x6712,/* 0xD0-0xD7 */ 0x6713,0x681F,0x681A,0x6849,0x6832,0x6833,0x683B,0x684B,/* 0xD8-0xDF */ 0x684F,0x6816,0x6831,0x681C,0x6835,0x682B,0x682D,0x682F,/* 0xE0-0xE7 */ 0x684E,0x6844,0x6834,0x681D,0x6812,0x6814,0x6826,0x6828,/* 0xE8-0xEF */ 0x682E,0x684D,0x683A,0x6825,0x6820,0x6B2C,0x6B2F,0x6B2D,/* 0xF0-0xF7 */ 0x6B31,0x6B34,0x6B6D,0x8082,0x6B88,0x6BE6,0x6BE4,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D2[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6BE8,0x6BE3,0x6BE2,0x6BE7,0x6C25,0x6D7A,0x6D63,0x6D64,/* 0x40-0x47 */ 0x6D76,0x6D0D,0x6D61,0x6D92,0x6D58,0x6D62,0x6D6D,0x6D6F,/* 0x48-0x4F */ 0x6D91,0x6D8D,0x6DEF,0x6D7F,0x6D86,0x6D5E,0x6D67,0x6D60,/* 0x50-0x57 */ 0x6D97,0x6D70,0x6D7C,0x6D5F,0x6D82,0x6D98,0x6D2F,0x6D68,/* 0x58-0x5F */ 0x6D8B,0x6D7E,0x6D80,0x6D84,0x6D16,0x6D83,0x6D7B,0x6D7D,/* 0x60-0x67 */ 0x6D75,0x6D90,0x70DC,0x70D3,0x70D1,0x70DD,0x70CB,0x7F39,/* 0x68-0x6F */ 0x70E2,0x70D7,0x70D2,0x70DE,0x70E0,0x70D4,0x70CD,0x70C5,/* 0x70-0x77 */ 0x70C6,0x70C7,0x70DA,0x70CE,0x70E1,0x7242,0x7278,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7277,0x7276,0x7300,0x72FA,0x72F4,0x72FE,0x72F6,/* 0xA0-0xA7 */ 0x72F3,0x72FB,0x7301,0x73D3,0x73D9,0x73E5,0x73D6,0x73BC,/* 0xA8-0xAF */ 0x73E7,0x73E3,0x73E9,0x73DC,0x73D2,0x73DB,0x73D4,0x73DD,/* 0xB0-0xB7 */ 0x73DA,0x73D7,0x73D8,0x73E8,0x74DE,0x74DF,0x74F4,0x74F5,/* 0xB8-0xBF */ 0x7521,0x755B,0x755F,0x75B0,0x75C1,0x75BB,0x75C4,0x75C0,/* 0xC0-0xC7 */ 0x75BF,0x75B6,0x75BA,0x768A,0x76C9,0x771D,0x771B,0x7710,/* 0xC8-0xCF */ 0x7713,0x7712,0x7723,0x7711,0x7715,0x7719,0x771A,0x7722,/* 0xD0-0xD7 */ 0x7727,0x7823,0x782C,0x7822,0x7835,0x782F,0x7828,0x782E,/* 0xD8-0xDF */ 0x782B,0x7821,0x7829,0x7833,0x782A,0x7831,0x7954,0x795B,/* 0xE0-0xE7 */ 0x794F,0x795C,0x7953,0x7952,0x7951,0x79EB,0x79EC,0x79E0,/* 0xE8-0xEF */ 0x79EE,0x79ED,0x79EA,0x79DC,0x79DE,0x79DD,0x7A86,0x7A89,/* 0xF0-0xF7 */ 0x7A85,0x7A8B,0x7A8C,0x7A8A,0x7A87,0x7AD8,0x7B10,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D3[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7B04,0x7B13,0x7B05,0x7B0F,0x7B08,0x7B0A,0x7B0E,0x7B09,/* 0x40-0x47 */ 0x7B12,0x7C84,0x7C91,0x7C8A,0x7C8C,0x7C88,0x7C8D,0x7C85,/* 0x48-0x4F */ 0x7D1E,0x7D1D,0x7D11,0x7D0E,0x7D18,0x7D16,0x7D13,0x7D1F,/* 0x50-0x57 */ 0x7D12,0x7D0F,0x7D0C,0x7F5C,0x7F61,0x7F5E,0x7F60,0x7F5D,/* 0x58-0x5F */ 0x7F5B,0x7F96,0x7F92,0x7FC3,0x7FC2,0x7FC0,0x8016,0x803E,/* 0x60-0x67 */ 0x8039,0x80FA,0x80F2,0x80F9,0x80F5,0x8101,0x80FB,0x8100,/* 0x68-0x6F */ 0x8201,0x822F,0x8225,0x8333,0x832D,0x8344,0x8319,0x8351,/* 0x70-0x77 */ 0x8325,0x8356,0x833F,0x8341,0x8326,0x831C,0x8322,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8342,0x834E,0x831B,0x832A,0x8308,0x833C,0x834D,/* 0xA0-0xA7 */ 0x8316,0x8324,0x8320,0x8337,0x832F,0x8329,0x8347,0x8345,/* 0xA8-0xAF */ 0x834C,0x8353,0x831E,0x832C,0x834B,0x8327,0x8348,0x8653,/* 0xB0-0xB7 */ 0x8652,0x86A2,0x86A8,0x8696,0x868D,0x8691,0x869E,0x8687,/* 0xB8-0xBF */ 0x8697,0x8686,0x868B,0x869A,0x8685,0x86A5,0x8699,0x86A1,/* 0xC0-0xC7 */ 0x86A7,0x8695,0x8698,0x868E,0x869D,0x8690,0x8694,0x8843,/* 0xC8-0xCF */ 0x8844,0x886D,0x8875,0x8876,0x8872,0x8880,0x8871,0x887F,/* 0xD0-0xD7 */ 0x886F,0x8883,0x887E,0x8874,0x887C,0x8A12,0x8C47,0x8C57,/* 0xD8-0xDF */ 0x8C7B,0x8CA4,0x8CA3,0x8D76,0x8D78,0x8DB5,0x8DB7,0x8DB6,/* 0xE0-0xE7 */ 0x8ED1,0x8ED3,0x8FFE,0x8FF5,0x9002,0x8FFF,0x8FFB,0x9004,/* 0xE8-0xEF */ 0x8FFC,0x8FF6,0x90D6,0x90E0,0x90D9,0x90DA,0x90E3,0x90DF,/* 0xF0-0xF7 */ 0x90E5,0x90D8,0x90DB,0x90D7,0x90DC,0x90E4,0x9150,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D4[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x914E,0x914F,0x91D5,0x91E2,0x91DA,0x965C,0x965F,0x96BC,/* 0x40-0x47 */ 0x98E3,0x9ADF,0x9B2F,0x4E7F,0x5070,0x506A,0x5061,0x505E,/* 0x48-0x4F */ 0x5060,0x5053,0x504B,0x505D,0x5072,0x5048,0x504D,0x5041,/* 0x50-0x57 */ 0x505B,0x504A,0x5062,0x5015,0x5045,0x505F,0x5069,0x506B,/* 0x58-0x5F */ 0x5063,0x5064,0x5046,0x5040,0x506E,0x5073,0x5057,0x5051,/* 0x60-0x67 */ 0x51D0,0x526B,0x526D,0x526C,0x526E,0x52D6,0x52D3,0x532D,/* 0x68-0x6F */ 0x539C,0x5575,0x5576,0x553C,0x554D,0x5550,0x5534,0x552A,/* 0x70-0x77 */ 0x5551,0x5562,0x5536,0x5535,0x5530,0x5552,0x5545,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x550C,0x5532,0x5565,0x554E,0x5539,0x5548,0x552D,/* 0xA0-0xA7 */ 0x553B,0x5540,0x554B,0x570A,0x5707,0x57FB,0x5814,0x57E2,/* 0xA8-0xAF */ 0x57F6,0x57DC,0x57F4,0x5800,0x57ED,0x57FD,0x5808,0x57F8,/* 0xB0-0xB7 */ 0x580B,0x57F3,0x57CF,0x5807,0x57EE,0x57E3,0x57F2,0x57E5,/* 0xB8-0xBF */ 0x57EC,0x57E1,0x580E,0x57FC,0x5810,0x57E7,0x5801,0x580C,/* 0xC0-0xC7 */ 0x57F1,0x57E9,0x57F0,0x580D,0x5804,0x595C,0x5A60,0x5A58,/* 0xC8-0xCF */ 0x5A55,0x5A67,0x5A5E,0x5A38,0x5A35,0x5A6D,0x5A50,0x5A5F,/* 0xD0-0xD7 */ 0x5A65,0x5A6C,0x5A53,0x5A64,0x5A57,0x5A43,0x5A5D,0x5A52,/* 0xD8-0xDF */ 0x5A44,0x5A5B,0x5A48,0x5A8E,0x5A3E,0x5A4D,0x5A39,0x5A4C,/* 0xE0-0xE7 */ 0x5A70,0x5A69,0x5A47,0x5A51,0x5A56,0x5A42,0x5A5C,0x5B72,/* 0xE8-0xEF */ 0x5B6E,0x5BC1,0x5BC0,0x5C59,0x5D1E,0x5D0B,0x5D1D,0x5D1A,/* 0xF0-0xF7 */ 0x5D20,0x5D0C,0x5D28,0x5D0D,0x5D26,0x5D25,0x5D0F,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D5[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5D30,0x5D12,0x5D23,0x5D1F,0x5D2E,0x5E3E,0x5E34,0x5EB1,/* 0x40-0x47 */ 0x5EB4,0x5EB9,0x5EB2,0x5EB3,0x5F36,0x5F38,0x5F9B,0x5F96,/* 0x48-0x4F */ 0x5F9F,0x608A,0x6090,0x6086,0x60BE,0x60B0,0x60BA,0x60D3,/* 0x50-0x57 */ 0x60D4,0x60CF,0x60E4,0x60D9,0x60DD,0x60C8,0x60B1,0x60DB,/* 0x58-0x5F */ 0x60B7,0x60CA,0x60BF,0x60C3,0x60CD,0x60C0,0x6332,0x6365,/* 0x60-0x67 */ 0x638A,0x6382,0x637D,0x63BD,0x639E,0x63AD,0x639D,0x6397,/* 0x68-0x6F */ 0x63AB,0x638E,0x636F,0x6387,0x6390,0x636E,0x63AF,0x6375,/* 0x70-0x77 */ 0x639C,0x636D,0x63AE,0x637C,0x63A4,0x633B,0x639F,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6378,0x6385,0x6381,0x6391,0x638D,0x6370,0x6553,/* 0xA0-0xA7 */ 0x65CD,0x6665,0x6661,0x665B,0x6659,0x665C,0x6662,0x6718,/* 0xA8-0xAF */ 0x6879,0x6887,0x6890,0x689C,0x686D,0x686E,0x68AE,0x68AB,/* 0xB0-0xB7 */ 0x6956,0x686F,0x68A3,0x68AC,0x68A9,0x6875,0x6874,0x68B2,/* 0xB8-0xBF */ 0x688F,0x6877,0x6892,0x687C,0x686B,0x6872,0x68AA,0x6880,/* 0xC0-0xC7 */ 0x6871,0x687E,0x689B,0x6896,0x688B,0x68A0,0x6889,0x68A4,/* 0xC8-0xCF */ 0x6878,0x687B,0x6891,0x688C,0x688A,0x687D,0x6B36,0x6B33,/* 0xD0-0xD7 */ 0x6B37,0x6B38,0x6B91,0x6B8F,0x6B8D,0x6B8E,0x6B8C,0x6C2A,/* 0xD8-0xDF */ 0x6DC0,0x6DAB,0x6DB4,0x6DB3,0x6E74,0x6DAC,0x6DE9,0x6DE2,/* 0xE0-0xE7 */ 0x6DB7,0x6DF6,0x6DD4,0x6E00,0x6DC8,0x6DE0,0x6DDF,0x6DD6,/* 0xE8-0xEF */ 0x6DBE,0x6DE5,0x6DDC,0x6DDD,0x6DDB,0x6DF4,0x6DCA,0x6DBD,/* 0xF0-0xF7 */ 0x6DED,0x6DF0,0x6DBA,0x6DD5,0x6DC2,0x6DCF,0x6DC9,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D6[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6DD0,0x6DF2,0x6DD3,0x6DFD,0x6DD7,0x6DCD,0x6DE3,0x6DBB,/* 0x40-0x47 */ 0x70FA,0x710D,0x70F7,0x7117,0x70F4,0x710C,0x70F0,0x7104,/* 0x48-0x4F */ 0x70F3,0x7110,0x70FC,0x70FF,0x7106,0x7113,0x7100,0x70F8,/* 0x50-0x57 */ 0x70F6,0x710B,0x7102,0x710E,0x727E,0x727B,0x727C,0x727F,/* 0x58-0x5F */ 0x731D,0x7317,0x7307,0x7311,0x7318,0x730A,0x7308,0x72FF,/* 0x60-0x67 */ 0x730F,0x731E,0x7388,0x73F6,0x73F8,0x73F5,0x7404,0x7401,/* 0x68-0x6F */ 0x73FD,0x7407,0x7400,0x73FA,0x73FC,0x73FF,0x740C,0x740B,/* 0x70-0x77 */ 0x73F4,0x7408,0x7564,0x7563,0x75CE,0x75D2,0x75CF,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x75CB,0x75CC,0x75D1,0x75D0,0x768F,0x7689,0x76D3,/* 0xA0-0xA7 */ 0x7739,0x772F,0x772D,0x7731,0x7732,0x7734,0x7733,0x773D,/* 0xA8-0xAF */ 0x7725,0x773B,0x7735,0x7848,0x7852,0x7849,0x784D,0x784A,/* 0xB0-0xB7 */ 0x784C,0x7826,0x7845,0x7850,0x7964,0x7967,0x7969,0x796A,/* 0xB8-0xBF */ 0x7963,0x796B,0x7961,0x79BB,0x79FA,0x79F8,0x79F6,0x79F7,/* 0xC0-0xC7 */ 0x7A8F,0x7A94,0x7A90,0x7B35,0x7B47,0x7B34,0x7B25,0x7B30,/* 0xC8-0xCF */ 0x7B22,0x7B24,0x7B33,0x7B18,0x7B2A,0x7B1D,0x7B31,0x7B2B,/* 0xD0-0xD7 */ 0x7B2D,0x7B2F,0x7B32,0x7B38,0x7B1A,0x7B23,0x7C94,0x7C98,/* 0xD8-0xDF */ 0x7C96,0x7CA3,0x7D35,0x7D3D,0x7D38,0x7D36,0x7D3A,0x7D45,/* 0xE0-0xE7 */ 0x7D2C,0x7D29,0x7D41,0x7D47,0x7D3E,0x7D3F,0x7D4A,0x7D3B,/* 0xE8-0xEF */ 0x7D28,0x7F63,0x7F95,0x7F9C,0x7F9D,0x7F9B,0x7FCA,0x7FCB,/* 0xF0-0xF7 */ 0x7FCD,0x7FD0,0x7FD1,0x7FC7,0x7FCF,0x7FC9,0x801F,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D7[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x801E,0x801B,0x8047,0x8043,0x8048,0x8118,0x8125,0x8119,/* 0x40-0x47 */ 0x811B,0x812D,0x811F,0x812C,0x811E,0x8121,0x8115,0x8127,/* 0x48-0x4F */ 0x811D,0x8122,0x8211,0x8238,0x8233,0x823A,0x8234,0x8232,/* 0x50-0x57 */ 0x8274,0x8390,0x83A3,0x83A8,0x838D,0x837A,0x8373,0x83A4,/* 0x58-0x5F */ 0x8374,0x838F,0x8381,0x8395,0x8399,0x8375,0x8394,0x83A9,/* 0x60-0x67 */ 0x837D,0x8383,0x838C,0x839D,0x839B,0x83AA,0x838B,0x837E,/* 0x68-0x6F */ 0x83A5,0x83AF,0x8388,0x8397,0x83B0,0x837F,0x83A6,0x8387,/* 0x70-0x77 */ 0x83AE,0x8376,0x839A,0x8659,0x8656,0x86BF,0x86B7,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x86C2,0x86C1,0x86C5,0x86BA,0x86B0,0x86C8,0x86B9,/* 0xA0-0xA7 */ 0x86B3,0x86B8,0x86CC,0x86B4,0x86BB,0x86BC,0x86C3,0x86BD,/* 0xA8-0xAF */ 0x86BE,0x8852,0x8889,0x8895,0x88A8,0x88A2,0x88AA,0x889A,/* 0xB0-0xB7 */ 0x8891,0x88A1,0x889F,0x8898,0x88A7,0x8899,0x889B,0x8897,/* 0xB8-0xBF */ 0x88A4,0x88AC,0x888C,0x8893,0x888E,0x8982,0x89D6,0x89D9,/* 0xC0-0xC7 */ 0x89D5,0x8A30,0x8A27,0x8A2C,0x8A1E,0x8C39,0x8C3B,0x8C5C,/* 0xC8-0xCF */ 0x8C5D,0x8C7D,0x8CA5,0x8D7D,0x8D7B,0x8D79,0x8DBC,0x8DC2,/* 0xD0-0xD7 */ 0x8DB9,0x8DBF,0x8DC1,0x8ED8,0x8EDE,0x8EDD,0x8EDC,0x8ED7,/* 0xD8-0xDF */ 0x8EE0,0x8EE1,0x9024,0x900B,0x9011,0x901C,0x900C,0x9021,/* 0xE0-0xE7 */ 0x90EF,0x90EA,0x90F0,0x90F4,0x90F2,0x90F3,0x90D4,0x90EB,/* 0xE8-0xEF */ 0x90EC,0x90E9,0x9156,0x9158,0x915A,0x9153,0x9155,0x91EC,/* 0xF0-0xF7 */ 0x91F4,0x91F1,0x91F3,0x91F8,0x91E4,0x91F9,0x91EA,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D8[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x91EB,0x91F7,0x91E8,0x91EE,0x957A,0x9586,0x9588,0x967C,/* 0x40-0x47 */ 0x966D,0x966B,0x9671,0x966F,0x96BF,0x976A,0x9804,0x98E5,/* 0x48-0x4F */ 0x9997,0x509B,0x5095,0x5094,0x509E,0x508B,0x50A3,0x5083,/* 0x50-0x57 */ 0x508C,0x508E,0x509D,0x5068,0x509C,0x5092,0x5082,0x5087,/* 0x58-0x5F */ 0x515F,0x51D4,0x5312,0x5311,0x53A4,0x53A7,0x5591,0x55A8,/* 0x60-0x67 */ 0x55A5,0x55AD,0x5577,0x5645,0x55A2,0x5593,0x5588,0x558F,/* 0x68-0x6F */ 0x55B5,0x5581,0x55A3,0x5592,0x55A4,0x557D,0x558C,0x55A6,/* 0x70-0x77 */ 0x557F,0x5595,0x55A1,0x558E,0x570C,0x5829,0x5837,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x5819,0x581E,0x5827,0x5823,0x5828,0x57F5,0x5848,/* 0xA0-0xA7 */ 0x5825,0x581C,0x581B,0x5833,0x583F,0x5836,0x582E,0x5839,/* 0xA8-0xAF */ 0x5838,0x582D,0x582C,0x583B,0x5961,0x5AAF,0x5A94,0x5A9F,/* 0xB0-0xB7 */ 0x5A7A,0x5AA2,0x5A9E,0x5A78,0x5AA6,0x5A7C,0x5AA5,0x5AAC,/* 0xB8-0xBF */ 0x5A95,0x5AAE,0x5A37,0x5A84,0x5A8A,0x5A97,0x5A83,0x5A8B,/* 0xC0-0xC7 */ 0x5AA9,0x5A7B,0x5A7D,0x5A8C,0x5A9C,0x5A8F,0x5A93,0x5A9D,/* 0xC8-0xCF */ 0x5BEA,0x5BCD,0x5BCB,0x5BD4,0x5BD1,0x5BCA,0x5BCE,0x5C0C,/* 0xD0-0xD7 */ 0x5C30,0x5D37,0x5D43,0x5D6B,0x5D41,0x5D4B,0x5D3F,0x5D35,/* 0xD8-0xDF */ 0x5D51,0x5D4E,0x5D55,0x5D33,0x5D3A,0x5D52,0x5D3D,0x5D31,/* 0xE0-0xE7 */ 0x5D59,0x5D42,0x5D39,0x5D49,0x5D38,0x5D3C,0x5D32,0x5D36,/* 0xE8-0xEF */ 0x5D40,0x5D45,0x5E44,0x5E41,0x5F58,0x5FA6,0x5FA5,0x5FAB,/* 0xF0-0xF7 */ 0x60C9,0x60B9,0x60CC,0x60E2,0x60CE,0x60C4,0x6114,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_D9[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x60F2,0x610A,0x6116,0x6105,0x60F5,0x6113,0x60F8,0x60FC,/* 0x40-0x47 */ 0x60FE,0x60C1,0x6103,0x6118,0x611D,0x6110,0x60FF,0x6104,/* 0x48-0x4F */ 0x610B,0x624A,0x6394,0x63B1,0x63B0,0x63CE,0x63E5,0x63E8,/* 0x50-0x57 */ 0x63EF,0x63C3,0x649D,0x63F3,0x63CA,0x63E0,0x63F6,0x63D5,/* 0x58-0x5F */ 0x63F2,0x63F5,0x6461,0x63DF,0x63BE,0x63DD,0x63DC,0x63C4,/* 0x60-0x67 */ 0x63D8,0x63D3,0x63C2,0x63C7,0x63CC,0x63CB,0x63C8,0x63F0,/* 0x68-0x6F */ 0x63D7,0x63D9,0x6532,0x6567,0x656A,0x6564,0x655C,0x6568,/* 0x70-0x77 */ 0x6565,0x658C,0x659D,0x659E,0x65AE,0x65D0,0x65D2,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x667C,0x666C,0x667B,0x6680,0x6671,0x6679,0x666A,/* 0xA0-0xA7 */ 0x6672,0x6701,0x690C,0x68D3,0x6904,0x68DC,0x692A,0x68EC,/* 0xA8-0xAF */ 0x68EA,0x68F1,0x690F,0x68D6,0x68F7,0x68EB,0x68E4,0x68F6,/* 0xB0-0xB7 */ 0x6913,0x6910,0x68F3,0x68E1,0x6907,0x68CC,0x6908,0x6970,/* 0xB8-0xBF */ 0x68B4,0x6911,0x68EF,0x68C6,0x6914,0x68F8,0x68D0,0x68FD,/* 0xC0-0xC7 */ 0x68FC,0x68E8,0x690B,0x690A,0x6917,0x68CE,0x68C8,0x68DD,/* 0xC8-0xCF */ 0x68DE,0x68E6,0x68F4,0x68D1,0x6906,0x68D4,0x68E9,0x6915,/* 0xD0-0xD7 */ 0x6925,0x68C7,0x6B39,0x6B3B,0x6B3F,0x6B3C,0x6B94,0x6B97,/* 0xD8-0xDF */ 0x6B99,0x6B95,0x6BBD,0x6BF0,0x6BF2,0x6BF3,0x6C30,0x6DFC,/* 0xE0-0xE7 */ 0x6E46,0x6E47,0x6E1F,0x6E49,0x6E88,0x6E3C,0x6E3D,0x6E45,/* 0xE8-0xEF */ 0x6E62,0x6E2B,0x6E3F,0x6E41,0x6E5D,0x6E73,0x6E1C,0x6E33,/* 0xF0-0xF7 */ 0x6E4B,0x6E40,0x6E51,0x6E3B,0x6E03,0x6E2E,0x6E5E,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_DA[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6E68,0x6E5C,0x6E61,0x6E31,0x6E28,0x6E60,0x6E71,0x6E6B,/* 0x40-0x47 */ 0x6E39,0x6E22,0x6E30,0x6E53,0x6E65,0x6E27,0x6E78,0x6E64,/* 0x48-0x4F */ 0x6E77,0x6E55,0x6E79,0x6E52,0x6E66,0x6E35,0x6E36,0x6E5A,/* 0x50-0x57 */ 0x7120,0x711E,0x712F,0x70FB,0x712E,0x7131,0x7123,0x7125,/* 0x58-0x5F */ 0x7122,0x7132,0x711F,0x7128,0x713A,0x711B,0x724B,0x725A,/* 0x60-0x67 */ 0x7288,0x7289,0x7286,0x7285,0x728B,0x7312,0x730B,0x7330,/* 0x68-0x6F */ 0x7322,0x7331,0x7333,0x7327,0x7332,0x732D,0x7326,0x7323,/* 0x70-0x77 */ 0x7335,0x730C,0x742E,0x742C,0x7430,0x742B,0x7416,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x741A,0x7421,0x742D,0x7431,0x7424,0x7423,0x741D,/* 0xA0-0xA7 */ 0x7429,0x7420,0x7432,0x74FB,0x752F,0x756F,0x756C,0x75E7,/* 0xA8-0xAF */ 0x75DA,0x75E1,0x75E6,0x75DD,0x75DF,0x75E4,0x75D7,0x7695,/* 0xB0-0xB7 */ 0x7692,0x76DA,0x7746,0x7747,0x7744,0x774D,0x7745,0x774A,/* 0xB8-0xBF */ 0x774E,0x774B,0x774C,0x77DE,0x77EC,0x7860,0x7864,0x7865,/* 0xC0-0xC7 */ 0x785C,0x786D,0x7871,0x786A,0x786E,0x7870,0x7869,0x7868,/* 0xC8-0xCF */ 0x785E,0x7862,0x7974,0x7973,0x7972,0x7970,0x7A02,0x7A0A,/* 0xD0-0xD7 */ 0x7A03,0x7A0C,0x7A04,0x7A99,0x7AE6,0x7AE4,0x7B4A,0x7B3B,/* 0xD8-0xDF */ 0x7B44,0x7B48,0x7B4C,0x7B4E,0x7B40,0x7B58,0x7B45,0x7CA2,/* 0xE0-0xE7 */ 0x7C9E,0x7CA8,0x7CA1,0x7D58,0x7D6F,0x7D63,0x7D53,0x7D56,/* 0xE8-0xEF */ 0x7D67,0x7D6A,0x7D4F,0x7D6D,0x7D5C,0x7D6B,0x7D52,0x7D54,/* 0xF0-0xF7 */ 0x7D69,0x7D51,0x7D5F,0x7D4E,0x7F3E,0x7F3F,0x7F65,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_DB[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7F66,0x7FA2,0x7FA0,0x7FA1,0x7FD7,0x8051,0x804F,0x8050,/* 0x40-0x47 */ 0x80FE,0x80D4,0x8143,0x814A,0x8152,0x814F,0x8147,0x813D,/* 0x48-0x4F */ 0x814D,0x813A,0x81E6,0x81EE,0x81F7,0x81F8,0x81F9,0x8204,/* 0x50-0x57 */ 0x823C,0x823D,0x823F,0x8275,0x833B,0x83CF,0x83F9,0x8423,/* 0x58-0x5F */ 0x83C0,0x83E8,0x8412,0x83E7,0x83E4,0x83FC,0x83F6,0x8410,/* 0x60-0x67 */ 0x83C6,0x83C8,0x83EB,0x83E3,0x83BF,0x8401,0x83DD,0x83E5,/* 0x68-0x6F */ 0x83D8,0x83FF,0x83E1,0x83CB,0x83CE,0x83D6,0x83F5,0x83C9,/* 0x70-0x77 */ 0x8409,0x840F,0x83DE,0x8411,0x8406,0x83C2,0x83F3,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x83D5,0x83FA,0x83C7,0x83D1,0x83EA,0x8413,0x83C3,/* 0xA0-0xA7 */ 0x83EC,0x83EE,0x83C4,0x83FB,0x83D7,0x83E2,0x841B,0x83DB,/* 0xA8-0xAF */ 0x83FE,0x86D8,0x86E2,0x86E6,0x86D3,0x86E3,0x86DA,0x86EA,/* 0xB0-0xB7 */ 0x86DD,0x86EB,0x86DC,0x86EC,0x86E9,0x86D7,0x86E8,0x86D1,/* 0xB8-0xBF */ 0x8848,0x8856,0x8855,0x88BA,0x88D7,0x88B9,0x88B8,0x88C0,/* 0xC0-0xC7 */ 0x88BE,0x88B6,0x88BC,0x88B7,0x88BD,0x88B2,0x8901,0x88C9,/* 0xC8-0xCF */ 0x8995,0x8998,0x8997,0x89DD,0x89DA,0x89DB,0x8A4E,0x8A4D,/* 0xD0-0xD7 */ 0x8A39,0x8A59,0x8A40,0x8A57,0x8A58,0x8A44,0x8A45,0x8A52,/* 0xD8-0xDF */ 0x8A48,0x8A51,0x8A4A,0x8A4C,0x8A4F,0x8C5F,0x8C81,0x8C80,/* 0xE0-0xE7 */ 0x8CBA,0x8CBE,0x8CB0,0x8CB9,0x8CB5,0x8D84,0x8D80,0x8D89,/* 0xE8-0xEF */ 0x8DD8,0x8DD3,0x8DCD,0x8DC7,0x8DD6,0x8DDC,0x8DCF,0x8DD5,/* 0xF0-0xF7 */ 0x8DD9,0x8DC8,0x8DD7,0x8DC5,0x8EEF,0x8EF7,0x8EFA,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_DC[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8EF9,0x8EE6,0x8EEE,0x8EE5,0x8EF5,0x8EE7,0x8EE8,0x8EF6,/* 0x40-0x47 */ 0x8EEB,0x8EF1,0x8EEC,0x8EF4,0x8EE9,0x902D,0x9034,0x902F,/* 0x48-0x4F */ 0x9106,0x912C,0x9104,0x90FF,0x90FC,0x9108,0x90F9,0x90FB,/* 0x50-0x57 */ 0x9101,0x9100,0x9107,0x9105,0x9103,0x9161,0x9164,0x915F,/* 0x58-0x5F */ 0x9162,0x9160,0x9201,0x920A,0x9225,0x9203,0x921A,0x9226,/* 0x60-0x67 */ 0x920F,0x920C,0x9200,0x9212,0x91FF,0x91FD,0x9206,0x9204,/* 0x68-0x6F */ 0x9227,0x9202,0x921C,0x9224,0x9219,0x9217,0x9205,0x9216,/* 0x70-0x77 */ 0x957B,0x958D,0x958C,0x9590,0x9687,0x967E,0x9688,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9689,0x9683,0x9680,0x96C2,0x96C8,0x96C3,0x96F1,/* 0xA0-0xA7 */ 0x96F0,0x976C,0x9770,0x976E,0x9807,0x98A9,0x98EB,0x9CE6,/* 0xA8-0xAF */ 0x9EF9,0x4E83,0x4E84,0x4EB6,0x50BD,0x50BF,0x50C6,0x50AE,/* 0xB0-0xB7 */ 0x50C4,0x50CA,0x50B4,0x50C8,0x50C2,0x50B0,0x50C1,0x50BA,/* 0xB8-0xBF */ 0x50B1,0x50CB,0x50C9,0x50B6,0x50B8,0x51D7,0x527A,0x5278,/* 0xC0-0xC7 */ 0x527B,0x527C,0x55C3,0x55DB,0x55CC,0x55D0,0x55CB,0x55CA,/* 0xC8-0xCF */ 0x55DD,0x55C0,0x55D4,0x55C4,0x55E9,0x55BF,0x55D2,0x558D,/* 0xD0-0xD7 */ 0x55CF,0x55D5,0x55E2,0x55D6,0x55C8,0x55F2,0x55CD,0x55D9,/* 0xD8-0xDF */ 0x55C2,0x5714,0x5853,0x5868,0x5864,0x584F,0x584D,0x5849,/* 0xE0-0xE7 */ 0x586F,0x5855,0x584E,0x585D,0x5859,0x5865,0x585B,0x583D,/* 0xE8-0xEF */ 0x5863,0x5871,0x58FC,0x5AC7,0x5AC4,0x5ACB,0x5ABA,0x5AB8,/* 0xF0-0xF7 */ 0x5AB1,0x5AB5,0x5AB0,0x5ABF,0x5AC8,0x5ABB,0x5AC6,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_DD[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5AB7,0x5AC0,0x5ACA,0x5AB4,0x5AB6,0x5ACD,0x5AB9,0x5A90,/* 0x40-0x47 */ 0x5BD6,0x5BD8,0x5BD9,0x5C1F,0x5C33,0x5D71,0x5D63,0x5D4A,/* 0x48-0x4F */ 0x5D65,0x5D72,0x5D6C,0x5D5E,0x5D68,0x5D67,0x5D62,0x5DF0,/* 0x50-0x57 */ 0x5E4F,0x5E4E,0x5E4A,0x5E4D,0x5E4B,0x5EC5,0x5ECC,0x5EC6,/* 0x58-0x5F */ 0x5ECB,0x5EC7,0x5F40,0x5FAF,0x5FAD,0x60F7,0x6149,0x614A,/* 0x60-0x67 */ 0x612B,0x6145,0x6136,0x6132,0x612E,0x6146,0x612F,0x614F,/* 0x68-0x6F */ 0x6129,0x6140,0x6220,0x9168,0x6223,0x6225,0x6224,0x63C5,/* 0x70-0x77 */ 0x63F1,0x63EB,0x6410,0x6412,0x6409,0x6420,0x6424,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6433,0x6443,0x641F,0x6415,0x6418,0x6439,0x6437,/* 0xA0-0xA7 */ 0x6422,0x6423,0x640C,0x6426,0x6430,0x6428,0x6441,0x6435,/* 0xA8-0xAF */ 0x642F,0x640A,0x641A,0x6440,0x6425,0x6427,0x640B,0x63E7,/* 0xB0-0xB7 */ 0x641B,0x642E,0x6421,0x640E,0x656F,0x6592,0x65D3,0x6686,/* 0xB8-0xBF */ 0x668C,0x6695,0x6690,0x668B,0x668A,0x6699,0x6694,0x6678,/* 0xC0-0xC7 */ 0x6720,0x6966,0x695F,0x6938,0x694E,0x6962,0x6971,0x693F,/* 0xC8-0xCF */ 0x6945,0x696A,0x6939,0x6942,0x6957,0x6959,0x697A,0x6948,/* 0xD0-0xD7 */ 0x6949,0x6935,0x696C,0x6933,0x693D,0x6965,0x68F0,0x6978,/* 0xD8-0xDF */ 0x6934,0x6969,0x6940,0x696F,0x6944,0x6976,0x6958,0x6941,/* 0xE0-0xE7 */ 0x6974,0x694C,0x693B,0x694B,0x6937,0x695C,0x694F,0x6951,/* 0xE8-0xEF */ 0x6932,0x6952,0x692F,0x697B,0x693C,0x6B46,0x6B45,0x6B43,/* 0xF0-0xF7 */ 0x6B42,0x6B48,0x6B41,0x6B9B,0xFA0D,0x6BFB,0x6BFC,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_DE[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6BF9,0x6BF7,0x6BF8,0x6E9B,0x6ED6,0x6EC8,0x6E8F,0x6EC0,/* 0x40-0x47 */ 0x6E9F,0x6E93,0x6E94,0x6EA0,0x6EB1,0x6EB9,0x6EC6,0x6ED2,/* 0x48-0x4F */ 0x6EBD,0x6EC1,0x6E9E,0x6EC9,0x6EB7,0x6EB0,0x6ECD,0x6EA6,/* 0x50-0x57 */ 0x6ECF,0x6EB2,0x6EBE,0x6EC3,0x6EDC,0x6ED8,0x6E99,0x6E92,/* 0x58-0x5F */ 0x6E8E,0x6E8D,0x6EA4,0x6EA1,0x6EBF,0x6EB3,0x6ED0,0x6ECA,/* 0x60-0x67 */ 0x6E97,0x6EAE,0x6EA3,0x7147,0x7154,0x7152,0x7163,0x7160,/* 0x68-0x6F */ 0x7141,0x715D,0x7162,0x7172,0x7178,0x716A,0x7161,0x7142,/* 0x70-0x77 */ 0x7158,0x7143,0x714B,0x7170,0x715F,0x7150,0x7153,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7144,0x714D,0x715A,0x724F,0x728D,0x728C,0x7291,/* 0xA0-0xA7 */ 0x7290,0x728E,0x733C,0x7342,0x733B,0x733A,0x7340,0x734A,/* 0xA8-0xAF */ 0x7349,0x7444,0x744A,0x744B,0x7452,0x7451,0x7457,0x7440,/* 0xB0-0xB7 */ 0x744F,0x7450,0x744E,0x7442,0x7446,0x744D,0x7454,0x74E1,/* 0xB8-0xBF */ 0x74FF,0x74FE,0x74FD,0x751D,0x7579,0x7577,0x6983,0x75EF,/* 0xC0-0xC7 */ 0x760F,0x7603,0x75F7,0x75FE,0x75FC,0x75F9,0x75F8,0x7610,/* 0xC8-0xCF */ 0x75FB,0x75F6,0x75ED,0x75F5,0x75FD,0x7699,0x76B5,0x76DD,/* 0xD0-0xD7 */ 0x7755,0x775F,0x7760,0x7752,0x7756,0x775A,0x7769,0x7767,/* 0xD8-0xDF */ 0x7754,0x7759,0x776D,0x77E0,0x7887,0x789A,0x7894,0x788F,/* 0xE0-0xE7 */ 0x7884,0x7895,0x7885,0x7886,0x78A1,0x7883,0x7879,0x7899,/* 0xE8-0xEF */ 0x7880,0x7896,0x787B,0x797C,0x7982,0x797D,0x7979,0x7A11,/* 0xF0-0xF7 */ 0x7A18,0x7A19,0x7A12,0x7A17,0x7A15,0x7A22,0x7A13,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_DF[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7A1B,0x7A10,0x7AA3,0x7AA2,0x7A9E,0x7AEB,0x7B66,0x7B64,/* 0x40-0x47 */ 0x7B6D,0x7B74,0x7B69,0x7B72,0x7B65,0x7B73,0x7B71,0x7B70,/* 0x48-0x4F */ 0x7B61,0x7B78,0x7B76,0x7B63,0x7CB2,0x7CB4,0x7CAF,0x7D88,/* 0x50-0x57 */ 0x7D86,0x7D80,0x7D8D,0x7D7F,0x7D85,0x7D7A,0x7D8E,0x7D7B,/* 0x58-0x5F */ 0x7D83,0x7D7C,0x7D8C,0x7D94,0x7D84,0x7D7D,0x7D92,0x7F6D,/* 0x60-0x67 */ 0x7F6B,0x7F67,0x7F68,0x7F6C,0x7FA6,0x7FA5,0x7FA7,0x7FDB,/* 0x68-0x6F */ 0x7FDC,0x8021,0x8164,0x8160,0x8177,0x815C,0x8169,0x815B,/* 0x70-0x77 */ 0x8162,0x8172,0x6721,0x815E,0x8176,0x8167,0x816F,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8144,0x8161,0x821D,0x8249,0x8244,0x8240,0x8242,/* 0xA0-0xA7 */ 0x8245,0x84F1,0x843F,0x8456,0x8476,0x8479,0x848F,0x848D,/* 0xA8-0xAF */ 0x8465,0x8451,0x8440,0x8486,0x8467,0x8430,0x844D,0x847D,/* 0xB0-0xB7 */ 0x845A,0x8459,0x8474,0x8473,0x845D,0x8507,0x845E,0x8437,/* 0xB8-0xBF */ 0x843A,0x8434,0x847A,0x8443,0x8478,0x8432,0x8445,0x8429,/* 0xC0-0xC7 */ 0x83D9,0x844B,0x842F,0x8442,0x842D,0x845F,0x8470,0x8439,/* 0xC8-0xCF */ 0x844E,0x844C,0x8452,0x846F,0x84C5,0x848E,0x843B,0x8447,/* 0xD0-0xD7 */ 0x8436,0x8433,0x8468,0x847E,0x8444,0x842B,0x8460,0x8454,/* 0xD8-0xDF */ 0x846E,0x8450,0x870B,0x8704,0x86F7,0x870C,0x86FA,0x86D6,/* 0xE0-0xE7 */ 0x86F5,0x874D,0x86F8,0x870E,0x8709,0x8701,0x86F6,0x870D,/* 0xE8-0xEF */ 0x8705,0x88D6,0x88CB,0x88CD,0x88CE,0x88DE,0x88DB,0x88DA,/* 0xF0-0xF7 */ 0x88CC,0x88D0,0x8985,0x899B,0x89DF,0x89E5,0x89E4,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E0[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x89E1,0x89E0,0x89E2,0x89DC,0x89E6,0x8A76,0x8A86,0x8A7F,/* 0x40-0x47 */ 0x8A61,0x8A3F,0x8A77,0x8A82,0x8A84,0x8A75,0x8A83,0x8A81,/* 0x48-0x4F */ 0x8A74,0x8A7A,0x8C3C,0x8C4B,0x8C4A,0x8C65,0x8C64,0x8C66,/* 0x50-0x57 */ 0x8C86,0x8C84,0x8C85,0x8CCC,0x8D68,0x8D69,0x8D91,0x8D8C,/* 0x58-0x5F */ 0x8D8E,0x8D8F,0x8D8D,0x8D93,0x8D94,0x8D90,0x8D92,0x8DF0,/* 0x60-0x67 */ 0x8DE0,0x8DEC,0x8DF1,0x8DEE,0x8DD0,0x8DE9,0x8DE3,0x8DE2,/* 0x68-0x6F */ 0x8DE7,0x8DF2,0x8DEB,0x8DF4,0x8F06,0x8EFF,0x8F01,0x8F00,/* 0x70-0x77 */ 0x8F05,0x8F07,0x8F08,0x8F02,0x8F0B,0x9052,0x903F,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9044,0x9049,0x903D,0x9110,0x910D,0x910F,0x9111,/* 0xA0-0xA7 */ 0x9116,0x9114,0x910B,0x910E,0x916E,0x916F,0x9248,0x9252,/* 0xA8-0xAF */ 0x9230,0x923A,0x9266,0x9233,0x9265,0x925E,0x9283,0x922E,/* 0xB0-0xB7 */ 0x924A,0x9246,0x926D,0x926C,0x924F,0x9260,0x9267,0x926F,/* 0xB8-0xBF */ 0x9236,0x9261,0x9270,0x9231,0x9254,0x9263,0x9250,0x9272,/* 0xC0-0xC7 */ 0x924E,0x9253,0x924C,0x9256,0x9232,0x959F,0x959C,0x959E,/* 0xC8-0xCF */ 0x959B,0x9692,0x9693,0x9691,0x9697,0x96CE,0x96FA,0x96FD,/* 0xD0-0xD7 */ 0x96F8,0x96F5,0x9773,0x9777,0x9778,0x9772,0x980F,0x980D,/* 0xD8-0xDF */ 0x980E,0x98AC,0x98F6,0x98F9,0x99AF,0x99B2,0x99B0,0x99B5,/* 0xE0-0xE7 */ 0x9AAD,0x9AAB,0x9B5B,0x9CEA,0x9CED,0x9CE7,0x9E80,0x9EFD,/* 0xE8-0xEF */ 0x50E6,0x50D4,0x50D7,0x50E8,0x50F3,0x50DB,0x50EA,0x50DD,/* 0xF0-0xF7 */ 0x50E4,0x50D3,0x50EC,0x50F0,0x50EF,0x50E3,0x50E0,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E1[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x51D8,0x5280,0x5281,0x52E9,0x52EB,0x5330,0x53AC,0x5627,/* 0x40-0x47 */ 0x5615,0x560C,0x5612,0x55FC,0x560F,0x561C,0x5601,0x5613,/* 0x48-0x4F */ 0x5602,0x55FA,0x561D,0x5604,0x55FF,0x55F9,0x5889,0x587C,/* 0x50-0x57 */ 0x5890,0x5898,0x5886,0x5881,0x587F,0x5874,0x588B,0x587A,/* 0x58-0x5F */ 0x5887,0x5891,0x588E,0x5876,0x5882,0x5888,0x587B,0x5894,/* 0x60-0x67 */ 0x588F,0x58FE,0x596B,0x5ADC,0x5AEE,0x5AE5,0x5AD5,0x5AEA,/* 0x68-0x6F */ 0x5ADA,0x5AED,0x5AEB,0x5AF3,0x5AE2,0x5AE0,0x5ADB,0x5AEC,/* 0x70-0x77 */ 0x5ADE,0x5ADD,0x5AD9,0x5AE8,0x5ADF,0x5B77,0x5BE0,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x5BE3,0x5C63,0x5D82,0x5D80,0x5D7D,0x5D86,0x5D7A,/* 0xA0-0xA7 */ 0x5D81,0x5D77,0x5D8A,0x5D89,0x5D88,0x5D7E,0x5D7C,0x5D8D,/* 0xA8-0xAF */ 0x5D79,0x5D7F,0x5E58,0x5E59,0x5E53,0x5ED8,0x5ED1,0x5ED7,/* 0xB0-0xB7 */ 0x5ECE,0x5EDC,0x5ED5,0x5ED9,0x5ED2,0x5ED4,0x5F44,0x5F43,/* 0xB8-0xBF */ 0x5F6F,0x5FB6,0x612C,0x6128,0x6141,0x615E,0x6171,0x6173,/* 0xC0-0xC7 */ 0x6152,0x6153,0x6172,0x616C,0x6180,0x6174,0x6154,0x617A,/* 0xC8-0xCF */ 0x615B,0x6165,0x613B,0x616A,0x6161,0x6156,0x6229,0x6227,/* 0xD0-0xD7 */ 0x622B,0x642B,0x644D,0x645B,0x645D,0x6474,0x6476,0x6472,/* 0xD8-0xDF */ 0x6473,0x647D,0x6475,0x6466,0x64A6,0x644E,0x6482,0x645E,/* 0xE0-0xE7 */ 0x645C,0x644B,0x6453,0x6460,0x6450,0x647F,0x643F,0x646C,/* 0xE8-0xEF */ 0x646B,0x6459,0x6465,0x6477,0x6573,0x65A0,0x66A1,0x66A0,/* 0xF0-0xF7 */ 0x669F,0x6705,0x6704,0x6722,0x69B1,0x69B6,0x69C9,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E2[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x69A0,0x69CE,0x6996,0x69B0,0x69AC,0x69BC,0x6991,0x6999,/* 0x40-0x47 */ 0x698E,0x69A7,0x698D,0x69A9,0x69BE,0x69AF,0x69BF,0x69C4,/* 0x48-0x4F */ 0x69BD,0x69A4,0x69D4,0x69B9,0x69CA,0x699A,0x69CF,0x69B3,/* 0x50-0x57 */ 0x6993,0x69AA,0x69A1,0x699E,0x69D9,0x6997,0x6990,0x69C2,/* 0x58-0x5F */ 0x69B5,0x69A5,0x69C6,0x6B4A,0x6B4D,0x6B4B,0x6B9E,0x6B9F,/* 0x60-0x67 */ 0x6BA0,0x6BC3,0x6BC4,0x6BFE,0x6ECE,0x6EF5,0x6EF1,0x6F03,/* 0x68-0x6F */ 0x6F25,0x6EF8,0x6F37,0x6EFB,0x6F2E,0x6F09,0x6F4E,0x6F19,/* 0x70-0x77 */ 0x6F1A,0x6F27,0x6F18,0x6F3B,0x6F12,0x6EED,0x6F0A,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x6F36,0x6F73,0x6EF9,0x6EEE,0x6F2D,0x6F40,0x6F30,/* 0xA0-0xA7 */ 0x6F3C,0x6F35,0x6EEB,0x6F07,0x6F0E,0x6F43,0x6F05,0x6EFD,/* 0xA8-0xAF */ 0x6EF6,0x6F39,0x6F1C,0x6EFC,0x6F3A,0x6F1F,0x6F0D,0x6F1E,/* 0xB0-0xB7 */ 0x6F08,0x6F21,0x7187,0x7190,0x7189,0x7180,0x7185,0x7182,/* 0xB8-0xBF */ 0x718F,0x717B,0x7186,0x7181,0x7197,0x7244,0x7253,0x7297,/* 0xC0-0xC7 */ 0x7295,0x7293,0x7343,0x734D,0x7351,0x734C,0x7462,0x7473,/* 0xC8-0xCF */ 0x7471,0x7475,0x7472,0x7467,0x746E,0x7500,0x7502,0x7503,/* 0xD0-0xD7 */ 0x757D,0x7590,0x7616,0x7608,0x760C,0x7615,0x7611,0x760A,/* 0xD8-0xDF */ 0x7614,0x76B8,0x7781,0x777C,0x7785,0x7782,0x776E,0x7780,/* 0xE0-0xE7 */ 0x776F,0x777E,0x7783,0x78B2,0x78AA,0x78B4,0x78AD,0x78A8,/* 0xE8-0xEF */ 0x787E,0x78AB,0x789E,0x78A5,0x78A0,0x78AC,0x78A2,0x78A4,/* 0xF0-0xF7 */ 0x7998,0x798A,0x798B,0x7996,0x7995,0x7994,0x7993,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E3[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7997,0x7988,0x7992,0x7990,0x7A2B,0x7A4A,0x7A30,0x7A2F,/* 0x40-0x47 */ 0x7A28,0x7A26,0x7AA8,0x7AAB,0x7AAC,0x7AEE,0x7B88,0x7B9C,/* 0x48-0x4F */ 0x7B8A,0x7B91,0x7B90,0x7B96,0x7B8D,0x7B8C,0x7B9B,0x7B8E,/* 0x50-0x57 */ 0x7B85,0x7B98,0x5284,0x7B99,0x7BA4,0x7B82,0x7CBB,0x7CBF,/* 0x58-0x5F */ 0x7CBC,0x7CBA,0x7DA7,0x7DB7,0x7DC2,0x7DA3,0x7DAA,0x7DC1,/* 0x60-0x67 */ 0x7DC0,0x7DC5,0x7D9D,0x7DCE,0x7DC4,0x7DC6,0x7DCB,0x7DCC,/* 0x68-0x6F */ 0x7DAF,0x7DB9,0x7D96,0x7DBC,0x7D9F,0x7DA6,0x7DAE,0x7DA9,/* 0x70-0x77 */ 0x7DA1,0x7DC9,0x7F73,0x7FE2,0x7FE3,0x7FE5,0x7FDE,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8024,0x805D,0x805C,0x8189,0x8186,0x8183,0x8187,/* 0xA0-0xA7 */ 0x818D,0x818C,0x818B,0x8215,0x8497,0x84A4,0x84A1,0x849F,/* 0xA8-0xAF */ 0x84BA,0x84CE,0x84C2,0x84AC,0x84AE,0x84AB,0x84B9,0x84B4,/* 0xB0-0xB7 */ 0x84C1,0x84CD,0x84AA,0x849A,0x84B1,0x84D0,0x849D,0x84A7,/* 0xB8-0xBF */ 0x84BB,0x84A2,0x8494,0x84C7,0x84CC,0x849B,0x84A9,0x84AF,/* 0xC0-0xC7 */ 0x84A8,0x84D6,0x8498,0x84B6,0x84CF,0x84A0,0x84D7,0x84D4,/* 0xC8-0xCF */ 0x84D2,0x84DB,0x84B0,0x8491,0x8661,0x8733,0x8723,0x8728,/* 0xD0-0xD7 */ 0x876B,0x8740,0x872E,0x871E,0x8721,0x8719,0x871B,0x8743,/* 0xD8-0xDF */ 0x872C,0x8741,0x873E,0x8746,0x8720,0x8732,0x872A,0x872D,/* 0xE0-0xE7 */ 0x873C,0x8712,0x873A,0x8731,0x8735,0x8742,0x8726,0x8727,/* 0xE8-0xEF */ 0x8738,0x8724,0x871A,0x8730,0x8711,0x88F7,0x88E7,0x88F1,/* 0xF0-0xF7 */ 0x88F2,0x88FA,0x88FE,0x88EE,0x88FC,0x88F6,0x88FB,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E4[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x88F0,0x88EC,0x88EB,0x899D,0x89A1,0x899F,0x899E,0x89E9,/* 0x40-0x47 */ 0x89EB,0x89E8,0x8AAB,0x8A99,0x8A8B,0x8A92,0x8A8F,0x8A96,/* 0x48-0x4F */ 0x8C3D,0x8C68,0x8C69,0x8CD5,0x8CCF,0x8CD7,0x8D96,0x8E09,/* 0x50-0x57 */ 0x8E02,0x8DFF,0x8E0D,0x8DFD,0x8E0A,0x8E03,0x8E07,0x8E06,/* 0x58-0x5F */ 0x8E05,0x8DFE,0x8E00,0x8E04,0x8F10,0x8F11,0x8F0E,0x8F0D,/* 0x60-0x67 */ 0x9123,0x911C,0x9120,0x9122,0x911F,0x911D,0x911A,0x9124,/* 0x68-0x6F */ 0x9121,0x911B,0x917A,0x9172,0x9179,0x9173,0x92A5,0x92A4,/* 0x70-0x77 */ 0x9276,0x929B,0x927A,0x92A0,0x9294,0x92AA,0x928D,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x92A6,0x929A,0x92AB,0x9279,0x9297,0x927F,0x92A3,/* 0xA0-0xA7 */ 0x92EE,0x928E,0x9282,0x9295,0x92A2,0x927D,0x9288,0x92A1,/* 0xA8-0xAF */ 0x928A,0x9286,0x928C,0x9299,0x92A7,0x927E,0x9287,0x92A9,/* 0xB0-0xB7 */ 0x929D,0x928B,0x922D,0x969E,0x96A1,0x96FF,0x9758,0x977D,/* 0xB8-0xBF */ 0x977A,0x977E,0x9783,0x9780,0x9782,0x977B,0x9784,0x9781,/* 0xC0-0xC7 */ 0x977F,0x97CE,0x97CD,0x9816,0x98AD,0x98AE,0x9902,0x9900,/* 0xC8-0xCF */ 0x9907,0x999D,0x999C,0x99C3,0x99B9,0x99BB,0x99BA,0x99C2,/* 0xD0-0xD7 */ 0x99BD,0x99C7,0x9AB1,0x9AE3,0x9AE7,0x9B3E,0x9B3F,0x9B60,/* 0xD8-0xDF */ 0x9B61,0x9B5F,0x9CF1,0x9CF2,0x9CF5,0x9EA7,0x50FF,0x5103,/* 0xE0-0xE7 */ 0x5130,0x50F8,0x5106,0x5107,0x50F6,0x50FE,0x510B,0x510C,/* 0xE8-0xEF */ 0x50FD,0x510A,0x528B,0x528C,0x52F1,0x52EF,0x5648,0x5642,/* 0xF0-0xF7 */ 0x564C,0x5635,0x5641,0x564A,0x5649,0x5646,0x5658,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E5[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x565A,0x5640,0x5633,0x563D,0x562C,0x563E,0x5638,0x562A,/* 0x40-0x47 */ 0x563A,0x571A,0x58AB,0x589D,0x58B1,0x58A0,0x58A3,0x58AF,/* 0x48-0x4F */ 0x58AC,0x58A5,0x58A1,0x58FF,0x5AFF,0x5AF4,0x5AFD,0x5AF7,/* 0x50-0x57 */ 0x5AF6,0x5B03,0x5AF8,0x5B02,0x5AF9,0x5B01,0x5B07,0x5B05,/* 0x58-0x5F */ 0x5B0F,0x5C67,0x5D99,0x5D97,0x5D9F,0x5D92,0x5DA2,0x5D93,/* 0x60-0x67 */ 0x5D95,0x5DA0,0x5D9C,0x5DA1,0x5D9A,0x5D9E,0x5E69,0x5E5D,/* 0x68-0x6F */ 0x5E60,0x5E5C,0x7DF3,0x5EDB,0x5EDE,0x5EE1,0x5F49,0x5FB2,/* 0x70-0x77 */ 0x618B,0x6183,0x6179,0x61B1,0x61B0,0x61A2,0x6189,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x619B,0x6193,0x61AF,0x61AD,0x619F,0x6192,0x61AA,/* 0xA0-0xA7 */ 0x61A1,0x618D,0x6166,0x61B3,0x622D,0x646E,0x6470,0x6496,/* 0xA8-0xAF */ 0x64A0,0x6485,0x6497,0x649C,0x648F,0x648B,0x648A,0x648C,/* 0xB0-0xB7 */ 0x64A3,0x649F,0x6468,0x64B1,0x6498,0x6576,0x657A,0x6579,/* 0xB8-0xBF */ 0x657B,0x65B2,0x65B3,0x66B5,0x66B0,0x66A9,0x66B2,0x66B7,/* 0xC0-0xC7 */ 0x66AA,0x66AF,0x6A00,0x6A06,0x6A17,0x69E5,0x69F8,0x6A15,/* 0xC8-0xCF */ 0x69F1,0x69E4,0x6A20,0x69FF,0x69EC,0x69E2,0x6A1B,0x6A1D,/* 0xD0-0xD7 */ 0x69FE,0x6A27,0x69F2,0x69EE,0x6A14,0x69F7,0x69E7,0x6A40,/* 0xD8-0xDF */ 0x6A08,0x69E6,0x69FB,0x6A0D,0x69FC,0x69EB,0x6A09,0x6A04,/* 0xE0-0xE7 */ 0x6A18,0x6A25,0x6A0F,0x69F6,0x6A26,0x6A07,0x69F4,0x6A16,/* 0xE8-0xEF */ 0x6B51,0x6BA5,0x6BA3,0x6BA2,0x6BA6,0x6C01,0x6C00,0x6BFF,/* 0xF0-0xF7 */ 0x6C02,0x6F41,0x6F26,0x6F7E,0x6F87,0x6FC6,0x6F92,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E6[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6F8D,0x6F89,0x6F8C,0x6F62,0x6F4F,0x6F85,0x6F5A,0x6F96,/* 0x40-0x47 */ 0x6F76,0x6F6C,0x6F82,0x6F55,0x6F72,0x6F52,0x6F50,0x6F57,/* 0x48-0x4F */ 0x6F94,0x6F93,0x6F5D,0x6F00,0x6F61,0x6F6B,0x6F7D,0x6F67,/* 0x50-0x57 */ 0x6F90,0x6F53,0x6F8B,0x6F69,0x6F7F,0x6F95,0x6F63,0x6F77,/* 0x58-0x5F */ 0x6F6A,0x6F7B,0x71B2,0x71AF,0x719B,0x71B0,0x71A0,0x719A,/* 0x60-0x67 */ 0x71A9,0x71B5,0x719D,0x71A5,0x719E,0x71A4,0x71A1,0x71AA,/* 0x68-0x6F */ 0x719C,0x71A7,0x71B3,0x7298,0x729A,0x7358,0x7352,0x735E,/* 0x70-0x77 */ 0x735F,0x7360,0x735D,0x735B,0x7361,0x735A,0x7359,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7362,0x7487,0x7489,0x748A,0x7486,0x7481,0x747D,/* 0xA0-0xA7 */ 0x7485,0x7488,0x747C,0x7479,0x7508,0x7507,0x757E,0x7625,/* 0xA8-0xAF */ 0x761E,0x7619,0x761D,0x761C,0x7623,0x761A,0x7628,0x761B,/* 0xB0-0xB7 */ 0x769C,0x769D,0x769E,0x769B,0x778D,0x778F,0x7789,0x7788,/* 0xB8-0xBF */ 0x78CD,0x78BB,0x78CF,0x78CC,0x78D1,0x78CE,0x78D4,0x78C8,/* 0xC0-0xC7 */ 0x78C3,0x78C4,0x78C9,0x799A,0x79A1,0x79A0,0x799C,0x79A2,/* 0xC8-0xCF */ 0x799B,0x6B76,0x7A39,0x7AB2,0x7AB4,0x7AB3,0x7BB7,0x7BCB,/* 0xD0-0xD7 */ 0x7BBE,0x7BAC,0x7BCE,0x7BAF,0x7BB9,0x7BCA,0x7BB5,0x7CC5,/* 0xD8-0xDF */ 0x7CC8,0x7CCC,0x7CCB,0x7DF7,0x7DDB,0x7DEA,0x7DE7,0x7DD7,/* 0xE0-0xE7 */ 0x7DE1,0x7E03,0x7DFA,0x7DE6,0x7DF6,0x7DF1,0x7DF0,0x7DEE,/* 0xE8-0xEF */ 0x7DDF,0x7F76,0x7FAC,0x7FB0,0x7FAD,0x7FED,0x7FEB,0x7FEA,/* 0xF0-0xF7 */ 0x7FEC,0x7FE6,0x7FE8,0x8064,0x8067,0x81A3,0x819F,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E7[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x819E,0x8195,0x81A2,0x8199,0x8197,0x8216,0x824F,0x8253,/* 0x40-0x47 */ 0x8252,0x8250,0x824E,0x8251,0x8524,0x853B,0x850F,0x8500,/* 0x48-0x4F */ 0x8529,0x850E,0x8509,0x850D,0x851F,0x850A,0x8527,0x851C,/* 0x50-0x57 */ 0x84FB,0x852B,0x84FA,0x8508,0x850C,0x84F4,0x852A,0x84F2,/* 0x58-0x5F */ 0x8515,0x84F7,0x84EB,0x84F3,0x84FC,0x8512,0x84EA,0x84E9,/* 0x60-0x67 */ 0x8516,0x84FE,0x8528,0x851D,0x852E,0x8502,0x84FD,0x851E,/* 0x68-0x6F */ 0x84F6,0x8531,0x8526,0x84E7,0x84E8,0x84F0,0x84EF,0x84F9,/* 0x70-0x77 */ 0x8518,0x8520,0x8530,0x850B,0x8519,0x852F,0x8662,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8756,0x8763,0x8764,0x8777,0x87E1,0x8773,0x8758,/* 0xA0-0xA7 */ 0x8754,0x875B,0x8752,0x8761,0x875A,0x8751,0x875E,0x876D,/* 0xA8-0xAF */ 0x876A,0x8750,0x874E,0x875F,0x875D,0x876F,0x876C,0x877A,/* 0xB0-0xB7 */ 0x876E,0x875C,0x8765,0x874F,0x877B,0x8775,0x8762,0x8767,/* 0xB8-0xBF */ 0x8769,0x885A,0x8905,0x890C,0x8914,0x890B,0x8917,0x8918,/* 0xC0-0xC7 */ 0x8919,0x8906,0x8916,0x8911,0x890E,0x8909,0x89A2,0x89A4,/* 0xC8-0xCF */ 0x89A3,0x89ED,0x89F0,0x89EC,0x8ACF,0x8AC6,0x8AB8,0x8AD3,/* 0xD0-0xD7 */ 0x8AD1,0x8AD4,0x8AD5,0x8ABB,0x8AD7,0x8ABE,0x8AC0,0x8AC5,/* 0xD8-0xDF */ 0x8AD8,0x8AC3,0x8ABA,0x8ABD,0x8AD9,0x8C3E,0x8C4D,0x8C8F,/* 0xE0-0xE7 */ 0x8CE5,0x8CDF,0x8CD9,0x8CE8,0x8CDA,0x8CDD,0x8CE7,0x8DA0,/* 0xE8-0xEF */ 0x8D9C,0x8DA1,0x8D9B,0x8E20,0x8E23,0x8E25,0x8E24,0x8E2E,/* 0xF0-0xF7 */ 0x8E15,0x8E1B,0x8E16,0x8E11,0x8E19,0x8E26,0x8E27,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E8[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8E14,0x8E12,0x8E18,0x8E13,0x8E1C,0x8E17,0x8E1A,0x8F2C,/* 0x40-0x47 */ 0x8F24,0x8F18,0x8F1A,0x8F20,0x8F23,0x8F16,0x8F17,0x9073,/* 0x48-0x4F */ 0x9070,0x906F,0x9067,0x906B,0x912F,0x912B,0x9129,0x912A,/* 0x50-0x57 */ 0x9132,0x9126,0x912E,0x9185,0x9186,0x918A,0x9181,0x9182,/* 0x58-0x5F */ 0x9184,0x9180,0x92D0,0x92C3,0x92C4,0x92C0,0x92D9,0x92B6,/* 0x60-0x67 */ 0x92CF,0x92F1,0x92DF,0x92D8,0x92E9,0x92D7,0x92DD,0x92CC,/* 0x68-0x6F */ 0x92EF,0x92C2,0x92E8,0x92CA,0x92C8,0x92CE,0x92E6,0x92CD,/* 0x70-0x77 */ 0x92D5,0x92C9,0x92E0,0x92DE,0x92E7,0x92D1,0x92D3,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x92B5,0x92E1,0x92C6,0x92B4,0x957C,0x95AC,0x95AB,/* 0xA0-0xA7 */ 0x95AE,0x95B0,0x96A4,0x96A2,0x96D3,0x9705,0x9708,0x9702,/* 0xA8-0xAF */ 0x975A,0x978A,0x978E,0x9788,0x97D0,0x97CF,0x981E,0x981D,/* 0xB0-0xB7 */ 0x9826,0x9829,0x9828,0x9820,0x981B,0x9827,0x98B2,0x9908,/* 0xB8-0xBF */ 0x98FA,0x9911,0x9914,0x9916,0x9917,0x9915,0x99DC,0x99CD,/* 0xC0-0xC7 */ 0x99CF,0x99D3,0x99D4,0x99CE,0x99C9,0x99D6,0x99D8,0x99CB,/* 0xC8-0xCF */ 0x99D7,0x99CC,0x9AB3,0x9AEC,0x9AEB,0x9AF3,0x9AF2,0x9AF1,/* 0xD0-0xD7 */ 0x9B46,0x9B43,0x9B67,0x9B74,0x9B71,0x9B66,0x9B76,0x9B75,/* 0xD8-0xDF */ 0x9B70,0x9B68,0x9B64,0x9B6C,0x9CFC,0x9CFA,0x9CFD,0x9CFF,/* 0xE0-0xE7 */ 0x9CF7,0x9D07,0x9D00,0x9CF9,0x9CFB,0x9D08,0x9D05,0x9D04,/* 0xE8-0xEF */ 0x9E83,0x9ED3,0x9F0F,0x9F10,0x511C,0x5113,0x5117,0x511A,/* 0xF0-0xF7 */ 0x5111,0x51DE,0x5334,0x53E1,0x5670,0x5660,0x566E,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_E9[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5673,0x5666,0x5663,0x566D,0x5672,0x565E,0x5677,0x571C,/* 0x40-0x47 */ 0x571B,0x58C8,0x58BD,0x58C9,0x58BF,0x58BA,0x58C2,0x58BC,/* 0x48-0x4F */ 0x58C6,0x5B17,0x5B19,0x5B1B,0x5B21,0x5B14,0x5B13,0x5B10,/* 0x50-0x57 */ 0x5B16,0x5B28,0x5B1A,0x5B20,0x5B1E,0x5BEF,0x5DAC,0x5DB1,/* 0x58-0x5F */ 0x5DA9,0x5DA7,0x5DB5,0x5DB0,0x5DAE,0x5DAA,0x5DA8,0x5DB2,/* 0x60-0x67 */ 0x5DAD,0x5DAF,0x5DB4,0x5E67,0x5E68,0x5E66,0x5E6F,0x5EE9,/* 0x68-0x6F */ 0x5EE7,0x5EE6,0x5EE8,0x5EE5,0x5F4B,0x5FBC,0x619D,0x61A8,/* 0x70-0x77 */ 0x6196,0x61C5,0x61B4,0x61C6,0x61C1,0x61CC,0x61BA,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x61BF,0x61B8,0x618C,0x64D7,0x64D6,0x64D0,0x64CF,/* 0xA0-0xA7 */ 0x64C9,0x64BD,0x6489,0x64C3,0x64DB,0x64F3,0x64D9,0x6533,/* 0xA8-0xAF */ 0x657F,0x657C,0x65A2,0x66C8,0x66BE,0x66C0,0x66CA,0x66CB,/* 0xB0-0xB7 */ 0x66CF,0x66BD,0x66BB,0x66BA,0x66CC,0x6723,0x6A34,0x6A66,/* 0xB8-0xBF */ 0x6A49,0x6A67,0x6A32,0x6A68,0x6A3E,0x6A5D,0x6A6D,0x6A76,/* 0xC0-0xC7 */ 0x6A5B,0x6A51,0x6A28,0x6A5A,0x6A3B,0x6A3F,0x6A41,0x6A6A,/* 0xC8-0xCF */ 0x6A64,0x6A50,0x6A4F,0x6A54,0x6A6F,0x6A69,0x6A60,0x6A3C,/* 0xD0-0xD7 */ 0x6A5E,0x6A56,0x6A55,0x6A4D,0x6A4E,0x6A46,0x6B55,0x6B54,/* 0xD8-0xDF */ 0x6B56,0x6BA7,0x6BAA,0x6BAB,0x6BC8,0x6BC7,0x6C04,0x6C03,/* 0xE0-0xE7 */ 0x6C06,0x6FAD,0x6FCB,0x6FA3,0x6FC7,0x6FBC,0x6FCE,0x6FC8,/* 0xE8-0xEF */ 0x6F5E,0x6FC4,0x6FBD,0x6F9E,0x6FCA,0x6FA8,0x7004,0x6FA5,/* 0xF0-0xF7 */ 0x6FAE,0x6FBA,0x6FAC,0x6FAA,0x6FCF,0x6FBF,0x6FB8,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_EA[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6FA2,0x6FC9,0x6FAB,0x6FCD,0x6FAF,0x6FB2,0x6FB0,0x71C5,/* 0x40-0x47 */ 0x71C2,0x71BF,0x71B8,0x71D6,0x71C0,0x71C1,0x71CB,0x71D4,/* 0x48-0x4F */ 0x71CA,0x71C7,0x71CF,0x71BD,0x71D8,0x71BC,0x71C6,0x71DA,/* 0x50-0x57 */ 0x71DB,0x729D,0x729E,0x7369,0x7366,0x7367,0x736C,0x7365,/* 0x58-0x5F */ 0x736B,0x736A,0x747F,0x749A,0x74A0,0x7494,0x7492,0x7495,/* 0x60-0x67 */ 0x74A1,0x750B,0x7580,0x762F,0x762D,0x7631,0x763D,0x7633,/* 0x68-0x6F */ 0x763C,0x7635,0x7632,0x7630,0x76BB,0x76E6,0x779A,0x779D,/* 0x70-0x77 */ 0x77A1,0x779C,0x779B,0x77A2,0x77A3,0x7795,0x7799,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7797,0x78DD,0x78E9,0x78E5,0x78EA,0x78DE,0x78E3,/* 0xA0-0xA7 */ 0x78DB,0x78E1,0x78E2,0x78ED,0x78DF,0x78E0,0x79A4,0x7A44,/* 0xA8-0xAF */ 0x7A48,0x7A47,0x7AB6,0x7AB8,0x7AB5,0x7AB1,0x7AB7,0x7BDE,/* 0xB0-0xB7 */ 0x7BE3,0x7BE7,0x7BDD,0x7BD5,0x7BE5,0x7BDA,0x7BE8,0x7BF9,/* 0xB8-0xBF */ 0x7BD4,0x7BEA,0x7BE2,0x7BDC,0x7BEB,0x7BD8,0x7BDF,0x7CD2,/* 0xC0-0xC7 */ 0x7CD4,0x7CD7,0x7CD0,0x7CD1,0x7E12,0x7E21,0x7E17,0x7E0C,/* 0xC8-0xCF */ 0x7E1F,0x7E20,0x7E13,0x7E0E,0x7E1C,0x7E15,0x7E1A,0x7E22,/* 0xD0-0xD7 */ 0x7E0B,0x7E0F,0x7E16,0x7E0D,0x7E14,0x7E25,0x7E24,0x7F43,/* 0xD8-0xDF */ 0x7F7B,0x7F7C,0x7F7A,0x7FB1,0x7FEF,0x802A,0x8029,0x806C,/* 0xE0-0xE7 */ 0x81B1,0x81A6,0x81AE,0x81B9,0x81B5,0x81AB,0x81B0,0x81AC,/* 0xE8-0xEF */ 0x81B4,0x81B2,0x81B7,0x81A7,0x81F2,0x8255,0x8256,0x8257,/* 0xF0-0xF7 */ 0x8556,0x8545,0x856B,0x854D,0x8553,0x8561,0x8558,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_EB[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8540,0x8546,0x8564,0x8541,0x8562,0x8544,0x8551,0x8547,/* 0x40-0x47 */ 0x8563,0x853E,0x855B,0x8571,0x854E,0x856E,0x8575,0x8555,/* 0x48-0x4F */ 0x8567,0x8560,0x858C,0x8566,0x855D,0x8554,0x8565,0x856C,/* 0x50-0x57 */ 0x8663,0x8665,0x8664,0x879B,0x878F,0x8797,0x8793,0x8792,/* 0x58-0x5F */ 0x8788,0x8781,0x8796,0x8798,0x8779,0x8787,0x87A3,0x8785,/* 0x60-0x67 */ 0x8790,0x8791,0x879D,0x8784,0x8794,0x879C,0x879A,0x8789,/* 0x68-0x6F */ 0x891E,0x8926,0x8930,0x892D,0x892E,0x8927,0x8931,0x8922,/* 0x70-0x77 */ 0x8929,0x8923,0x892F,0x892C,0x891F,0x89F1,0x8AE0,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8AE2,0x8AF2,0x8AF4,0x8AF5,0x8ADD,0x8B14,0x8AE4,/* 0xA0-0xA7 */ 0x8ADF,0x8AF0,0x8AC8,0x8ADE,0x8AE1,0x8AE8,0x8AFF,0x8AEF,/* 0xA8-0xAF */ 0x8AFB,0x8C91,0x8C92,0x8C90,0x8CF5,0x8CEE,0x8CF1,0x8CF0,/* 0xB0-0xB7 */ 0x8CF3,0x8D6C,0x8D6E,0x8DA5,0x8DA7,0x8E33,0x8E3E,0x8E38,/* 0xB8-0xBF */ 0x8E40,0x8E45,0x8E36,0x8E3C,0x8E3D,0x8E41,0x8E30,0x8E3F,/* 0xC0-0xC7 */ 0x8EBD,0x8F36,0x8F2E,0x8F35,0x8F32,0x8F39,0x8F37,0x8F34,/* 0xC8-0xCF */ 0x9076,0x9079,0x907B,0x9086,0x90FA,0x9133,0x9135,0x9136,/* 0xD0-0xD7 */ 0x9193,0x9190,0x9191,0x918D,0x918F,0x9327,0x931E,0x9308,/* 0xD8-0xDF */ 0x931F,0x9306,0x930F,0x937A,0x9338,0x933C,0x931B,0x9323,/* 0xE0-0xE7 */ 0x9312,0x9301,0x9346,0x932D,0x930E,0x930D,0x92CB,0x931D,/* 0xE8-0xEF */ 0x92FA,0x9325,0x9313,0x92F9,0x92F7,0x9334,0x9302,0x9324,/* 0xF0-0xF7 */ 0x92FF,0x9329,0x9339,0x9335,0x932A,0x9314,0x930C,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_EC[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x930B,0x92FE,0x9309,0x9300,0x92FB,0x9316,0x95BC,0x95CD,/* 0x40-0x47 */ 0x95BE,0x95B9,0x95BA,0x95B6,0x95BF,0x95B5,0x95BD,0x96A9,/* 0x48-0x4F */ 0x96D4,0x970B,0x9712,0x9710,0x9799,0x9797,0x9794,0x97F0,/* 0x50-0x57 */ 0x97F8,0x9835,0x982F,0x9832,0x9924,0x991F,0x9927,0x9929,/* 0x58-0x5F */ 0x999E,0x99EE,0x99EC,0x99E5,0x99E4,0x99F0,0x99E3,0x99EA,/* 0x60-0x67 */ 0x99E9,0x99E7,0x9AB9,0x9ABF,0x9AB4,0x9ABB,0x9AF6,0x9AFA,/* 0x68-0x6F */ 0x9AF9,0x9AF7,0x9B33,0x9B80,0x9B85,0x9B87,0x9B7C,0x9B7E,/* 0x70-0x77 */ 0x9B7B,0x9B82,0x9B93,0x9B92,0x9B90,0x9B7A,0x9B95,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9B7D,0x9B88,0x9D25,0x9D17,0x9D20,0x9D1E,0x9D14,/* 0xA0-0xA7 */ 0x9D29,0x9D1D,0x9D18,0x9D22,0x9D10,0x9D19,0x9D1F,0x9E88,/* 0xA8-0xAF */ 0x9E86,0x9E87,0x9EAE,0x9EAD,0x9ED5,0x9ED6,0x9EFA,0x9F12,/* 0xB0-0xB7 */ 0x9F3D,0x5126,0x5125,0x5122,0x5124,0x5120,0x5129,0x52F4,/* 0xB8-0xBF */ 0x5693,0x568C,0x568D,0x5686,0x5684,0x5683,0x567E,0x5682,/* 0xC0-0xC7 */ 0x567F,0x5681,0x58D6,0x58D4,0x58CF,0x58D2,0x5B2D,0x5B25,/* 0xC8-0xCF */ 0x5B32,0x5B23,0x5B2C,0x5B27,0x5B26,0x5B2F,0x5B2E,0x5B7B,/* 0xD0-0xD7 */ 0x5BF1,0x5BF2,0x5DB7,0x5E6C,0x5E6A,0x5FBE,0x5FBB,0x61C3,/* 0xD8-0xDF */ 0x61B5,0x61BC,0x61E7,0x61E0,0x61E5,0x61E4,0x61E8,0x61DE,/* 0xE0-0xE7 */ 0x64EF,0x64E9,0x64E3,0x64EB,0x64E4,0x64E8,0x6581,0x6580,/* 0xE8-0xEF */ 0x65B6,0x65DA,0x66D2,0x6A8D,0x6A96,0x6A81,0x6AA5,0x6A89,/* 0xF0-0xF7 */ 0x6A9F,0x6A9B,0x6AA1,0x6A9E,0x6A87,0x6A93,0x6A8E,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_ED[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x6A95,0x6A83,0x6AA8,0x6AA4,0x6A91,0x6A7F,0x6AA6,0x6A9A,/* 0x40-0x47 */ 0x6A85,0x6A8C,0x6A92,0x6B5B,0x6BAD,0x6C09,0x6FCC,0x6FA9,/* 0x48-0x4F */ 0x6FF4,0x6FD4,0x6FE3,0x6FDC,0x6FED,0x6FE7,0x6FE6,0x6FDE,/* 0x50-0x57 */ 0x6FF2,0x6FDD,0x6FE2,0x6FE8,0x71E1,0x71F1,0x71E8,0x71F2,/* 0x58-0x5F */ 0x71E4,0x71F0,0x71E2,0x7373,0x736E,0x736F,0x7497,0x74B2,/* 0x60-0x67 */ 0x74AB,0x7490,0x74AA,0x74AD,0x74B1,0x74A5,0x74AF,0x7510,/* 0x68-0x6F */ 0x7511,0x7512,0x750F,0x7584,0x7643,0x7648,0x7649,0x7647,/* 0x70-0x77 */ 0x76A4,0x76E9,0x77B5,0x77AB,0x77B2,0x77B7,0x77B6,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x77B4,0x77B1,0x77A8,0x77F0,0x78F3,0x78FD,0x7902,/* 0xA0-0xA7 */ 0x78FB,0x78FC,0x78F2,0x7905,0x78F9,0x78FE,0x7904,0x79AB,/* 0xA8-0xAF */ 0x79A8,0x7A5C,0x7A5B,0x7A56,0x7A58,0x7A54,0x7A5A,0x7ABE,/* 0xB0-0xB7 */ 0x7AC0,0x7AC1,0x7C05,0x7C0F,0x7BF2,0x7C00,0x7BFF,0x7BFB,/* 0xB8-0xBF */ 0x7C0E,0x7BF4,0x7C0B,0x7BF3,0x7C02,0x7C09,0x7C03,0x7C01,/* 0xC0-0xC7 */ 0x7BF8,0x7BFD,0x7C06,0x7BF0,0x7BF1,0x7C10,0x7C0A,0x7CE8,/* 0xC8-0xCF */ 0x7E2D,0x7E3C,0x7E42,0x7E33,0x9848,0x7E38,0x7E2A,0x7E49,/* 0xD0-0xD7 */ 0x7E40,0x7E47,0x7E29,0x7E4C,0x7E30,0x7E3B,0x7E36,0x7E44,/* 0xD8-0xDF */ 0x7E3A,0x7F45,0x7F7F,0x7F7E,0x7F7D,0x7FF4,0x7FF2,0x802C,/* 0xE0-0xE7 */ 0x81BB,0x81C4,0x81CC,0x81CA,0x81C5,0x81C7,0x81BC,0x81E9,/* 0xE8-0xEF */ 0x825B,0x825A,0x825C,0x8583,0x8580,0x858F,0x85A7,0x8595,/* 0xF0-0xF7 */ 0x85A0,0x858B,0x85A3,0x857B,0x85A4,0x859A,0x859E,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_EE[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8577,0x857C,0x8589,0x85A1,0x857A,0x8578,0x8557,0x858E,/* 0x40-0x47 */ 0x8596,0x8586,0x858D,0x8599,0x859D,0x8581,0x85A2,0x8582,/* 0x48-0x4F */ 0x8588,0x8585,0x8579,0x8576,0x8598,0x8590,0x859F,0x8668,/* 0x50-0x57 */ 0x87BE,0x87AA,0x87AD,0x87C5,0x87B0,0x87AC,0x87B9,0x87B5,/* 0x58-0x5F */ 0x87BC,0x87AE,0x87C9,0x87C3,0x87C2,0x87CC,0x87B7,0x87AF,/* 0x60-0x67 */ 0x87C4,0x87CA,0x87B4,0x87B6,0x87BF,0x87B8,0x87BD,0x87DE,/* 0x68-0x6F */ 0x87B2,0x8935,0x8933,0x893C,0x893E,0x8941,0x8952,0x8937,/* 0x70-0x77 */ 0x8942,0x89AD,0x89AF,0x89AE,0x89F2,0x89F3,0x8B1E,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x8B18,0x8B16,0x8B11,0x8B05,0x8B0B,0x8B22,0x8B0F,/* 0xA0-0xA7 */ 0x8B12,0x8B15,0x8B07,0x8B0D,0x8B08,0x8B06,0x8B1C,0x8B13,/* 0xA8-0xAF */ 0x8B1A,0x8C4F,0x8C70,0x8C72,0x8C71,0x8C6F,0x8C95,0x8C94,/* 0xB0-0xB7 */ 0x8CF9,0x8D6F,0x8E4E,0x8E4D,0x8E53,0x8E50,0x8E4C,0x8E47,/* 0xB8-0xBF */ 0x8F43,0x8F40,0x9085,0x907E,0x9138,0x919A,0x91A2,0x919B,/* 0xC0-0xC7 */ 0x9199,0x919F,0x91A1,0x919D,0x91A0,0x93A1,0x9383,0x93AF,/* 0xC8-0xCF */ 0x9364,0x9356,0x9347,0x937C,0x9358,0x935C,0x9376,0x9349,/* 0xD0-0xD7 */ 0x9350,0x9351,0x9360,0x936D,0x938F,0x934C,0x936A,0x9379,/* 0xD8-0xDF */ 0x9357,0x9355,0x9352,0x934F,0x9371,0x9377,0x937B,0x9361,/* 0xE0-0xE7 */ 0x935E,0x9363,0x9367,0x9380,0x934E,0x9359,0x95C7,0x95C0,/* 0xE8-0xEF */ 0x95C9,0x95C3,0x95C5,0x95B7,0x96AE,0x96B0,0x96AC,0x9720,/* 0xF0-0xF7 */ 0x971F,0x9718,0x971D,0x9719,0x979A,0x97A1,0x979C,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_EF[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x979E,0x979D,0x97D5,0x97D4,0x97F1,0x9841,0x9844,0x984A,/* 0x40-0x47 */ 0x9849,0x9845,0x9843,0x9925,0x992B,0x992C,0x992A,0x9933,/* 0x48-0x4F */ 0x9932,0x992F,0x992D,0x9931,0x9930,0x9998,0x99A3,0x99A1,/* 0x50-0x57 */ 0x9A02,0x99FA,0x99F4,0x99F7,0x99F9,0x99F8,0x99F6,0x99FB,/* 0x58-0x5F */ 0x99FD,0x99FE,0x99FC,0x9A03,0x9ABE,0x9AFE,0x9AFD,0x9B01,/* 0x60-0x67 */ 0x9AFC,0x9B48,0x9B9A,0x9BA8,0x9B9E,0x9B9B,0x9BA6,0x9BA1,/* 0x68-0x6F */ 0x9BA5,0x9BA4,0x9B86,0x9BA2,0x9BA0,0x9BAF,0x9D33,0x9D41,/* 0x70-0x77 */ 0x9D67,0x9D36,0x9D2E,0x9D2F,0x9D31,0x9D38,0x9D30,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9D45,0x9D42,0x9D43,0x9D3E,0x9D37,0x9D40,0x9D3D,/* 0xA0-0xA7 */ 0x7FF5,0x9D2D,0x9E8A,0x9E89,0x9E8D,0x9EB0,0x9EC8,0x9EDA,/* 0xA8-0xAF */ 0x9EFB,0x9EFF,0x9F24,0x9F23,0x9F22,0x9F54,0x9FA0,0x5131,/* 0xB0-0xB7 */ 0x512D,0x512E,0x5698,0x569C,0x5697,0x569A,0x569D,0x5699,/* 0xB8-0xBF */ 0x5970,0x5B3C,0x5C69,0x5C6A,0x5DC0,0x5E6D,0x5E6E,0x61D8,/* 0xC0-0xC7 */ 0x61DF,0x61ED,0x61EE,0x61F1,0x61EA,0x61F0,0x61EB,0x61D6,/* 0xC8-0xCF */ 0x61E9,0x64FF,0x6504,0x64FD,0x64F8,0x6501,0x6503,0x64FC,/* 0xD0-0xD7 */ 0x6594,0x65DB,0x66DA,0x66DB,0x66D8,0x6AC5,0x6AB9,0x6ABD,/* 0xD8-0xDF */ 0x6AE1,0x6AC6,0x6ABA,0x6AB6,0x6AB7,0x6AC7,0x6AB4,0x6AAD,/* 0xE0-0xE7 */ 0x6B5E,0x6BC9,0x6C0B,0x7007,0x700C,0x700D,0x7001,0x7005,/* 0xE8-0xEF */ 0x7014,0x700E,0x6FFF,0x7000,0x6FFB,0x7026,0x6FFC,0x6FF7,/* 0xF0-0xF7 */ 0x700A,0x7201,0x71FF,0x71F9,0x7203,0x71FD,0x7376,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F0[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x74B8,0x74C0,0x74B5,0x74C1,0x74BE,0x74B6,0x74BB,0x74C2,/* 0x40-0x47 */ 0x7514,0x7513,0x765C,0x7664,0x7659,0x7650,0x7653,0x7657,/* 0x48-0x4F */ 0x765A,0x76A6,0x76BD,0x76EC,0x77C2,0x77BA,0x78FF,0x790C,/* 0x50-0x57 */ 0x7913,0x7914,0x7909,0x7910,0x7912,0x7911,0x79AD,0x79AC,/* 0x58-0x5F */ 0x7A5F,0x7C1C,0x7C29,0x7C19,0x7C20,0x7C1F,0x7C2D,0x7C1D,/* 0x60-0x67 */ 0x7C26,0x7C28,0x7C22,0x7C25,0x7C30,0x7E5C,0x7E50,0x7E56,/* 0x68-0x6F */ 0x7E63,0x7E58,0x7E62,0x7E5F,0x7E51,0x7E60,0x7E57,0x7E53,/* 0x70-0x77 */ 0x7FB5,0x7FB3,0x7FF7,0x7FF8,0x8075,0x81D1,0x81D2,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x81D0,0x825F,0x825E,0x85B4,0x85C6,0x85C0,0x85C3,/* 0xA0-0xA7 */ 0x85C2,0x85B3,0x85B5,0x85BD,0x85C7,0x85C4,0x85BF,0x85CB,/* 0xA8-0xAF */ 0x85CE,0x85C8,0x85C5,0x85B1,0x85B6,0x85D2,0x8624,0x85B8,/* 0xB0-0xB7 */ 0x85B7,0x85BE,0x8669,0x87E7,0x87E6,0x87E2,0x87DB,0x87EB,/* 0xB8-0xBF */ 0x87EA,0x87E5,0x87DF,0x87F3,0x87E4,0x87D4,0x87DC,0x87D3,/* 0xC0-0xC7 */ 0x87ED,0x87D8,0x87E3,0x87A4,0x87D7,0x87D9,0x8801,0x87F4,/* 0xC8-0xCF */ 0x87E8,0x87DD,0x8953,0x894B,0x894F,0x894C,0x8946,0x8950,/* 0xD0-0xD7 */ 0x8951,0x8949,0x8B2A,0x8B27,0x8B23,0x8B33,0x8B30,0x8B35,/* 0xD8-0xDF */ 0x8B47,0x8B2F,0x8B3C,0x8B3E,0x8B31,0x8B25,0x8B37,0x8B26,/* 0xE0-0xE7 */ 0x8B36,0x8B2E,0x8B24,0x8B3B,0x8B3D,0x8B3A,0x8C42,0x8C75,/* 0xE8-0xEF */ 0x8C99,0x8C98,0x8C97,0x8CFE,0x8D04,0x8D02,0x8D00,0x8E5C,/* 0xF0-0xF7 */ 0x8E62,0x8E60,0x8E57,0x8E56,0x8E5E,0x8E65,0x8E67,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F1[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8E5B,0x8E5A,0x8E61,0x8E5D,0x8E69,0x8E54,0x8F46,0x8F47,/* 0x40-0x47 */ 0x8F48,0x8F4B,0x9128,0x913A,0x913B,0x913E,0x91A8,0x91A5,/* 0x48-0x4F */ 0x91A7,0x91AF,0x91AA,0x93B5,0x938C,0x9392,0x93B7,0x939B,/* 0x50-0x57 */ 0x939D,0x9389,0x93A7,0x938E,0x93AA,0x939E,0x93A6,0x9395,/* 0x58-0x5F */ 0x9388,0x9399,0x939F,0x938D,0x93B1,0x9391,0x93B2,0x93A4,/* 0x60-0x67 */ 0x93A8,0x93B4,0x93A3,0x93A5,0x95D2,0x95D3,0x95D1,0x96B3,/* 0x68-0x6F */ 0x96D7,0x96DA,0x5DC2,0x96DF,0x96D8,0x96DD,0x9723,0x9722,/* 0x70-0x77 */ 0x9725,0x97AC,0x97AE,0x97A8,0x97AB,0x97A4,0x97AA,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x97A2,0x97A5,0x97D7,0x97D9,0x97D6,0x97D8,0x97FA,/* 0xA0-0xA7 */ 0x9850,0x9851,0x9852,0x98B8,0x9941,0x993C,0x993A,0x9A0F,/* 0xA8-0xAF */ 0x9A0B,0x9A09,0x9A0D,0x9A04,0x9A11,0x9A0A,0x9A05,0x9A07,/* 0xB0-0xB7 */ 0x9A06,0x9AC0,0x9ADC,0x9B08,0x9B04,0x9B05,0x9B29,0x9B35,/* 0xB8-0xBF */ 0x9B4A,0x9B4C,0x9B4B,0x9BC7,0x9BC6,0x9BC3,0x9BBF,0x9BC1,/* 0xC0-0xC7 */ 0x9BB5,0x9BB8,0x9BD3,0x9BB6,0x9BC4,0x9BB9,0x9BBD,0x9D5C,/* 0xC8-0xCF */ 0x9D53,0x9D4F,0x9D4A,0x9D5B,0x9D4B,0x9D59,0x9D56,0x9D4C,/* 0xD0-0xD7 */ 0x9D57,0x9D52,0x9D54,0x9D5F,0x9D58,0x9D5A,0x9E8E,0x9E8C,/* 0xD8-0xDF */ 0x9EDF,0x9F01,0x9F00,0x9F16,0x9F25,0x9F2B,0x9F2A,0x9F29,/* 0xE0-0xE7 */ 0x9F28,0x9F4C,0x9F55,0x5134,0x5135,0x5296,0x52F7,0x53B4,/* 0xE8-0xEF */ 0x56AB,0x56AD,0x56A6,0x56A7,0x56AA,0x56AC,0x58DA,0x58DD,/* 0xF0-0xF7 */ 0x58DB,0x5912,0x5B3D,0x5B3E,0x5B3F,0x5DC3,0x5E70,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F2[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x5FBF,0x61FB,0x6507,0x6510,0x650D,0x6509,0x650C,0x650E,/* 0x40-0x47 */ 0x6584,0x65DE,0x65DD,0x66DE,0x6AE7,0x6AE0,0x6ACC,0x6AD1,/* 0x48-0x4F */ 0x6AD9,0x6ACB,0x6ADF,0x6ADC,0x6AD0,0x6AEB,0x6ACF,0x6ACD,/* 0x50-0x57 */ 0x6ADE,0x6B60,0x6BB0,0x6C0C,0x7019,0x7027,0x7020,0x7016,/* 0x58-0x5F */ 0x702B,0x7021,0x7022,0x7023,0x7029,0x7017,0x7024,0x701C,/* 0x60-0x67 */ 0x702A,0x720C,0x720A,0x7207,0x7202,0x7205,0x72A5,0x72A6,/* 0x68-0x6F */ 0x72A4,0x72A3,0x72A1,0x74CB,0x74C5,0x74B7,0x74C3,0x7516,/* 0x70-0x77 */ 0x7660,0x77C9,0x77CA,0x77C4,0x77F1,0x791D,0x791B,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x7921,0x791C,0x7917,0x791E,0x79B0,0x7A67,0x7A68,/* 0xA0-0xA7 */ 0x7C33,0x7C3C,0x7C39,0x7C2C,0x7C3B,0x7CEC,0x7CEA,0x7E76,/* 0xA8-0xAF */ 0x7E75,0x7E78,0x7E70,0x7E77,0x7E6F,0x7E7A,0x7E72,0x7E74,/* 0xB0-0xB7 */ 0x7E68,0x7F4B,0x7F4A,0x7F83,0x7F86,0x7FB7,0x7FFD,0x7FFE,/* 0xB8-0xBF */ 0x8078,0x81D7,0x81D5,0x8264,0x8261,0x8263,0x85EB,0x85F1,/* 0xC0-0xC7 */ 0x85ED,0x85D9,0x85E1,0x85E8,0x85DA,0x85D7,0x85EC,0x85F2,/* 0xC8-0xCF */ 0x85F8,0x85D8,0x85DF,0x85E3,0x85DC,0x85D1,0x85F0,0x85E6,/* 0xD0-0xD7 */ 0x85EF,0x85DE,0x85E2,0x8800,0x87FA,0x8803,0x87F6,0x87F7,/* 0xD8-0xDF */ 0x8809,0x880C,0x880B,0x8806,0x87FC,0x8808,0x87FF,0x880A,/* 0xE0-0xE7 */ 0x8802,0x8962,0x895A,0x895B,0x8957,0x8961,0x895C,0x8958,/* 0xE8-0xEF */ 0x895D,0x8959,0x8988,0x89B7,0x89B6,0x89F6,0x8B50,0x8B48,/* 0xF0-0xF7 */ 0x8B4A,0x8B40,0x8B53,0x8B56,0x8B54,0x8B4B,0x8B55,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F3[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8B51,0x8B42,0x8B52,0x8B57,0x8C43,0x8C77,0x8C76,0x8C9A,/* 0x40-0x47 */ 0x8D06,0x8D07,0x8D09,0x8DAC,0x8DAA,0x8DAD,0x8DAB,0x8E6D,/* 0x48-0x4F */ 0x8E78,0x8E73,0x8E6A,0x8E6F,0x8E7B,0x8EC2,0x8F52,0x8F51,/* 0x50-0x57 */ 0x8F4F,0x8F50,0x8F53,0x8FB4,0x9140,0x913F,0x91B0,0x91AD,/* 0x58-0x5F */ 0x93DE,0x93C7,0x93CF,0x93C2,0x93DA,0x93D0,0x93F9,0x93EC,/* 0x60-0x67 */ 0x93CC,0x93D9,0x93A9,0x93E6,0x93CA,0x93D4,0x93EE,0x93E3,/* 0x68-0x6F */ 0x93D5,0x93C4,0x93CE,0x93C0,0x93D2,0x93E7,0x957D,0x95DA,/* 0x70-0x77 */ 0x95DB,0x96E1,0x9729,0x972B,0x972C,0x9728,0x9726,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x97B3,0x97B7,0x97B6,0x97DD,0x97DE,0x97DF,0x985C,/* 0xA0-0xA7 */ 0x9859,0x985D,0x9857,0x98BF,0x98BD,0x98BB,0x98BE,0x9948,/* 0xA8-0xAF */ 0x9947,0x9943,0x99A6,0x99A7,0x9A1A,0x9A15,0x9A25,0x9A1D,/* 0xB0-0xB7 */ 0x9A24,0x9A1B,0x9A22,0x9A20,0x9A27,0x9A23,0x9A1E,0x9A1C,/* 0xB8-0xBF */ 0x9A14,0x9AC2,0x9B0B,0x9B0A,0x9B0E,0x9B0C,0x9B37,0x9BEA,/* 0xC0-0xC7 */ 0x9BEB,0x9BE0,0x9BDE,0x9BE4,0x9BE6,0x9BE2,0x9BF0,0x9BD4,/* 0xC8-0xCF */ 0x9BD7,0x9BEC,0x9BDC,0x9BD9,0x9BE5,0x9BD5,0x9BE1,0x9BDA,/* 0xD0-0xD7 */ 0x9D77,0x9D81,0x9D8A,0x9D84,0x9D88,0x9D71,0x9D80,0x9D78,/* 0xD8-0xDF */ 0x9D86,0x9D8B,0x9D8C,0x9D7D,0x9D6B,0x9D74,0x9D75,0x9D70,/* 0xE0-0xE7 */ 0x9D69,0x9D85,0x9D73,0x9D7B,0x9D82,0x9D6F,0x9D79,0x9D7F,/* 0xE8-0xEF */ 0x9D87,0x9D68,0x9E94,0x9E91,0x9EC0,0x9EFC,0x9F2D,0x9F40,/* 0xF0-0xF7 */ 0x9F41,0x9F4D,0x9F56,0x9F57,0x9F58,0x5337,0x56B2,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F4[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x56B5,0x56B3,0x58E3,0x5B45,0x5DC6,0x5DC7,0x5EEE,0x5EEF,/* 0x40-0x47 */ 0x5FC0,0x5FC1,0x61F9,0x6517,0x6516,0x6515,0x6513,0x65DF,/* 0x48-0x4F */ 0x66E8,0x66E3,0x66E4,0x6AF3,0x6AF0,0x6AEA,0x6AE8,0x6AF9,/* 0x50-0x57 */ 0x6AF1,0x6AEE,0x6AEF,0x703C,0x7035,0x702F,0x7037,0x7034,/* 0x58-0x5F */ 0x7031,0x7042,0x7038,0x703F,0x703A,0x7039,0x7040,0x703B,/* 0x60-0x67 */ 0x7033,0x7041,0x7213,0x7214,0x72A8,0x737D,0x737C,0x74BA,/* 0x68-0x6F */ 0x76AB,0x76AA,0x76BE,0x76ED,0x77CC,0x77CE,0x77CF,0x77CD,/* 0x70-0x77 */ 0x77F2,0x7925,0x7923,0x7927,0x7928,0x7924,0x7929,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x79B2,0x7A6E,0x7A6C,0x7A6D,0x7AF7,0x7C49,0x7C48,/* 0xA0-0xA7 */ 0x7C4A,0x7C47,0x7C45,0x7CEE,0x7E7B,0x7E7E,0x7E81,0x7E80,/* 0xA8-0xAF */ 0x7FBA,0x7FFF,0x8079,0x81DB,0x81D9,0x820B,0x8268,0x8269,/* 0xB0-0xB7 */ 0x8622,0x85FF,0x8601,0x85FE,0x861B,0x8600,0x85F6,0x8604,/* 0xB8-0xBF */ 0x8609,0x8605,0x860C,0x85FD,0x8819,0x8810,0x8811,0x8817,/* 0xC0-0xC7 */ 0x8813,0x8816,0x8963,0x8966,0x89B9,0x89F7,0x8B60,0x8B6A,/* 0xC8-0xCF */ 0x8B5D,0x8B68,0x8B63,0x8B65,0x8B67,0x8B6D,0x8DAE,0x8E86,/* 0xD0-0xD7 */ 0x8E88,0x8E84,0x8F59,0x8F56,0x8F57,0x8F55,0x8F58,0x8F5A,/* 0xD8-0xDF */ 0x908D,0x9143,0x9141,0x91B7,0x91B5,0x91B2,0x91B3,0x940B,/* 0xE0-0xE7 */ 0x9413,0x93FB,0x9420,0x940F,0x9414,0x93FE,0x9415,0x9410,/* 0xE8-0xEF */ 0x9428,0x9419,0x940D,0x93F5,0x9400,0x93F7,0x9407,0x940E,/* 0xF0-0xF7 */ 0x9416,0x9412,0x93FA,0x9409,0x93F8,0x940A,0x93FF,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F5[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x93FC,0x940C,0x93F6,0x9411,0x9406,0x95DE,0x95E0,0x95DF,/* 0x40-0x47 */ 0x972E,0x972F,0x97B9,0x97BB,0x97FD,0x97FE,0x9860,0x9862,/* 0x48-0x4F */ 0x9863,0x985F,0x98C1,0x98C2,0x9950,0x994E,0x9959,0x994C,/* 0x50-0x57 */ 0x994B,0x9953,0x9A32,0x9A34,0x9A31,0x9A2C,0x9A2A,0x9A36,/* 0x58-0x5F */ 0x9A29,0x9A2E,0x9A38,0x9A2D,0x9AC7,0x9ACA,0x9AC6,0x9B10,/* 0x60-0x67 */ 0x9B12,0x9B11,0x9C0B,0x9C08,0x9BF7,0x9C05,0x9C12,0x9BF8,/* 0x68-0x6F */ 0x9C40,0x9C07,0x9C0E,0x9C06,0x9C17,0x9C14,0x9C09,0x9D9F,/* 0x70-0x77 */ 0x9D99,0x9DA4,0x9D9D,0x9D92,0x9D98,0x9D90,0x9D9B,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9DA0,0x9D94,0x9D9C,0x9DAA,0x9D97,0x9DA1,0x9D9A,/* 0xA0-0xA7 */ 0x9DA2,0x9DA8,0x9D9E,0x9DA3,0x9DBF,0x9DA9,0x9D96,0x9DA6,/* 0xA8-0xAF */ 0x9DA7,0x9E99,0x9E9B,0x9E9A,0x9EE5,0x9EE4,0x9EE7,0x9EE6,/* 0xB0-0xB7 */ 0x9F30,0x9F2E,0x9F5B,0x9F60,0x9F5E,0x9F5D,0x9F59,0x9F91,/* 0xB8-0xBF */ 0x513A,0x5139,0x5298,0x5297,0x56C3,0x56BD,0x56BE,0x5B48,/* 0xC0-0xC7 */ 0x5B47,0x5DCB,0x5DCF,0x5EF1,0x61FD,0x651B,0x6B02,0x6AFC,/* 0xC8-0xCF */ 0x6B03,0x6AF8,0x6B00,0x7043,0x7044,0x704A,0x7048,0x7049,/* 0xD0-0xD7 */ 0x7045,0x7046,0x721D,0x721A,0x7219,0x737E,0x7517,0x766A,/* 0xD8-0xDF */ 0x77D0,0x792D,0x7931,0x792F,0x7C54,0x7C53,0x7CF2,0x7E8A,/* 0xE0-0xE7 */ 0x7E87,0x7E88,0x7E8B,0x7E86,0x7E8D,0x7F4D,0x7FBB,0x8030,/* 0xE8-0xEF */ 0x81DD,0x8618,0x862A,0x8626,0x861F,0x8623,0x861C,0x8619,/* 0xF0-0xF7 */ 0x8627,0x862E,0x8621,0x8620,0x8629,0x861E,0x8625,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F6[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8829,0x881D,0x881B,0x8820,0x8824,0x881C,0x882B,0x884A,/* 0x40-0x47 */ 0x896D,0x8969,0x896E,0x896B,0x89FA,0x8B79,0x8B78,0x8B45,/* 0x48-0x4F */ 0x8B7A,0x8B7B,0x8D10,0x8D14,0x8DAF,0x8E8E,0x8E8C,0x8F5E,/* 0x50-0x57 */ 0x8F5B,0x8F5D,0x9146,0x9144,0x9145,0x91B9,0x943F,0x943B,/* 0x58-0x5F */ 0x9436,0x9429,0x943D,0x943C,0x9430,0x9439,0x942A,0x9437,/* 0x60-0x67 */ 0x942C,0x9440,0x9431,0x95E5,0x95E4,0x95E3,0x9735,0x973A,/* 0x68-0x6F */ 0x97BF,0x97E1,0x9864,0x98C9,0x98C6,0x98C0,0x9958,0x9956,/* 0x70-0x77 */ 0x9A39,0x9A3D,0x9A46,0x9A44,0x9A42,0x9A41,0x9A3A,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9A3F,0x9ACD,0x9B15,0x9B17,0x9B18,0x9B16,0x9B3A,/* 0xA0-0xA7 */ 0x9B52,0x9C2B,0x9C1D,0x9C1C,0x9C2C,0x9C23,0x9C28,0x9C29,/* 0xA8-0xAF */ 0x9C24,0x9C21,0x9DB7,0x9DB6,0x9DBC,0x9DC1,0x9DC7,0x9DCA,/* 0xB0-0xB7 */ 0x9DCF,0x9DBE,0x9DC5,0x9DC3,0x9DBB,0x9DB5,0x9DCE,0x9DB9,/* 0xB8-0xBF */ 0x9DBA,0x9DAC,0x9DC8,0x9DB1,0x9DAD,0x9DCC,0x9DB3,0x9DCD,/* 0xC0-0xC7 */ 0x9DB2,0x9E7A,0x9E9C,0x9EEB,0x9EEE,0x9EED,0x9F1B,0x9F18,/* 0xC8-0xCF */ 0x9F1A,0x9F31,0x9F4E,0x9F65,0x9F64,0x9F92,0x4EB9,0x56C6,/* 0xD0-0xD7 */ 0x56C5,0x56CB,0x5971,0x5B4B,0x5B4C,0x5DD5,0x5DD1,0x5EF2,/* 0xD8-0xDF */ 0x6521,0x6520,0x6526,0x6522,0x6B0B,0x6B08,0x6B09,0x6C0D,/* 0xE0-0xE7 */ 0x7055,0x7056,0x7057,0x7052,0x721E,0x721F,0x72A9,0x737F,/* 0xE8-0xEF */ 0x74D8,0x74D5,0x74D9,0x74D7,0x766D,0x76AD,0x7935,0x79B4,/* 0xF0-0xF7 */ 0x7A70,0x7A71,0x7C57,0x7C5C,0x7C59,0x7C5B,0x7C5A,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F7[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7CF4,0x7CF1,0x7E91,0x7F4F,0x7F87,0x81DE,0x826B,0x8634,/* 0x40-0x47 */ 0x8635,0x8633,0x862C,0x8632,0x8636,0x882C,0x8828,0x8826,/* 0x48-0x4F */ 0x882A,0x8825,0x8971,0x89BF,0x89BE,0x89FB,0x8B7E,0x8B84,/* 0x50-0x57 */ 0x8B82,0x8B86,0x8B85,0x8B7F,0x8D15,0x8E95,0x8E94,0x8E9A,/* 0x58-0x5F */ 0x8E92,0x8E90,0x8E96,0x8E97,0x8F60,0x8F62,0x9147,0x944C,/* 0x60-0x67 */ 0x9450,0x944A,0x944B,0x944F,0x9447,0x9445,0x9448,0x9449,/* 0x68-0x6F */ 0x9446,0x973F,0x97E3,0x986A,0x9869,0x98CB,0x9954,0x995B,/* 0x70-0x77 */ 0x9A4E,0x9A53,0x9A54,0x9A4C,0x9A4F,0x9A48,0x9A4A,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9A49,0x9A52,0x9A50,0x9AD0,0x9B19,0x9B2B,0x9B3B,/* 0xA0-0xA7 */ 0x9B56,0x9B55,0x9C46,0x9C48,0x9C3F,0x9C44,0x9C39,0x9C33,/* 0xA8-0xAF */ 0x9C41,0x9C3C,0x9C37,0x9C34,0x9C32,0x9C3D,0x9C36,0x9DDB,/* 0xB0-0xB7 */ 0x9DD2,0x9DDE,0x9DDA,0x9DCB,0x9DD0,0x9DDC,0x9DD1,0x9DDF,/* 0xB8-0xBF */ 0x9DE9,0x9DD9,0x9DD8,0x9DD6,0x9DF5,0x9DD5,0x9DDD,0x9EB6,/* 0xC0-0xC7 */ 0x9EF0,0x9F35,0x9F33,0x9F32,0x9F42,0x9F6B,0x9F95,0x9FA2,/* 0xC8-0xCF */ 0x513D,0x5299,0x58E8,0x58E7,0x5972,0x5B4D,0x5DD8,0x882F,/* 0xD0-0xD7 */ 0x5F4F,0x6201,0x6203,0x6204,0x6529,0x6525,0x6596,0x66EB,/* 0xD8-0xDF */ 0x6B11,0x6B12,0x6B0F,0x6BCA,0x705B,0x705A,0x7222,0x7382,/* 0xE0-0xE7 */ 0x7381,0x7383,0x7670,0x77D4,0x7C67,0x7C66,0x7E95,0x826C,/* 0xE8-0xEF */ 0x863A,0x8640,0x8639,0x863C,0x8631,0x863B,0x863E,0x8830,/* 0xF0-0xF7 */ 0x8832,0x882E,0x8833,0x8976,0x8974,0x8973,0x89FE,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F8[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x8B8C,0x8B8E,0x8B8B,0x8B88,0x8C45,0x8D19,0x8E98,0x8F64,/* 0x40-0x47 */ 0x8F63,0x91BC,0x9462,0x9455,0x945D,0x9457,0x945E,0x97C4,/* 0x48-0x4F */ 0x97C5,0x9800,0x9A56,0x9A59,0x9B1E,0x9B1F,0x9B20,0x9C52,/* 0x50-0x57 */ 0x9C58,0x9C50,0x9C4A,0x9C4D,0x9C4B,0x9C55,0x9C59,0x9C4C,/* 0x58-0x5F */ 0x9C4E,0x9DFB,0x9DF7,0x9DEF,0x9DE3,0x9DEB,0x9DF8,0x9DE4,/* 0x60-0x67 */ 0x9DF6,0x9DE1,0x9DEE,0x9DE6,0x9DF2,0x9DF0,0x9DE2,0x9DEC,/* 0x68-0x6F */ 0x9DF4,0x9DF3,0x9DE8,0x9DED,0x9EC2,0x9ED0,0x9EF2,0x9EF3,/* 0x70-0x77 */ 0x9F06,0x9F1C,0x9F38,0x9F37,0x9F36,0x9F43,0x9F4F,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9F71,0x9F70,0x9F6E,0x9F6F,0x56D3,0x56CD,0x5B4E,/* 0xA0-0xA7 */ 0x5C6D,0x652D,0x66ED,0x66EE,0x6B13,0x705F,0x7061,0x705D,/* 0xA8-0xAF */ 0x7060,0x7223,0x74DB,0x74E5,0x77D5,0x7938,0x79B7,0x79B6,/* 0xB0-0xB7 */ 0x7C6A,0x7E97,0x7F89,0x826D,0x8643,0x8838,0x8837,0x8835,/* 0xB8-0xBF */ 0x884B,0x8B94,0x8B95,0x8E9E,0x8E9F,0x8EA0,0x8E9D,0x91BE,/* 0xC0-0xC7 */ 0x91BD,0x91C2,0x946B,0x9468,0x9469,0x96E5,0x9746,0x9743,/* 0xC8-0xCF */ 0x9747,0x97C7,0x97E5,0x9A5E,0x9AD5,0x9B59,0x9C63,0x9C67,/* 0xD0-0xD7 */ 0x9C66,0x9C62,0x9C5E,0x9C60,0x9E02,0x9DFE,0x9E07,0x9E03,/* 0xD8-0xDF */ 0x9E06,0x9E05,0x9E00,0x9E01,0x9E09,0x9DFF,0x9DFD,0x9E04,/* 0xE0-0xE7 */ 0x9EA0,0x9F1E,0x9F46,0x9F74,0x9F75,0x9F76,0x56D4,0x652E,/* 0xE8-0xEF */ 0x65B8,0x6B18,0x6B19,0x6B17,0x6B1A,0x7062,0x7226,0x72AA,/* 0xF0-0xF7 */ 0x77D8,0x77D9,0x7939,0x7C69,0x7C6B,0x7CF6,0x7E9A,0x0000,/* 0xF8-0xFF */ }; static const wchar_t c2u_F9[256] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x00-0x07 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x08-0x0F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x10-0x17 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x18-0x1F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x20-0x27 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x28-0x2F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x30-0x37 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x38-0x3F */ 0x7E98,0x7E9B,0x7E99,0x81E0,0x81E1,0x8646,0x8647,0x8648,/* 0x40-0x47 */ 0x8979,0x897A,0x897C,0x897B,0x89FF,0x8B98,0x8B99,0x8EA5,/* 0x48-0x4F */ 0x8EA4,0x8EA3,0x946E,0x946D,0x946F,0x9471,0x9473,0x9749,/* 0x50-0x57 */ 0x9872,0x995F,0x9C68,0x9C6E,0x9C6D,0x9E0B,0x9E0D,0x9E10,/* 0x58-0x5F */ 0x9E0F,0x9E12,0x9E11,0x9EA1,0x9EF5,0x9F09,0x9F47,0x9F78,/* 0x60-0x67 */ 0x9F7B,0x9F7A,0x9F79,0x571E,0x7066,0x7C6F,0x883C,0x8DB2,/* 0x68-0x6F */ 0x8EA6,0x91C3,0x9474,0x9478,0x9476,0x9475,0x9A60,0x9C74,/* 0x70-0x77 */ 0x9C73,0x9C71,0x9C75,0x9E14,0x9E13,0x9EF6,0x9F0A,0x0000,/* 0x78-0x7F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x80-0x87 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x88-0x8F */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x90-0x97 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,/* 0x98-0x9F */ 0x0000,0x9FA4,0x7068,0x7065,0x7CF7,0x866A,0x883E,0x883D,/* 0xA0-0xA7 */ 0x883F,0x8B9E,0x8C9C,0x8EA9,0x8EC9,0x974B,0x9873,0x9874,/* 0xA8-0xAF */ 0x98CC,0x9961,0x99AB,0x9A64,0x9A66,0x9A67,0x9B24,0x9E15,/* 0xB0-0xB7 */ 0x9E17,0x9F48,0x6207,0x6B1E,0x7227,0x864C,0x8EA8,0x9482,/* 0xB8-0xBF */ 0x9480,0x9481,0x9A69,0x9A68,0x9B2E,0x9E19,0x7229,0x864B,/* 0xC0-0xC7 */ 0x8B9F,0x9483,0x9C79,0x9EB7,0x7675,0x9A6B,0x9C7A,0x9E1D,/* 0xC8-0xCF */ 0x7069,0x706A,0x9EA4,0x9F7E,0x9F49,0x9F98,0x7881,0x92B9,/* 0xD0-0xD7 */ 0x88CF,0x58BB,0x6052,0x7CA7,0x5AFA,0x2554,0x2566,0x2557,/* 0xD8-0xDF */ 0x2560,0x256C,0x2563,0x255A,0x2569,0x255D,0x2552,0x2564,/* 0xE0-0xE7 */ 0x2555,0x255E,0x256A,0x2561,0x2558,0x2567,0x255B,0x2553,/* 0xE8-0xEF */ 0x2565,0x2556,0x255F,0x256B,0x2562,0x2559,0x2568,0x255C,/* 0xF0-0xF7 */ 0x2551,0x2550,0x256D,0x256E,0x2570,0x256F,0x2593,0x0000,/* 0xF8-0xFF */ }; static const wchar_t *page_charset2uni[256] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, c2u_A1, c2u_A2, c2u_A3, c2u_A4, c2u_A5, c2u_A6, c2u_A7, c2u_A8, c2u_A9, c2u_AA, c2u_AB, c2u_AC, c2u_AD, c2u_AE, c2u_AF, c2u_B0, c2u_B1, c2u_B2, c2u_B3, c2u_B4, c2u_B5, c2u_B6, c2u_B7, c2u_B8, c2u_B9, c2u_BA, c2u_BB, c2u_BC, c2u_BD, c2u_BE, c2u_BF, c2u_C0, c2u_C1, c2u_C2, c2u_C3, c2u_C4, c2u_C5, c2u_C6, NULL, NULL, c2u_C9, c2u_CA, c2u_CB, c2u_CC, c2u_CD, c2u_CE, c2u_CF, c2u_D0, c2u_D1, c2u_D2, c2u_D3, c2u_D4, c2u_D5, c2u_D6, c2u_D7, c2u_D8, c2u_D9, c2u_DA, c2u_DB, c2u_DC, c2u_DD, c2u_DE, c2u_DF, c2u_E0, c2u_E1, c2u_E2, c2u_E3, c2u_E4, c2u_E5, c2u_E6, c2u_E7, c2u_E8, c2u_E9, c2u_EA, c2u_EB, c2u_EC, c2u_ED, c2u_EE, c2u_EF, c2u_F0, c2u_F1, c2u_F2, c2u_F3, c2u_F4, c2u_F5, c2u_F6, c2u_F7, c2u_F8, c2u_F9, NULL, NULL, NULL, NULL, NULL, NULL, }; static const unsigned char u2c_02[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA3, 0xBE, /* 0xC4-0xC7 */ 0x00, 0x00, 0xA3, 0xBC, 0xA3, 0xBD, 0xA3, 0xBF, /* 0xC8-0xCB */ 0x00, 0x00, 0xA1, 0xC5, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD4-0xD7 */ 0x00, 0x00, 0xA3, 0xBB, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ }; static const unsigned char u2c_03[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0xA1, 0xC2, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0xA3, 0x44, 0xA3, 0x45, 0xA3, 0x46, /* 0x90-0x93 */ 0xA3, 0x47, 0xA3, 0x48, 0xA3, 0x49, 0xA3, 0x4A, /* 0x94-0x97 */ 0xA3, 0x4B, 0xA3, 0x4C, 0xA3, 0x4D, 0xA3, 0x4E, /* 0x98-0x9B */ 0xA3, 0x4F, 0xA3, 0x50, 0xA3, 0x51, 0xA3, 0x52, /* 0x9C-0x9F */ 0xA3, 0x53, 0xA3, 0x54, 0x00, 0x00, 0xA3, 0x55, /* 0xA0-0xA3 */ 0xA3, 0x56, 0xA3, 0x57, 0xA3, 0x58, 0xA3, 0x59, /* 0xA4-0xA7 */ 0xA3, 0x5A, 0xA3, 0x5B, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0xA3, 0x5C, 0xA3, 0x5D, 0xA3, 0x5E, /* 0xB0-0xB3 */ 0xA3, 0x5F, 0xA3, 0x60, 0xA3, 0x61, 0xA3, 0x62, /* 0xB4-0xB7 */ 0xA3, 0x63, 0xA3, 0x64, 0xA3, 0x65, 0xA3, 0x66, /* 0xB8-0xBB */ 0xA3, 0x67, 0xA3, 0x68, 0xA3, 0x69, 0xA3, 0x6A, /* 0xBC-0xBF */ 0xA3, 0x6B, 0xA3, 0x6C, 0x00, 0x00, 0xA3, 0x6D, /* 0xC0-0xC3 */ 0xA3, 0x6E, 0xA3, 0x6F, 0xA3, 0x70, 0xA3, 0x71, /* 0xC4-0xC7 */ 0xA3, 0x72, 0xA3, 0x73, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ }; static const unsigned char u2c_20[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x56, /* 0x10-0x13 */ 0xA1, 0x58, 0xA2, 0x77, 0xA1, 0xFC, 0x00, 0x00, /* 0x14-0x17 */ 0xA1, 0xA5, 0xA1, 0xA6, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0xA1, 0xA7, 0xA1, 0xA8, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0x45, 0x00, 0x00, /* 0x20-0x23 */ 0xA3, 0xBB, 0xA1, 0x4C, 0xA1, 0x4B, 0xA1, 0x45, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xAC, 0xA1, 0xB2, /* 0x30-0x33 */ 0x00, 0x00, 0xA1, 0xAB, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xB0, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC3, 0x00, 0x00, /* 0x3C-0x3F */ }; static const unsigned char u2c_21[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA2, 0x4A, /* 0x00-0x03 */ 0x00, 0x00, 0xA1, 0xC1, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0xA2, 0x4B, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0xA2, 0xB9, 0xA2, 0xBA, 0xA2, 0xBB, 0xA2, 0xBC, /* 0x60-0x63 */ 0xA2, 0xBD, 0xA2, 0xBE, 0xA2, 0xBF, 0xA2, 0xC0, /* 0x64-0x67 */ 0xA2, 0xC1, 0xA2, 0xC2, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0xA1, 0xF6, 0xA1, 0xF4, 0xA1, 0xF7, 0xA1, 0xF5, /* 0x90-0x93 */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xF8, 0xA1, 0xF9, /* 0x94-0x97 */ 0xA1, 0xFB, 0xA1, 0xFA, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ }; static const unsigned char u2c_22[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0xA2, 0x41, 0xA2, 0x42, 0x00, 0x00, /* 0x14-0x17 */ 0xA2, 0x58, 0x00, 0x00, 0xA1, 0xD4, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xDB, 0xA1, 0xE8, /* 0x1C-0x1F */ 0xA1, 0xE7, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xFD, /* 0x20-0x23 */ 0x00, 0x00, 0xA1, 0xFC, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0xA1, 0xE4, 0xA1, 0xE5, 0xA1, 0xEC, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xED, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0xA1, 0xEF, 0xA1, 0xEE, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xDC, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0xA1, 0xDA, 0xA1, 0xDD, 0x00, 0x00, 0xA1, 0xDD, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xD8, 0xA1, 0xD9, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ 0x00, 0x00, 0xA1, 0xF2, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0x00, 0x00, 0xA1, 0xF3, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0xA1, 0xE6, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xE9, /* 0xBC-0xBF */ }; static const unsigned char u2c_23[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0x5B, /* 0x04-0x07 */ }; static const unsigned char u2c_25[512] = { 0xA2, 0x77, 0x00, 0x00, 0xA2, 0x78, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0xA2, 0x7A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0xA2, 0x7B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0xA2, 0x7C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0xA2, 0x7D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0xA2, 0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0xA2, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0xA2, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0xA2, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0xA2, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0xF9, 0xF9, 0xF9, 0xF8, 0xF9, 0xE6, 0xF9, 0xEF, /* 0x50-0x53 */ 0xF9, 0xDD, 0xF9, 0xE8, 0xF9, 0xF1, 0xF9, 0xDF, /* 0x54-0x57 */ 0xF9, 0xEC, 0xF9, 0xF5, 0xF9, 0xE3, 0xF9, 0xEE, /* 0x58-0x5B */ 0xF9, 0xF7, 0xF9, 0xE5, 0xF9, 0xE9, 0xF9, 0xF2, /* 0x5C-0x5F */ 0xF9, 0xE0, 0xF9, 0xEB, 0xF9, 0xF4, 0xF9, 0xE2, /* 0x60-0x63 */ 0xF9, 0xE7, 0xF9, 0xF0, 0xF9, 0xDE, 0xF9, 0xED, /* 0x64-0x67 */ 0xF9, 0xF6, 0xF9, 0xE4, 0xF9, 0xEA, 0xF9, 0xF3, /* 0x68-0x6B */ 0xF9, 0xE1, 0xA2, 0x7E, 0xA2, 0xA1, 0xA2, 0xA3, /* 0x6C-0x6F */ 0xA2, 0xA2, 0xA2, 0xAC, 0xA2, 0xAD, 0xA2, 0xAE, /* 0x70-0x73 */ 0xA1, 0x5A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0xA2, 0x62, 0xA2, 0x63, 0xA2, 0x64, /* 0x80-0x83 */ 0xA2, 0x65, 0xA2, 0x66, 0xA2, 0x67, 0xA2, 0x68, /* 0x84-0x87 */ 0xA2, 0x69, 0xA2, 0x70, 0xA2, 0x6F, 0xA2, 0x6E, /* 0x88-0x8B */ 0xA2, 0x6D, 0xA2, 0x6C, 0xA2, 0x6B, 0xA2, 0x6A, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0xFE, /* 0x90-0x93 */ 0xA2, 0x76, 0xA2, 0x79, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0xA1, 0xBD, 0xA1, 0xBC, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xB6, 0xA1, 0xB5, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0xA1, 0xBF, 0xA1, 0xBE, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xBB, 0xA1, 0xBA, /* 0xC4-0xC7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xB3, /* 0xC8-0xCB */ 0x00, 0x00, 0x00, 0x00, 0xA1, 0xB7, 0xA1, 0xB4, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD4-0xD7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */ 0x00, 0x00, 0x00, 0x00, 0xA2, 0xA8, 0xA2, 0xA9, /* 0xE0-0xE3 */ 0xA2, 0xAB, 0xA2, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ }; static const unsigned char u2c_26[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0xA1, 0xB9, 0xA1, 0xB8, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0xA1, 0xF3, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0xA1, 0xF0, 0xA1, 0xF2, 0xA1, 0xF1, 0x00, 0x00, /* 0x40-0x43 */ }; static const unsigned char u2c_30[512] = { 0xA1, 0x40, 0xA1, 0x42, 0xA1, 0x43, 0xA1, 0xB2, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0xA1, 0x71, 0xA1, 0x72, 0xA1, 0x6D, 0xA1, 0x6E, /* 0x08-0x0B */ 0xA1, 0x75, 0xA1, 0x76, 0xA1, 0x79, 0xA1, 0x7A, /* 0x0C-0x0F */ 0xA1, 0x69, 0xA1, 0x6A, 0xA2, 0x45, 0x00, 0x00, /* 0x10-0x13 */ 0xA1, 0x65, 0xA1, 0x66, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0xA1, 0xE3, 0xA1, 0xA9, 0xA1, 0xAA, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0xA2, 0xC3, 0xA2, 0xC4, 0xA2, 0xC5, /* 0x20-0x23 */ 0xA2, 0xC6, 0xA2, 0xC7, 0xA2, 0xC8, 0xA2, 0xC9, /* 0x24-0x27 */ 0xA2, 0xCA, 0xA2, 0xCB, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0xA1, 0xCA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ }; static const unsigned char u2c_31[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0xA3, 0x74, 0xA3, 0x75, 0xA3, 0x76, /* 0x04-0x07 */ 0xA3, 0x77, 0xA3, 0x78, 0xA3, 0x79, 0xA3, 0x7A, /* 0x08-0x0B */ 0xA3, 0x7B, 0xA3, 0x7C, 0xA3, 0x7D, 0xA3, 0x7E, /* 0x0C-0x0F */ 0xA3, 0xA1, 0xA3, 0xA2, 0xA3, 0xA3, 0xA3, 0xA4, /* 0x10-0x13 */ 0xA3, 0xA5, 0xA3, 0xA6, 0xA3, 0xA7, 0xA3, 0xA8, /* 0x14-0x17 */ 0xA3, 0xA9, 0xA3, 0xAA, 0xA3, 0xAB, 0xA3, 0xAC, /* 0x18-0x1B */ 0xA3, 0xAD, 0xA3, 0xAE, 0xA3, 0xAF, 0xA3, 0xB0, /* 0x1C-0x1F */ 0xA3, 0xB1, 0xA3, 0xB2, 0xA3, 0xB3, 0xA3, 0xB4, /* 0x20-0x23 */ 0xA3, 0xB5, 0xA3, 0xB6, 0xA3, 0xB7, 0xA3, 0xB8, /* 0x24-0x27 */ 0xA3, 0xB9, 0xA3, 0xBA, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0xA4, 0x40, 0xA4, 0x47, /* 0x90-0x93 */ 0xA4, 0x54, 0xA5, 0x7C, 0xA4, 0x57, 0xA4, 0xA4, /* 0x94-0x97 */ 0xA4, 0x55, 0xA5, 0xD2, 0xA4, 0x41, 0xA4, 0xFE, /* 0x98-0x9B */ 0xA4, 0x42, 0xA4, 0xD1, 0xA6, 0x61, 0xA4, 0x48, /* 0x9C-0x9F */ }; static const unsigned char u2c_32[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0xA4, 0x40, 0xA4, 0x47, 0xA4, 0x54, 0xA5, 0x7C, /* 0x20-0x23 */ 0xA4, 0xAD, 0xA4, 0xBB, 0xA4, 0x43, 0xA4, 0x4B, /* 0x24-0x27 */ 0xA4, 0x45, 0xA4, 0x51, 0xA4, 0xEB, 0xA4, 0xF5, /* 0x28-0x2B */ 0xA4, 0xF4, 0xA4, 0xEC, 0xAA, 0xF7, 0xA4, 0x67, /* 0x2C-0x2F */ 0xA4, 0xE9, 0xAE, 0xE8, 0xA6, 0xB3, 0xAA, 0xC0, /* 0x30-0x33 */ 0xA6, 0x57, 0xAF, 0x53, 0xB0, 0x5D, 0xAF, 0xAC, /* 0x34-0x37 */ 0xB3, 0xD2, 0xA5, 0x4E, 0xA9, 0x49, 0xBE, 0xC7, /* 0x38-0x3B */ 0xBA, 0xCA, 0xA5, 0xF8, 0xB8, 0xEA, 0xA8, 0xF3, /* 0x3C-0x3F */ 0xB2, 0xBD, 0xA5, 0xF0, 0xA6, 0xDB, 0xA6, 0xDC, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0xA4, 0x40, 0xA4, 0x47, 0xA4, 0x54, 0xA5, 0x7C, /* 0x80-0x83 */ 0xA4, 0xAD, 0xA4, 0xBB, 0xA4, 0x43, 0xA4, 0x4B, /* 0x84-0x87 */ 0xA4, 0x45, 0xA4, 0x51, 0xA4, 0xEB, 0xA4, 0xF5, /* 0x88-0x8B */ 0xA4, 0xF4, 0xA4, 0xEC, 0xAA, 0xF7, 0xA4, 0x67, /* 0x8C-0x8F */ 0xA4, 0xE9, 0xAE, 0xE8, 0xA6, 0xB3, 0xAA, 0xC0, /* 0x90-0x93 */ 0xA6, 0x57, 0xAF, 0x53, 0xB0, 0x5D, 0xAF, 0xAC, /* 0x94-0x97 */ 0xB3, 0xD2, 0xAF, 0xB5, 0xA8, 0x6B, 0xA4, 0x6B, /* 0x98-0x9B */ 0xBE, 0x41, 0xC0, 0x75, 0xA6, 0x4C, 0xAA, 0x60, /* 0x9C-0x9F */ 0xB6, 0xB5, 0xA5, 0xF0, 0xBC, 0x67, 0xA1, 0xC0, /* 0xA0-0xA3 */ 0xA4, 0x57, 0xA4, 0xA4, 0xA4, 0x55, 0xA5, 0xAA, /* 0xA4-0xA7 */ 0xA5, 0x6B, 0xC2, 0xE5, 0xA9, 0x76, 0xBE, 0xC7, /* 0xA8-0xAB */ 0xBA, 0xCA, 0xA5, 0xF8, 0xB8, 0xEA, 0xA8, 0xF3, /* 0xAC-0xAF */ 0xA9, 0x5D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ }; static const unsigned char u2c_33[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0xA2, 0x55, 0xA2, 0x56, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0xA2, 0x50, 0xA2, 0x51, 0xA2, 0x52, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0xA2, 0x54, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ 0xA2, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ 0x00, 0x00, 0x00, 0x00, 0xA2, 0x53, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0xA1, 0xEB, 0xA1, 0xEA, 0x00, 0x00, /* 0xD0-0xD3 */ 0x00, 0x00, 0xA2, 0x4F, 0x00, 0x00, 0x00, 0x00, /* 0xD4-0xD7 */ }; static const unsigned char u2c_4E[512] = { 0xA4, 0x40, 0xA4, 0x42, 0x00, 0x00, 0xA4, 0x43, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC9, 0x45, /* 0x04-0x07 */ 0xA4, 0x56, 0xA4, 0x54, 0xA4, 0x57, 0xA4, 0x55, /* 0x08-0x0B */ 0xC9, 0x46, 0xA4, 0xA3, 0xC9, 0x4F, 0xC9, 0x4D, /* 0x0C-0x0F */ 0xA4, 0xA2, 0xA4, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0xA5, 0x42, 0xA5, 0x41, 0xA5, 0x40, 0x00, 0x00, /* 0x14-0x17 */ 0xA5, 0x43, 0xA4, 0xFE, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0xA5, 0xE0, 0xA5, 0xE1, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0xA8, 0xC3, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA4, 0x58, /* 0x28-0x2B */ 0x00, 0x00, 0xA4, 0xA4, 0xC9, 0x50, 0x00, 0x00, /* 0x2C-0x2F */ 0xA4, 0xA5, 0xC9, 0x63, 0xA6, 0xEA, 0xCB, 0xB1, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0xA4, 0x59, 0xA4, 0xA6, 0x00, 0x00, 0xA5, 0x44, /* 0x38-0x3B */ 0xC9, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0xC9, 0x40, 0xA4, 0x44, /* 0x40-0x43 */ 0x00, 0x00, 0xA4, 0x5B, 0x00, 0x00, 0xC9, 0x47, /* 0x44-0x47 */ 0xA4, 0x5C, 0x00, 0x00, 0x00, 0x00, 0xA4, 0xA7, /* 0x48-0x4B */ 0x00, 0x00, 0xA5, 0x45, 0xA5, 0x47, 0xA5, 0x46, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0xA5, 0xE2, 0xA5, 0xE3, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0xA8, 0xC4, 0x00, 0x00, /* 0x54-0x57 */ 0xAD, 0xBC, 0xA4, 0x41, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0xC9, 0x41, 0xA4, 0x45, 0xA4, 0x5E, 0xA4, 0x5D, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0xA5, 0xE4, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA8, 0xC5, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0xB0, 0xAE, 0xD4, 0x4B, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0xB6, 0xC3, 0xDC, 0xB1, /* 0x80-0x83 */ 0xDC, 0xB2, 0x00, 0x00, 0xA4, 0x46, 0x00, 0x00, /* 0x84-0x87 */ 0xA4, 0xA9, 0x00, 0x00, 0x00, 0x00, 0xA8, 0xC6, /* 0x88-0x8B */ 0xA4, 0x47, 0xC9, 0x48, 0xA4, 0x5F, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0xA4, 0xAA, 0xA4, 0xAC, 0xC9, 0x51, /* 0x90-0x93 */ 0xA4, 0xAD, 0xA4, 0xAB, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0x00, 0x00, 0xA5, 0xE5, 0x00, 0x00, 0xA8, 0xC7, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0xA8, 0xC8, 0xAB, 0x45, /* 0x9C-0x9F */ 0x00, 0x00, 0xA4, 0x60, 0xA4, 0xAE, 0x00, 0x00, /* 0xA0-0xA3 */ 0xA5, 0xE6, 0xA5, 0xE8, 0xA5, 0xE7, 0x00, 0x00, /* 0xA4-0xA7 */ 0xA6, 0xEB, 0x00, 0x00, 0x00, 0x00, 0xA8, 0xC9, /* 0xA8-0xAB */ 0xA8, 0xCA, 0xAB, 0x46, 0xAB, 0x47, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAD, 0xBD, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0xDC, 0xB3, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0xF6, 0xD6, 0xA4, 0x48, 0x00, 0x00, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0xA4, 0xB0, 0xA4, 0xAF, 0xC9, 0x52, 0xA4, 0xB1, /* 0xC0-0xC3 */ 0xA4, 0xB7, 0x00, 0x00, 0xA4, 0xB2, 0xA4, 0xB3, /* 0xC4-0xC7 */ 0xC9, 0x54, 0xC9, 0x53, 0xA4, 0xB5, 0xA4, 0xB6, /* 0xC8-0xCB */ 0x00, 0x00, 0xA4, 0xB4, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0xA5, 0x4A, 0xA5, 0x4B, 0xA5, 0x4C, 0xA5, 0x4D, /* 0xD4-0xD7 */ 0xA5, 0x49, 0xA5, 0x50, 0xC9, 0x6A, 0x00, 0x00, /* 0xD8-0xDB */ 0xC9, 0x66, 0xC9, 0x69, 0xA5, 0x51, 0xA5, 0x61, /* 0xDC-0xDF */ 0x00, 0x00, 0xC9, 0x68, 0x00, 0x00, 0xA5, 0x4E, /* 0xE0-0xE3 */ 0xA5, 0x4F, 0xA5, 0x48, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ 0xC9, 0x65, 0xC9, 0x67, 0x00, 0x00, 0x00, 0x00, /* 0xE8-0xEB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */ 0xA5, 0xF5, 0xC9, 0xB0, 0xA5, 0xF2, 0xA5, 0xF6, /* 0xF0-0xF3 */ 0xC9, 0xBA, 0xC9, 0xAE, 0xA5, 0xF3, 0xC9, 0xB2, /* 0xF4-0xF7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA5, 0xF4, /* 0xF8-0xFB */ 0x00, 0x00, 0xA5, 0xF7, 0x00, 0x00, 0xA5, 0xE9, /* 0xFC-0xFF */ }; static const unsigned char u2c_4F[512] = { 0xC9, 0xB1, 0xA5, 0xF8, 0xC9, 0xB5, 0x00, 0x00, /* 0x00-0x03 */ 0xC9, 0xB9, 0xC9, 0xB6, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0xC9, 0xB3, 0xA5, 0xEA, 0xA5, 0xEC, 0xA5, 0xF9, /* 0x08-0x0B */ 0x00, 0x00, 0xA5, 0xEE, 0xC9, 0xAB, 0xA5, 0xF1, /* 0x0C-0x0F */ 0xA5, 0xEF, 0xA5, 0xF0, 0xC9, 0xBB, 0xC9, 0xB8, /* 0x10-0x13 */ 0xC9, 0xAF, 0xA5, 0xED, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0xC9, 0xAC, 0xA5, 0xEB, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0xC9, 0xB4, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0xC9, 0xB7, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0xC9, 0xAD, 0xCA, 0x66, 0x00, 0x00, 0xA7, 0x42, /* 0x2C-0x2F */ 0xA6, 0xF4, 0x00, 0x00, 0x00, 0x00, 0xCA, 0x67, /* 0x30-0x33 */ 0xA6, 0xF1, 0x00, 0x00, 0xA7, 0x44, 0x00, 0x00, /* 0x34-0x37 */ 0xA6, 0xF9, 0x00, 0x00, 0xA6, 0xF8, 0xCA, 0x5B, /* 0x38-0x3B */ 0xA6, 0xFC, 0xA6, 0xF7, 0xCA, 0x60, 0xCA, 0x68, /* 0x3C-0x3F */ 0x00, 0x00, 0xCA, 0x64, 0x00, 0x00, 0xA6, 0xFA, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0xA6, 0xFD, 0xA6, 0xEE, /* 0x44-0x47 */ 0xA7, 0x47, 0xCA, 0x5D, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ 0xCB, 0xBD, 0xA6, 0xEC, 0xA7, 0x43, 0xA6, 0xED, /* 0x4C-0x4F */ 0xA6, 0xF5, 0xA6, 0xF6, 0xCA, 0x62, 0xCA, 0x5E, /* 0x50-0x53 */ 0xA6, 0xFB, 0xA6, 0xF3, 0xCA, 0x5A, 0xA6, 0xEF, /* 0x54-0x57 */ 0xCA, 0x65, 0xA7, 0x45, 0xA7, 0x48, 0xA6, 0xF2, /* 0x58-0x5B */ 0xA7, 0x40, 0xA7, 0x46, 0xA6, 0xF0, 0xCA, 0x63, /* 0x5C-0x5F */ 0xA7, 0x41, 0xCA, 0x69, 0xCA, 0x5C, 0xA6, 0xFE, /* 0x60-0x63 */ 0xCA, 0x5F, 0x00, 0x00, 0x00, 0x00, 0xCA, 0x61, /* 0x64-0x67 */ 0x00, 0x00, 0xA8, 0xD8, 0xCB, 0xBF, 0xCB, 0xCB, /* 0x68-0x6B */ 0xA8, 0xD0, 0x00, 0x00, 0xCB, 0xCC, 0xA8, 0xCB, /* 0x6C-0x6F */ 0xA8, 0xD5, 0x00, 0x00, 0x00, 0x00, 0xA8, 0xCE, /* 0x70-0x73 */ 0xCB, 0xB9, 0xA8, 0xD6, 0xCB, 0xB8, 0xCB, 0xBC, /* 0x74-0x77 */ 0xCB, 0xC3, 0xCB, 0xC1, 0xA8, 0xDE, 0xA8, 0xD9, /* 0x78-0x7B */ 0xCB, 0xB3, 0xCB, 0xB5, 0xA8, 0xDB, 0xA8, 0xCF, /* 0x7C-0x7F */ 0xCB, 0xB6, 0xCB, 0xC2, 0xCB, 0xC9, 0xA8, 0xD4, /* 0x80-0x83 */ 0xCB, 0xBB, 0xCB, 0xB4, 0xA8, 0xD3, 0xCB, 0xB7, /* 0x84-0x87 */ 0xA8, 0xD7, 0xCB, 0xBA, 0x00, 0x00, 0xA8, 0xD2, /* 0x88-0x8B */ 0x00, 0x00, 0xA8, 0xCD, 0x00, 0x00, 0xA8, 0xDC, /* 0x8C-0x8F */ 0xCB, 0xC4, 0xA8, 0xDD, 0xCB, 0xC8, 0x00, 0x00, /* 0x90-0x93 */ 0xCB, 0xC6, 0xCB, 0xCA, 0xA8, 0xDA, 0xCB, 0xBE, /* 0x94-0x97 */ 0xCB, 0xB2, 0x00, 0x00, 0xCB, 0xC0, 0xA8, 0xD1, /* 0x98-0x9B */ 0xCB, 0xC5, 0xA8, 0xCC, 0xCB, 0xC7, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0xAB, 0x56, 0xAB, 0x4A, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0xCD, 0xE0, 0xCD, 0xE8, /* 0xB0-0xB3 */ 0x00, 0x00, 0xAB, 0x49, 0xAB, 0x51, 0xAB, 0x5D, /* 0xB4-0xB7 */ 0x00, 0x00, 0xCD, 0xEE, 0xCD, 0xEC, 0xCD, 0xE7, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAB, 0x4B, /* 0xBC-0xBF */ 0xCD, 0xED, 0xCD, 0xE3, 0xAB, 0x59, 0xAB, 0x50, /* 0xC0-0xC3 */ 0xAB, 0x58, 0xCD, 0xDE, 0x00, 0x00, 0xCD, 0xEA, /* 0xC4-0xC7 */ 0x00, 0x00, 0xCD, 0xE1, 0xAB, 0x54, 0xCD, 0xE2, /* 0xC8-0xCB */ 0x00, 0x00, 0xCD, 0xDD, 0xAB, 0x5B, 0xAB, 0x4E, /* 0xCC-0xCF */ 0xAB, 0x57, 0xAB, 0x4D, 0x00, 0x00, 0xCD, 0xDF, /* 0xD0-0xD3 */ 0xCD, 0xE4, 0x00, 0x00, 0xCD, 0xEB, 0xAB, 0x55, /* 0xD4-0xD7 */ 0xAB, 0x52, 0xCD, 0xE6, 0xAB, 0x5A, 0xCD, 0xE9, /* 0xD8-0xDB */ 0xCD, 0xE5, 0xAB, 0x4F, 0xAB, 0x5C, 0xAB, 0x53, /* 0xDC-0xDF */ 0xAB, 0x4C, 0xAB, 0x48, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE8-0xEB */ 0xCD, 0xEF, 0x00, 0x00, 0xAD, 0xD7, 0xAD, 0xC1, /* 0xEC-0xEF */ 0x00, 0x00, 0xAD, 0xD1, 0x00, 0x00, 0xAD, 0xD6, /* 0xF0-0xF3 */ 0xD0, 0xD0, 0xD0, 0xCF, 0xD0, 0xD4, 0xD0, 0xD5, /* 0xF4-0xF7 */ 0xAD, 0xC4, 0x00, 0x00, 0xAD, 0xCD, 0x00, 0x00, /* 0xF8-0xFB */ 0x00, 0x00, 0x00, 0x00, 0xAD, 0xDA, 0x00, 0x00, /* 0xFC-0xFF */ }; static const unsigned char u2c_50[512] = { 0xAD, 0xCE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0xD0, 0xC9, 0xAD, 0xC7, 0xD0, 0xCA, /* 0x04-0x07 */ 0x00, 0x00, 0xAD, 0xDC, 0x00, 0x00, 0xAD, 0xD3, /* 0x08-0x0B */ 0xAD, 0xBE, 0xAD, 0xBF, 0xD0, 0xDD, 0xB0, 0xBF, /* 0x0C-0x0F */ 0x00, 0x00, 0xAD, 0xCC, 0xAD, 0xCB, 0xD0, 0xCB, /* 0x10-0x13 */ 0xAD, 0xCF, 0xD4, 0x5B, 0xAD, 0xC6, 0xD0, 0xD6, /* 0x14-0x17 */ 0xAD, 0xD5, 0xAD, 0xD4, 0xAD, 0xCA, 0xD0, 0xCE, /* 0x18-0x1B */ 0xD0, 0xD7, 0x00, 0x00, 0xD0, 0xC8, 0xAD, 0xC9, /* 0x1C-0x1F */ 0xD0, 0xD8, 0xAD, 0xD2, 0xD0, 0xCC, 0xAD, 0xC0, /* 0x20-0x23 */ 0x00, 0x00, 0xAD, 0xC3, 0xAD, 0xC2, 0xD0, 0xD9, /* 0x24-0x27 */ 0xAD, 0xD0, 0xAD, 0xC5, 0xAD, 0xD9, 0xAD, 0xDB, /* 0x28-0x2B */ 0xD0, 0xD3, 0xAD, 0xD8, 0x00, 0x00, 0xD0, 0xDB, /* 0x2C-0x2F */ 0xD0, 0xCD, 0xD0, 0xDC, 0x00, 0x00, 0xD0, 0xD1, /* 0x30-0x33 */ 0x00, 0x00, 0xD0, 0xDA, 0x00, 0x00, 0xD0, 0xD2, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0xAD, 0xC8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0xD4, 0x63, 0xD4, 0x57, 0x00, 0x00, 0xB0, 0xB3, /* 0x40-0x43 */ 0x00, 0x00, 0xD4, 0x5C, 0xD4, 0x62, 0xB0, 0xB2, /* 0x44-0x47 */ 0xD4, 0x55, 0xB0, 0xB6, 0xD4, 0x59, 0xD4, 0x52, /* 0x48-0x4B */ 0xB0, 0xB4, 0xD4, 0x56, 0xB0, 0xB9, 0xB0, 0xBE, /* 0x4C-0x4F */ 0x00, 0x00, 0xD4, 0x67, 0x00, 0x00, 0xD4, 0x51, /* 0x50-0x53 */ 0x00, 0x00, 0xB0, 0xBA, 0x00, 0x00, 0xD4, 0x66, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0xB0, 0xB5, 0xD4, 0x58, /* 0x58-0x5B */ 0xB0, 0xB1, 0xD4, 0x53, 0xD4, 0x4F, 0xD4, 0x5D, /* 0x5C-0x5F */ 0xD4, 0x50, 0xD4, 0x4E, 0xD4, 0x5A, 0xD4, 0x60, /* 0x60-0x63 */ 0xD4, 0x61, 0xB0, 0xB7, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0xD8, 0x5B, 0xD4, 0x5E, 0xD4, 0x4D, 0xD4, 0x5F, /* 0x68-0x6B */ 0x00, 0x00, 0xB0, 0xC1, 0xD4, 0x64, 0xB0, 0xC0, /* 0x6C-0x6F */ 0xD4, 0x4C, 0x00, 0x00, 0xD4, 0x54, 0xD4, 0x65, /* 0x70-0x73 */ 0xB0, 0xBC, 0xB0, 0xBB, 0xB0, 0xB8, 0xB0, 0xBD, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0xB0, 0xAF, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0xB0, 0xB0, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0xB3, 0xC8, 0x00, 0x00, 0xD8, 0x5E, 0xD8, 0x57, /* 0x80-0x83 */ 0x00, 0x00, 0xB3, 0xC5, 0x00, 0x00, 0xD8, 0x5F, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD8, 0x55, /* 0x88-0x8B */ 0xD8, 0x58, 0xB3, 0xC4, 0xD8, 0x59, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0xB3, 0xC7, 0xD8, 0x5D, 0x00, 0x00, /* 0x90-0x93 */ 0xD8, 0x53, 0xD8, 0x52, 0xB3, 0xC9, 0x00, 0x00, /* 0x94-0x97 */ 0xB3, 0xCA, 0xB3, 0xC6, 0xB3, 0xCB, 0xD8, 0x51, /* 0x98-0x9B */ 0xD8, 0x5C, 0xD8, 0x5A, 0xD8, 0x54, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0xB3, 0xC3, 0xD8, 0x56, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0xB6, 0xCA, 0xB6, 0xC4, 0xDC, 0xB7, 0xB6, 0xCD, /* 0xAC-0xAF */ 0xDC, 0xBD, 0xDC, 0xC0, 0xB6, 0xC6, 0xB6, 0xC7, /* 0xB0-0xB3 */ 0xDC, 0xBA, 0xB6, 0xC5, 0xDC, 0xC3, 0xB6, 0xCB, /* 0xB4-0xB7 */ 0xDC, 0xC4, 0x00, 0x00, 0xDC, 0xBF, 0xB6, 0xCC, /* 0xB8-0xBB */ 0x00, 0x00, 0xDC, 0xB4, 0xB6, 0xC9, 0xDC, 0xB5, /* 0xBC-0xBF */ 0x00, 0x00, 0xDC, 0xBE, 0xDC, 0xBC, 0x00, 0x00, /* 0xC0-0xC3 */ 0xDC, 0xB8, 0xB6, 0xC8, 0xDC, 0xB6, 0xB6, 0xCE, /* 0xC4-0xC7 */ 0xDC, 0xBB, 0xDC, 0xC2, 0xDC, 0xB9, 0xDC, 0xC1, /* 0xC8-0xCB */ 0x00, 0x00, 0x00, 0x00, 0xB9, 0xB6, 0xB9, 0xB3, /* 0xCC-0xCF */ 0x00, 0x00, 0xB9, 0xB4, 0x00, 0x00, 0xE0, 0xF9, /* 0xD0-0xD3 */ 0xE0, 0xF1, 0xB9, 0xB2, 0xB9, 0xAF, 0xE0, 0xF2, /* 0xD4-0xD7 */ 0x00, 0x00, 0x00, 0x00, 0xB9, 0xB1, 0xE0, 0xF5, /* 0xD8-0xDB */ 0x00, 0x00, 0xE0, 0xF7, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */ 0xE0, 0xFE, 0x00, 0x00, 0x00, 0x00, 0xE0, 0xFD, /* 0xE0-0xE3 */ 0xE0, 0xF8, 0xB9, 0xAE, 0xE0, 0xF0, 0xB9, 0xAC, /* 0xE4-0xE7 */ 0xE0, 0xF3, 0xB9, 0xB7, 0xE0, 0xF6, 0x00, 0x00, /* 0xE8-0xEB */ 0xE0, 0xFA, 0xB9, 0xB0, 0xB9, 0xAD, 0xE0, 0xFC, /* 0xEC-0xEF */ 0xE0, 0xFB, 0xB9, 0xB5, 0x00, 0x00, 0xE0, 0xF4, /* 0xF0-0xF3 */ 0x00, 0x00, 0xBB, 0xF8, 0xE4, 0xEC, 0x00, 0x00, /* 0xF4-0xF7 */ 0xE4, 0xE9, 0xBB, 0xF9, 0x00, 0x00, 0xBB, 0xF7, /* 0xF8-0xFB */ 0x00, 0x00, 0xE4, 0xF0, 0xE4, 0xED, 0xE4, 0xE6, /* 0xFC-0xFF */ }; static const unsigned char u2c_51[512] = { 0xBB, 0xF6, 0x00, 0x00, 0xBB, 0xFA, 0xE4, 0xE7, /* 0x00-0x03 */ 0xBB, 0xF5, 0xBB, 0xFD, 0xE4, 0xEA, 0xE4, 0xEB, /* 0x04-0x07 */ 0xBB, 0xFB, 0xBB, 0xFC, 0xE4, 0xF1, 0xE4, 0xEE, /* 0x08-0x0B */ 0xE4, 0xEF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0xBE, 0xAA, 0xE8, 0xF8, 0xBE, 0xA7, 0xE8, 0xF5, /* 0x10-0x13 */ 0xBE, 0xA9, 0xBE, 0xAB, 0x00, 0x00, 0xE8, 0xF6, /* 0x14-0x17 */ 0xBE, 0xA8, 0x00, 0x00, 0xE8, 0xF7, 0x00, 0x00, /* 0x18-0x1B */ 0xE8, 0xF4, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x76, /* 0x1C-0x1F */ 0xEC, 0xBD, 0xC0, 0x77, 0xEC, 0xBB, 0x00, 0x00, /* 0x20-0x23 */ 0xEC, 0xBC, 0xEC, 0xBA, 0xEC, 0xB9, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0xEC, 0xBE, 0xC0, 0x75, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0xEF, 0xB8, 0xEF, 0xB9, 0x00, 0x00, /* 0x2C-0x2F */ 0xE4, 0xE8, 0xEF, 0xB7, 0xC0, 0x78, 0xC3, 0x5F, /* 0x30-0x33 */ 0xF1, 0xEB, 0xF1, 0xEC, 0x00, 0x00, 0xC4, 0xD7, /* 0x34-0x37 */ 0xC4, 0xD8, 0xF5, 0xC1, 0xF5, 0xC0, 0xC5, 0x6C, /* 0x38-0x3B */ 0xC5, 0x6B, 0xF7, 0xD0, 0x00, 0x00, 0xA4, 0x49, /* 0x3C-0x3F */ 0xA4, 0x61, 0xA4, 0xB9, 0x00, 0x00, 0xA4, 0xB8, /* 0x40-0x43 */ 0xA5, 0x53, 0xA5, 0x52, 0xA5, 0xFC, 0xA5, 0xFB, /* 0x44-0x47 */ 0xA5, 0xFD, 0xA5, 0xFA, 0x00, 0x00, 0xA7, 0x4A, /* 0x48-0x4B */ 0xA7, 0x49, 0xA7, 0x4B, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0xA8, 0xE0, 0x00, 0x00, /* 0x50-0x53 */ 0xA8, 0xDF, 0xA8, 0xE1, 0x00, 0x00, 0xAB, 0x5E, /* 0x54-0x57 */ 0x00, 0x00, 0xA2, 0x59, 0xD0, 0xDE, 0xA2, 0x5A, /* 0x58-0x5B */ 0xB0, 0xC2, 0xA2, 0x5C, 0xA2, 0x5B, 0xD8, 0x60, /* 0x5C-0x5F */ 0x00, 0x00, 0xA2, 0x5D, 0xB9, 0xB8, 0xA2, 0x5E, /* 0x60-0x63 */ 0x00, 0x00, 0xA4, 0x4A, 0x00, 0x00, 0xA4, 0xBA, /* 0x64-0x67 */ 0xA5, 0xFE, 0xA8, 0xE2, 0x00, 0x00, 0xA4, 0x4B, /* 0x68-0x6B */ 0xA4, 0xBD, 0xA4, 0xBB, 0xA4, 0xBC, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0xA6, 0x40, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0xA7, 0x4C, 0xA8, 0xE4, 0xA8, 0xE3, /* 0x74-0x77 */ 0xA8, 0xE5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0xAD, 0xDD, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0xBE, 0xAC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC9, 0x4E, /* 0x84-0x87 */ 0x00, 0x00, 0xA5, 0x54, 0xA5, 0x55, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0xA6, 0x41, 0x00, 0x00, 0xCA, 0x6A, /* 0x8C-0x8F */ 0x00, 0x00, 0xAB, 0x60, 0xAB, 0x5F, 0xD0, 0xE0, /* 0x90-0x93 */ 0xD0, 0xDF, 0xB0, 0xC3, 0x00, 0x00, 0xA4, 0xBE, /* 0x94-0x97 */ 0xC9, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0xCB, 0xCD, 0x00, 0x00, /* 0x9C-0x9F */ 0xAB, 0x61, 0x00, 0x00, 0xAD, 0xE0, 0x00, 0x00, /* 0xA0-0xA3 */ 0xAD, 0xDE, 0xAD, 0xDF, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0xBE, 0xAD, 0x00, 0x00, /* 0xA8-0xAB */ 0xA5, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0xA6, 0x42, 0xC9, 0xBC, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0xA7, 0x4D, 0xA7, 0x4E, /* 0xB4-0xB7 */ 0x00, 0x00, 0xCA, 0x6B, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0xCB, 0xCE, 0xA8, 0xE6, 0xCB, 0xCF, 0x00, 0x00, /* 0xBC-0xBF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ 0xD0, 0xE2, 0xD0, 0xE3, 0xAD, 0xE3, 0x00, 0x00, /* 0xC4-0xC7 */ 0xD0, 0xE4, 0x00, 0x00, 0xD0, 0xE1, 0xAD, 0xE4, /* 0xC8-0xCB */ 0xAD, 0xE2, 0xAD, 0xE1, 0xD0, 0xE5, 0x00, 0x00, /* 0xCC-0xCF */ 0xD4, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0xD8, 0x61, 0x00, 0x00, 0x00, 0x00, 0xDC, 0xC5, /* 0xD4-0xD7 */ 0xE1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 0xBB, 0xFE, 0xBE, 0xAE, 0xE8, 0xF9, 0x00, 0x00, /* 0xDC-0xDF */ 0xA4, 0x4C, 0xA4, 0x5A, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE8-0xEB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */ 0xB0, 0xC4, 0xB3, 0xCD, 0x00, 0x00, 0xB9, 0xB9, /* 0xF0-0xF3 */ 0x00, 0x00, 0xC9, 0x42, 0xA4, 0xBF, 0x00, 0x00, /* 0xF4-0xF7 */ 0xA5, 0x59, 0xA5, 0x57, 0xA5, 0x58, 0x00, 0x00, /* 0xF8-0xFB */ 0x00, 0x00, 0xA8, 0xE7, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ }; static const unsigned char u2c_52[512] = { 0xA4, 0x4D, 0xA4, 0x4E, 0x00, 0x00, 0xA4, 0x62, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0xA4, 0xC0, 0xA4, 0xC1, /* 0x04-0x07 */ 0xA4, 0xC2, 0xC9, 0xBE, 0xA5, 0x5A, 0x00, 0x00, /* 0x08-0x0B */ 0xC9, 0x6B, 0x00, 0x00, 0xA6, 0x46, 0x00, 0x00, /* 0x0C-0x0F */ 0xC9, 0xBF, 0xA6, 0x44, 0xA6, 0x45, 0xC9, 0xBD, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0xA6, 0x47, 0xA6, 0x43, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ 0xCA, 0x6C, 0xAA, 0xEC, 0xCA, 0x6D, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0xCA, 0x6E, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0xA7, 0x50, 0xA7, 0x4F, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0xA7, 0x53, 0xA7, 0x51, 0xA7, 0x52, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0xA8, 0xED, 0x00, 0x00, /* 0x2C-0x2F */ 0xA8, 0xEC, 0xCB, 0xD4, 0xCB, 0xD1, 0xCB, 0xD2, /* 0x30-0x33 */ 0x00, 0x00, 0xCB, 0xD0, 0xA8, 0xEE, 0xA8, 0xEA, /* 0x34-0x37 */ 0xA8, 0xE9, 0x00, 0x00, 0xA8, 0xEB, 0xA8, 0xE8, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ 0x00, 0x00, 0xA8, 0xEF, 0x00, 0x00, 0xAB, 0x63, /* 0x40-0x43 */ 0xCD, 0xF0, 0x00, 0x00, 0xCB, 0xD3, 0xAB, 0x68, /* 0x44-0x47 */ 0x00, 0x00, 0xCD, 0xF1, 0xAB, 0x64, 0xAB, 0x67, /* 0x48-0x4B */ 0xAB, 0x66, 0xAB, 0x65, 0xAB, 0x62, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0xD0, 0xE8, 0x00, 0x00, /* 0x50-0x53 */ 0xAD, 0xE7, 0xD0, 0xEB, 0xAD, 0xE5, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0xD0, 0xE7, 0xAD, 0xE8, /* 0x58-0x5B */ 0xAD, 0xE6, 0xAD, 0xE9, 0xD0, 0xE9, 0xD0, 0xEA, /* 0x5C-0x5F */ 0x00, 0x00, 0xD0, 0xE6, 0xD0, 0xEC, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0xB3, 0xD1, 0xB0, 0xC5, 0xD4, 0x69, /* 0x68-0x6B */ 0xD4, 0x6B, 0xD4, 0x6A, 0xD4, 0x6C, 0xB0, 0xC6, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0xB3, 0xCE, 0x00, 0x00, /* 0x70-0x73 */ 0xB3, 0xCF, 0xB3, 0xD0, 0x00, 0x00, 0xB6, 0xD0, /* 0x74-0x77 */ 0xDC, 0xC7, 0x00, 0x00, 0xDC, 0xC6, 0xDC, 0xC8, /* 0x78-0x7B */ 0xDC, 0xC9, 0xB6, 0xD1, 0x00, 0x00, 0xB6, 0xCF, /* 0x7C-0x7F */ 0xE1, 0x41, 0xE1, 0x42, 0xB9, 0xBB, 0xB9, 0xBA, /* 0x80-0x83 */ 0xE3, 0x5A, 0x00, 0x00, 0x00, 0x00, 0xBC, 0x40, /* 0x84-0x87 */ 0xBC, 0x41, 0xBC, 0x42, 0xBC, 0x44, 0xE4, 0xF2, /* 0x88-0x8B */ 0xE4, 0xF3, 0xBC, 0x43, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0xBE, 0xAF, 0x00, 0x00, 0xBE, 0xB0, /* 0x90-0x93 */ 0x00, 0x00, 0x00, 0x00, 0xF1, 0xED, 0xF5, 0xC3, /* 0x94-0x97 */ 0xF5, 0xC2, 0xF7, 0xD1, 0x00, 0x00, 0xA4, 0x4F, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA5, 0x5C, /* 0x9C-0x9F */ 0xA5, 0x5B, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x48, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0xC9, 0xC0, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0xA7, 0x55, 0xA7, 0x56, 0xA7, 0x54, /* 0xA8-0xAB */ 0xA7, 0x57, 0xCA, 0x6F, 0xCA, 0x70, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA8, 0xF1, /* 0xB8-0xBB */ 0xCB, 0xD5, 0x00, 0x00, 0xA8, 0xF0, 0x00, 0x00, /* 0xBC-0xBF */ 0xCD, 0xF2, 0xAB, 0x6C, 0xCD, 0xF3, 0xAB, 0x6B, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAB, 0x69, /* 0xC4-0xC7 */ 0x00, 0x00, 0xAB, 0x6A, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ 0x00, 0x00, 0xD0, 0xED, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0xB0, 0xC7, 0xD4, 0x6E, /* 0xD0-0xD3 */ 0x00, 0x00, 0xB0, 0xCA, 0xD4, 0x6D, 0xB1, 0xE5, /* 0xD4-0xD7 */ 0xB0, 0xC9, 0xB0, 0xC8, 0x00, 0x00, 0xB3, 0xD4, /* 0xD8-0xDB */ 0x00, 0x00, 0xB3, 0xD3, 0xB3, 0xD2, 0xB6, 0xD2, /* 0xDC-0xDF */ 0x00, 0x00, 0x00, 0x00, 0xB6, 0xD5, 0xB6, 0xD6, /* 0xE0-0xE3 */ 0xB6, 0xD4, 0x00, 0x00, 0xB6, 0xD3, 0x00, 0x00, /* 0xE4-0xE7 */ 0x00, 0x00, 0xE1, 0x43, 0x00, 0x00, 0xE1, 0x44, /* 0xE8-0xEB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0xF5, /* 0xEC-0xEF */ 0xBC, 0x45, 0xE4, 0xF4, 0x00, 0x00, 0xBE, 0xB1, /* 0xF0-0xF3 */ 0xEC, 0xBF, 0xC0, 0x79, 0x00, 0x00, 0xF1, 0xEE, /* 0xF4-0xF7 */ 0xC4, 0x55, 0x00, 0x00, 0xA4, 0x63, 0xA4, 0xC3, /* 0xF8-0xFB */ 0xC9, 0x56, 0x00, 0x00, 0xA4, 0xC4, 0xA4, 0xC5, /* 0xFC-0xFF */ }; static const unsigned char u2c_53[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0xA5, 0x5D, 0xA5, 0x5E, 0x00, 0x00, /* 0x04-0x07 */ 0xA6, 0x49, 0xCA, 0x71, 0xCB, 0xD6, 0xCB, 0xD7, /* 0x08-0x0B */ 0x00, 0x00, 0xAB, 0x6D, 0xD0, 0xEE, 0xB0, 0xCC, /* 0x0C-0x0F */ 0xB0, 0xCB, 0xD8, 0x63, 0xD8, 0x62, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0xA4, 0x50, 0xA4, 0xC6, 0xA5, 0x5F, /* 0x14-0x17 */ 0x00, 0x00, 0xB0, 0xCD, 0xC9, 0x43, 0x00, 0x00, /* 0x18-0x1B */ 0xC9, 0x6C, 0xA5, 0x60, 0x00, 0x00, 0xC9, 0xC2, /* 0x1C-0x1F */ 0xA6, 0x4B, 0xA6, 0x4A, 0xC9, 0xC1, 0xA7, 0x58, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xAD, 0xEA, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0xD4, 0x6F, 0x00, 0x00, 0xB6, 0xD7, /* 0x2C-0x2F */ 0xE1, 0x45, 0xB9, 0xBC, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0xE8, 0xFA, 0x00, 0x00, 0x00, 0x00, 0xF3, 0xFD, /* 0x34-0x37 */ 0x00, 0x00, 0xA4, 0xC7, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0xCB, 0xD8, 0xCD, 0xF4, 0xB0, 0xD0, 0xB0, 0xCE, /* 0x3C-0x3F */ 0xB0, 0xCF, 0xA4, 0x51, 0x00, 0x00, 0xA4, 0x64, /* 0x40-0x43 */ 0xA2, 0xCD, 0xA4, 0xCA, 0x00, 0x00, 0xA4, 0xC9, /* 0x44-0x47 */ 0xA4, 0xC8, 0xA5, 0x63, 0xA5, 0x62, 0x00, 0x00, /* 0x48-0x4B */ 0xC9, 0x6D, 0xC9, 0xC3, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0xA8, 0xF5, 0xA8, 0xF2, 0xA8, 0xF4, /* 0x50-0x53 */ 0xA8, 0xF3, 0x00, 0x00, 0x00, 0x00, 0xAB, 0x6E, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0xB3, 0xD5, 0x00, 0x00, /* 0x58-0x5B */ 0xA4, 0x52, 0x00, 0x00, 0xA4, 0xCB, 0x00, 0x00, /* 0x5C-0x5F */ 0xA5, 0x65, 0xA5, 0x64, 0x00, 0x00, 0xCA, 0x72, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0xA8, 0xF6, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ 0xC9, 0x57, 0x00, 0x00, 0xA5, 0x67, 0xA5, 0x66, /* 0x6C-0x6F */ 0xA6, 0x4C, 0xA6, 0x4D, 0xCA, 0x73, 0xA7, 0x59, /* 0x70-0x73 */ 0x00, 0x00, 0xA7, 0x5A, 0x00, 0x00, 0xA8, 0xF7, /* 0x74-0x77 */ 0xA8, 0xF8, 0xA8, 0xF9, 0x00, 0x00, 0xAB, 0x6F, /* 0x78-0x7B */ 0xCD, 0xF5, 0x00, 0x00, 0x00, 0x00, 0xAD, 0xEB, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0xC9, 0x44, 0x00, 0x00, /* 0x80-0x83 */ 0xA4, 0xCC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0xC9, 0xC4, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0xCA, 0x74, 0xCA, 0x75, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0xCB, 0xD9, 0x00, 0x00, /* 0x90-0x93 */ 0xCB, 0xDA, 0x00, 0x00, 0xCD, 0xF7, 0xCD, 0xF6, /* 0x94-0x97 */ 0xCD, 0xF9, 0xCD, 0xF8, 0xAB, 0x70, 0x00, 0x00, /* 0x98-0x9B */ 0xD4, 0x70, 0xAD, 0xED, 0xD0, 0xEF, 0xAD, 0xEC, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0xD8, 0x64, 0xB3, 0xD6, 0x00, 0x00, 0xD8, 0x65, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0xE1, 0x46, 0xB9, 0xBD, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0xBC, 0x46, 0x00, 0x00, /* 0xB0-0xB3 */ 0xF1, 0xEF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0xC9, 0x58, 0x00, 0x00, 0xA5, 0x68, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB0, 0xD1, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */ 0xA4, 0x53, 0xA4, 0x65, 0xA4, 0xCE, 0xA4, 0xCD, /* 0xC8-0xCB */ 0x00, 0x00, 0xA4, 0xCF, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0xA8, 0xFB, 0x00, 0x00, 0xA8, 0xFA, 0xA8, 0xFC, /* 0xD4-0xD7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAB, 0x71, /* 0xD8-0xDB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAD, 0xEE, /* 0xDC-0xDF */ 0x00, 0x00, 0xE8, 0xFB, 0xC2, 0x4F, 0xA4, 0x66, /* 0xE0-0xE3 */ 0xA5, 0x6A, 0xA5, 0x79, 0xA5, 0x74, 0x00, 0x00, /* 0xE4-0xE7 */ 0xA5, 0x6F, 0xA5, 0x6E, 0xA5, 0x75, 0xA5, 0x73, /* 0xE8-0xEB */ 0xA5, 0x6C, 0xA5, 0x7A, 0xA5, 0x6D, 0xA5, 0x69, /* 0xEC-0xEF */ 0xA5, 0x78, 0xA5, 0x77, 0xA5, 0x76, 0xA5, 0x6B, /* 0xF0-0xF3 */ 0x00, 0x00, 0xA5, 0x72, 0x00, 0x00, 0x00, 0x00, /* 0xF4-0xF7 */ 0xA5, 0x71, 0x00, 0x00, 0x00, 0x00, 0xA5, 0x7B, /* 0xF8-0xFB */ 0xA5, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ }; static const unsigned char u2c_54[512] = { 0x00, 0x00, 0xA6, 0x53, 0x00, 0x00, 0xA6, 0x59, /* 0x00-0x03 */ 0xA6, 0x55, 0x00, 0x00, 0xA6, 0x5B, 0xC9, 0xC5, /* 0x04-0x07 */ 0xA6, 0x58, 0xA6, 0x4E, 0xA6, 0x51, 0xA6, 0x54, /* 0x08-0x0B */ 0xA6, 0x50, 0xA6, 0x57, 0xA6, 0x5A, 0xA6, 0x4F, /* 0x0C-0x0F */ 0xA6, 0x52, 0xA6, 0x56, 0xA6, 0x5C, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0xCA, 0x7E, 0xCA, 0x7B, 0x00, 0x00, 0xA7, 0x67, /* 0x18-0x1B */ 0xCA, 0x7C, 0xA7, 0x5B, 0xA7, 0x5D, 0xA7, 0x75, /* 0x1C-0x1F */ 0xA7, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0xCA, 0xA5, 0xCA, 0x7D, 0xA7, 0x5F, 0xA7, 0x61, /* 0x24-0x27 */ 0xCA, 0xA4, 0xA7, 0x68, 0xCA, 0x78, 0xA7, 0x74, /* 0x28-0x2B */ 0xA7, 0x76, 0xA7, 0x5C, 0xA7, 0x6D, 0x00, 0x00, /* 0x2C-0x2F */ 0xCA, 0x76, 0xA7, 0x73, 0x00, 0x00, 0xA7, 0x64, /* 0x30-0x33 */ 0x00, 0x00, 0xA7, 0x6E, 0xA7, 0x6F, 0xCA, 0x77, /* 0x34-0x37 */ 0xA7, 0x6C, 0xA7, 0x6A, 0x00, 0x00, 0xA7, 0x6B, /* 0x38-0x3B */ 0xA7, 0x71, 0xCA, 0xA1, 0xA7, 0x5E, 0x00, 0x00, /* 0x3C-0x3F */ 0xA7, 0x72, 0xCA, 0xA3, 0xA7, 0x66, 0xA7, 0x63, /* 0x40-0x43 */ 0x00, 0x00, 0xCA, 0x7A, 0xA7, 0x62, 0xCA, 0xA6, /* 0x44-0x47 */ 0xA7, 0x65, 0x00, 0x00, 0xA7, 0x69, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0xA7, 0x60, 0xCA, 0xA2, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0xCA, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0xCB, 0xEB, 0xCB, 0xEA, 0xA9, 0x4F, 0xCB, 0xED, /* 0x60-0x63 */ 0xCB, 0xEF, 0xCB, 0xE4, 0xCB, 0xE7, 0xCB, 0xEE, /* 0x64-0x67 */ 0xA9, 0x50, 0x00, 0x00, 0x00, 0x00, 0xCB, 0xE1, /* 0x68-0x6B */ 0xCB, 0xE5, 0x00, 0x00, 0x00, 0x00, 0xCB, 0xE9, /* 0x6C-0x6F */ 0xCE, 0x49, 0xA9, 0x4B, 0xCE, 0x4D, 0xA8, 0xFD, /* 0x70-0x73 */ 0xCB, 0xE6, 0xA8, 0xFE, 0xA9, 0x4C, 0xA9, 0x45, /* 0x74-0x77 */ 0xA9, 0x41, 0x00, 0x00, 0xCB, 0xE2, 0xA9, 0x44, /* 0x78-0x7B */ 0xA9, 0x49, 0xA9, 0x52, 0xCB, 0xE3, 0xCB, 0xDC, /* 0x7C-0x7F */ 0xA9, 0x43, 0xCB, 0xDD, 0xCB, 0xDF, 0x00, 0x00, /* 0x80-0x83 */ 0xA9, 0x46, 0x00, 0x00, 0xA9, 0x48, 0xCB, 0xDB, /* 0x84-0x87 */ 0xCB, 0xE0, 0x00, 0x00, 0x00, 0x00, 0xA9, 0x51, /* 0x88-0x8B */ 0xA9, 0x4D, 0xCB, 0xE8, 0xA9, 0x53, 0x00, 0x00, /* 0x8C-0x8F */ 0xA9, 0x4A, 0xCB, 0xDE, 0xA9, 0x47, 0x00, 0x00, /* 0x90-0x93 */ 0x00, 0x00, 0xA9, 0x42, 0xA9, 0x40, 0x00, 0x00, /* 0x94-0x97 */ 0xCB, 0xEC, 0x00, 0x00, 0xA9, 0x4E, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0xCE, 0x48, 0xCD, 0xFB, 0xCE, 0x4B, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0xCD, 0xFD, 0xAB, 0x78, 0xAB, 0xA8, /* 0xA4-0xA7 */ 0xAB, 0x74, 0xAB, 0xA7, 0xAB, 0x7D, 0xAB, 0xA4, /* 0xA8-0xAB */ 0xAB, 0x72, 0xCD, 0xFC, 0xCE, 0x43, 0xAB, 0xA3, /* 0xAC-0xAF */ 0xCE, 0x4F, 0xAB, 0xA5, 0x00, 0x00, 0xAB, 0x79, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0xCE, 0x45, 0xCE, 0x42, /* 0xB4-0xB7 */ 0xAB, 0x77, 0x00, 0x00, 0xCD, 0xFA, 0xAB, 0xA6, /* 0xB8-0xBB */ 0xCE, 0x4A, 0xAB, 0x7C, 0xCE, 0x4C, 0xAB, 0xA9, /* 0xBC-0xBF */ 0xAB, 0x73, 0xAB, 0x7E, 0xAB, 0x7B, 0xCE, 0x40, /* 0xC0-0xC3 */ 0xAB, 0xA1, 0xCE, 0x46, 0xCE, 0x47, 0xAB, 0x7A, /* 0xC4-0xC7 */ 0xAB, 0xA2, 0xAB, 0x76, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ 0x00, 0x00, 0x00, 0x00, 0xAB, 0x75, 0xCD, 0xFE, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0x00, 0x00, 0x00, 0x00, 0xCE, 0x44, 0x00, 0x00, /* 0xD4-0xD7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 0x00, 0x00, 0x00, 0x00, 0xCE, 0x4E, 0x00, 0x00, /* 0xDC-0xDF */ 0xD1, 0x44, 0xAD, 0xFB, 0xD0, 0xF1, 0x00, 0x00, /* 0xE0-0xE3 */ 0xD0, 0xF6, 0xAD, 0xF4, 0xAE, 0x40, 0xD0, 0xF4, /* 0xE4-0xE7 */ 0xAD, 0xEF, 0xAD, 0xF9, 0xAD, 0xFE, 0xD0, 0xFB, /* 0xE8-0xEB */ 0x00, 0x00, 0xAD, 0xFA, 0xAD, 0xFD, 0x00, 0x00, /* 0xEC-0xEF */ 0x00, 0x00, 0xD0, 0xFE, 0xAD, 0xF5, 0xD0, 0xF5, /* 0xF0-0xF3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD1, 0x42, /* 0xF4-0xF7 */ 0xD1, 0x43, 0x00, 0x00, 0xAD, 0xF7, 0xD1, 0x41, /* 0xF8-0xFB */ 0xAD, 0xF3, 0xAE, 0x43, 0x00, 0x00, 0xD0, 0xF8, /* 0xFC-0xFF */ }; static const unsigned char u2c_55[512] = { 0x00, 0x00, 0xAD, 0xF1, 0x00, 0x00, 0xD1, 0x46, /* 0x00-0x03 */ 0xD0, 0xF9, 0xD0, 0xFD, 0xAD, 0xF6, 0xAE, 0x42, /* 0x04-0x07 */ 0xD0, 0xFA, 0xAD, 0xFC, 0xD1, 0x40, 0xD1, 0x47, /* 0x08-0x0B */ 0xD4, 0xA1, 0x00, 0x00, 0xD1, 0x45, 0xAE, 0x44, /* 0x0C-0x0F */ 0xAD, 0xF0, 0xD0, 0xFC, 0xD0, 0xF3, 0x00, 0x00, /* 0x10-0x13 */ 0xAD, 0xF8, 0x00, 0x00, 0x00, 0x00, 0xD0, 0xF2, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0xD0, 0xF7, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0xD0, 0xF0, 0xAE, 0x41, /* 0x24-0x27 */ 0x00, 0x00, 0x00, 0x00, 0xD4, 0x77, 0x00, 0x00, /* 0x28-0x2B */ 0xB0, 0xE4, 0xD4, 0xA7, 0xB0, 0xE2, 0xB0, 0xDF, /* 0x2C-0x2F */ 0xD4, 0x7C, 0xB0, 0xDB, 0xD4, 0xA2, 0xB0, 0xE6, /* 0x30-0x33 */ 0xD4, 0x76, 0xD4, 0x7B, 0xD4, 0x7A, 0xAD, 0xF2, /* 0x34-0x37 */ 0xB0, 0xE1, 0xD4, 0xA5, 0x00, 0x00, 0xD4, 0xA8, /* 0x38-0x3B */ 0xD4, 0x73, 0x00, 0x00, 0xB3, 0xE8, 0x00, 0x00, /* 0x3C-0x3F */ 0xD4, 0xA9, 0xB0, 0xE7, 0x00, 0x00, 0xB0, 0xD9, /* 0x40-0x43 */ 0xB0, 0xD6, 0xD4, 0x7E, 0xB0, 0xD3, 0x00, 0x00, /* 0x44-0x47 */ 0xD4, 0xA6, 0x00, 0x00, 0xB0, 0xDA, 0xD4, 0xAA, /* 0x48-0x4B */ 0x00, 0x00, 0xD4, 0x74, 0xD4, 0xA4, 0xB0, 0xDD, /* 0x4C-0x4F */ 0xD4, 0x75, 0xD4, 0x78, 0xD4, 0x7D, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0xB0, 0xDE, 0xB0, 0xDC, 0xB0, 0xE8, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0xB0, 0xE3, 0x00, 0x00, 0xB0, 0xD7, 0xB1, 0xD2, /* 0x5C-0x5F */ 0x00, 0x00, 0xB0, 0xD8, 0xD4, 0x79, 0xB0, 0xE5, /* 0x60-0x63 */ 0xB0, 0xE0, 0xD4, 0xA3, 0xB0, 0xD5, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0x00, 0x00, 0xB0, 0xD4, 0x00, 0x00, /* 0x68-0x6B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0xD4, 0x71, 0xD4, 0x72, 0xD8, 0x6A, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB3, 0xD7, /* 0x78-0x7B */ 0xB3, 0xDA, 0xD8, 0x75, 0xB3, 0xEE, 0xD8, 0x78, /* 0x7C-0x7F */ 0xB3, 0xD8, 0xD8, 0x71, 0xB3, 0xDE, 0xB3, 0xE4, /* 0x80-0x83 */ 0xB5, 0xBD, 0x00, 0x00, 0x00, 0x00, 0xB3, 0xE2, /* 0x84-0x87 */ 0xD8, 0x6E, 0xB3, 0xEF, 0xB3, 0xDB, 0xB3, 0xE3, /* 0x88-0x8B */ 0xD8, 0x76, 0xDC, 0xD7, 0xD8, 0x7B, 0xD8, 0x6F, /* 0x8C-0x8F */ 0x00, 0x00, 0xD8, 0x66, 0xD8, 0x73, 0xD8, 0x6D, /* 0x90-0x93 */ 0xB3, 0xE1, 0xD8, 0x79, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0xB3, 0xDD, 0xB3, 0xF1, 0xB3, 0xEA, 0x00, 0x00, /* 0x98-0x9B */ 0xB3, 0xDF, 0xB3, 0xDC, 0x00, 0x00, 0xB3, 0xE7, /* 0x9C-0x9F */ 0x00, 0x00, 0xD8, 0x7A, 0xD8, 0x6C, 0xD8, 0x72, /* 0xA0-0xA3 */ 0xD8, 0x74, 0xD8, 0x68, 0xD8, 0x77, 0xB3, 0xD9, /* 0xA4-0xA7 */ 0xD8, 0x67, 0x00, 0x00, 0xB3, 0xE0, 0xB3, 0xF0, /* 0xA8-0xAB */ 0xB3, 0xEC, 0xD8, 0x69, 0xB3, 0xE6, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0xB3, 0xED, 0xB3, 0xE9, 0xB3, 0xE5, /* 0xB0-0xB3 */ 0x00, 0x00, 0xD8, 0x70, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB3, 0xEB, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xDC, 0xD5, /* 0xBC-0xBF */ 0xDC, 0xD1, 0x00, 0x00, 0xDC, 0xE0, 0xDC, 0xCA, /* 0xC0-0xC3 */ 0xDC, 0xD3, 0xB6, 0xE5, 0xB6, 0xE6, 0xB6, 0xDE, /* 0xC4-0xC7 */ 0xDC, 0xDC, 0xB6, 0xE8, 0xDC, 0xCF, 0xDC, 0xCE, /* 0xC8-0xCB */ 0xDC, 0xCC, 0xDC, 0xDE, 0xB6, 0xDC, 0xDC, 0xD8, /* 0xCC-0xCF */ 0xDC, 0xCD, 0xB6, 0xDF, 0xDC, 0xD6, 0xB6, 0xDA, /* 0xD0-0xD3 */ 0xDC, 0xD2, 0xDC, 0xD9, 0xDC, 0xDB, 0x00, 0x00, /* 0xD4-0xD7 */ 0x00, 0x00, 0xDC, 0xDF, 0xB6, 0xE3, 0xDC, 0xCB, /* 0xD8-0xDB */ 0xB6, 0xDD, 0xDC, 0xD0, 0x00, 0x00, 0xB6, 0xD8, /* 0xDC-0xDF */ 0x00, 0x00, 0xB6, 0xE4, 0xDC, 0xDA, 0xB6, 0xE0, /* 0xE0-0xE3 */ 0xB6, 0xE1, 0xB6, 0xE7, 0xB6, 0xDB, 0xA2, 0x5F, /* 0xE4-0xE7 */ 0xB6, 0xD9, 0xDC, 0xD4, 0x00, 0x00, 0x00, 0x00, /* 0xE8-0xEB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB6, 0xE2, /* 0xEC-0xEF */ 0x00, 0x00, 0x00, 0x00, 0xDC, 0xDD, 0x00, 0x00, /* 0xF0-0xF3 */ 0x00, 0x00, 0x00, 0x00, 0xB9, 0xCD, 0xB9, 0xC8, /* 0xF4-0xF7 */ 0x00, 0x00, 0xE1, 0x55, 0xE1, 0x51, 0x00, 0x00, /* 0xF8-0xFB */ 0xE1, 0x4B, 0xB9, 0xC2, 0xB9, 0xBE, 0xE1, 0x54, /* 0xFC-0xFF */ }; static const unsigned char u2c_56[512] = { 0xB9, 0xBF, 0xE1, 0x4E, 0xE1, 0x50, 0x00, 0x00, /* 0x00-0x03 */ 0xE1, 0x53, 0x00, 0x00, 0xB9, 0xC4, 0x00, 0x00, /* 0x04-0x07 */ 0xB9, 0xCB, 0xB9, 0xC5, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0xE1, 0x49, 0xB9, 0xC6, 0xB9, 0xC7, 0xE1, 0x4C, /* 0x0C-0x0F */ 0xB9, 0xCC, 0x00, 0x00, 0xE1, 0x4A, 0xE1, 0x4F, /* 0x10-0x13 */ 0xB9, 0xC3, 0xE1, 0x48, 0xB9, 0xC9, 0xB9, 0xC1, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB9, 0xC0, /* 0x18-0x1B */ 0xE1, 0x4D, 0xE1, 0x52, 0x00, 0x00, 0xB9, 0xCA, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE1, 0x47, /* 0x24-0x27 */ 0x00, 0x00, 0xBC, 0x4D, 0xE5, 0x47, 0x00, 0x00, /* 0x28-0x2B */ 0xE5, 0x44, 0x00, 0x00, 0xBC, 0x47, 0xBC, 0x53, /* 0x2C-0x2F */ 0xBC, 0x54, 0x00, 0x00, 0xBC, 0x4A, 0xE5, 0x42, /* 0x30-0x33 */ 0xBC, 0x4C, 0xE4, 0xF9, 0xBC, 0x52, 0x00, 0x00, /* 0x34-0x37 */ 0xE5, 0x46, 0xBC, 0x49, 0xE5, 0x48, 0xBC, 0x48, /* 0x38-0x3B */ 0x00, 0x00, 0xE5, 0x43, 0xE5, 0x45, 0xBC, 0x4B, /* 0x3C-0x3F */ 0xE5, 0x41, 0xE4, 0xFA, 0xE4, 0xF7, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0xD8, 0x6B, 0xE4, 0xFD, 0x00, 0x00, /* 0x44-0x47 */ 0xE4, 0xF6, 0xE4, 0xFC, 0xE4, 0xFB, 0x00, 0x00, /* 0x48-0x4B */ 0xE4, 0xF8, 0x00, 0x00, 0xBC, 0x4F, 0x00, 0x00, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xBC, 0x4E, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xBC, 0x50, /* 0x54-0x57 */ 0xE4, 0xFE, 0xBE, 0xB2, 0xE5, 0x40, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0xE9, 0x45, 0x00, 0x00, /* 0x5C-0x5F */ 0xE8, 0xFD, 0x00, 0x00, 0xBE, 0xBE, 0xE9, 0x42, /* 0x60-0x63 */ 0xBE, 0xB6, 0xBE, 0xBA, 0xE9, 0x41, 0x00, 0x00, /* 0x64-0x67 */ 0xBE, 0xB9, 0xBE, 0xB5, 0xBE, 0xB8, 0xBE, 0xB3, /* 0x68-0x6B */ 0xBE, 0xBD, 0xE9, 0x43, 0xE8, 0xFE, 0xBE, 0xBC, /* 0x6C-0x6F */ 0xE8, 0xFC, 0xBE, 0xBB, 0xE9, 0x44, 0xE9, 0x40, /* 0x70-0x73 */ 0xBC, 0x51, 0x00, 0x00, 0xBE, 0xBF, 0xE9, 0x46, /* 0x74-0x77 */ 0xBE, 0xB7, 0xBE, 0xB4, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ 0x00, 0x00, 0x00, 0x00, 0xEC, 0xC6, 0xEC, 0xC8, /* 0x7C-0x7F */ 0xC0, 0x7B, 0xEC, 0xC9, 0xEC, 0xC7, 0xEC, 0xC5, /* 0x80-0x83 */ 0xEC, 0xC4, 0xC0, 0x7D, 0xEC, 0xC3, 0xC0, 0x7E, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ 0xEC, 0xC1, 0xEC, 0xC2, 0xC0, 0x7A, 0xC0, 0xA1, /* 0x8C-0x8F */ 0xC0, 0x7C, 0x00, 0x00, 0x00, 0x00, 0xEC, 0xC0, /* 0x90-0x93 */ 0x00, 0x00, 0xC2, 0x50, 0x00, 0x00, 0xEF, 0xBC, /* 0x94-0x97 */ 0xEF, 0xBA, 0xEF, 0xBF, 0xEF, 0xBD, 0x00, 0x00, /* 0x98-0x9B */ 0xEF, 0xBB, 0xEF, 0xBE, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0xC3, 0x60, 0xF1, 0xF2, 0xF1, 0xF3, /* 0xA4-0xA7 */ 0xC4, 0x56, 0x00, 0x00, 0xF1, 0xF4, 0xF1, 0xF0, /* 0xA8-0xAB */ 0xF1, 0xF5, 0xF1, 0xF1, 0xC2, 0x51, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0xF3, 0xFE, 0xF4, 0x41, /* 0xB0-0xB3 */ 0xC4, 0x59, 0xF4, 0x40, 0xC4, 0x58, 0xC4, 0x57, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0xC4, 0x5A, 0xF5, 0xC5, 0xF5, 0xC6, 0x00, 0x00, /* 0xBC-0xBF */ 0xC4, 0xDA, 0xC4, 0xD9, 0xC4, 0xDB, 0xF5, 0xC4, /* 0xC0-0xC3 */ 0x00, 0x00, 0xF6, 0xD8, 0xF6, 0xD7, 0x00, 0x00, /* 0xC4-0xC7 */ 0xC5, 0x6D, 0xC5, 0x6F, 0xC5, 0x6E, 0xF6, 0xD9, /* 0xC8-0xCB */ 0xC5, 0xC8, 0xF8, 0xA6, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0xC5, 0xF1, 0x00, 0x00, 0xF8, 0xA5, /* 0xD0-0xD3 */ 0xF8, 0xEE, 0x00, 0x00, 0x00, 0x00, 0xC9, 0x49, /* 0xD4-0xD7 */ 0x00, 0x00, 0x00, 0x00, 0xA5, 0x7D, 0xA5, 0x7C, /* 0xD8-0xDB */ 0x00, 0x00, 0xA6, 0x5F, 0xA6, 0x5E, 0xC9, 0xC7, /* 0xDC-0xDF */ 0xA6, 0x5D, 0xC9, 0xC6, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ 0xA7, 0x79, 0xCA, 0xA9, 0x00, 0x00, 0xCA, 0xA8, /* 0xE4-0xE7 */ 0x00, 0x00, 0x00, 0x00, 0xA7, 0x77, 0xA7, 0x7A, /* 0xE8-0xEB */ 0x00, 0x00, 0x00, 0x00, 0xCA, 0xA7, 0x00, 0x00, /* 0xEC-0xEF */ 0xA7, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xF0-0xF3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xCB, 0xF0, /* 0xF4-0xF7 */ 0x00, 0x00, 0xCB, 0xF1, 0xA9, 0x54, 0x00, 0x00, /* 0xF8-0xFB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAB, 0xAA, /* 0xFC-0xFF */ }; static const unsigned char u2c_57[512] = { 0x00, 0x00, 0xD1, 0x48, 0xD1, 0x49, 0xAE, 0x45, /* 0x00-0x03 */ 0xAE, 0x46, 0x00, 0x00, 0x00, 0x00, 0xD4, 0xAC, /* 0x04-0x07 */ 0xB0, 0xE9, 0xB0, 0xEB, 0xD4, 0xAB, 0xB0, 0xEA, /* 0x08-0x0B */ 0xD8, 0x7C, 0xB3, 0xF2, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0xB6, 0xE9, 0xB6, 0xEA, /* 0x10-0x13 */ 0xDC, 0xE1, 0x00, 0x00, 0xB9, 0xCF, 0x00, 0x00, /* 0x14-0x17 */ 0xB9, 0xCE, 0x00, 0x00, 0xE5, 0x49, 0xE9, 0x48, /* 0x18-0x1B */ 0xE9, 0x47, 0x00, 0x00, 0xF9, 0x6B, 0xA4, 0x67, /* 0x1C-0x1F */ 0xC9, 0x59, 0x00, 0x00, 0xC9, 0x6E, 0xC9, 0x6F, /* 0x20-0x23 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0xA6, 0x62, 0xA6, 0x66, 0xC9, 0xC9, 0x00, 0x00, /* 0x28-0x2B */ 0xA6, 0x64, 0xA6, 0x63, 0xC9, 0xC8, 0xA6, 0x65, /* 0x2C-0x2F */ 0xA6, 0x61, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x60, /* 0x30-0x33 */ 0xC9, 0xCA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA7, 0xA6, /* 0x38-0x3B */ 0x00, 0x00, 0x00, 0x00, 0xA7, 0xA3, 0x00, 0x00, /* 0x3C-0x3F */ 0xA7, 0x7D, 0xCA, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0xCA, 0xAB, 0x00, 0x00, 0xA7, 0xA1, /* 0x44-0x47 */ 0x00, 0x00, 0xCA, 0xAD, 0xA7, 0x7B, 0xCA, 0xAE, /* 0x48-0x4B */ 0xCA, 0xAC, 0xA7, 0x7E, 0xA7, 0xA2, 0xA7, 0xA5, /* 0x4C-0x4F */ 0xA7, 0xA4, 0xA7, 0x7C, 0xCA, 0xAF, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0xA9, 0x59, 0xCB, 0xFE, 0x00, 0x00, /* 0x60-0x63 */ 0xA9, 0x5B, 0x00, 0x00, 0xA9, 0x5A, 0x00, 0x00, /* 0x64-0x67 */ 0xCC, 0x40, 0xA9, 0x58, 0xA9, 0x57, 0xCB, 0xF5, /* 0x68-0x6B */ 0x00, 0x00, 0xCB, 0xF4, 0x00, 0x00, 0xCB, 0xF2, /* 0x6C-0x6F */ 0xCB, 0xF7, 0xCB, 0xF6, 0xCB, 0xF3, 0xCB, 0xFC, /* 0x70-0x73 */ 0xCB, 0xFD, 0xCB, 0xFA, 0xCB, 0xF8, 0xA9, 0x56, /* 0x74-0x77 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xCB, 0xFB, /* 0x78-0x7B */ 0xA9, 0x5C, 0xCC, 0x41, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ 0xCB, 0xF9, 0x00, 0x00, 0xAB, 0xAB, 0xA9, 0x55, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAB, 0xAC, /* 0x88-0x8B */ 0xCE, 0x54, 0x00, 0x00, 0x00, 0x00, 0xCE, 0x5A, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAB, 0xB2, /* 0x90-0x93 */ 0xCE, 0x58, 0xCE, 0x5E, 0x00, 0x00, 0xCE, 0x55, /* 0x94-0x97 */ 0xCE, 0x59, 0xCE, 0x5B, 0xCE, 0x5D, 0xCE, 0x57, /* 0x98-0x9B */ 0x00, 0x00, 0xCE, 0x56, 0xCE, 0x51, 0xCE, 0x52, /* 0x9C-0x9F */ 0xAB, 0xAD, 0x00, 0x00, 0xAB, 0xAF, 0xAB, 0xAE, /* 0xA0-0xA3 */ 0xCE, 0x53, 0xCE, 0x5C, 0x00, 0x00, 0x00, 0x00, /* 0xA4-0xA7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0xAB, 0xB1, 0x00, 0x00, /* 0xAC-0xAF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ 0x00, 0x00, 0xCE, 0x50, 0xD1, 0x53, 0x00, 0x00, /* 0xB4-0xB7 */ 0xD1, 0x52, 0xD1, 0x57, 0xD1, 0x4E, 0x00, 0x00, /* 0xB8-0xBB */ 0xD1, 0x51, 0xD1, 0x50, 0x00, 0x00, 0xD1, 0x54, /* 0xBC-0xBF */ 0x00, 0x00, 0xD1, 0x58, 0xAE, 0x47, 0xAE, 0x4A, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0xD1, 0x4F, 0xD1, 0x55, /* 0xC4-0xC7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAE, 0x49, /* 0xC8-0xCB */ 0xD1, 0x4A, 0x00, 0x00, 0xAB, 0xB0, 0xD4, 0xBA, /* 0xCC-0xCF */ 0xD1, 0x56, 0x00, 0x00, 0xD1, 0x4D, 0x00, 0x00, /* 0xD0-0xD3 */ 0xAE, 0x48, 0xD1, 0x4C, 0x00, 0x00, 0x00, 0x00, /* 0xD4-0xD7 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 0xD4, 0xB1, 0x00, 0x00, 0x00, 0x00, 0xB0, 0xEC, /* 0xDC-0xDF */ 0xB0, 0xF0, 0xD4, 0xC1, 0xD4, 0xAF, 0xD4, 0xBD, /* 0xE0-0xE3 */ 0xB0, 0xF1, 0xD4, 0xBF, 0x00, 0x00, 0xD4, 0xC5, /* 0xE4-0xE7 */ 0x00, 0x00, 0xD4, 0xC9, 0x00, 0x00, 0x00, 0x00, /* 0xE8-0xEB */ 0xD4, 0xC0, 0xD4, 0xB4, 0xD4, 0xBC, 0x00, 0x00, /* 0xEC-0xEF */ 0xD4, 0xCA, 0xD4, 0xC8, 0xD4, 0xBE, 0xD4, 0xB9, /* 0xF0-0xF3 */ 0xD4, 0xB2, 0xD8, 0xA6, 0xD4, 0xB0, 0xB0, 0xF5, /* 0xF4-0xF7 */ 0xD4, 0xB7, 0xB0, 0xF6, 0xB0, 0xF2, 0xD4, 0xAD, /* 0xF8-0xFB */ 0xD4, 0xC3, 0xD4, 0xB5, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ }; static const unsigned char u2c_58[512] = { 0xD4, 0xB3, 0xD4, 0xC6, 0xB0, 0xF3, 0x00, 0x00, /* 0x00-0x03 */ 0xD4, 0xCC, 0xB0, 0xED, 0xB0, 0xEF, 0xD4, 0xBB, /* 0x04-0x07 */ 0xD4, 0xB6, 0xAE, 0x4B, 0xB0, 0xEE, 0xD4, 0xB8, /* 0x08-0x0B */ 0xD4, 0xC7, 0xD4, 0xCB, 0xD4, 0xC2, 0x00, 0x00, /* 0x0C-0x0F */ 0xD4, 0xC4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0xD4, 0xAE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0xD8, 0xA1, 0x00, 0x00, 0xD8, 0xAA, /* 0x18-0x1B */ 0xD8, 0xA9, 0xB3, 0xFA, 0xD8, 0xA2, 0x00, 0x00, /* 0x1C-0x1F */ 0xB3, 0xFB, 0xB3, 0xF9, 0x00, 0x00, 0xD8, 0xA4, /* 0x20-0x23 */ 0xB3, 0xF6, 0xD8, 0xA8, 0x00, 0x00, 0xD8, 0xA3, /* 0x24-0x27 */ 0xD8, 0xA5, 0xD8, 0x7D, 0xB3, 0xF4, 0x00, 0x00, /* 0x28-0x2B */ 0xD8, 0xB2, 0xD8, 0xB1, 0xD8, 0xAE, 0xB3, 0xF3, /* 0x2C-0x2F */ 0xB3, 0xF7, 0xB3, 0xF8, 0xD1, 0x4B, 0xD8, 0xAB, /* 0x30-0x33 */ 0xB3, 0xF5, 0xB0, 0xF4, 0xD8, 0xAD, 0xD8, 0x7E, /* 0x34-0x37 */ 0xD8, 0xB0, 0xD8, 0xAF, 0x00, 0x00, 0xD8, 0xB3, /* 0x38-0x3B */ 0x00, 0x00, 0xDC, 0xEF, 0x00, 0x00, 0xD8, 0xAC, /* 0x3C-0x3F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0xD8, 0xA7, 0xDC, 0xE7, 0xB6, 0xF4, 0xB6, 0xF7, /* 0x48-0x4B */ 0xB6, 0xF2, 0xDC, 0xE6, 0xDC, 0xEA, 0xDC, 0xE5, /* 0x4C-0x4F */ 0x00, 0x00, 0xB6, 0xEC, 0xB6, 0xF6, 0xDC, 0xE2, /* 0x50-0x53 */ 0xB6, 0xF0, 0xDC, 0xE9, 0x00, 0x00, 0xB6, 0xEE, /* 0x54-0x57 */ 0xB6, 0xED, 0xDC, 0xEC, 0xB6, 0xEF, 0xDC, 0xEE, /* 0x58-0x5B */ 0x00, 0x00, 0xDC, 0xEB, 0xB6, 0xEB, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0xB6, 0xF5, 0xDC, 0xF0, /* 0x60-0x63 */ 0xDC, 0xE4, 0xDC, 0xED, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0xDC, 0xE3, 0x00, 0x00, 0x00, 0x00, 0xB6, 0xF1, /* 0x68-0x6B */ 0x00, 0x00, 0xB6, 0xF3, 0x00, 0x00, 0xDC, 0xE8, /* 0x6C-0x6F */ 0x00, 0x00, 0xDC, 0xF1, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0xE1, 0x5D, 0xB9, 0xD0, 0xE1, 0x63, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0xB9, 0xD5, 0xE1, 0x5F, 0xE1, 0x66, /* 0x78-0x7B */ 0xE1, 0x57, 0xB9, 0xD7, 0xB9, 0xD1, 0xE1, 0x5C, /* 0x7C-0x7F */ 0xBC, 0x55, 0xE1, 0x5B, 0xE1, 0x64, 0xB9, 0xD2, /* 0x80-0x83 */ 0x00, 0x00, 0xB9, 0xD6, 0xE1, 0x5A, 0xE1, 0x60, /* 0x84-0x87 */ 0xE1, 0x65, 0xE1, 0x56, 0xB9, 0xD4, 0xE1, 0x5E, /* 0x88-0x8B */ 0x00, 0x00, 0x00, 0x00, 0xE1, 0x62, 0xE1, 0x68, /* 0x8C-0x8F */ 0xE1, 0x58, 0xE1, 0x61, 0x00, 0x00, 0xB9, 0xD3, /* 0x90-0x93 */ 0xE1, 0x67, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0xE1, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0xBC, 0x59, 0xE5, 0x4B, 0xBC, 0x57, 0xBC, 0x56, /* 0x9C-0x9F */ 0xE5, 0x4D, 0xE5, 0x52, 0x00, 0x00, 0xE5, 0x4E, /* 0xA0-0xA3 */ 0x00, 0x00, 0xE5, 0x51, 0xBC, 0x5C, 0x00, 0x00, /* 0xA4-0xA7 */ 0xBE, 0xA5, 0xBC, 0x5B, 0x00, 0x00, 0xE5, 0x4A, /* 0xA8-0xAB */ 0xE5, 0x50, 0x00, 0x00, 0xBC, 0x5A, 0xE5, 0x4F, /* 0xAC-0xAF */ 0x00, 0x00, 0xE5, 0x4C, 0x00, 0x00, 0xBC, 0x58, /* 0xB0-0xB3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0x00, 0x00, 0xE9, 0x4D, 0xF9, 0xD9, /* 0xB8-0xBB */ 0xE9, 0x4F, 0xE9, 0x4A, 0xBE, 0xC1, 0xE9, 0x4C, /* 0xBC-0xBF */ 0x00, 0x00, 0xBE, 0xC0, 0xE9, 0x4E, 0x00, 0x00, /* 0xC0-0xC3 */ 0x00, 0x00, 0xBE, 0xC3, 0xE9, 0x50, 0xBE, 0xC2, /* 0xC4-0xC7 */ 0xE9, 0x49, 0xE9, 0x4B, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ 0x00, 0x00, 0x00, 0x00, 0xC0, 0xA5, 0xEC, 0xCC, /* 0xCC-0xCF */ 0x00, 0x00, 0xC0, 0xA4, 0xEC, 0xCD, 0xC0, 0xA3, /* 0xD0-0xD3 */ 0xEC, 0xCB, 0xC0, 0xA2, 0xEC, 0xCA, 0x00, 0x00, /* 0xD4-0xD7 */ 0xC2, 0x53, 0xC2, 0x52, 0xF1, 0xF6, 0xF1, 0xF8, /* 0xD8-0xDB */ 0x00, 0x00, 0xF1, 0xF7, 0xC3, 0x61, 0xC3, 0x62, /* 0xDC-0xDF */ 0x00, 0x00, 0x00, 0x00, 0xC3, 0x63, 0xF4, 0x42, /* 0xE0-0xE3 */ 0xC4, 0x5B, 0x00, 0x00, 0x00, 0x00, 0xF7, 0xD3, /* 0xE4-0xE7 */ 0xF7, 0xD2, 0xC5, 0xF2, 0x00, 0x00, 0xA4, 0x68, /* 0xE8-0xEB */ 0xA4, 0xD0, 0x00, 0x00, 0x00, 0x00, 0xA7, 0xA7, /* 0xEC-0xEF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xF0-0xF3 */ 0xCE, 0x5F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xF4-0xF7 */ 0x00, 0x00, 0xB3, 0xFC, 0xB3, 0xFD, 0x00, 0x00, /* 0xF8-0xFB */ 0xDC, 0xF2, 0xB9, 0xD8, 0xE1, 0x69, 0xE5, 0x53, /* 0xFC-0xFF */ }; static const unsigned char u2c_59[512] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC9, 0x5A, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0xCA, 0xB0, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ 0xCC, 0x42, 0xCE, 0x60, 0xD1, 0x59, 0xAE, 0x4C, /* 0x0C-0x0F */ 0x00, 0x00, 0x00, 0x00, 0xF1, 0xF9, 0x00, 0x00, /* 0x10-0x13 */ 0xC4, 0xDC, 0xA4, 0x69, 0xA5, 0x7E, 0xC9, 0x70, /* 0x14-0x17 */ 0x00, 0x00, 0xA6, 0x67, 0xA6, 0x68, 0x00, 0x00, /* 0x18-0x1B */ 0xA9, 0x5D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ 0xB0, 0xF7, 0x00, 0x00, 0xB9, 0xDA, 0x00, 0x00, /* 0x20-0x23 */ 0xB9, 0xDB, 0xB9, 0xD9, 0x00, 0x00, 0xA4, 0x6A, /* 0x24-0x27 */ 0x00, 0x00, 0xA4, 0xD1, 0xA4, 0xD3, 0xA4, 0xD2, /* 0x28-0x2B */ 0xC9, 0x5B, 0xA4, 0xD4, 0xA5, 0xA1, 0xC9, 0x71, /* 0x2C-0x2F */ 0x00, 0x00, 0xA5, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA6, 0x69, /* 0x34-0x37 */ 0xA6, 0x6A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0xC9, 0xCB, 0x00, 0x00, 0xA7, 0xA8, 0x00, 0x00, /* 0x3C-0x3F */ 0xCA, 0xB1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0xA9, 0x61, 0xCC, 0x43, 0x00, 0x00, 0xA9, 0x5F, /* 0x44-0x47 */ 0xA9, 0x60, 0xA9, 0x5E, 0xD1, 0x5A, 0x00, 0x00, /* 0x48-0x4B */ 0x00, 0x00, 0x00, 0x00, 0xAB, 0xB6, 0xAB, 0xB5, /* 0x4C-0x4F */ 0xAB, 0xB7, 0xAB, 0xB4, 0x00, 0x00, 0xCE, 0x61, /* 0x50-0x53 */ 0xA9, 0x62, 0xAB, 0xB3, 0x00, 0x00, 0xAE, 0x4D, /* 0x54-0x57 */ 0xAE, 0x4E, 0x00, 0x00, 0xAE, 0x4F, 0x00, 0x00, /* 0x58-0x5B */ 0xD4, 0xCD, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0xB3, 0xFE, 0xD8, 0xB4, 0xB0, 0xF8, 0x00, 0x00, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB6, 0xF8, /* 0x64-0x67 */ 0x00, 0x00, 0xB9, 0xDD, 0xB9, 0xDC, 0xE1, 0x6A, /* 0x68-0x6B */ 0x00, 0x00, 0xBC, 0x5D, 0xBE, 0xC4, 0x00, 0x00, /* 0x6C-0x6F */ 0xEF, 0xC0, 0xF6, 0xDA, 0xF7, 0xD4, 0xA4, 0x6B, /* 0x70-0x73 */ 0xA5, 0xA3, 0x00, 0x00, 0xA5, 0xA4, 0xC9, 0xD1, /* 0x74-0x77 */ 0xA6, 0x6C, 0xA6, 0x6F, 0x00, 0x00, 0xC9, 0xCF, /* 0x78-0x7B */ 0xC9, 0xCD, 0xA6, 0x6E, 0xC9, 0xD0, 0xC9, 0xD2, /* 0x7C-0x7F */ 0xC9, 0xCC, 0xA6, 0x71, 0xA6, 0x70, 0xA6, 0x6D, /* 0x80-0x83 */ 0xA6, 0x6B, 0xC9, 0xCE, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0xA7, 0xB3, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0xA7, 0xB0, 0xCA, 0xB6, 0xCA, 0xB9, /* 0x8C-0x8F */ 0xCA, 0xB8, 0x00, 0x00, 0xA7, 0xAA, 0xA7, 0xB2, /* 0x90-0x93 */ 0x00, 0x00, 0x00, 0x00, 0xA7, 0xAF, 0xCA, 0xB5, /* 0x94-0x97 */ 0xCA, 0xB3, 0xA7, 0xAE, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0xA7, 0xA9, 0xA7, 0xAC, 0x00, 0x00, /* 0x9C-0x9F */ 0xCA, 0xB4, 0xCA, 0xBB, 0xCA, 0xB7, 0xA7, 0xAD, /* 0xA0-0xA3 */ 0xA7, 0xB1, 0xA7, 0xB4, 0xCA, 0xB2, 0xCA, 0xBA, /* 0xA4-0xA7 */ 0xA7, 0xAB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0x00, 0x00, 0x00, 0x00, 0xA9, 0x67, 0xA9, 0x6F, /* 0xAC-0xAF */ 0x00, 0x00, 0xCC, 0x4F, 0xCC, 0x48, 0xA9, 0x70, /* 0xB0-0xB3 */ 0xCC, 0x53, 0xCC, 0x44, 0xCC, 0x4B, 0x00, 0x00, /* 0xB4-0xB7 */ 0x00, 0x00, 0xA9, 0x66, 0xCC, 0x45, 0xA9, 0x64, /* 0xB8-0xBB */ 0xCC, 0x4C, 0xCC, 0x50, 0xA9, 0x63, 0x00, 0x00, /* 0xBC-0xBF */ 0xCC, 0x51, 0xCC, 0x4A, 0x00, 0x00, 0xCC, 0x4D, /* 0xC0-0xC3 */ 0x00, 0x00, 0xA9, 0x72, 0xA9, 0x69, 0xCC, 0x54, /* 0xC4-0xC7 */ 0xCC, 0x52, 0x00, 0x00, 0xA9, 0x6E, 0xA9, 0x6C, /* 0xC8-0xCB */ 0xCC, 0x49, 0xA9, 0x6B, 0xCC, 0x47, 0xCC, 0x46, /* 0xCC-0xCF */ 0xA9, 0x6A, 0xA9, 0x68, 0xA9, 0x71, 0xA9, 0x6D, /* 0xD0-0xD3 */ 0xA9, 0x65, 0x00, 0x00, 0xCC, 0x4E, 0x00, 0x00, /* 0xD4-0xD7 */ 0xAB, 0xB9, 0x00, 0x00, 0xAB, 0xC0, 0xCE, 0x6F, /* 0xD8-0xDB */ 0xAB, 0xB8, 0xCE, 0x67, 0xCE, 0x63, 0x00, 0x00, /* 0xDC-0xDF */ 0xCE, 0x73, 0xCE, 0x62, 0x00, 0x00, 0xAB, 0xBB, /* 0xE0-0xE3 */ 0xCE, 0x6C, 0xAB, 0xBE, 0xAB, 0xC1, 0x00, 0x00, /* 0xE4-0xE7 */ 0xAB, 0xBC, 0xCE, 0x70, 0xAB, 0xBF, 0x00, 0x00, /* 0xE8-0xEB */ 0xAE, 0x56, 0xCE, 0x76, 0xCE, 0x64, 0x00, 0x00, /* 0xEC-0xEF */ 0x00, 0x00, 0xCE, 0x66, 0xCE, 0x6D, 0xCE, 0x71, /* 0xF0-0xF3 */ 0xCE, 0x75, 0xCE, 0x72, 0xCE, 0x6B, 0xCE, 0x6E, /* 0xF4-0xF7 */ 0x00, 0x00, 0x00, 0x00, 0xCE, 0x68, 0xAB, 0xC3, /* 0xF8-0xFB */ 0xCE, 0x6A, 0xCE, 0x69, 0xCE, 0x74, 0xAB, 0xBA, /* 0xFC-0xFF */ }; static const unsigned char u2c_5A[512] = { 0xCE, 0x65, 0xAB, 0xC2, 0x00, 0x00, 0xAB, 0xBD, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0xAE, 0x5C, 0xD1, 0x62, 0x00, 0x00, /* 0x08-0x0B */ 0xAE, 0x5B, 0x00, 0x00, 0x00, 0x00, 0xD1, 0x60, /* 0x0C-0x0F */ 0x00, 0x00, 0xAE, 0x50, 0x00, 0x00, 0xAE, 0x55, /* 0x10-0x13 */ 0x00, 0x00, 0xD1, 0x5F, 0xD1, 0x5C, 0xD1, 0x61, /* 0x14-0x17 */ 0xAE, 0x51, 0xD1, 0x5B, 0x00, 0x00, 0xAE, 0x54, /* 0x18-0x1B */ 0xAE, 0x52, 0x00, 0x00, 0xD1, 0x63, 0xAE, 0x53, /* 0x1C-0x1F */ 0xAE, 0x57, 0x00, 0x00, 0x00, 0x00, 0xAE, 0x58, /* 0x20-0x23 */ 0x00, 0x00, 0xAE, 0x5A, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0x00, 0x00, 0xAE, 0x59, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0xD1, 0x5D, 0xD1, 0x5E, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD1, 0x64, /* 0x30-0x33 */ 0x00, 0x00, 0xD4, 0xD4, 0xB0, 0xF9, 0xD8, 0xC2, /* 0x34-0x37 */ 0xD4, 0xD3, 0xD4, 0xE6, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0xB1, 0x40, 0x00, 0x00, 0xD4, 0xE4, 0x00, 0x00, /* 0x3C-0x3F */ 0xB0, 0xFE, 0xB0, 0xFA, 0xD4, 0xED, 0xD4, 0xDD, /* 0x40-0x43 */ 0xD4, 0xE0, 0x00, 0x00, 0xB1, 0x43, 0xD4, 0xEA, /* 0x44-0x47 */ 0xD4, 0xE2, 0xB0, 0xFB, 0xB1, 0x44, 0x00, 0x00, /* 0x48-0x4B */ 0xD4, 0xE7, 0xD4, 0xE5, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ 0xD4, 0xD6, 0xD4, 0xEB, 0xD4, 0xDF, 0xD4, 0xDA, /* 0x50-0x53 */ 0x00, 0x00, 0xD4, 0xD0, 0xD4, 0xEC, 0xD4, 0xDC, /* 0x54-0x57 */ 0xD4, 0xCF, 0x00, 0x00, 0xB1, 0x42, 0xD4, 0xE1, /* 0x58-0x5B */ 0xD4, 0xEE, 0xD4, 0xDE, 0xD4, 0xD2, 0xD4, 0xD7, /* 0x5C-0x5F */ 0xD4, 0xCE, 0x00, 0x00, 0xB1, 0x41, 0x00, 0x00, /* 0x60-0x63 */ 0xD4, 0xDB, 0xD4, 0xD8, 0xB0, 0xFC, 0xD4, 0xD1, /* 0x64-0x67 */ 0x00, 0x00, 0xD4, 0xE9, 0xB0, 0xFD, 0x00, 0x00, /* 0x68-0x6B */ 0xD4, 0xD9, 0xD4, 0xD5, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ 0xD4, 0xE8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB4, 0x40, /* 0x74-0x77 */ 0xD8, 0xBB, 0x00, 0x00, 0xD8, 0xB8, 0xD8, 0xC9, /* 0x78-0x7B */ 0xD8, 0xBD, 0xD8, 0xCA, 0x00, 0x00, 0xB4, 0x42, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD8, 0xC6, /* 0x80-0x83 */ 0xD8, 0xC3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ 0x00, 0x00, 0x00, 0x00, 0xD8, 0xC4, 0xD8, 0xC7, /* 0x88-0x8B */ 0xD8, 0xCB, 0x00, 0x00, 0xD4, 0xE3, 0xD8, 0xCD, /* 0x8C-0x8F */ 0xDD, 0x47, 0x00, 0x00, 0xB4, 0x43, 0xD8, 0xCE, /* 0x90-0x93 */ 0xD8, 0xB6, 0xD8, 0xC0, 0x00, 0x00, 0xD8, 0xC5, /* 0x94-0x97 */ 0x00, 0x00, 0x00, 0x00, 0xB4, 0x41, 0xB4, 0x44, /* 0x98-0x9B */ 0xD8, 0xCC, 0xD8, 0xCF, 0xD8, 0xBA, 0xD8, 0xB7, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0xD8, 0xB9, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0xD8, 0xBE, 0xD8, 0xBC, 0xB4, 0x45, /* 0xA4-0xA7 */ 0x00, 0x00, 0xD8, 0xC8, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0xD8, 0xBF, 0x00, 0x00, 0xD8, 0xC1, 0xD8, 0xB5, /* 0xAC-0xAF */ 0xDC, 0xFA, 0xDC, 0xF8, 0xB7, 0x42, 0xB7, 0x40, /* 0xB0-0xB3 */ 0xDD, 0x43, 0xDC, 0xF9, 0xDD, 0x44, 0xDD, 0x40, /* 0xB4-0xB7 */ 0xDC, 0xF7, 0xDD, 0x46, 0xDC, 0xF6, 0xDC, 0xFD, /* 0xB8-0xBB */ 0xB6, 0xFE, 0xB6, 0xFD, 0xB6, 0xFC, 0xDC, 0xFB, /* 0xBC-0xBF */ 0xDD, 0x41, 0xB6, 0xF9, 0xB7, 0x41, 0x00, 0x00, /* 0xC0-0xC3 */ 0xDC, 0xF4, 0x00, 0x00, 0xDC, 0xFE, 0xDC, 0xF3, /* 0xC4-0xC7 */ 0xDC, 0xFC, 0xB6, 0xFA, 0xDD, 0x42, 0xDC, 0xF5, /* 0xC8-0xCB */ 0xB6, 0xFB, 0xDD, 0x45, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ 0x00, 0x00, 0xE1, 0x6E, 0xB9, 0xE2, 0xB9, 0xE1, /* 0xD4-0xD7 */ 0xB9, 0xE3, 0xE1, 0x7A, 0xE1, 0x70, 0xE1, 0x76, /* 0xD8-0xDB */ 0xE1, 0x6B, 0xE1, 0x79, 0xE1, 0x78, 0xE1, 0x7C, /* 0xDC-0xDF */ 0xE1, 0x75, 0xB9, 0xDE, 0xE1, 0x74, 0xB9, 0xE4, /* 0xE0-0xE3 */ 0x00, 0x00, 0xE1, 0x6D, 0xB9, 0xDF, 0x00, 0x00, /* 0xE4-0xE7 */ 0xE1, 0x7B, 0xB9, 0xE0, 0xE1, 0x6F, 0xE1, 0x72, /* 0xE8-0xEB */ 0xE1, 0x77, 0xE1, 0x71, 0xE1, 0x6C, 0x00, 0x00, /* 0xEC-0xEF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE1, 0x73, /* 0xF0-0xF3 */ 0xE5, 0x55, 0xBC, 0x61, 0xE5, 0x58, 0xE5, 0x57, /* 0xF4-0xF7 */ 0xE5, 0x5A, 0xE5, 0x5C, 0xF9, 0xDC, 0xBC, 0x5F, /* 0xF8-0xFB */ 0x00, 0x00, 0xE5, 0x56, 0x00, 0x00, 0xE5, 0x54, /* 0xFC-0xFF */ }; static const unsigned char u2c_5B[512] = { 0x00, 0x00, 0xE5, 0x5D, 0xE5, 0x5B, 0xE5, 0x59, /* 0x00-0x03 */ 0x00, 0x00, 0xE5, 0x5F, 0x00, 0x00, 0xE5, 0x5E, /* 0x04-0x07 */ 0xBC, 0x63, 0xBC, 0x5E, 0x00, 0x00, 0xBC, 0x60, /* 0x08-0x0B */ 0xBC, 0x62, 0x00, 0x00, 0x00, 0x00, 0xE5, 0x60, /* 0x0C-0x0F */ 0xE9, 0x57, 0x00, 0x00, 0x00, 0x00, 0xE9, 0x56, /* 0x10-0x13 */ 0xE9, 0x55, 0x00, 0x00, 0xE9, 0x58, 0xE9, 0x51, /* 0x14-0x17 */ 0x00, 0x00, 0xE9, 0x52, 0xE9, 0x5A, 0xE9, 0x53, /* 0x18-0x1B */ 0x00, 0x00, 0xBE, 0xC5, 0xE9, 0x5C, 0x00, 0x00, /* 0x1C-0x1F */ 0xE9, 0x5B, 0xE9, 0x54, 0x00, 0x00, 0xEC, 0xD1, /* 0x20-0x23 */ 0xC0, 0xA8, 0xEC, 0xCF, 0xEC, 0xD4, 0xEC, 0xD3, /* 0x24-0x27 */ 0xE9, 0x59, 0x00, 0x00, 0xC0, 0xA7, 0x00, 0x00, /* 0x28-0x2B */ 0xEC, 0xD2, 0xEC, 0xCE, 0xEC, 0xD6, 0xEC, 0xD5, /* 0x2C-0x2F */ 0xC0, 0xA6, 0x00, 0x00, 0xEC, 0xD0, 0x00, 0x00, /* 0x30-0x33 */ 0xBE, 0xC6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ 0xC2, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0xEF, 0xC1, 0xF1, 0xFA, 0xF1, 0xFB, 0xF1, 0xFC, /* 0x3C-0x3F */ 0xC4, 0x5C, 0x00, 0x00, 0x00, 0x00, 0xC4, 0x5D, /* 0x40-0x43 */ 0x00, 0x00, 0xF4, 0x43, 0x00, 0x00, 0xF5, 0xC8, /* 0x44-0x47 */ 0xF5, 0xC7, 0x00, 0x00, 0x00, 0x00, 0xF6, 0xDB, /* 0x48-0x4B */ 0xF6, 0xDC, 0xF7, 0xD5, 0xF8, 0xA7, 0x00, 0x00, /* 0x4C-0x4F */ 0xA4, 0x6C, 0xA4, 0x6D, 0x00, 0x00, 0xA4, 0x6E, /* 0x50-0x53 */ 0xA4, 0xD5, 0xA5, 0xA5, 0xC9, 0xD3, 0xA6, 0x72, /* 0x54-0x57 */ 0xA6, 0x73, 0x00, 0x00, 0xA7, 0xB7, 0xA7, 0xB8, /* 0x58-0x5B */ 0xA7, 0xB6, 0xA7, 0xB5, 0x00, 0x00, 0xA9, 0x73, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0xCC, 0x55, 0xA9, 0x75, /* 0x60-0x63 */ 0xA9, 0x74, 0xCC, 0x56, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ 0x00, 0x00, 0xAB, 0xC4, 0x00, 0x00, 0xAE, 0x5D, /* 0x68-0x6B */ 0xD1, 0x65, 0x00, 0x00, 0xD4, 0xF0, 0x00, 0x00, /* 0x6C-0x6F */ 0xB1, 0x45, 0xB4, 0x47, 0xD4, 0xEF, 0xB4, 0x46, /* 0x70-0x73 */ 0x00, 0x00, 0xB9, 0xE5, 0x00, 0x00, 0xE1, 0x7D, /* 0x74-0x77 */ 0xBE, 0xC7, 0x00, 0x00, 0xC0, 0xA9, 0xEC, 0xD7, /* 0x78-0x7B */ 0x00, 0x00, 0xC4, 0x5E, 0x00, 0x00, 0xC5, 0x70, /* 0x7C-0x7F */ 0x00, 0x00, 0xC9, 0x72, 0x00, 0x00, 0xA5, 0xA6, /* 0x80-0x83 */ 0xC9, 0x73, 0xA6, 0x76, 0x00, 0x00, 0xA6, 0x74, /* 0x84-0x87 */ 0xA6, 0x75, 0xA6, 0x77, 0x00, 0x00, 0xA7, 0xBA, /* 0x88-0x8B */ 0xA7, 0xB9, 0x00, 0x00, 0xCA, 0xBC, 0xA7, 0xBB, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0xCA, 0xBD, 0xCC, 0x57, /* 0x90-0x93 */ 0x00, 0x00, 0xCC, 0x58, 0x00, 0x00, 0xA9, 0x76, /* 0x94-0x97 */ 0xA9, 0x78, 0xA9, 0x7A, 0xA9, 0x77, 0xA9, 0x7B, /* 0x98-0x9B */ 0xA9, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0x00, 0x00, 0x00, 0x00, 0xAB, 0xC8, 0xAB, 0xC5, /* 0xA0-0xA3 */ 0xAB, 0xC7, 0xAB, 0xC9, 0xAB, 0xC6, 0xD1, 0x66, /* 0xA4-0xA7 */ 0xCE, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ 0xD1, 0x68, 0xD1, 0x67, 0xAE, 0x63, 0x00, 0x00, /* 0xAC-0xAF */ 0xAE, 0x5F, 0x00, 0x00, 0x00, 0x00, 0xAE, 0x60, /* 0xB0-0xB3 */ 0xAE, 0x62, 0xAE, 0x64, 0xAE, 0x61, 0x00, 0x00, /* 0xB4-0xB7 */ 0xAE, 0x66, 0xAE, 0x65, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB1, 0x4A, /* 0xBC-0xBF */ 0xD4, 0xF2, 0xD4, 0xF1, 0xB1, 0x49, 0x00, 0x00, /* 0xC0-0xC3 */ 0xB1, 0x48, 0xB1, 0x47, 0xB1, 0x4B, 0xB1, 0x46, /* 0xC4-0xC7 */ 0x00, 0x00, 0x00, 0x00, 0xD8, 0xD5, 0xD8, 0xD2, /* 0xC8-0xCB */ 0xB4, 0x49, 0xD8, 0xD1, 0xD8, 0xD6, 0x00, 0x00, /* 0xCC-0xCF */ 0xB4, 0x4B, 0xD8, 0xD4, 0xB4, 0x48, 0xB4, 0x4A, /* 0xD0-0xD3 */ 0xD8, 0xD3, 0x00, 0x00, 0xDD, 0x48, 0x00, 0x00, /* 0xD4-0xD7 */ 0xDD, 0x49, 0xDD, 0x4A, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 0x00, 0x00, 0x00, 0x00, 0xB9, 0xE6, 0xB9, 0xEE, /* 0xDC-0xDF */ 0xE1, 0x7E, 0xB9, 0xE8, 0xB9, 0xEC, 0xE1, 0xA1, /* 0xE0-0xE3 */ 0xB9, 0xED, 0xB9, 0xE9, 0xB9, 0xEA, 0xB9, 0xE7, /* 0xE4-0xE7 */ 0xB9, 0xEB, 0xBC, 0x66, 0xD8, 0xD0, 0xBC, 0x67, /* 0xE8-0xEB */ 0xBC, 0x65, 0x00, 0x00, 0xBC, 0x64, 0xE9, 0x5D, /* 0xEC-0xEF */ 0xBE, 0xC8, 0xEC, 0xD8, 0xEC, 0xD9, 0x00, 0x00, /* 0xF0-0xF3 */ 0x00, 0x00, 0xC3, 0x64, 0xC4, 0x5F, 0x00, 0x00, /* 0xF4-0xF7 */ 0xA4, 0x6F, 0x00, 0x00, 0xA6, 0x78, 0x00, 0x00, /* 0xF8-0xFB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ }; static const unsigned char u2c_5C[512] = { 0x00, 0x00, 0xAB, 0xCA, 0x00, 0x00, 0xD1, 0x69, /* 0x00-0x03 */ 0xAE, 0x67, 0x00, 0x00, 0x00, 0x00, 0xB1, 0x4E, /* 0x04-0x07 */ 0xB1, 0x4D, 0xB1, 0x4C, 0xB4, 0x4C, 0xB4, 0x4D, /* 0x08-0x0B */ 0xD8, 0xD7, 0xB9, 0xEF, 0xBE, 0xC9, 0xA4, 0x70, /* 0x0C-0x0F */ 0xC9, 0x5C, 0xA4, 0xD6, 0xC9, 0x74, 0x00, 0x00, /* 0x10-0x13 */ 0x00, 0x00, 0xC9, 0xD4, 0xA6, 0x79, 0x00, 0x00, /* 0x14-0x17 */ 0x00, 0x00, 0x00, 0x00, 0xA9, 0x7C, 0x00, 0x00, /* 0x18-0x1B */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xDD, 0x4B, /* 0x1C-0x1F */ 0x00, 0x00, 0x00, 0x00, 0xA4, 0x71, 0x00, 0x00, /* 0x20-0x23 */ 0xA4, 0xD7, 0xC9, 0xD5, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0xCA, 0xBE, 0x00, 0x00, 0xCA, 0xBF, 0x00, 0x00, /* 0x28-0x2B */ 0xA7, 0xBC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0xD8, 0xD8, 0xB4, 0x4E, 0x00, 0x00, 0xDD, 0x4C, /* 0x30-0x33 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0xAA, /* 0x34-0x37 */ 0xA4, 0x72, 0xA4, 0xA8, 0xA4, 0xD8, 0xC9, 0x75, /* 0x38-0x3B */ 0xA5, 0xA7, 0x00, 0x00, 0xA7, 0xC0, 0xA7, 0xBF, /* 0x3C-0x3F */ 0xA7, 0xBD, 0xA7, 0xBE, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ 0xCC, 0x59, 0xA9, 0x7E, 0xA9, 0xA1, 0xCC, 0x5A, /* 0x44-0x47 */ 0xA9, 0x7D, 0x00, 0x00, 0x00, 0x00, 0xAB, 0xCE, /* 0x48-0x4B */ 0xCE, 0x78, 0xAB, 0xCD, 0xAB, 0xCB, 0xAB, 0xCC, /* 0x4C-0x4F */ 0xAE, 0x6A, 0xAE, 0x68, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ 0xD1, 0x6B, 0xAE, 0x69, 0xD1, 0x6A, 0x00, 0x00, /* 0x54-0x57 */ 0xAE, 0x5E, 0xD4, 0xF3, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0xB1, 0x50, 0xB1, 0x51, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ 0xB1, 0x4F, 0x00, 0x00, 0xB9, 0xF0, 0xE1, 0xA2, /* 0x60-0x63 */ 0xBC, 0x68, 0xBC, 0x69, 0x00, 0x00, 0xE5, 0x61, /* 0x64-0x67 */ 0xC0, 0xAB, 0xEF, 0xC2, 0xEF, 0xC3, 0x00, 0x00, /* 0x68-0x6B */ 0xC4, 0xDD, 0xF8, 0xA8, 0xC9, 0x4B, 0xA4, 0xD9, /* 0x6C-0x6F */ 0x00, 0x00, 0xA4, 0x73, 0x00, 0x00, 0xC9, 0x77, /* 0x70-0x73 */ 0xC9, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ 0x00, 0x00, 0xA6, 0x7A, 0xC9, 0xD7, 0xC9, 0xD8, /* 0x78-0x7B */ 0xC9, 0xD6, 0x00, 0x00, 0xC9, 0xD9, 0x00, 0x00, /* 0x7C-0x7F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ 0x00, 0x00, 0x00, 0x00, 0xCA, 0xC7, 0x00, 0x00, /* 0x84-0x87 */ 0xCA, 0xC2, 0xCA, 0xC4, 0xCA, 0xC6, 0xCA, 0xC3, /* 0x88-0x8B */ 0xA7, 0xC4, 0xCA, 0xC0, 0x00, 0x00, 0xCA, 0xC1, /* 0x8C-0x8F */ 0xA7, 0xC1, 0xA7, 0xC2, 0xCA, 0xC5, 0xCA, 0xC8, /* 0x90-0x93 */ 0xA7, 0xC3, 0xCA, 0xC9, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ 0x00, 0x00, 0xCC, 0x68, 0x00, 0x00, 0xCC, 0x62, /* 0x9C-0x9F */ 0xCC, 0x5D, 0xA9, 0xA3, 0xCC, 0x65, 0xCC, 0x63, /* 0xA0-0xA3 */ 0xCC, 0x5C, 0xCC, 0x69, 0xCC, 0x6C, 0xCC, 0x67, /* 0xA4-0xA7 */ 0xCC, 0x60, 0xA9, 0xA5, 0xCC, 0x66, 0xA9, 0xA6, /* 0xA8-0xAB */ 0xCC, 0x61, 0xCC, 0x64, 0xCC, 0x5B, 0xCC, 0x5F, /* 0xAC-0xAF */ 0xCC, 0x6B, 0xA9, 0xA7, 0x00, 0x00, 0xA9, 0xA8, /* 0xB0-0xB3 */ 0x00, 0x00, 0xCC, 0x5E, 0xCC, 0x6A, 0xA9, 0xA2, /* 0xB4-0xB7 */ 0xA9, 0xA4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0xCE, 0xAB, 0xCE, 0xA4, /* 0xC4-0xC7 */ 0xCE, 0xAA, 0xCE, 0xA3, 0xCE, 0xA5, 0xCE, 0x7D, /* 0xC8-0xCB */ 0xCE, 0x7B, 0x00, 0x00, 0xCE, 0xAC, 0xCE, 0xA9, /* 0xCC-0xCF */ 0xCE, 0x79, 0x00, 0x00, 0xAB, 0xD0, 0xCE, 0xA7, /* 0xD0-0xD3 */ 0xCE, 0xA8, 0x00, 0x00, 0xCE, 0xA6, 0xCE, 0x7C, /* 0xD4-0xD7 */ 0xCE, 0x7A, 0xAB, 0xCF, 0xCE, 0xA2, 0xCE, 0x7E, /* 0xD8-0xDB */ 0x00, 0x00, 0x00, 0x00, 0xCE, 0xA1, 0xCE, 0xAD, /* 0xDC-0xDF */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ 0xAE, 0x6F, 0x00, 0x00, 0xAE, 0x6E, 0x00, 0x00, /* 0xE8-0xEB */ 0xD1, 0x6C, 0xAE, 0x6B, 0xD1, 0x6E, 0x00, 0x00, /* 0xEC-0xEF */ 0xAE, 0x70, 0xD1, 0x6F, 0x00, 0x00, 0x00, 0x00, /* 0xF0-0xF3 */ 0xAE, 0x73, 0x00, 0x00, 0xAE, 0x71, 0xD1, 0x70, /* 0xF4-0xF7 */ 0xCE, 0xAE, 0xD1, 0x72, 0x00, 0x00, 0xAE, 0x6D, /* 0xF8-0xFB */ 0x00, 0x00, 0xAE, 0x6C, 0x00, 0x00, 0xD1, 0x6D, /* 0xFC-0xFF */ }; static const unsigned char u2c_5D[512] = { 0xD1, 0x71, 0xAE, 0x72, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 0x00, 0x00, 0x00, 0x00, 0xB1, 0x53, 0xB1, 0x52, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD4, 0xF5, /* 0x08-0x0B */ 0xD4, 0xF9, 0xD4, 0xFB, 0xB1, 0x54, 0xD4, 0xFE, /* 0x0C-0x0F */ 0x00, 0x00, 0xB1, 0x58, 0xD5, 0x41, 0x00, 0x00, /* 0x10-0x13 */ 0xB1, 0x5A, 0x00, 0x00, 0xB1, 0x56, 0xB1, 0x5E, /* 0x14-0x17 */ 0x00, 0x00, 0xB1, 0x5B, 0xD4, 0xF7, 0xB1, 0x55, /* 0x18-0x1B */ 0x00, 0x00, 0xD4, 0xF6, 0xD4, 0xF4, 0xD5, 0x43, /* 0x1C-0x1F */ 0xD4, 0xF8, 0x00, 0x00, 0xB1, 0x57, 0xD5, 0x42, /* 0x20-0x23 */ 0xB1, 0x5C, 0xD4, 0xFD, 0xD4, 0xFC, 0xB1, 0x5D, /* 0x24-0x27 */ 0xD4, 0xFA, 0xB1, 0x59, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ 0x00, 0x00, 0x00, 0x00, 0xD5, 0x44, 0x00, 0x00, /* 0x2C-0x2F */ 0xD5, 0x40, 0xD8, 0xE7, 0xD8, 0xEE, 0xD8, 0xE3, /* 0x30-0x33 */ 0xB4, 0x51, 0xD8, 0xDF, 0xD8, 0xEF, 0xD8, 0xD9, /* 0x34-0x37 */ 0xD8, 0xEC, 0xD8, 0xEA, 0xD8, 0xE4, 0x00, 0x00, /* 0x38-0x3B */ 0xD8, 0xED, 0xD8, 0xE6, 0x00, 0x00, 0xD8, 0xDE, /* 0x3C-0x3F */ 0xD8, 0xF0, 0xD8, 0xDC, 0xD8, 0xE9, 0xD8, 0xDA, /* 0x40-0x43 */ 0x00, 0x00, 0xD8, 0xF1, 0x00, 0x00, 0xB4, 0x52, /* 0x44-0x47 */ 0x00, 0x00, 0xD8, 0xEB, 0xDD, 0x4F, 0xD8, 0xDD, /* 0x48-0x4B */ 0xB4, 0x4F, 0x00, 0x00, 0xD8, 0xE1, 0x00, 0x00, /* 0x4C-0x4F */ 0xB4, 0x50, 0xD8, 0xE0, 0xD8, 0xE5, 0x00, 0x00, /* 0x50-0x53 */ 0x00, 0x00, 0xD8, 0xE2, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ 0x00, 0x00, 0xD8, 0xE8, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ 0x00, 0x00, 0x00, 0x00, 0xDD, 0x53, 0x00, 0x00, /* 0x5C-0x5F */ 0x00, 0x00, 0x00, 0x00, 0xDD, 0x56, 0xDD, 0x4E, /* 0x60-0x63 */ 0x00, 0x00, 0xDD, 0x50, 0x00, 0x00, 0xDD, 0x55, /* 0x64-0x67 */ 0xDD, 0x54, 0xB7, 0x43, 0x00, 0x00, 0xD8, 0xDB, /* 0x68-0x6B */ 0xDD, 0x52, 0x00, 0x00, 0x00, 0x00, 0xB7, 0x44, /* 0x6C-0x6F */ 0x00, 0x00, 0xDD, 0x4D, 0xDD, 0x51, 0x00, 0x00, /* 0x70-0x73 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE1, 0xA9, /* 0x74-0x77 */ 0x00, 0x00, 0xE1, 0xB0, 0xE1, 0xA7, 0x00, 0x00, /* 0x78-0x7B */ 0xE1, 0xAE, 0xE1, 0xA5, 0xE1, 0xAD, 0xE1, 0xB1, /* 0x7C-0x7F */ 0xE1, 0xA4, 0xE1, 0xA8, 0xE1, 0xA3, 0x00, 0x00, /* 0x80-0x83 */ 0xB9, 0xF1, 0x00, 0x00, 0xE1, 0xA6, 0xB9, 0xF2, /* 0x84-0x87 */ 0xE1, 0xAC, 0xE1, 0xAB, 0xE1, 0xAA, 0x00, 0x00, /* 0x88-0x8B */ 0x00, 0x00, 0xE1, 0xAF, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0xE5, 0x65, 0xE5, 0x67, /* 0x90-0x93 */ 0xBC, 0x6B, 0xE5, 0x68, 0x00, 0x00, 0xE5, 0x63, /* 0x94-0x97 */ 0x00, 0x00, 0xE5, 0x62, 0xE5, 0x6C, 0x00, 0x00, /* 0x98-0x9B */ 0xE5, 0x6A, 0xBC, 0x6A, 0xE5, 0x6D, 0xE5, 0x64, /* 0x9C-0x9F */ 0xE5, 0x69, 0xE5, 0x6B, 0xE5, 0x66, 0x00, 0x00, /* 0xA0-0xA3 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE9, 0x61, /* 0xA4-0xA7 */ 0xE9, 0x66, 0xE9, 0x60, 0xE9, 0x65, 0x00, 0x00, /* 0xA8-0xAB */ 0xE9, 0x5E, 0xE9, 0x68, 0xE9, 0x64, 0xE9, 0x69, /* 0xAC-0xAF */ 0xE9, 0x63, 0xE9, 0x5F, 0xE9, 0x67, 0x00, 0x00, /* 0xB0-0xB3 */ 0xE9, 0x6A, 0xE9, 0x62, 0x00, 0x00, 0xEC, 0xDA, /* 0xB4-0xB7 */ 0xC0, 0xAF, 0x00, 0x00, 0xC0, 0xAD, 0x00, 0x00, /* 0xB8-0xBB */ 0xC0, 0xAC, 0xC0, 0xAE, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ 0xEF, 0xC4, 0x00, 0x00, 0xF1, 0x72, 0xF1, 0xFD, /* 0xC0-0xC3 */ 0x00, 0x00, 0x00, 0x00, 0xF4, 0x44, 0xF4, 0x45, /* 0xC4-0xC7 */ 0x00, 0x00, 0xC4, 0x60, 0x00, 0x00, 0xF5, 0xC9, /* 0xC8-0xCB */ 0x00, 0x00, 0xC4, 0xDE, 0x00, 0x00, 0xF5, 0xCA, /* 0xCC-0xCF */ 0x00, 0x00, 0xF6, 0xDE, 0xC5, 0x72, 0x00, 0x00, /* 0xD0-0xD3 */ 0xC5, 0x71, 0xF6, 0xDD, 0xC5, 0xC9, 0x00, 0x00, /* 0xD4-0xD7 */ 0xF7, 0xD6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ 0x00, 0x00, 0xA4, 0x74, 0xA6, 0x7B, 0xC9, 0xDA, /* 0xDC-0xDF */ 0xCA, 0xCA, 0xA8, 0xB5, 0xB1, 0x5F, 0x00, 0x00, /* 0xE0-0xE3 */ 0x00, 0x00, 0xA4, 0x75, 0xA5, 0xAA, 0xA5, 0xA9, /* 0xE4-0xE7 */ 0xA5, 0xA8, 0x00, 0x00, 0x00, 0x00, 0xA7, 0xC5, /* 0xE8-0xEB */ 0x00, 0x00, 0x00, 0x00, 0xAE, 0x74, 0x00, 0x00, /* 0xEC-0xEF */ 0xDD, 0x57, 0xA4, 0x76, 0xA4, 0x77, 0xA4, 0x78, /* 0xF0-0xF3 */ 0xA4, 0xDA, 0x00, 0x00, 0x00, 0x00, 0xAB, 0xD1, /* 0xF4-0xF7 */ 0x00, 0x00, 0xCE, 0xAF, 0x00, 0x00, 0x00, 0x00, /* 0xF8-0xFB */ 0x00, 0x00, 0xB4, 0x53, 0xA4, 0x79, 0xC9, 0x5D, /* 0xFC-0xFF */ }; static const unsigned char u2c_5E[512] = { 0x00, 0x00, 0x00, 0x00, 0xA5, 0xAB, 0xA5, 0xAC, /* 0x00-0x03 */ 0xC9, 0x78, 0x00, 0x00, 0xA6, 0x7C, 0x00, 0x00, /* 0x04-0x07 */ 0x00, 0x00, 0x00, 0x00, 0xCA, 0xCB, 0x00, 0x00, /* 0x08-0x0B */ 0xA7, 0xC6, 0x00, 0x00, 0xCA, 0xCC, 0x00, 0x00, /* 0x0C-0x0F */ 0x00, 0x00, 0xA9, 0xAE, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ 0xCC, 0x6E, 0xA9, 0xAC, 0xA9, 0xAB, 0xCC, 0x6D, /* 0x14-0x17 */ 0xA9, 0xA9, 0xCC, 0x6F, 0xA9, 0xAA, 0xA9, 0xAD, /* 0x18-0x1B */ 0x00, 0x00, 0xAB, 0xD2, 0x00, 0x00, 0xAB, 0xD4, /* 0x1C-0x1F */ 0xCE, 0xB3, 0xCE, 0xB0, 0xCE, 0xB1, 0xCE, 0xB2, /* 0x20-0x23 */ 0xCE, 0xB4, 0xAB, 0xD3, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ 0xD1, 0x74, 0xD1, 0x73, 0x00, 0x00, 0xAE, 0x76, /* 0x28-0x2B */ 0x00, 0x00, 0xAE, 0x75, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB1, 0x62, /* 0x30-0x33 */ 0xD5, 0x46, 0x00, 0x00, 0xB1, 0x61, 0xB1, 0x63, /* 0x34-0x37 */ 0xB1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ 0x00, 0x00, 0xB4, 0x55, 0xD5, 0x45, 0x00, 0x00, /* 0x3C-0x3F */ 0xB4, 0x56, 0xD8, 0xF3, 0x00, 0x00, 0xB4, 0x57, /* 0x40-0x43 */ 0xD8, 0xF2, 0xB4, 0x54, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ 0x00, 0x00, 0x00, 0x00, 0xDD, 0x5A, 0xDD, 0x5C, /* 0x48-0x4B */ 0xB7, 0x45, 0xDD, 0x5B, 0xDD, 0x59, 0xDD, 0x58, /* 0x4C-0x4F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE1, 0xB4, /* 0x50-0x53 */ 0xB9, 0xF7, 0xB9, 0xF5, 0x00, 0x00, 0xB9, 0xF6, /* 0x54-0x57 */ 0xE1, 0xB2, 0xE1, 0xB3, 0x00, 0x00, 0xB9, 0xF3, /* 0x58-0x5B */ 0xE5, 0x71, 0xE5, 0x6F, 0x00, 0x00, 0xBC, 0x6D, /* 0x5C-0x5F */ 0xE5, 0x70, 0xBC, 0x6E, 0xBC, 0x6C, 0xB9, 0xF4, /* 0x60-0x63 */ 0x00, 0x00, 0x00, 0x00, 0xE9, 0x6D, 0xE9, 0x6B, /* 0x64-0x67 */ 0xE9, 0x6C, 0xE5, 0x6E, 0xEC, 0xDC, 0xC0, 0xB0, /* 0x68-0x6B */ 0xEC, 0xDB, 0xEF, 0xC5, 0xEF, 0xC6, 0xE9, 0x6E, /* 0x6C-0x6F */ 0xF1, 0xFE, 0x00, 0x00, 0xA4, 0x7A, 0xA5, 0xAD, /* 0x70-0x73 */ 0xA6, 0x7E, 0xC9, 0xDB, 0xA6, 0x7D, 0x00, 0x00, /* 0x74-0x77 */ 0xA9, 0xAF, 0xB7, 0x46, 0x00, 0x00, 0xA4, 0xDB, /* 0x78-0x7B */ 0xA5, 0xAE, 0xAB, 0xD5, 0xB4, 0x58, 0x00, 0x00, /* 0x7C-0x7F */ 0xC9, 0x79, 0x00, 0x00, 0xC9, 0x7A, 0x00, 0x00, /* 0x80-0x83 */ 0xC9, 0xDC, 0x00, 0x00, 0x00, 0x00, 0xA7, 0xC8, /* 0x84-0x87 */ 0xCA, 0xD0, 0xCA, 0xCE, 0xA7, 0xC9, 0xCA, 0xCD, /* 0x88-0x8B */ 0xCA, 0xCF, 0xCA, 0xD1, 0x00, 0x00, 0xA7, 0xC7, /* 0x8C-0x8F */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ 0x00, 0x00, 0xA9, 0xB3, 0xA9, 0xB4, 0xA9, 0xB1, /* 0x94-0x97 */ 0x00, 0x00, 0x00, 0x00, 0xA9, 0xB0, 0xCE, 0xB8, /* 0x98-0x9B */ 0xA9, 0xB2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ 0xAB, 0xD6, 0x00, 0x00, 0xCE, 0xB7, 0xCE, 0xB9, /* 0xA0-0xA3 */ 0xCE