/src/crosvm/hypervisor/src/kvm/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2020 The ChromiumOS Authors |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. |
4 | | |
5 | | #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
6 | | mod aarch64; |
7 | | #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
8 | | pub use aarch64::*; |
9 | | |
10 | | mod cap; |
11 | | pub use cap::KvmCap; |
12 | | |
13 | | #[cfg(target_arch = "riscv64")] |
14 | | mod riscv64; |
15 | | |
16 | | #[cfg(target_arch = "x86_64")] |
17 | | mod x86_64; |
18 | | |
19 | | use std::cmp::min; |
20 | | use std::cmp::Reverse; |
21 | | use std::collections::BTreeMap; |
22 | | use std::collections::BinaryHeap; |
23 | | use std::convert::TryFrom; |
24 | | use std::ffi::CString; |
25 | | use std::fs::File; |
26 | | use std::os::raw::c_ulong; |
27 | | use std::os::raw::c_void; |
28 | | use std::os::unix::prelude::OsStrExt; |
29 | | use std::path::Path; |
30 | | use std::ptr::copy_nonoverlapping; |
31 | | use std::sync::Arc; |
32 | | |
33 | | use base::errno_result; |
34 | | use base::error; |
35 | | use base::ioctl; |
36 | | use base::ioctl_with_mut_ref; |
37 | | use base::ioctl_with_ref; |
38 | | use base::ioctl_with_val; |
39 | | use base::pagesize; |
40 | | use base::AsRawDescriptor; |
41 | | use base::Error; |
42 | | use base::Event; |
43 | | use base::FromRawDescriptor; |
44 | | use base::MappedRegion; |
45 | | use base::MemoryMapping; |
46 | | use base::MemoryMappingBuilder; |
47 | | use base::MmapError; |
48 | | use base::Protection; |
49 | | use base::RawDescriptor; |
50 | | use base::Result; |
51 | | use base::SafeDescriptor; |
52 | | use data_model::vec_with_array_field; |
53 | | use kvm_sys::*; |
54 | | use libc::open64; |
55 | | use libc::EFAULT; |
56 | | use libc::EINVAL; |
57 | | use libc::EIO; |
58 | | use libc::ENOENT; |
59 | | use libc::ENOSPC; |
60 | | use libc::ENOSYS; |
61 | | use libc::EOVERFLOW; |
62 | | use libc::O_CLOEXEC; |
63 | | use libc::O_RDWR; |
64 | | #[cfg(target_arch = "riscv64")] |
65 | | use riscv64::*; |
66 | | use sync::Mutex; |
67 | | use vm_memory::GuestAddress; |
68 | | use vm_memory::GuestMemory; |
69 | | #[cfg(target_arch = "x86_64")] |
70 | | pub use x86_64::*; |
71 | | |
72 | | use crate::BalloonEvent; |
73 | | use crate::ClockState; |
74 | | use crate::Config; |
75 | | use crate::Datamatch; |
76 | | use crate::DeviceKind; |
77 | | use crate::Hypervisor; |
78 | | use crate::HypervisorCap; |
79 | | use crate::IoEventAddress; |
80 | | use crate::IoOperation; |
81 | | use crate::IoParams; |
82 | | use crate::IrqRoute; |
83 | | use crate::IrqSource; |
84 | | use crate::MPState; |
85 | | use crate::MemCacheType; |
86 | | use crate::MemSlot; |
87 | | use crate::Vcpu; |
88 | | use crate::VcpuExit; |
89 | | use crate::VcpuSignalHandle; |
90 | | use crate::VcpuSignalHandleInner; |
91 | | use crate::Vm; |
92 | | use crate::VmCap; |
93 | | |
94 | | // Wrapper around KVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping |
95 | | // from guest physical to host user pages. |
96 | | // |
97 | | // SAFETY: |
98 | | // Safe when the guest regions are guaranteed not to overlap. |
99 | 0 | unsafe fn set_user_memory_region( |
100 | 0 | descriptor: &SafeDescriptor, |
101 | 0 | slot: MemSlot, |
102 | 0 | read_only: bool, |
103 | 0 | log_dirty_pages: bool, |
104 | 0 | cache: MemCacheType, |
105 | 0 | guest_addr: u64, |
106 | 0 | memory_size: u64, |
107 | 0 | userspace_addr: *mut u8, |
108 | 0 | ) -> Result<()> { |
109 | 0 | let mut flags = if read_only { KVM_MEM_READONLY } else { 0 }; |
110 | 0 | if log_dirty_pages { |
111 | 0 | flags |= KVM_MEM_LOG_DIRTY_PAGES; |
112 | 0 | } |
113 | 0 | if cache == MemCacheType::CacheNonCoherent { |
114 | 0 | flags |= KVM_MEM_NON_COHERENT_DMA; |
115 | 0 | } |
116 | 0 | let region = kvm_userspace_memory_region { |
117 | 0 | slot, |
118 | 0 | flags, |
119 | 0 | guest_phys_addr: guest_addr, |
120 | 0 | memory_size, |
121 | 0 | userspace_addr: userspace_addr as u64, |
122 | 0 | }; |
123 | 0 |
|
124 | 0 | let ret = ioctl_with_ref(descriptor, KVM_SET_USER_MEMORY_REGION, ®ion); |
125 | 0 | if ret == 0 { |
126 | 0 | Ok(()) |
127 | | } else { |
128 | 0 | errno_result() |
129 | | } |
130 | 0 | } |
131 | | |
132 | | /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region |
133 | | /// size. |
134 | | /// |
135 | | /// # Arguments |
136 | | /// |
137 | | /// * `size` - Number of bytes in the memory region being queried. |
138 | 0 | pub fn dirty_log_bitmap_size(size: usize) -> usize { |
139 | 0 | let page_size = pagesize(); |
140 | 0 | (((size + page_size - 1) / page_size) + 7) / 8 |
141 | 0 | } |
142 | | |
143 | | pub struct Kvm { |
144 | | kvm: SafeDescriptor, |
145 | | vcpu_mmap_size: usize, |
146 | | } |
147 | | |
148 | | impl Kvm { |
149 | 0 | pub fn new_with_path(device_path: &Path) -> Result<Kvm> { |
150 | 0 | let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap(); |
151 | 0 | // SAFETY: |
152 | 0 | // Open calls are safe because we give a nul-terminated string and verify the result. |
153 | 0 | let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) }; |
154 | 0 | if ret < 0 { |
155 | 0 | return errno_result(); |
156 | 0 | } |
157 | 0 | // SAFETY: |
158 | 0 | // Safe because we verify that ret is valid and we own the fd. |
159 | 0 | let kvm = unsafe { SafeDescriptor::from_raw_descriptor(ret) }; |
160 | 0 |
|
161 | 0 | // SAFETY: |
162 | 0 | // Safe because we know that the descriptor is valid and we verify the return result. |
163 | 0 | let version = unsafe { ioctl(&kvm, KVM_GET_API_VERSION) }; |
164 | 0 | if version < 0 { |
165 | 0 | return errno_result(); |
166 | 0 | } |
167 | 0 |
|
168 | 0 | // Per the kernel KVM API documentation: "Applications should refuse to run if |
169 | 0 | // KVM_GET_API_VERSION returns a value other than 12." |
170 | 0 | if version as u32 != KVM_API_VERSION { |
171 | 0 | error!( |
172 | 0 | "KVM_GET_API_VERSION: expected {}, got {}", |
173 | | KVM_API_VERSION, version, |
174 | | ); |
175 | 0 | return Err(Error::new(ENOSYS)); |
176 | 0 | } |
177 | 0 |
|
178 | 0 | // SAFETY: |
179 | 0 | // Safe because we know that our file is a KVM fd and we verify the return result. |
180 | 0 | let res = unsafe { ioctl(&kvm, KVM_GET_VCPU_MMAP_SIZE) }; |
181 | 0 | if res <= 0 { |
182 | 0 | return errno_result(); |
183 | 0 | } |
184 | 0 | let vcpu_mmap_size = res as usize; |
185 | 0 |
|
186 | 0 | Ok(Kvm { |
187 | 0 | kvm, |
188 | 0 | vcpu_mmap_size, |
189 | 0 | }) |
190 | 0 | } |
191 | | |
192 | | /// Opens `/dev/kvm` and returns a Kvm object on success. |
193 | 0 | pub fn new() -> Result<Kvm> { |
194 | 0 | Kvm::new_with_path(Path::new("/dev/kvm")) |
195 | 0 | } |
196 | | } |
197 | | |
198 | | impl AsRawDescriptor for Kvm { |
199 | 0 | fn as_raw_descriptor(&self) -> RawDescriptor { |
200 | 0 | self.kvm.as_raw_descriptor() |
201 | 0 | } |
202 | | } |
203 | | |
204 | | impl Hypervisor for Kvm { |
205 | 0 | fn try_clone(&self) -> Result<Self> { |
206 | 0 | Ok(Kvm { |
207 | 0 | kvm: self.kvm.try_clone()?, |
208 | 0 | vcpu_mmap_size: self.vcpu_mmap_size, |
209 | | }) |
210 | 0 | } |
211 | | |
212 | 0 | fn check_capability(&self, cap: HypervisorCap) -> bool { |
213 | 0 | if let Ok(kvm_cap) = KvmCap::try_from(cap) { |
214 | | // SAFETY: |
215 | | // this ioctl is safe because we know this kvm descriptor is valid, |
216 | | // and we are copying over the kvm capability (u32) as a c_ulong value. |
217 | 0 | unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, kvm_cap as c_ulong) == 1 } |
218 | | } else { |
219 | | // this capability cannot be converted on this platform, so return false |
220 | 0 | false |
221 | | } |
222 | 0 | } |
223 | | } |
224 | | |
225 | | /// A wrapper around creating and using a KVM VM. |
226 | | pub struct KvmVm { |
227 | | kvm: Kvm, |
228 | | vm: SafeDescriptor, |
229 | | guest_mem: GuestMemory, |
230 | | mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>, |
231 | | /// A min heap of MemSlot numbers that were used and then removed and can now be re-used |
232 | | mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>, |
233 | | cap_kvmclock_ctrl: bool, |
234 | | } |
235 | | |
236 | | impl KvmVm { |
237 | | /// Constructs a new `KvmVm` using the given `Kvm` instance. |
238 | 0 | pub fn new(kvm: &Kvm, guest_mem: GuestMemory, cfg: Config) -> Result<KvmVm> { |
239 | | // SAFETY: |
240 | | // Safe because we know kvm is a real kvm fd as this module is the only one that can make |
241 | | // Kvm objects. |
242 | 0 | let ret = unsafe { |
243 | | ioctl_with_val( |
244 | 0 | kvm, |
245 | 0 | KVM_CREATE_VM, |
246 | 0 | kvm.get_vm_type(cfg.protection_type)? as c_ulong, |
247 | | ) |
248 | | }; |
249 | 0 | if ret < 0 { |
250 | 0 | return errno_result(); |
251 | 0 | } |
252 | 0 | // SAFETY: |
253 | 0 | // Safe because we verify that ret is valid and we own the fd. |
254 | 0 | let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) }; |
255 | 0 | for region in guest_mem.regions() { |
256 | | // SAFETY: |
257 | | // Safe because the guest regions are guaranteed not to overlap. |
258 | | unsafe { |
259 | 0 | set_user_memory_region( |
260 | 0 | &vm_descriptor, |
261 | 0 | region.index as MemSlot, |
262 | 0 | false, |
263 | 0 | false, |
264 | 0 | MemCacheType::CacheCoherent, |
265 | 0 | region.guest_addr.offset(), |
266 | 0 | region.size as u64, |
267 | 0 | region.host_addr as *mut u8, |
268 | 0 | ) |
269 | 0 | }?; |
270 | | } |
271 | | |
272 | 0 | let mut vm = KvmVm { |
273 | 0 | kvm: kvm.try_clone()?, |
274 | 0 | vm: vm_descriptor, |
275 | 0 | guest_mem, |
276 | 0 | mem_regions: Arc::new(Mutex::new(BTreeMap::new())), |
277 | 0 | mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())), |
278 | 0 | cap_kvmclock_ctrl: false, |
279 | 0 | }; |
280 | 0 | vm.cap_kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl); |
281 | 0 | vm.init_arch(&cfg)?; |
282 | 0 | Ok(vm) |
283 | 0 | } |
284 | | |
285 | 0 | pub fn create_kvm_vcpu(&self, id: usize) -> Result<KvmVcpu> { |
286 | 0 | // SAFETY: |
287 | 0 | // Safe because we know that our file is a VM fd and we verify the return result. |
288 | 0 | let fd = unsafe { ioctl_with_val(self, KVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) }; |
289 | 0 | if fd < 0 { |
290 | 0 | return errno_result(); |
291 | 0 | } |
292 | 0 |
|
293 | 0 | // SAFETY: |
294 | 0 | // Wrap the vcpu now in case the following ? returns early. This is safe because we verified |
295 | 0 | // the value of the fd and we own the fd. |
296 | 0 | let vcpu = unsafe { File::from_raw_descriptor(fd) }; |
297 | | |
298 | | // The VCPU mapping is held by an `Arc` inside `KvmVcpu`, and it can also be cloned by |
299 | | // `signal_handle()` for use in `KvmVcpuSignalHandle`. The mapping will not be destroyed |
300 | | // until all references are dropped, so it is safe to reference `kvm_run` fields via the |
301 | | // `as_ptr()` function during either type's lifetime. |
302 | 0 | let run_mmap = MemoryMappingBuilder::new(self.kvm.vcpu_mmap_size) |
303 | 0 | .from_file(&vcpu) |
304 | 0 | .build() |
305 | 0 | .map_err(|_| Error::new(ENOSPC))?; |
306 | | |
307 | | Ok(KvmVcpu { |
308 | 0 | kvm: self.kvm.try_clone()?, |
309 | 0 | vm: self.vm.try_clone()?, |
310 | 0 | vcpu, |
311 | 0 | id, |
312 | 0 | cap_kvmclock_ctrl: self.cap_kvmclock_ctrl, |
313 | 0 | run_mmap: Arc::new(run_mmap), |
314 | | }) |
315 | 0 | } |
316 | | |
317 | | /// Creates an in kernel interrupt controller. |
318 | | /// |
319 | | /// See the documentation on the KVM_CREATE_IRQCHIP ioctl. |
320 | 0 | pub fn create_irq_chip(&self) -> Result<()> { |
321 | 0 | // SAFETY: |
322 | 0 | // Safe because we know that our file is a VM fd and we verify the return result. |
323 | 0 | let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) }; |
324 | 0 | if ret == 0 { |
325 | 0 | Ok(()) |
326 | | } else { |
327 | 0 | errno_result() |
328 | | } |
329 | 0 | } |
330 | | |
331 | | /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise. |
332 | 0 | pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> { |
333 | 0 | let mut irq_level = kvm_irq_level::default(); |
334 | 0 | irq_level.__bindgen_anon_1.irq = irq; |
335 | 0 | irq_level.level = active.into(); |
336 | 0 |
|
337 | 0 | // SAFETY: |
338 | 0 | // Safe because we know that our file is a VM fd, we know the kernel will only read the |
339 | 0 | // correct amount of memory from our pointer, and we verify the return result. |
340 | 0 | let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) }; |
341 | 0 | if ret == 0 { |
342 | 0 | Ok(()) |
343 | | } else { |
344 | 0 | errno_result() |
345 | | } |
346 | 0 | } |
347 | | |
348 | | /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` |
349 | | /// ( when not None ) will be triggered when the irqchip is resampled. |
350 | 0 | pub fn register_irqfd( |
351 | 0 | &self, |
352 | 0 | gsi: u32, |
353 | 0 | evt: &Event, |
354 | 0 | resample_evt: Option<&Event>, |
355 | 0 | ) -> Result<()> { |
356 | 0 | let mut irqfd = kvm_irqfd { |
357 | 0 | fd: evt.as_raw_descriptor() as u32, |
358 | 0 | gsi, |
359 | 0 | ..Default::default() |
360 | 0 | }; |
361 | | |
362 | 0 | if let Some(r_evt) = resample_evt { |
363 | 0 | irqfd.flags = KVM_IRQFD_FLAG_RESAMPLE; |
364 | 0 | irqfd.resamplefd = r_evt.as_raw_descriptor() as u32; |
365 | 0 | } |
366 | | |
367 | | // SAFETY: |
368 | | // Safe because we know that our file is a VM fd, we know the kernel will only read the |
369 | | // correct amount of memory from our pointer, and we verify the return result. |
370 | 0 | let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) }; |
371 | 0 | if ret == 0 { |
372 | 0 | Ok(()) |
373 | | } else { |
374 | 0 | errno_result() |
375 | | } |
376 | 0 | } |
377 | | |
378 | | /// Unregisters an event that was previously registered with |
379 | | /// `register_irqfd`. |
380 | | /// |
381 | | /// The `evt` and `gsi` pair must be the same as the ones passed into |
382 | | /// `register_irqfd`. |
383 | 0 | pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> { |
384 | 0 | let irqfd = kvm_irqfd { |
385 | 0 | fd: evt.as_raw_descriptor() as u32, |
386 | 0 | gsi, |
387 | 0 | flags: KVM_IRQFD_FLAG_DEASSIGN, |
388 | 0 | ..Default::default() |
389 | 0 | }; |
390 | 0 | // SAFETY: |
391 | 0 | // Safe because we know that our file is a VM fd, we know the kernel will only read the |
392 | 0 | // correct amount of memory from our pointer, and we verify the return result. |
393 | 0 | let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) }; |
394 | 0 | if ret == 0 { |
395 | 0 | Ok(()) |
396 | | } else { |
397 | 0 | errno_result() |
398 | | } |
399 | 0 | } |
400 | | |
401 | | /// Sets the GSI routing table, replacing any table set with previous calls to |
402 | | /// `set_gsi_routing`. |
403 | 0 | pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> { |
404 | 0 | let mut irq_routing = |
405 | 0 | vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len()); |
406 | 0 | irq_routing[0].nr = routes.len() as u32; |
407 | 0 |
|
408 | 0 | // SAFETY: |
409 | 0 | // Safe because we ensured there is enough space in irq_routing to hold the number of |
410 | 0 | // route entries. |
411 | 0 | let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) }; |
412 | 0 | for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) { |
413 | 0 | *irq_route = kvm_irq_routing_entry::from(route); |
414 | 0 | } |
415 | | |
416 | | // TODO(b/315998194): Add safety comment |
417 | | #[allow(clippy::undocumented_unsafe_blocks)] |
418 | 0 | let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &irq_routing[0]) }; |
419 | 0 | if ret == 0 { |
420 | 0 | Ok(()) |
421 | | } else { |
422 | 0 | errno_result() |
423 | | } |
424 | 0 | } |
425 | | |
426 | 0 | fn ioeventfd( |
427 | 0 | &self, |
428 | 0 | evt: &Event, |
429 | 0 | addr: IoEventAddress, |
430 | 0 | datamatch: Datamatch, |
431 | 0 | deassign: bool, |
432 | 0 | ) -> Result<()> { |
433 | 0 | let (do_datamatch, datamatch_value, datamatch_len) = match datamatch { |
434 | 0 | Datamatch::AnyLength => (false, 0, 0), |
435 | 0 | Datamatch::U8(v) => match v { |
436 | 0 | Some(u) => (true, u as u64, 1), |
437 | 0 | None => (false, 0, 1), |
438 | | }, |
439 | 0 | Datamatch::U16(v) => match v { |
440 | 0 | Some(u) => (true, u as u64, 2), |
441 | 0 | None => (false, 0, 2), |
442 | | }, |
443 | 0 | Datamatch::U32(v) => match v { |
444 | 0 | Some(u) => (true, u as u64, 4), |
445 | 0 | None => (false, 0, 4), |
446 | | }, |
447 | 0 | Datamatch::U64(v) => match v { |
448 | 0 | Some(u) => (true, u, 8), |
449 | 0 | None => (false, 0, 8), |
450 | | }, |
451 | | }; |
452 | 0 | let mut flags = 0; |
453 | 0 | if deassign { |
454 | 0 | flags |= 1 << kvm_ioeventfd_flag_nr_deassign; |
455 | 0 | } |
456 | 0 | if do_datamatch { |
457 | 0 | flags |= 1 << kvm_ioeventfd_flag_nr_datamatch |
458 | 0 | } |
459 | 0 | if let IoEventAddress::Pio(_) = addr { |
460 | 0 | flags |= 1 << kvm_ioeventfd_flag_nr_pio; |
461 | 0 | } |
462 | 0 | let ioeventfd = kvm_ioeventfd { |
463 | 0 | datamatch: datamatch_value, |
464 | 0 | len: datamatch_len, |
465 | 0 | addr: match addr { |
466 | 0 | IoEventAddress::Pio(p) => p, |
467 | 0 | IoEventAddress::Mmio(m) => m, |
468 | | }, |
469 | 0 | fd: evt.as_raw_descriptor(), |
470 | 0 | flags, |
471 | 0 | ..Default::default() |
472 | 0 | }; |
473 | 0 | // SAFETY: |
474 | 0 | // Safe because we know that our file is a VM fd, we know the kernel will only read the |
475 | 0 | // correct amount of memory from our pointer, and we verify the return result. |
476 | 0 | let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) }; |
477 | 0 | if ret == 0 { |
478 | 0 | Ok(()) |
479 | | } else { |
480 | 0 | errno_result() |
481 | | } |
482 | 0 | } |
483 | | |
484 | | /// Checks whether a particular KVM-specific capability is available for this VM. |
485 | 0 | pub fn check_raw_capability(&self, capability: KvmCap) -> bool { |
486 | 0 | // SAFETY: |
487 | 0 | // Safe because we know that our file is a KVM fd, and if the cap is invalid KVM assumes |
488 | 0 | // it's an unavailable extension and returns 0. |
489 | 0 | let ret = unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, capability as c_ulong) }; |
490 | 0 | match capability { |
491 | | #[cfg(target_arch = "x86_64")] |
492 | | KvmCap::BusLockDetect => { |
493 | 0 | if ret > 0 { |
494 | 0 | ret as u32 & KVM_BUS_LOCK_DETECTION_EXIT == KVM_BUS_LOCK_DETECTION_EXIT |
495 | | } else { |
496 | 0 | false |
497 | | } |
498 | | } |
499 | 0 | _ => ret == 1, |
500 | | } |
501 | 0 | } |
502 | | |
503 | | // Currently only used on aarch64, but works on any architecture. |
504 | | #[allow(dead_code)] |
505 | | /// Enables a KVM-specific capability for this VM, with the given arguments. |
506 | | /// |
507 | | /// # Safety |
508 | | /// This function is marked as unsafe because `args` may be interpreted as pointers for some |
509 | | /// capabilities. The caller must ensure that any pointers passed in the `args` array are |
510 | | /// allocated as the kernel expects, and that mutable pointers are owned. |
511 | 0 | unsafe fn enable_raw_capability( |
512 | 0 | &self, |
513 | 0 | capability: KvmCap, |
514 | 0 | flags: u32, |
515 | 0 | args: &[u64; 4], |
516 | 0 | ) -> Result<()> { |
517 | 0 | let kvm_cap = kvm_enable_cap { |
518 | 0 | cap: capability as u32, |
519 | 0 | args: *args, |
520 | 0 | flags, |
521 | 0 | ..Default::default() |
522 | 0 | }; |
523 | 0 | // SAFETY: |
524 | 0 | // Safe because we allocated the struct and we know the kernel will read exactly the size of |
525 | 0 | // the struct, and because we assume the caller has allocated the args appropriately. |
526 | 0 | let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap); |
527 | 0 | if ret == 0 { |
528 | 0 | Ok(()) |
529 | | } else { |
530 | 0 | errno_result() |
531 | | } |
532 | 0 | } |
533 | | |
534 | 0 | fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> { |
535 | 0 | match self.guest_mem.remove_range(guest_address, size) { |
536 | 0 | Ok(_) => Ok(()), |
537 | 0 | Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e), |
538 | 0 | Err(_) => Err(Error::new(EIO)), |
539 | | } |
540 | 0 | } |
541 | | |
542 | 0 | fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> { |
543 | 0 | // No-op, when the guest attempts to access the pages again, Linux/KVM will provide them. |
544 | 0 | Ok(()) |
545 | 0 | } |
546 | | } |
547 | | |
548 | | impl Vm for KvmVm { |
549 | 0 | fn try_clone(&self) -> Result<Self> { |
550 | 0 | Ok(KvmVm { |
551 | 0 | kvm: self.kvm.try_clone()?, |
552 | 0 | vm: self.vm.try_clone()?, |
553 | 0 | guest_mem: self.guest_mem.clone(), |
554 | 0 | mem_regions: self.mem_regions.clone(), |
555 | 0 | mem_slot_gaps: self.mem_slot_gaps.clone(), |
556 | 0 | cap_kvmclock_ctrl: self.cap_kvmclock_ctrl, |
557 | | }) |
558 | 0 | } |
559 | | |
560 | 0 | fn check_capability(&self, c: VmCap) -> bool { |
561 | 0 | if let Some(val) = self.check_capability_arch(c) { |
562 | 0 | return val; |
563 | 0 | } |
564 | 0 | match c { |
565 | 0 | VmCap::DirtyLog => true, |
566 | 0 | VmCap::PvClock => false, |
567 | 0 | VmCap::Protected => self.check_raw_capability(KvmCap::ArmProtectedVm), |
568 | 0 | VmCap::EarlyInitCpuid => false, |
569 | | #[cfg(target_arch = "x86_64")] |
570 | 0 | VmCap::BusLockDetect => self.check_raw_capability(KvmCap::BusLockDetect), |
571 | | // When pKVM is the hypervisor, read-only memslots aren't supported, even for |
572 | | // non-protected VMs. |
573 | 0 | VmCap::ReadOnlyMemoryRegion => !self.is_pkvm(), |
574 | | VmCap::MemNoncoherentDma => { |
575 | 0 | cfg!(feature = "noncoherent-dma") |
576 | 0 | && self.check_raw_capability(KvmCap::MemNoncoherentDma) |
577 | | } |
578 | | } |
579 | 0 | } |
580 | | |
581 | 0 | fn enable_capability(&self, c: VmCap, _flags: u32) -> Result<bool> { |
582 | 0 | match c { |
583 | | #[cfg(target_arch = "x86_64")] |
584 | | VmCap::BusLockDetect => { |
585 | 0 | let args = [KVM_BUS_LOCK_DETECTION_EXIT as u64, 0, 0, 0]; |
586 | 0 | Ok( |
587 | 0 | // TODO(b/315998194): Add safety comment |
588 | 0 | #[allow(clippy::undocumented_unsafe_blocks)] |
589 | 0 | unsafe { |
590 | 0 | self.enable_raw_capability(KvmCap::BusLockDetect, _flags, &args) == Ok(()) |
591 | 0 | }, |
592 | 0 | ) |
593 | | } |
594 | 0 | _ => Ok(false), |
595 | | } |
596 | 0 | } |
597 | | |
598 | 0 | fn get_guest_phys_addr_bits(&self) -> u8 { |
599 | 0 | self.kvm.get_guest_phys_addr_bits() |
600 | 0 | } |
601 | | |
602 | 0 | fn get_memory(&self) -> &GuestMemory { |
603 | 0 | &self.guest_mem |
604 | 0 | } |
605 | | |
606 | 0 | fn add_memory_region( |
607 | 0 | &mut self, |
608 | 0 | guest_addr: GuestAddress, |
609 | 0 | mem: Box<dyn MappedRegion>, |
610 | 0 | read_only: bool, |
611 | 0 | log_dirty_pages: bool, |
612 | 0 | cache: MemCacheType, |
613 | 0 | ) -> Result<MemSlot> { |
614 | 0 | let pgsz = pagesize() as u64; |
615 | 0 | // KVM require to set the user memory region with page size aligned size. Safe to extend |
616 | 0 | // the mem.size() to be page size aligned because the mmap will round up the size to be |
617 | 0 | // page size aligned if it is not. |
618 | 0 | let size = (mem.size() as u64 + pgsz - 1) / pgsz * pgsz; |
619 | 0 | let end_addr = guest_addr |
620 | 0 | .checked_add(size) |
621 | 0 | .ok_or_else(|| Error::new(EOVERFLOW))?; |
622 | 0 | if self.guest_mem.range_overlap(guest_addr, end_addr) { |
623 | 0 | return Err(Error::new(ENOSPC)); |
624 | 0 | } |
625 | 0 | let mut regions = self.mem_regions.lock(); |
626 | 0 | let mut gaps = self.mem_slot_gaps.lock(); |
627 | 0 | let slot = match gaps.pop() { |
628 | 0 | Some(gap) => gap.0, |
629 | 0 | None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot, |
630 | | }; |
631 | | |
632 | 0 | let cache_type = if self.check_capability(VmCap::MemNoncoherentDma) { |
633 | 0 | cache |
634 | | } else { |
635 | 0 | MemCacheType::CacheCoherent |
636 | | }; |
637 | | |
638 | | // SAFETY: |
639 | | // Safe because we check that the given guest address is valid and has no overlaps. We also |
640 | | // know that the pointer and size are correct because the MemoryMapping interface ensures |
641 | | // this. We take ownership of the memory mapping so that it won't be unmapped until the slot |
642 | | // is removed. |
643 | 0 | let res = unsafe { |
644 | 0 | set_user_memory_region( |
645 | 0 | &self.vm, |
646 | 0 | slot, |
647 | 0 | read_only, |
648 | 0 | log_dirty_pages, |
649 | 0 | cache_type, |
650 | 0 | guest_addr.offset(), |
651 | 0 | size, |
652 | 0 | mem.as_ptr(), |
653 | 0 | ) |
654 | | }; |
655 | | |
656 | 0 | if let Err(e) = res { |
657 | 0 | gaps.push(Reverse(slot)); |
658 | 0 | return Err(e); |
659 | 0 | } |
660 | 0 | regions.insert(slot, mem); |
661 | 0 | Ok(slot) |
662 | 0 | } |
663 | | |
664 | 0 | fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> { |
665 | 0 | let mut regions = self.mem_regions.lock(); |
666 | 0 | let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?; |
667 | | |
668 | 0 | mem.msync(offset, size).map_err(|err| match err { |
669 | 0 | MmapError::InvalidAddress => Error::new(EFAULT), |
670 | 0 | MmapError::NotPageAligned => Error::new(EINVAL), |
671 | 0 | MmapError::SystemCallFailed(e) => e, |
672 | 0 | _ => Error::new(EIO), |
673 | 0 | }) |
674 | 0 | } |
675 | | |
676 | 0 | fn madvise_pageout_memory_region( |
677 | 0 | &mut self, |
678 | 0 | slot: MemSlot, |
679 | 0 | offset: usize, |
680 | 0 | size: usize, |
681 | 0 | ) -> Result<()> { |
682 | 0 | let mut regions = self.mem_regions.lock(); |
683 | 0 | let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?; |
684 | | |
685 | 0 | mem.madvise(offset, size, libc::MADV_PAGEOUT) |
686 | 0 | .map_err(|err| match err { |
687 | 0 | MmapError::InvalidAddress => Error::new(EFAULT), |
688 | 0 | MmapError::NotPageAligned => Error::new(EINVAL), |
689 | 0 | MmapError::SystemCallFailed(e) => e, |
690 | 0 | _ => Error::new(EIO), |
691 | 0 | }) |
692 | 0 | } |
693 | | |
694 | 0 | fn madvise_remove_memory_region( |
695 | 0 | &mut self, |
696 | 0 | slot: MemSlot, |
697 | 0 | offset: usize, |
698 | 0 | size: usize, |
699 | 0 | ) -> Result<()> { |
700 | 0 | let mut regions = self.mem_regions.lock(); |
701 | 0 | let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?; |
702 | | |
703 | 0 | mem.madvise(offset, size, libc::MADV_REMOVE) |
704 | 0 | .map_err(|err| match err { |
705 | 0 | MmapError::InvalidAddress => Error::new(EFAULT), |
706 | 0 | MmapError::NotPageAligned => Error::new(EINVAL), |
707 | 0 | MmapError::SystemCallFailed(e) => e, |
708 | 0 | _ => Error::new(EIO), |
709 | 0 | }) |
710 | 0 | } |
711 | | |
712 | 0 | fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> { |
713 | 0 | let mut regions = self.mem_regions.lock(); |
714 | 0 | if !regions.contains_key(&slot) { |
715 | 0 | return Err(Error::new(ENOENT)); |
716 | 0 | } |
717 | 0 | // SAFETY: |
718 | 0 | // Safe because the slot is checked against the list of memory slots. |
719 | 0 | unsafe { |
720 | 0 | set_user_memory_region( |
721 | 0 | &self.vm, |
722 | 0 | slot, |
723 | 0 | false, |
724 | 0 | false, |
725 | 0 | MemCacheType::CacheCoherent, |
726 | 0 | 0, |
727 | 0 | 0, |
728 | 0 | std::ptr::null_mut(), |
729 | 0 | )?; |
730 | | } |
731 | 0 | self.mem_slot_gaps.lock().push(Reverse(slot)); |
732 | 0 | // This remove will always succeed because of the contains_key check above. |
733 | 0 | Ok(regions.remove(&slot).unwrap()) |
734 | 0 | } |
735 | | |
736 | 0 | fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor> { |
737 | 0 | let mut device = if let Some(dev) = self.get_device_params_arch(kind) { |
738 | 0 | dev |
739 | | } else { |
740 | 0 | match kind { |
741 | 0 | DeviceKind::Vfio => kvm_create_device { |
742 | 0 | type_: kvm_device_type_KVM_DEV_TYPE_VFIO, |
743 | 0 | fd: 0, |
744 | 0 | flags: 0, |
745 | 0 | }, |
746 | | |
747 | | // ARM and risc-v have additional DeviceKinds, so it needs the catch-all pattern |
748 | | #[cfg(any(target_arch = "arm", target_arch = "aarch64", target_arch = "riscv64"))] |
749 | | _ => return Err(Error::new(libc::ENXIO)), |
750 | | } |
751 | | }; |
752 | | |
753 | | // SAFETY: |
754 | | // Safe because we know that our file is a VM fd, we know the kernel will only write correct |
755 | | // amount of memory to our pointer, and we verify the return result. |
756 | 0 | let ret = unsafe { base::ioctl_with_mut_ref(self, KVM_CREATE_DEVICE, &mut device) }; |
757 | 0 | if ret == 0 { |
758 | 0 | Ok( |
759 | 0 | // SAFETY: |
760 | 0 | // Safe because we verify that ret is valid and we own the fd. |
761 | 0 | unsafe { SafeDescriptor::from_raw_descriptor(device.fd as i32) }, |
762 | 0 | ) |
763 | | } else { |
764 | 0 | errno_result() |
765 | | } |
766 | 0 | } |
767 | | |
768 | 0 | fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()> { |
769 | 0 | let regions = self.mem_regions.lock(); |
770 | 0 | let mmap = regions.get(&slot).ok_or_else(|| Error::new(ENOENT))?; |
771 | | // Ensures that there are as many bytes in dirty_log as there are pages in the mmap. |
772 | 0 | if dirty_log_bitmap_size(mmap.size()) > dirty_log.len() { |
773 | 0 | return Err(Error::new(EINVAL)); |
774 | 0 | } |
775 | 0 |
|
776 | 0 | let mut dirty_log_kvm = kvm_dirty_log { |
777 | 0 | slot, |
778 | 0 | ..Default::default() |
779 | 0 | }; |
780 | 0 | dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void; |
781 | 0 | // SAFETY: |
782 | 0 | // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid (because |
783 | 0 | // it's from a slice) and we checked that it will be large enough to hold the entire log. |
784 | 0 | let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) }; |
785 | 0 | if ret == 0 { |
786 | 0 | Ok(()) |
787 | | } else { |
788 | 0 | errno_result() |
789 | | } |
790 | 0 | } |
791 | | |
792 | 0 | fn register_ioevent( |
793 | 0 | &mut self, |
794 | 0 | evt: &Event, |
795 | 0 | addr: IoEventAddress, |
796 | 0 | datamatch: Datamatch, |
797 | 0 | ) -> Result<()> { |
798 | 0 | self.ioeventfd(evt, addr, datamatch, false) |
799 | 0 | } |
800 | | |
801 | 0 | fn unregister_ioevent( |
802 | 0 | &mut self, |
803 | 0 | evt: &Event, |
804 | 0 | addr: IoEventAddress, |
805 | 0 | datamatch: Datamatch, |
806 | 0 | ) -> Result<()> { |
807 | 0 | self.ioeventfd(evt, addr, datamatch, true) |
808 | 0 | } |
809 | | |
810 | 0 | fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> { |
811 | 0 | // KVM delivers IO events in-kernel with ioeventfds, so this is a no-op |
812 | 0 | Ok(()) |
813 | 0 | } |
814 | | |
815 | 0 | fn get_pvclock(&self) -> Result<ClockState> { |
816 | 0 | self.get_pvclock_arch() |
817 | 0 | } |
818 | | |
819 | 0 | fn set_pvclock(&self, state: &ClockState) -> Result<()> { |
820 | 0 | self.set_pvclock_arch(state) |
821 | 0 | } |
822 | | |
823 | 0 | fn add_fd_mapping( |
824 | 0 | &mut self, |
825 | 0 | slot: u32, |
826 | 0 | offset: usize, |
827 | 0 | size: usize, |
828 | 0 | fd: &dyn AsRawDescriptor, |
829 | 0 | fd_offset: u64, |
830 | 0 | prot: Protection, |
831 | 0 | ) -> Result<()> { |
832 | 0 | let mut regions = self.mem_regions.lock(); |
833 | 0 | let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?; |
834 | | |
835 | 0 | match region.add_fd_mapping(offset, size, fd, fd_offset, prot) { |
836 | 0 | Ok(()) => Ok(()), |
837 | 0 | Err(MmapError::SystemCallFailed(e)) => Err(e), |
838 | 0 | Err(_) => Err(Error::new(EIO)), |
839 | | } |
840 | 0 | } |
841 | | |
842 | 0 | fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> { |
843 | 0 | let mut regions = self.mem_regions.lock(); |
844 | 0 | let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?; |
845 | | |
846 | 0 | match region.remove_mapping(offset, size) { |
847 | 0 | Ok(()) => Ok(()), |
848 | 0 | Err(MmapError::SystemCallFailed(e)) => Err(e), |
849 | 0 | Err(_) => Err(Error::new(EIO)), |
850 | | } |
851 | 0 | } |
852 | | |
853 | 0 | fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> { |
854 | 0 | match event { |
855 | 0 | BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size), |
856 | 0 | BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size), |
857 | 0 | BalloonEvent::BalloonTargetReached(_) => Ok(()), |
858 | | } |
859 | 0 | } |
860 | | } |
861 | | |
862 | | impl AsRawDescriptor for KvmVm { |
863 | 0 | fn as_raw_descriptor(&self) -> RawDescriptor { |
864 | 0 | self.vm.as_raw_descriptor() |
865 | 0 | } |
866 | | } |
867 | | |
868 | | struct KvmVcpuSignalHandle { |
869 | | run_mmap: Arc<MemoryMapping>, |
870 | | } |
871 | | |
872 | | impl VcpuSignalHandleInner for KvmVcpuSignalHandle { |
873 | 0 | fn signal_immediate_exit(&self) { |
874 | 0 | // SAFETY: we ensure `run_mmap` is a valid mapping of `kvm_run` at creation time, and the |
875 | 0 | // `Arc` ensures the mapping still exists while we hold a reference to it. |
876 | 0 | unsafe { |
877 | 0 | let run = self.run_mmap.as_ptr() as *mut kvm_run; |
878 | 0 | (*run).immediate_exit = 1; |
879 | 0 | } |
880 | 0 | } |
881 | | } |
882 | | |
883 | | /// A wrapper around using a KVM Vcpu. |
884 | | pub struct KvmVcpu { |
885 | | kvm: Kvm, |
886 | | vm: SafeDescriptor, |
887 | | vcpu: File, |
888 | | id: usize, |
889 | | cap_kvmclock_ctrl: bool, |
890 | | run_mmap: Arc<MemoryMapping>, |
891 | | } |
892 | | |
893 | | impl Vcpu for KvmVcpu { |
894 | 0 | fn try_clone(&self) -> Result<Self> { |
895 | 0 | let vm = self.vm.try_clone()?; |
896 | 0 | let vcpu = self.vcpu.try_clone()?; |
897 | | |
898 | | Ok(KvmVcpu { |
899 | 0 | kvm: self.kvm.try_clone()?, |
900 | 0 | vm, |
901 | 0 | vcpu, |
902 | 0 | cap_kvmclock_ctrl: self.cap_kvmclock_ctrl, |
903 | 0 | id: self.id, |
904 | 0 | run_mmap: self.run_mmap.clone(), |
905 | | }) |
906 | 0 | } |
907 | | |
908 | 0 | fn as_vcpu(&self) -> &dyn Vcpu { |
909 | 0 | self |
910 | 0 | } |
911 | | |
912 | 0 | fn id(&self) -> usize { |
913 | 0 | self.id |
914 | 0 | } |
915 | | |
916 | | #[allow(clippy::cast_ptr_alignment)] |
917 | 0 | fn set_immediate_exit(&self, exit: bool) { |
918 | 0 | // SAFETY: |
919 | 0 | // Safe because we know we mapped enough memory to hold the kvm_run struct because the |
920 | 0 | // kernel told us how large it was. The pointer is page aligned so casting to a different |
921 | 0 | // type is well defined, hence the clippy allow attribute. |
922 | 0 | let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) }; |
923 | 0 | run.immediate_exit = exit.into(); |
924 | 0 | } |
925 | | |
926 | 0 | fn signal_handle(&self) -> VcpuSignalHandle { |
927 | 0 | VcpuSignalHandle { |
928 | 0 | inner: Box::new(KvmVcpuSignalHandle { |
929 | 0 | run_mmap: self.run_mmap.clone(), |
930 | 0 | }), |
931 | 0 | } |
932 | 0 | } |
933 | | |
934 | 0 | fn on_suspend(&self) -> Result<()> { |
935 | 0 | // On KVM implementations that use a paravirtualized clock (e.g. x86), a flag must be set to |
936 | 0 | // indicate to the guest kernel that a vCPU was suspended. The guest kernel will use this |
937 | 0 | // flag to prevent the soft lockup detection from triggering when this vCPU resumes, which |
938 | 0 | // could happen days later in realtime. |
939 | 0 | if self.cap_kvmclock_ctrl { |
940 | | // SAFETY: |
941 | | // The ioctl is safe because it does not read or write memory in this process. |
942 | 0 | if unsafe { ioctl(self, KVM_KVMCLOCK_CTRL) } != 0 { |
943 | | // Even if the host kernel supports the capability, it may not be configured by |
944 | | // the guest - for example, when the guest kernel offlines a CPU. |
945 | 0 | if Error::last().errno() != libc::EINVAL { |
946 | 0 | return errno_result(); |
947 | 0 | } |
948 | 0 | } |
949 | 0 | } |
950 | | |
951 | 0 | Ok(()) |
952 | 0 | } |
953 | | |
954 | 0 | unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()> { |
955 | 0 | let kvm_cap = kvm_enable_cap { |
956 | 0 | cap, |
957 | 0 | args: *args, |
958 | 0 | ..Default::default() |
959 | 0 | }; |
960 | 0 | // SAFETY: |
961 | 0 | // Safe because we allocated the struct and we know the kernel will read exactly the size of |
962 | 0 | // the struct, and because we assume the caller has allocated the args appropriately. |
963 | 0 | let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, &kvm_cap); |
964 | 0 | if ret == 0 { |
965 | 0 | Ok(()) |
966 | | } else { |
967 | 0 | errno_result() |
968 | | } |
969 | 0 | } |
970 | | |
971 | | #[allow(clippy::cast_ptr_alignment)] |
972 | | // The pointer is page aligned so casting to a different type is well defined, hence the clippy |
973 | | // allow attribute. |
974 | 0 | fn run(&mut self) -> Result<VcpuExit> { |
975 | 0 | // SAFETY: |
976 | 0 | // Safe because we know that our file is a VCPU fd and we verify the return result. |
977 | 0 | let ret = unsafe { ioctl(self, KVM_RUN) }; |
978 | 0 | if ret != 0 { |
979 | 0 | return errno_result(); |
980 | 0 | } |
981 | 0 |
|
982 | 0 | // SAFETY: |
983 | 0 | // Safe because we know we mapped enough memory to hold the kvm_run struct because the |
984 | 0 | // kernel told us how large it was. |
985 | 0 | let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) }; |
986 | | |
987 | | // Check for architecture-specific VM exit reasons first in case the architecture wants to |
988 | | // override the default handling. |
989 | 0 | if let Some(vcpu_exit) = self.handle_vm_exit_arch(run) { |
990 | 0 | return Ok(vcpu_exit); |
991 | 0 | } |
992 | 0 |
|
993 | 0 | match run.exit_reason { |
994 | 0 | KVM_EXIT_MMIO => Ok(VcpuExit::Mmio), |
995 | 0 | KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception), |
996 | 0 | KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall), |
997 | 0 | KVM_EXIT_DEBUG => Ok(VcpuExit::Debug), |
998 | 0 | KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen), |
999 | 0 | KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))), |
1000 | | KVM_EXIT_FAIL_ENTRY => { |
1001 | | // SAFETY: |
1002 | | // Safe because the exit_reason (which comes from the kernel) told us which |
1003 | | // union field to use. |
1004 | 0 | let hardware_entry_failure_reason = unsafe { |
1005 | 0 | run.__bindgen_anon_1 |
1006 | 0 | .fail_entry |
1007 | 0 | .hardware_entry_failure_reason |
1008 | 0 | }; |
1009 | 0 | Ok(VcpuExit::FailEntry { |
1010 | 0 | hardware_entry_failure_reason, |
1011 | 0 | }) |
1012 | | } |
1013 | 0 | KVM_EXIT_INTR => Ok(VcpuExit::Intr), |
1014 | 0 | KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError), |
1015 | | KVM_EXIT_SYSTEM_EVENT => { |
1016 | | // SAFETY: |
1017 | | // Safe because we know the exit reason told us this union |
1018 | | // field is valid |
1019 | 0 | let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ }; |
1020 | 0 | let event_flags = |
1021 | 0 | // SAFETY: |
1022 | 0 | // Safe because we know the exit reason told us this union |
1023 | 0 | // field is valid |
1024 | 0 | unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags }; |
1025 | 0 | match event_type { |
1026 | 0 | KVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown), |
1027 | 0 | KVM_SYSTEM_EVENT_RESET => self.system_event_reset(event_flags), |
1028 | 0 | KVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash), |
1029 | | _ => { |
1030 | 0 | error!( |
1031 | 0 | "Unknown KVM system event {} with flags {}", |
1032 | | event_type, event_flags |
1033 | | ); |
1034 | 0 | Err(Error::new(EINVAL)) |
1035 | | } |
1036 | | } |
1037 | | } |
1038 | 0 | r => panic!("unknown kvm exit reason: {}", r), |
1039 | | } |
1040 | 0 | } |
1041 | | |
1042 | 0 | fn handle_mmio( |
1043 | 0 | &self, |
1044 | 0 | handle_fn: &mut dyn FnMut(IoParams) -> Result<Option<[u8; 8]>>, |
1045 | 0 | ) -> Result<()> { |
1046 | 0 | // SAFETY: |
1047 | 0 | // Safe because we know we mapped enough memory to hold the kvm_run struct because the |
1048 | 0 | // kernel told us how large it was. The pointer is page aligned so casting to a different |
1049 | 0 | // type is well defined, hence the clippy allow attribute. |
1050 | 0 | let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) }; |
1051 | 0 | // Verify that the handler is called in the right context. |
1052 | 0 | assert!(run.exit_reason == KVM_EXIT_MMIO); |
1053 | | // SAFETY: |
1054 | | // Safe because the exit_reason (which comes from the kernel) told us which |
1055 | | // union field to use. |
1056 | 0 | let mmio = unsafe { &mut run.__bindgen_anon_1.mmio }; |
1057 | 0 | let address = mmio.phys_addr; |
1058 | 0 | let size = min(mmio.len as usize, mmio.data.len()); |
1059 | 0 | if mmio.is_write != 0 { |
1060 | 0 | handle_fn(IoParams { |
1061 | 0 | address, |
1062 | 0 | size, |
1063 | 0 | operation: IoOperation::Write { data: mmio.data }, |
1064 | 0 | })?; |
1065 | 0 | Ok(()) |
1066 | 0 | } else if let Some(data) = handle_fn(IoParams { |
1067 | 0 | address, |
1068 | 0 | size, |
1069 | 0 | operation: IoOperation::Read, |
1070 | 0 | })? { |
1071 | 0 | mmio.data[..size].copy_from_slice(&data[..size]); |
1072 | 0 | Ok(()) |
1073 | | } else { |
1074 | 0 | Err(Error::new(EINVAL)) |
1075 | | } |
1076 | 0 | } |
1077 | | |
1078 | 0 | fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()> { |
1079 | 0 | // SAFETY: |
1080 | 0 | // Safe because we know we mapped enough memory to hold the kvm_run struct because the |
1081 | 0 | // kernel told us how large it was. The pointer is page aligned so casting to a different |
1082 | 0 | // type is well defined, hence the clippy allow attribute. |
1083 | 0 | let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) }; |
1084 | 0 | // Verify that the handler is called in the right context. |
1085 | 0 | assert!(run.exit_reason == KVM_EXIT_IO); |
1086 | | // SAFETY: |
1087 | | // Safe because the exit_reason (which comes from the kernel) told us which |
1088 | | // union field to use. |
1089 | 0 | let io = unsafe { run.__bindgen_anon_1.io }; |
1090 | 0 | let size = usize::from(io.size); |
1091 | 0 |
|
1092 | 0 | // SAFETY: |
1093 | 0 | // The data_offset is defined by the kernel to be some number of bytes into the kvm_run |
1094 | 0 | // structure, which we have fully mmap'd. |
1095 | 0 | let mut data_ptr = unsafe { (run as *mut kvm_run as *mut u8).add(io.data_offset as usize) }; |
1096 | 0 |
|
1097 | 0 | match io.direction as u32 { |
1098 | | KVM_EXIT_IO_IN => { |
1099 | 0 | for _ in 0..io.count { |
1100 | 0 | if let Some(data) = handle_fn(IoParams { |
1101 | 0 | address: io.port.into(), |
1102 | 0 | size, |
1103 | 0 | operation: IoOperation::Read, |
1104 | 0 | }) { |
1105 | | // TODO(b/315998194): Add safety comment |
1106 | | #[allow(clippy::undocumented_unsafe_blocks)] |
1107 | 0 | unsafe { |
1108 | 0 | copy_nonoverlapping(data.as_ptr(), data_ptr, size); |
1109 | 0 | data_ptr = data_ptr.add(size); |
1110 | 0 | } |
1111 | | } else { |
1112 | 0 | return Err(Error::new(EINVAL)); |
1113 | | } |
1114 | | } |
1115 | 0 | Ok(()) |
1116 | | } |
1117 | | KVM_EXIT_IO_OUT => { |
1118 | 0 | for _ in 0..io.count { |
1119 | 0 | let mut data = [0; 8]; |
1120 | 0 | // TODO(b/315998194): Add safety comment |
1121 | 0 | #[allow(clippy::undocumented_unsafe_blocks)] |
1122 | 0 | unsafe { |
1123 | 0 | copy_nonoverlapping(data_ptr, data.as_mut_ptr(), min(size, data.len())); |
1124 | 0 | data_ptr = data_ptr.add(size); |
1125 | 0 | } |
1126 | 0 | handle_fn(IoParams { |
1127 | 0 | address: io.port.into(), |
1128 | 0 | size, |
1129 | 0 | operation: IoOperation::Write { data }, |
1130 | 0 | }); |
1131 | 0 | } |
1132 | 0 | Ok(()) |
1133 | | } |
1134 | 0 | _ => Err(Error::new(EINVAL)), |
1135 | | } |
1136 | 0 | } |
1137 | | } |
1138 | | |
1139 | | impl KvmVcpu { |
1140 | | /// Gets the vcpu's current "multiprocessing state". |
1141 | | /// |
1142 | | /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after |
1143 | | /// a call to `Vm::create_irq_chip`. |
1144 | | /// |
1145 | | /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone |
1146 | | /// to run crosvm on s390. |
1147 | 0 | pub fn get_mp_state(&self) -> Result<kvm_mp_state> { |
1148 | 0 | // SAFETY: trivially safe |
1149 | 0 | let mut state: kvm_mp_state = unsafe { std::mem::zeroed() }; |
1150 | 0 | let ret = { |
1151 | 0 | // SAFETY: |
1152 | 0 | // Safe because we know that our file is a VCPU fd, we know the kernel will only write |
1153 | 0 | // the correct amount of memory to our pointer, and we verify the return |
1154 | 0 | // result. |
1155 | 0 | unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) } |
1156 | 0 | }; |
1157 | 0 | if ret < 0 { |
1158 | 0 | return errno_result(); |
1159 | 0 | } |
1160 | 0 | Ok(state) |
1161 | 0 | } |
1162 | | |
1163 | | /// Sets the vcpu's current "multiprocessing state". |
1164 | | /// |
1165 | | /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after |
1166 | | /// a call to `Vm::create_irq_chip`. |
1167 | | /// |
1168 | | /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone |
1169 | | /// to run crosvm on s390. |
1170 | 0 | pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> { |
1171 | 0 | let ret = { |
1172 | 0 | // SAFETY: |
1173 | 0 | // The ioctl is safe because the kernel will only read from the kvm_mp_state struct. |
1174 | 0 | unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) } |
1175 | 0 | }; |
1176 | 0 | if ret < 0 { |
1177 | 0 | return errno_result(); |
1178 | 0 | } |
1179 | 0 | Ok(()) |
1180 | 0 | } |
1181 | | } |
1182 | | |
1183 | | impl AsRawDescriptor for KvmVcpu { |
1184 | 0 | fn as_raw_descriptor(&self) -> RawDescriptor { |
1185 | 0 | self.vcpu.as_raw_descriptor() |
1186 | 0 | } |
1187 | | } |
1188 | | |
1189 | | impl TryFrom<HypervisorCap> for KvmCap { |
1190 | | type Error = Error; |
1191 | | |
1192 | 0 | fn try_from(cap: HypervisorCap) -> Result<KvmCap> { |
1193 | 0 | match cap { |
1194 | 0 | HypervisorCap::ArmPmuV3 => Ok(KvmCap::ArmPmuV3), |
1195 | 0 | HypervisorCap::ImmediateExit => Ok(KvmCap::ImmediateExit), |
1196 | 0 | HypervisorCap::S390UserSigp => Ok(KvmCap::S390UserSigp), |
1197 | 0 | HypervisorCap::TscDeadlineTimer => Ok(KvmCap::TscDeadlineTimer), |
1198 | 0 | HypervisorCap::UserMemory => Ok(KvmCap::UserMemory), |
1199 | | #[cfg(target_arch = "x86_64")] |
1200 | 0 | HypervisorCap::Xcrs => Ok(KvmCap::Xcrs), |
1201 | | #[cfg(target_arch = "x86_64")] |
1202 | 0 | HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)), |
1203 | 0 | HypervisorCap::StaticSwiotlbAllocationRequired => Err(Error::new(libc::EINVAL)), |
1204 | 0 | HypervisorCap::HypervisorInitializedBootContext => Err(Error::new(libc::EINVAL)), |
1205 | | } |
1206 | 0 | } |
1207 | | } |
1208 | | |
1209 | | impl From<&IrqRoute> for kvm_irq_routing_entry { |
1210 | 0 | fn from(item: &IrqRoute) -> Self { |
1211 | 0 | match &item.source { |
1212 | 0 | IrqSource::Irqchip { chip, pin } => kvm_irq_routing_entry { |
1213 | 0 | gsi: item.gsi, |
1214 | 0 | type_: KVM_IRQ_ROUTING_IRQCHIP, |
1215 | 0 | u: kvm_irq_routing_entry__bindgen_ty_1 { |
1216 | 0 | irqchip: kvm_irq_routing_irqchip { |
1217 | 0 | irqchip: chip_to_kvm_chip(*chip), |
1218 | 0 | pin: *pin, |
1219 | 0 | }, |
1220 | 0 | }, |
1221 | 0 | ..Default::default() |
1222 | 0 | }, |
1223 | 0 | IrqSource::Msi { address, data } => kvm_irq_routing_entry { |
1224 | 0 | gsi: item.gsi, |
1225 | 0 | type_: KVM_IRQ_ROUTING_MSI, |
1226 | 0 | u: kvm_irq_routing_entry__bindgen_ty_1 { |
1227 | 0 | msi: kvm_irq_routing_msi { |
1228 | 0 | address_lo: *address as u32, |
1229 | 0 | address_hi: (*address >> 32) as u32, |
1230 | 0 | data: *data, |
1231 | 0 | ..Default::default() |
1232 | 0 | }, |
1233 | 0 | }, |
1234 | 0 | ..Default::default() |
1235 | 0 | }, |
1236 | | } |
1237 | 0 | } |
1238 | | } |
1239 | | |
1240 | | impl From<&kvm_mp_state> for MPState { |
1241 | 0 | fn from(item: &kvm_mp_state) -> Self { |
1242 | 0 | match item.mp_state { |
1243 | 0 | KVM_MP_STATE_RUNNABLE => MPState::Runnable, |
1244 | 0 | KVM_MP_STATE_UNINITIALIZED => MPState::Uninitialized, |
1245 | 0 | KVM_MP_STATE_INIT_RECEIVED => MPState::InitReceived, |
1246 | 0 | KVM_MP_STATE_HALTED => MPState::Halted, |
1247 | 0 | KVM_MP_STATE_SIPI_RECEIVED => MPState::SipiReceived, |
1248 | 0 | KVM_MP_STATE_STOPPED => MPState::Stopped, |
1249 | 0 | state => { |
1250 | 0 | error!( |
1251 | 0 | "unrecognized kvm_mp_state {}, setting to KVM_MP_STATE_RUNNABLE", |
1252 | | state |
1253 | | ); |
1254 | 0 | MPState::Runnable |
1255 | | } |
1256 | | } |
1257 | 0 | } |
1258 | | } |
1259 | | |
1260 | | impl From<&MPState> for kvm_mp_state { |
1261 | 0 | fn from(item: &MPState) -> Self { |
1262 | 0 | kvm_mp_state { |
1263 | 0 | mp_state: match item { |
1264 | 0 | MPState::Runnable => KVM_MP_STATE_RUNNABLE, |
1265 | 0 | MPState::Uninitialized => KVM_MP_STATE_UNINITIALIZED, |
1266 | 0 | MPState::InitReceived => KVM_MP_STATE_INIT_RECEIVED, |
1267 | 0 | MPState::Halted => KVM_MP_STATE_HALTED, |
1268 | 0 | MPState::SipiReceived => KVM_MP_STATE_SIPI_RECEIVED, |
1269 | 0 | MPState::Stopped => KVM_MP_STATE_STOPPED, |
1270 | | }, |
1271 | | } |
1272 | 0 | } |
1273 | | } |