Coverage Report

Created: 2026-06-07 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cloud-hypervisor/pci/src/vfio_user.rs
Line
Count
Source
1
// Copyright © 2021 Intel Corporation
2
//
3
// SPDX-License-Identifier: Apache-2.0
4
//
5
6
use std::any::Any;
7
use std::os::fd::AsFd;
8
use std::os::unix::prelude::AsRawFd;
9
use std::sync::{Arc, Barrier, Mutex};
10
11
use hypervisor::HypervisorVmError;
12
use log::{error, info};
13
use thiserror::Error;
14
use vfio_bindings::bindings::vfio::*;
15
use vfio_ioctls::VfioIrq;
16
use vfio_user::{Client, Error as VfioUserError};
17
use vm_allocator::{AddressAllocator, MemorySlotAllocator, SystemAllocator};
18
use vm_device::dma_mapping::ExternalDmaMapping;
19
use vm_device::interrupt::{InterruptManager, InterruptSourceGroup, MsiIrqGroupConfig};
20
use vm_device::{BusDevice, Resource};
21
use vm_memory::bitmap::AtomicBitmap;
22
use vm_memory::{
23
    Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap,
24
};
25
use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable};
26
use vmm_sys_util::eventfd::EventFd;
27
28
use crate::mmap::MmapRegion;
29
use crate::vfio::{
30
    UserMemoryRegion, VFIO_COMMON_ID, Vfio, VfioCommon, VfioCommonConfig, VfioError,
31
};
32
use crate::{
33
    BarReprogrammingParams, PciBarConfiguration, PciBdf, PciDevice, PciDeviceError, PciSubclass,
34
    VfioPciError,
35
};
36
37
pub struct VfioUserPciDevice {
38
    id: String,
39
    vm: Arc<dyn hypervisor::Vm>,
40
    client: Arc<Mutex<Client>>,
41
    common: VfioCommon,
42
    memory_slot_allocator: MemorySlotAllocator,
43
}
44
45
#[derive(Error, Debug)]
46
pub enum VfioUserPciDeviceError {
47
    #[error("Client error")]
48
    Client(#[source] VfioUserError),
49
    #[error("Failed to map VFIO PCI region into guest")]
50
    MapRegionGuest(#[source] HypervisorVmError),
51
    #[error("Failed to DMA map")]
52
    DmaMap(#[source] VfioUserError),
53
    #[error("Failed to DMA unmap")]
54
    DmaUnmap(#[source] VfioUserError),
55
    #[error("Failed to initialize legacy interrupts")]
56
    InitializeLegacyInterrupts(#[source] VfioPciError),
57
    #[error("Failed to create VfioCommon")]
58
    CreateVfioCommon(#[source] VfioPciError),
59
    #[error("Other OS error")]
60
    Other(#[source] std::io::Error),
61
}
62
63
#[derive(Copy, Clone)]
64
enum PciVfioUserSubclass {
65
    VfioUserSubclass = 0xff,
66
}
67
68
impl PciSubclass for PciVfioUserSubclass {
69
0
    fn get_register_value(&self) -> u8 {
70
0
        *self as u8
71
0
    }
72
}
73
74
impl VfioUserPciDevice {
75
    #[allow(clippy::too_many_arguments)]
76
0
    pub fn new(
77
0
        id: String,
78
0
        vm: Arc<dyn hypervisor::Vm>,
79
0
        client: Arc<Mutex<Client>>,
80
0
        msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
81
0
        legacy_interrupt_group: Option<Arc<dyn InterruptSourceGroup>>,
82
0
        bdf: PciBdf,
83
0
        memory_slot_allocator: MemorySlotAllocator,
84
0
        snapshot: Option<&Snapshot>,
85
0
    ) -> Result<Self, VfioUserPciDeviceError> {
86
0
        let resettable = client.lock().unwrap().resettable();
87
0
        if resettable {
88
0
            client
89
0
                .lock()
90
0
                .unwrap()
91
0
                .reset()
92
0
                .map_err(VfioUserPciDeviceError::Client)?;
93
0
        }
94
95
0
        let vfio_wrapper = VfioUserClientWrapper {
96
0
            client: client.clone(),
97
0
        };
98
99
0
        let common = VfioCommon::new(
100
0
            msi_interrupt_manager,
101
0
            legacy_interrupt_group,
102
0
            Arc::new(vfio_wrapper) as Arc<dyn Vfio>,
103
0
            &PciVfioUserSubclass::VfioUserSubclass,
104
0
            bdf,
105
0
            vm_migration::snapshot_from_id(snapshot, VFIO_COMMON_ID),
106
0
            VfioCommonConfig::default(),
107
        )
108
0
        .map_err(VfioUserPciDeviceError::CreateVfioCommon)?;
109
110
0
        Ok(Self {
111
0
            id,
112
0
            vm,
113
0
            client,
114
0
            common,
115
0
            memory_slot_allocator,
116
0
        })
117
0
    }
118
119
    /// Map all of the MMIO regions.
120
0
    pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> {
121
0
        for mmio_region in &mut self.common.mmio_regions {
122
0
            let region_flags = self
123
0
                .client
124
0
                .lock()
125
0
                .unwrap()
126
0
                .region(mmio_region.index)
127
0
                .unwrap()
128
0
                .flags;
129
0
            let file_offset = self
130
0
                .client
131
0
                .lock()
132
0
                .unwrap()
133
0
                .region(mmio_region.index)
134
0
                .unwrap()
135
0
                .file_offset
136
0
                .clone();
137
138
0
            let sparse_areas = self
139
0
                .client
140
0
                .lock()
141
0
                .unwrap()
142
0
                .region(mmio_region.index)
143
0
                .unwrap()
144
0
                .sparse_areas
145
0
                .clone();
146
147
0
            if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
148
0
                let mut prot = 0;
149
0
                if region_flags & VFIO_REGION_INFO_FLAG_READ != 0 {
150
0
                    prot |= libc::PROT_READ;
151
0
                }
152
0
                if region_flags & VFIO_REGION_INFO_FLAG_WRITE != 0 {
153
0
                    prot |= libc::PROT_WRITE;
154
0
                }
155
156
0
                let mmaps = if sparse_areas.is_empty() {
157
0
                    vec![vfio_region_sparse_mmap_area {
158
0
                        offset: 0,
159
0
                        size: mmio_region.length,
160
0
                    }]
161
                } else {
162
0
                    sparse_areas
163
                };
164
165
0
                let file_offset = file_offset.as_ref().unwrap();
166
167
0
                for s in mmaps.iter() {
168
0
                    let mapping = match MmapRegion::mmap(
169
0
                        s.size,
170
0
                        prot,
171
0
                        file_offset.file().as_fd(),
172
0
                        file_offset.start(),
173
0
                        s.offset,
174
0
                    ) {
175
0
                        Ok(mapping) => Arc::new(mapping),
176
0
                        Err(e) => {
177
0
                            error!(
178
                                "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}",
179
                                s.offset, s.size, e
180
                            );
181
0
                            return Err(VfioUserPciDeviceError::Other(e));
182
                        }
183
                    };
184
185
0
                    let user_memory_region = UserMemoryRegion {
186
0
                        slot: self.memory_slot_allocator.next_memory_slot(),
187
0
                        start: mmio_region.start.0 + s.offset,
188
0
                        mapping,
189
0
                    };
190
191
                    // SAFETY: validity of len and host_addr guaranteed by hypervisor::mmap::MmapRegion
192
                    unsafe {
193
0
                        self.vm.create_user_memory_region(
194
0
                            user_memory_region.slot,
195
0
                            user_memory_region.start,
196
0
                            user_memory_region.mapping.len(),
197
0
                            user_memory_region.mapping.addr(),
198
0
                            false,
199
0
                            false,
200
0
                        )
201
                    }
202
0
                    .map_err(VfioUserPciDeviceError::MapRegionGuest)?;
203
204
0
                    mmio_region.user_memory_regions.push(user_memory_region);
205
                }
206
0
            }
207
        }
208
209
0
        Ok(())
210
0
    }
211
212
0
    fn unmap_mmio_regions(&mut self) {
213
0
        for mmio_region in self.common.mmio_regions.iter_mut() {
214
0
            for user_memory_region in mmio_region.user_memory_regions.drain(..) {
215
                // Remove region
216
                // SAFETY: guaranteed by hypervisor::mmap::MmapRegion invariants
217
0
                if let Err(e) = unsafe {
218
0
                    self.vm.remove_user_memory_region(
219
0
                        user_memory_region.slot,
220
0
                        user_memory_region.start,
221
0
                        user_memory_region.mapping.len(),
222
0
                        user_memory_region.mapping.addr(),
223
0
                        false,
224
0
                        false,
225
0
                    )
226
0
                } {
227
0
                    error!("Could not remove the userspace memory region: {e}");
228
0
                }
229
230
0
                self.memory_slot_allocator
231
0
                    .free_memory_slot(user_memory_region.slot);
232
                // memory will be unmapped on drop
233
            }
234
        }
235
0
    }
236
237
0
    pub fn dma_map(
238
0
        &mut self,
239
0
        region: &GuestRegionMmap<AtomicBitmap>,
240
0
    ) -> Result<(), VfioUserPciDeviceError> {
241
0
        let (fd, offset) = match region.file_offset() {
242
0
            Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()),
243
0
            None => return Ok(()),
244
        };
245
246
0
        self.client
247
0
            .lock()
248
0
            .unwrap()
249
0
            .dma_map(offset, region.start_addr().raw_value(), region.len(), fd)
250
0
            .map_err(VfioUserPciDeviceError::DmaMap)
251
0
    }
252
253
0
    pub fn dma_unmap(
254
0
        &mut self,
255
0
        region: &GuestRegionMmap<AtomicBitmap>,
256
0
    ) -> Result<(), VfioUserPciDeviceError> {
257
0
        self.client
258
0
            .lock()
259
0
            .unwrap()
260
0
            .dma_unmap(region.start_addr().raw_value(), region.len())
261
0
            .map_err(VfioUserPciDeviceError::DmaUnmap)
262
0
    }
263
}
264
265
impl BusDevice for VfioUserPciDevice {
266
0
    fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
267
0
        self.read_bar(base, offset, data);
268
0
    }
269
270
0
    fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
271
0
        self.write_bar(base, offset, data)
272
0
    }
273
}
274
275
struct VfioUserClientWrapper {
276
    client: Arc<Mutex<Client>>,
277
}
278
279
impl Vfio for VfioUserClientWrapper {
280
0
    fn region_read(&self, index: u32, offset: u64, data: &mut [u8]) {
281
0
        self.client
282
0
            .lock()
283
0
            .unwrap()
284
0
            .region_read(index, offset, data)
285
0
            .ok();
286
0
    }
287
288
0
    fn region_write(&self, index: u32, offset: u64, data: &[u8]) {
289
0
        self.client
290
0
            .lock()
291
0
            .unwrap()
292
0
            .region_write(index, offset, data)
293
0
            .ok();
294
0
    }
295
296
0
    fn get_irq_info(&self, irq_index: u32) -> Option<VfioIrq> {
297
0
        self.client
298
0
            .lock()
299
0
            .unwrap()
300
0
            .get_irq_info(irq_index)
301
0
            .ok()
302
0
            .map(|i| VfioIrq {
303
0
                index: i.index,
304
0
                flags: i.flags,
305
0
                count: i.count,
306
0
            })
307
0
    }
308
309
0
    fn enable_irq(&self, irq_index: u32, event_fds: Vec<&EventFd>) -> Result<(), VfioError> {
310
0
        info!(
311
            "Enabling IRQ {:x} number of fds = {:?}",
312
            irq_index,
313
0
            event_fds.len()
314
        );
315
0
        let fds: Vec<i32> = event_fds.iter().map(|e| e.as_raw_fd()).collect();
316
317
        // Batch into blocks of 16 fds as sendmsg() has a size limit
318
0
        let mut sent_fds = 0;
319
0
        let num_fds = event_fds.len() as u32;
320
0
        while sent_fds < num_fds {
321
0
            let remaining_fds = num_fds - sent_fds;
322
0
            let count = if remaining_fds > 16 {
323
0
                16
324
            } else {
325
0
                remaining_fds
326
            };
327
328
0
            self.client
329
0
                .lock()
330
0
                .unwrap()
331
0
                .set_irqs(
332
0
                    irq_index,
333
0
                    VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
334
0
                    sent_fds,
335
0
                    count,
336
0
                    &fds[sent_fds as usize..(sent_fds + count) as usize],
337
0
                )
338
0
                .map_err(VfioError::VfioUser)?;
339
340
0
            sent_fds += count;
341
        }
342
343
0
        Ok(())
344
0
    }
345
346
0
    fn disable_irq(&self, irq_index: u32) -> Result<(), VfioError> {
347
0
        info!("Disabling IRQ {irq_index:x}");
348
0
        self.client
349
0
            .lock()
350
0
            .unwrap()
351
0
            .set_irqs(
352
0
                irq_index,
353
0
                VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
354
0
                0,
355
0
                0,
356
0
                &[],
357
0
            )
358
0
            .map_err(VfioError::VfioUser)
359
0
    }
360
361
0
    fn unmask_irq(&self, irq_index: u32) -> Result<(), VfioError> {
362
0
        info!("Unmasking IRQ {irq_index:x}");
363
0
        self.client
364
0
            .lock()
365
0
            .unwrap()
366
0
            .set_irqs(
367
0
                irq_index,
368
0
                VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
369
0
                0,
370
0
                1,
371
0
                &[],
372
0
            )
373
0
            .map_err(VfioError::VfioUser)
374
0
    }
375
}
376
377
impl PciDevice for VfioUserPciDevice {
378
0
    fn allocate_bars(
379
0
        &mut self,
380
0
        allocator: &mut SystemAllocator,
381
0
        mmio32_allocator: &mut AddressAllocator,
382
0
        mmio64_allocator: &mut AddressAllocator,
383
0
        resources: Option<Vec<Resource>>,
384
0
    ) -> Result<Vec<PciBarConfiguration>, PciDeviceError> {
385
0
        self.common.allocate_bars(
386
0
            allocator,
387
0
            mmio32_allocator,
388
0
            mmio64_allocator,
389
0
            resources.as_deref(),
390
        )
391
0
    }
392
393
0
    fn free_bars(
394
0
        &mut self,
395
0
        allocator: &mut SystemAllocator,
396
0
        mmio32_allocator: &mut AddressAllocator,
397
0
        mmio64_allocator: &mut AddressAllocator,
398
0
    ) -> Result<(), PciDeviceError> {
399
0
        self.common
400
0
            .free_bars(allocator, mmio32_allocator, mmio64_allocator)
401
0
    }
402
403
0
    fn restore_bar_addr(&mut self, params: &BarReprogrammingParams) {
404
0
        self.common.configuration.restore_bar_addr(params);
405
0
    }
406
407
0
    fn as_any_mut(&mut self) -> &mut dyn Any {
408
0
        self
409
0
    }
410
411
0
    fn write_config_register(
412
0
        &mut self,
413
0
        reg_idx: usize,
414
0
        offset: u64,
415
0
        data: &[u8],
416
0
    ) -> (Vec<BarReprogrammingParams>, Option<Arc<Barrier>>) {
417
0
        self.common.write_config_register(reg_idx, offset, data)
418
0
    }
419
420
0
    fn read_config_register(&mut self, reg_idx: usize) -> u32 {
421
0
        self.common.read_config_register(reg_idx)
422
0
    }
423
424
0
    fn read_bar(&mut self, base: u64, offset: u64, data: &mut [u8]) {
425
0
        self.common.read_bar(base, offset, data);
426
0
    }
427
428
0
    fn write_bar(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
429
0
        self.common.write_bar(base, offset, data)
430
0
    }
431
432
0
    fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), std::io::Error> {
433
0
        info!("Moving BAR 0x{old_base:x} -> 0x{new_base:x}");
434
0
        for mmio_region in self.common.mmio_regions.iter_mut() {
435
0
            if mmio_region.start.raw_value() == old_base {
436
0
                mmio_region.start = GuestAddress(new_base);
437
438
0
                for user_memory_region in mmio_region.user_memory_regions.iter_mut() {
439
                    // Remove old region
440
                    // SAFETY: only valid regions are in user_memory_regions
441
                    unsafe {
442
0
                        self.vm.remove_user_memory_region(
443
0
                            user_memory_region.slot,
444
0
                            user_memory_region.start,
445
0
                            user_memory_region.mapping.len(),
446
0
                            user_memory_region.mapping.addr(),
447
0
                            false,
448
0
                            false,
449
0
                        )
450
                    }
451
0
                    .map_err(std::io::Error::other)?;
452
453
                    // Update the user memory region with the correct start address.
454
0
                    if new_base > old_base {
455
0
                        user_memory_region.start += new_base - old_base;
456
0
                    } else {
457
0
                        user_memory_region.start -= old_base - new_base;
458
0
                    }
459
460
                    // Insert new region
461
                    // SAFETY: only valid regions are in user_memory_regions
462
                    unsafe {
463
0
                        self.vm.create_user_memory_region(
464
0
                            user_memory_region.slot,
465
0
                            user_memory_region.start,
466
0
                            user_memory_region.mapping.len(),
467
0
                            user_memory_region.mapping.addr(),
468
0
                            false,
469
0
                            false,
470
0
                        )
471
                    }
472
0
                    .map_err(std::io::Error::other)?;
473
                }
474
0
                info!("Moved bar 0x{old_base:x} -> 0x{new_base:x}");
475
0
            }
476
        }
477
478
0
        Ok(())
479
0
    }
480
481
0
    fn id(&self) -> Option<String> {
482
0
        Some(self.id.clone())
483
0
    }
484
}
485
486
impl Drop for VfioUserPciDevice {
487
0
    fn drop(&mut self) {
488
0
        self.unmap_mmio_regions();
489
490
0
        if let Some(msix) = &self.common.interrupt.msix
491
0
            && msix.bar.enabled()
492
0
        {
493
0
            self.common.disable_msix();
494
0
        }
495
496
0
        if let Some(msi) = &self.common.interrupt.msi
497
0
            && msi.cfg.enabled()
498
0
        {
499
0
            self.common.disable_msi();
500
0
        }
501
502
0
        if self.common.interrupt.intx_in_use() {
503
0
            self.common.disable_intx();
504
0
        }
505
506
0
        if let Err(e) = self.client.lock().unwrap().shutdown() {
507
0
            error!("Failed shutting down vfio-user client: {e}");
508
0
        }
509
0
    }
510
}
511
512
impl Pausable for VfioUserPciDevice {}
513
514
impl Snapshottable for VfioUserPciDevice {
515
0
    fn id(&self) -> String {
516
0
        self.id.clone()
517
0
    }
518
519
0
    fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
520
0
        let mut vfio_pci_dev_snapshot = Snapshot::default();
521
522
        // Snapshot VfioCommon
523
0
        vfio_pci_dev_snapshot.add_snapshot(self.common.id(), self.common.snapshot()?);
524
525
0
        Ok(vfio_pci_dev_snapshot)
526
0
    }
527
}
528
impl Transportable for VfioUserPciDevice {}
529
impl Migratable for VfioUserPciDevice {}
530
531
pub struct VfioUserDmaMapping<M: GuestAddressSpace> {
532
    client: Arc<Mutex<Client>>,
533
    memory: Arc<M>,
534
}
535
536
impl<M: GuestAddressSpace> VfioUserDmaMapping<M> {
537
0
    pub fn new(client: Arc<Mutex<Client>>, memory: Arc<M>) -> Self {
538
0
        Self { client, memory }
539
0
    }
540
}
541
542
impl<M: GuestAddressSpace + Sync + Send> ExternalDmaMapping for VfioUserDmaMapping<M> {
543
0
    fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), std::io::Error> {
544
0
        let mem = self.memory.memory();
545
0
        let guest_addr = GuestAddress(gpa);
546
0
        let Some(region) = mem.find_region(guest_addr) else {
547
0
            return Err(std::io::Error::other(format!(
548
0
                "Region not found for 0x{gpa:x}"
549
0
            )));
550
        };
551
552
        // Check that the range fits in the region.
553
0
        let region_offset = guest_addr
554
0
            .checked_offset_from(region.start_addr())
555
0
            .ok_or_else(|| std::io::Error::other(format!("gpa 0x{gpa:x} below region start")))?;
556
0
        let region_remaining = (region.len())
557
0
            .checked_sub(region_offset)
558
0
            .ok_or_else(|| std::io::Error::other(format!("gpa 0x{gpa:x} past region end")))?;
559
0
        if size > region_remaining {
560
0
            return Err(std::io::Error::other(format!(
561
0
                "DMA map (gpa 0x{gpa:x}, size 0x{size:x}) extends past region end"
562
0
            )));
563
0
        }
564
565
0
        let file_offset = region.file_offset().ok_or_else(|| {
566
0
            std::io::Error::other(format!("region for gpa 0x{gpa:x} has no backing file"))
567
0
        })?;
568
0
        let offset = region_offset
569
0
            .checked_add(file_offset.start())
570
0
            .ok_or_else(|| std::io::Error::other("offset overflow in DMA map"))?;
571
572
0
        self.client
573
0
            .lock()
574
0
            .unwrap()
575
0
            .dma_map(offset, iova, size, file_offset.file().as_raw_fd())
576
0
            .map_err(|e| std::io::Error::other(format!("Error mapping region: {e}")))
577
0
    }
578
579
0
    fn unmap(&self, iova: u64, size: u64) -> std::result::Result<(), std::io::Error> {
580
0
        self.client
581
0
            .lock()
582
0
            .unwrap()
583
0
            .dma_unmap(iova, size)
584
0
            .map_err(|e| std::io::Error::other(format!("Error unmapping region: {e}")))
585
0
    }
586
}