// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.

//! Virtual Machine Monitor that leverages the Linux Kernel-based Virtual Machine (KVM),
//! and other virtualization features to run a single lightweight micro-virtual
//! machine (microVM).
#![warn(missing_docs)]
extern crate chrono;
extern crate epoll;
extern crate futures;
extern crate libc;
extern crate serde;
#[macro_use]
extern crate serde_derive;
extern crate serde_json;
extern crate time;
extern crate timerfd;

#[cfg(target_arch = "x86_64")]
extern crate cpuid;
extern crate devices;
extern crate fc_util;
extern crate kernel;
extern crate kvm;
#[macro_use]
extern crate logger;
extern crate arch;
extern crate memory_model;
extern crate net_util;
extern crate rate_limiter;
extern crate seccomp;
extern crate sys_util;

/// Syscalls allowed through the seccomp filter.
pub mod default_syscalls;
mod device_manager;
/// Signal handling utilities for seccomp violations.
mod sigsys_handler;
mod vm_control;
/// Wrappers over structures used to configure the VMM.
pub mod vmm_config;
mod vstate;

use futures::sync::oneshot;
use std::collections::HashMap;
use std::ffi::CString;
use std::fmt::{Display, Formatter};
use std::fs::{metadata, File, OpenOptions};
use std::os::unix::io::{AsRawFd, RawFd};
use std::path::PathBuf;
use std::result;
use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT};
use std::sync::mpsc::{channel, Receiver, Sender, TryRecvError};
use std::sync::{Arc, Barrier, RwLock};
use std::thread;
use std::time::Duration;

use libc::{c_void, siginfo_t};
use timerfd::{ClockId, SetTimeFlags, TimerFd, TimerState};

use device_manager::legacy::LegacyDeviceManager;
use device_manager::mmio::MMIODeviceManager;
use devices::virtio;
use devices::{DeviceEventT, EpollHandler, EpollHandlerPayload};
use fc_util::now_cputime_us;
use kernel::cmdline as kernel_cmdline;
use kernel::loader as kernel_loader;
use kvm::*;
use logger::error::LoggerError;
use logger::{AppInfo, Level, LogOption, Metric, LOGGER, METRICS};
use memory_model::{GuestAddress, GuestMemory};
use serde_json::Value;
pub use sigsys_handler::setup_sigsys_handler;
use sys_util::{register_signal_handler, EventFd, Terminal};
use vm_control::VmResponse;
use vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError};
use vmm_config::drive::{BlockDeviceConfig, BlockDeviceConfigs, DriveError};
use vmm_config::instance_info::{InstanceInfo, InstanceState, StartMicrovmError};
use vmm_config::logger::{LoggerConfig, LoggerConfigError, LoggerLevel};
use vmm_config::machine_config::{VmConfig, VmConfigError};
use vmm_config::net::{NetworkInterfaceConfig, NetworkInterfaceConfigs, NetworkInterfaceError};
#[cfg(feature = "vsock")]
use vmm_config::vsock::{VsockDeviceConfig, VsockDeviceConfigs, VsockError};
use vstate::{Vcpu, Vm};

const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0";
const MAGIC_IOPORT_SIGNAL_GUEST_BOOT_COMPLETE: u16 = 0x03f0;
const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123;
const VCPU_RTSIG_OFFSET: i32 = 0;
const WRITE_METRICS_PERIOD_SECONDS: u64 = 60;

static START_INSTANCE_REQUEST_TS: AtomicUsize = ATOMIC_USIZE_INIT;
static START_INSTANCE_REQUEST_CPU_TS: AtomicUsize = ATOMIC_USIZE_INIT;

/// Success exit code.
pub const FC_EXIT_CODE_OK: u8 = 0;
/// Generic error exit code.
pub const FC_EXIT_CODE_GENERIC_ERROR: u8 = 1;
/// Generic exit code for an error considered not possible to occur if the program logic is sound.
pub const FC_EXIT_CODE_UNEXPECTED_ERROR: u8 = 2;
/// Firecracker was shut down after intercepting a restricted system call.
pub const FC_EXIT_CODE_BAD_SYSCALL: u8 = 148;

/// Errors associated with the VMM internal logic. These errors cannot be generated by direct user
/// input, but can result from bad configuration of the host (for example if Firecracker doesn't
/// have permissions to open the KVM fd).
#[derive(Debug)]
enum Error {
    /// Cannot receive message from the API.
    ApiChannel,
    /// Legacy devices work with Event file descriptors and the creation can fail because
    /// of resource exhaustion.
    CreateLegacyDevice(device_manager::legacy::Error),
    /// An operation on the epoll instance failed due to resource exhaustion or bad configuration.
    EpollFd(std::io::Error),
    /// Cannot read from an Event file descriptor.
    EventFd(sys_util::Error),
    /// Describes a logical problem.
    GeneralFailure, // TODO: there are some cases in which this error should be replaced.
    /// Cannot open /dev/kvm. Either the host does not have KVM or Firecracker does not have
    /// permission to open the file descriptor.
    Kvm(sys_util::Error),
    /// The host kernel reports an invalid KVM API version.
    KvmApiVersion(i32),
    /// Cannot initialize the KVM context due to missing capabilities.
    KvmCap(kvm::Cap),
    /// Epoll wait failed.
    Poll(std::io::Error),
    /// Write to the serial console failed.
    Serial(sys_util::Error),
    /// Cannot create Timer file descriptor.
    TimerFd(std::io::Error),
    /// Cannot open the VM file descriptor.
    Vm(vstate::Error),
}

/// Types of errors associated with vmm actions.
#[derive(Debug)]
pub enum ErrorKind {
    /// User Errors describe bad configuration (user input).
    User,
    /// Internal Errors are unrelated to the user and usually refer to logical errors
    /// or bad management of resources (memory, file descriptors & others).
    Internal,
}

/// Wrapper for all errors associated with VMM actions.
#[derive(Debug)]
pub enum VmmActionError {
    /// The action `ConfigureBootSource` failed either because of bad user input (`ErrorKind::User`)
    /// or an internal error (`ErrorKind::Internal`).
    BootSource(ErrorKind, BootSourceConfigError),
    /// One of the actions `InsertBlockDevice`, `RescanBlockDevice` or `UpdateBlockDevicePath`
    /// failed either because of bad user input (`ErrorKind::User`) or an
    /// internal error (`ErrorKind::Internal`).
    DriveConfig(ErrorKind, DriveError),
    /// The action `ConfigureLogger` failed either because of bad user input (`ErrorKind::User`) or
    /// an internal error (`ErrorKind::Internal`).
    Logger(ErrorKind, LoggerConfigError),
    /// One of the actions `GetVmConfiguration` or `SetVmConfiguration` failed either because of bad
    /// input (`ErrorKind::User`) or an internal error (`ErrorKind::Internal`).
    MachineConfig(ErrorKind, VmConfigError),
    /// The action `InsertNetworkDevice` failed either because of bad user input (`ErrorKind::User`)
    /// or an internal error (`ErrorKind::Internal`).
    NetworkConfig(ErrorKind, NetworkInterfaceError),
    /// The action `StartMicroVm` failed either because of bad user input (`ErrorKind::User`) or
    /// an internal error (`ErrorKind::Internal`).
    StartMicrovm(ErrorKind, StartMicrovmError),
    #[cfg(feature = "vsock")]
    /// The action `insert_vsock_device` failed either because of bad user input (`ErrorKind::User`)
    /// or an internal error (`ErrorKind::Internal`).
    VsockConfig(ErrorKind, VsockError),
}

impl VmmActionError {
    /// Returns the error type.
    pub fn get_kind(&self) -> &ErrorKind {
        use self::VmmActionError::*;

        match *self {
            BootSource(ref kind, _) => kind,
            DriveConfig(ref kind, _) => kind,
            Logger(ref kind, _) => kind,
            MachineConfig(ref kind, _) => kind,
            NetworkConfig(ref kind, _) => kind,
            StartMicrovm(ref kind, _) => kind,
            #[cfg(feature = "vsock")]
            VsockConfig(ref kind, _) => kind,
        }
    }
}

impl Display for VmmActionError {
    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
        use self::VmmActionError::*;

        match *self {
            BootSource(_, ref err) => write!(f, "{}", err.to_string()),
            DriveConfig(_, ref err) => write!(f, "{}", err.to_string()),
            Logger(_, ref err) => write!(f, "{}", err.to_string()),
            MachineConfig(_, ref err) => write!(f, "{}", err.to_string()),
            NetworkConfig(_, ref err) => write!(f, "{}", err.to_string()),
            StartMicrovm(_, ref err) => write!(f, "{}", err.to_string()),
            #[cfg(feature = "vsock")]
            VsockConfig(_, ref err) => write!(f, "{}", err.to_string()),
        }
    }
}

/// This enum represents the public interface of the VMM. Each action contains various
/// bits of information (ids, paths, etc.), together with an OutcomeSender, which is always present.
pub enum VmmAction {
    /// Configure the boot source of the microVM using as input the `ConfigureBootSource`. This
    /// action can only be called before the microVM has booted. The response is sent using the
    /// `OutcomeSender`.
    ConfigureBootSource(BootSourceConfig, OutcomeSender),
    /// Configure the logger using as input the `LoggerConfig`. This action can only be called
    /// before the microVM has booted. The response is sent using the `OutcomeSender`.
    ConfigureLogger(LoggerConfig, OutcomeSender),
    /// Get the configuration of the microVM. The action response is sent using the `OutcomeSender`.
    GetVmConfiguration(OutcomeSender),
    /// Flush the metrics. This action can only be called after the logger has been configured.
    /// The response is sent using the `OutcomeSender`.
    FlushMetrics(OutcomeSender),
    /// Add a new block device or update one that already exists using the `BlockDeviceConfig` as
    /// input. This action can only be called before the microVM has booted. The response
    /// is sent using the `OutcomeSender`.
    InsertBlockDevice(BlockDeviceConfig, OutcomeSender),
    /// Add a new network interface config or update one that already exists using the
    /// `NetworkInterfaceConfig` as input. This action can only be called before the microVM has
    /// booted. The response is sent using the `OutcomeSender`.
    InsertNetworkDevice(NetworkInterfaceConfig, OutcomeSender),
    #[cfg(feature = "vsock")]
    /// Add a new vsock device or update one that already exists using the
    /// `VsockDeviceConfig` as input. This action can only be called before the microVM has
    /// booted. The response is sent using the `OutcomeSender`.
    InsertVsockDevice(VsockDeviceConfig, OutcomeSender),
    /// Update the size of an existing block device specified by an ID. The ID is the first data
    /// associated with this enum variant. This action can only be called after the microVM is
    /// started. The response is sent using the `OutcomeSender`.
    RescanBlockDevice(String, OutcomeSender),
    /// Set the microVM configuration (memory & vcpu) using `VmConfig` as input. This
    /// action can only be called before the microVM has booted. The action
    /// response is sent using the `OutcomeSender`.
    SetVmConfiguration(VmConfig, OutcomeSender),
    /// Launch the microVM. This action can only be called before the microVM has booted.
    /// The response is sent using the `OutcomeSender`.
    StartMicroVm(OutcomeSender),
    /// Update the path of an existing block device. The data associated with this variant
    /// represents the `drive_id` and the `path_on_host`. The response is sent using
    /// the `OutcomeSender`.
    UpdateBlockDevicePath(String, String, OutcomeSender),
}

/// The enum represents the response sent by the VMM in case of success. The response is either
/// empty, when no data needs to be sent, or an internal VMM structure.
#[derive(Debug)]
pub enum VmmData {
    /// No data is sent on the channel.
    Empty,
    /// The microVM configuration represented by `VmConfig`.
    MachineConfiguration(VmConfig),
}

/// Data type used to communicate between the API and the VMM.
pub type VmmRequestOutcome = std::result::Result<VmmData, VmmActionError>;
/// One shot channel used to send a request.
pub type OutcomeSender = oneshot::Sender<VmmRequestOutcome>;
/// One shot channel used to receive a response.
pub type OutcomeReceiver = oneshot::Receiver<VmmRequestOutcome>;

type Result<T> = std::result::Result<T, Error>;

/// Describes a KVM context that gets attached to the micro vm instance.
/// It gives access to the functionality of the KVM wrapper as long as every required
/// KVM capability is present on the host.
pub struct KvmContext {
    kvm: Kvm,
    max_memslots: usize,
}

impl KvmContext {
    fn new(kvm_fd: Option<RawFd>) -> Result<Self> {
        fn check_cap(kvm: &Kvm, cap: Cap) -> std::result::Result<(), Error> {
            if !kvm.check_extension(cap) {
                return Err(Error::KvmCap(cap));
            }
            Ok(())
        }

        let kvm = if let Some(fd) = kvm_fd {
            // Safe because we expect kvm_fd to contain a valid fd number when is_some() == true.
            unsafe { Kvm::new_with_fd_number(fd) }
        } else {
            Kvm::new().map_err(Error::Kvm)?
        };

        if kvm.get_api_version() != kvm::KVM_API_VERSION as i32 {
            return Err(Error::KvmApiVersion(kvm.get_api_version()));
        }

        check_cap(&kvm, Cap::Irqchip)?;
        check_cap(&kvm, Cap::Ioeventfd)?;
        check_cap(&kvm, Cap::Irqfd)?;
        // check_cap(&kvm, Cap::ImmediateExit)?;
        #[cfg(target_arch = "x86_64")]
        check_cap(&kvm, Cap::SetTssAddr)?;
        check_cap(&kvm, Cap::UserMemory)?;

        let max_memslots = kvm.get_nr_memslots();
        Ok(KvmContext { kvm, max_memslots })
    }

    fn fd(&self) -> &Kvm {
        &self.kvm
    }

    /// Get the maximum number of memory slots reported by this KVM context.
    pub fn max_memslots(&self) -> usize {
        self.max_memslots
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
enum EpollDispatch {
    Exit,
    Stdin,
    DeviceHandler(usize, DeviceEventT),
    VmmActionRequest,
    WriteMetrics,
}

struct MaybeHandler {
    handler: Option<Box<EpollHandler>>,
    receiver: Receiver<Box<EpollHandler>>,
}

impl MaybeHandler {
    fn new(receiver: Receiver<Box<EpollHandler>>) -> Self {
        MaybeHandler {
            handler: None,
            receiver,
        }
    }
}

struct EpollEvent<T: AsRawFd> {
    fd: T,
}

// Handles epoll related business.
// A glaring shortcoming of the current design is the liberal passing around of raw_fds,
// and duping of file descriptors. This issue will be solved when we also implement device removal.
struct EpollContext {
    epoll_raw_fd: RawFd,
    stdin_index: u64,
    // FIXME: find a different design as this does not scale. This Vec can only grow.
    dispatch_table: Vec<Option<EpollDispatch>>,
    device_handlers: Vec<MaybeHandler>,
}

impl EpollContext {
    fn new() -> Result<Self> {
        let epoll_raw_fd = epoll::create(true).map_err(Error::EpollFd)?;

        // Initial capacity needs to be large enough to hold:
        // * 1 exit event
        // * 1 stdin event
        // * 2 queue events for virtio block
        // * 4 for virtio net
        // The total is 8 elements; allowing spare capacity to avoid reallocations.
        let mut dispatch_table = Vec::with_capacity(20);
        let stdin_index = dispatch_table.len() as u64;
        dispatch_table.push(None);
        Ok(EpollContext {
            epoll_raw_fd,
            stdin_index,
            dispatch_table,
            device_handlers: Vec::with_capacity(6),
        })
    }

    fn enable_stdin_event(&mut self) -> Result<()> {
        if let Err(e) = epoll::ctl(
            self.epoll_raw_fd,
            epoll::ControlOptions::EPOLL_CTL_ADD,
            libc::STDIN_FILENO,
            epoll::Event::new(epoll::Events::EPOLLIN, self.stdin_index),
        ) {
            // TODO: We just log this message, and immediately return Ok, instead of returning the
            // actual error because this operation always fails with EPERM when adding a fd which
            // has been redirected to /dev/null via dup2 (this may happen inside the jailer).
            // Find a better solution to this (and think about the state of the serial device
            // while we're at it). This also led to commenting out parts of the
            // enable_disable_stdin_test() unit test function.
            warn!("Could not add stdin event to epoll. {:?}", e);
            return Ok(());
        }

        self.dispatch_table[self.stdin_index as usize] = Some(EpollDispatch::Stdin);

        Ok(())
    }

    fn disable_stdin_event(&mut self) -> Result<()> {
        // Ignore failure to remove from epoll. The only reason for failure is
        // that stdin has closed or changed in which case we won't get
        // any more events on the original event_fd anyway.
        let _ = epoll::ctl(
            self.epoll_raw_fd,
            epoll::ControlOptions::EPOLL_CTL_DEL,
            libc::STDIN_FILENO,
            epoll::Event::new(epoll::Events::EPOLLIN, self.stdin_index),
        )
        .map_err(Error::EpollFd);
        self.dispatch_table[self.stdin_index as usize] = None;

        Ok(())
    }

    fn add_event<T>(&mut self, fd: T, token: EpollDispatch) -> Result<EpollEvent<T>>
    where
        T: AsRawFd,
    {
        let dispatch_index = self.dispatch_table.len() as u64;
        epoll::ctl(
            self.epoll_raw_fd,
            epoll::ControlOptions::EPOLL_CTL_ADD,
            fd.as_raw_fd(),
            epoll::Event::new(epoll::Events::EPOLLIN, dispatch_index),
        )
        .map_err(Error::EpollFd)?;
        self.dispatch_table.push(Some(token));

        Ok(EpollEvent { fd })
    }

    fn allocate_tokens(&mut self, count: usize) -> (u64, Sender<Box<EpollHandler>>) {
        let dispatch_base = self.dispatch_table.len() as u64;
        let device_idx = self.device_handlers.len();
        let (sender, receiver) = channel();

        for x in 0..count {
            self.dispatch_table.push(Some(EpollDispatch::DeviceHandler(
                device_idx,
                x as DeviceEventT,
            )));
        }

        self.device_handlers.push(MaybeHandler::new(receiver));

        (dispatch_base, sender)
    }

    fn allocate_virtio_block_tokens(&mut self) -> (virtio::block::EpollConfig, usize) {
        let (dispatch_base, sender) = self.allocate_tokens(virtio::block::BLOCK_EVENTS_COUNT);
        (
            virtio::block::EpollConfig::new(dispatch_base, self.epoll_raw_fd, sender),
            self.device_handlers.len(),
        )
    }

    fn allocate_virtio_net_tokens(&mut self) -> virtio::net::EpollConfig {
        let (dispatch_base, sender) = self.allocate_tokens(virtio::net::NET_EVENTS_COUNT);
        virtio::net::EpollConfig::new(dispatch_base, self.epoll_raw_fd, sender)
    }

    #[cfg(feature = "vsock")]
    fn allocate_virtio_vsock_tokens(&mut self) -> virtio::vhost::handle::VhostEpollConfig {
        let (dispatch_base, sender) =
            self.allocate_tokens(virtio::vhost::handle::VHOST_EVENTS_COUNT);
        virtio::vhost::handle::VhostEpollConfig::new(dispatch_base, self.epoll_raw_fd, sender)
    }

    fn get_device_handler(&mut self, device_idx: usize) -> Result<&mut EpollHandler> {
        let ref mut maybe = self.device_handlers[device_idx];
        match maybe.handler {
            Some(ref mut v) => Ok(v.as_mut()),
            None => {
                // This should only be called in response to an epoll trigger.
                // Moreover, this branch of the match should only be active on the first call
                // (the first epoll event for this device), therefore the channel is guaranteed
                // to contain a message for the first epoll event since both epoll event
                // registration and channel send() happen in the device activate() function.
                let received = maybe
                    .receiver
                    .try_recv()
                    .map_err(|_| Error::GeneralFailure)?;
                Ok(maybe.handler.get_or_insert(received).as_mut())
            }
        }
    }
}

impl Drop for EpollContext {
    fn drop(&mut self) {
        let rc = unsafe { libc::close(self.epoll_raw_fd) };
        if rc != 0 {
            warn!("Cannot close epoll.");
        }
    }
}

struct KernelConfig {
    cmdline: kernel_cmdline::Cmdline,
    kernel_file: File,
    cmdline_addr: GuestAddress,
}

struct Vmm {
    kvm: KvmContext,

    vm_config: VmConfig,
    shared_info: Arc<RwLock<InstanceInfo>>,

    // Guest VM core resources.
    guest_memory: Option<GuestMemory>,
    kernel_config: Option<KernelConfig>,
    vcpu_handles: Option<Vec<thread::JoinHandle<()>>>,
    exit_evt: Option<EpollEvent<EventFd>>,
    vm: Vm,

    // Guest VM devices.
    mmio_device_manager: Option<MMIODeviceManager>,
    legacy_device_manager: LegacyDeviceManager,
    drive_handler_id_map: HashMap<String, usize>,

    // Device configurations.
    // If there is a Root Block Device, this should be added as the first element of the list.
    // This is necessary because we want the root to always be mounted on /dev/vda.
    block_device_configs: BlockDeviceConfigs,
    network_interface_configs: NetworkInterfaceConfigs,
    #[cfg(feature = "vsock")]
    vsock_device_configs: VsockDeviceConfigs,

    epoll_context: EpollContext,

    // API resources.
    api_event: EpollEvent<EventFd>,
    from_api: Receiver<Box<VmmAction>>,

    write_metrics_event: EpollEvent<TimerFd>,

    // The level of seccomp filtering used. Seccomp filters are loaded before executing guest code.
    // See `seccomp::SeccompLevel` for more information about seccomp levels.
    seccomp_level: u32,
}

impl Vmm {
    fn new(
        api_shared_info: Arc<RwLock<InstanceInfo>>,
        api_event_fd: EventFd,
        from_api: Receiver<Box<VmmAction>>,
        seccomp_level: u32,
        kvm_fd: Option<RawFd>,
    ) -> Result<Self> {
        let mut epoll_context = EpollContext::new()?;
        // If this fails, it's fatal; using expect() to crash.
        let api_event = epoll_context
            .add_event(api_event_fd, EpollDispatch::VmmActionRequest)
            .expect("Cannot add API eventfd to epoll.");

        let write_metrics_event = epoll_context
            .add_event(
                // non-blocking & close on exec
                TimerFd::new_custom(ClockId::Monotonic, true, true).map_err(Error::TimerFd)?,
                EpollDispatch::WriteMetrics,
            )
            .expect("Cannot add write metrics TimerFd to epoll.");

        let block_device_configs = BlockDeviceConfigs::new();
        let kvm = KvmContext::new(kvm_fd)?;
        let vm = Vm::new(kvm.fd()).map_err(Error::Vm)?;

        Ok(Vmm {
            kvm,
            vm_config: VmConfig::default(),
            shared_info: api_shared_info,
            guest_memory: None,
            kernel_config: None,
            vcpu_handles: None,
            exit_evt: None,
            vm,
            mmio_device_manager: None,
            legacy_device_manager: LegacyDeviceManager::new().map_err(Error::CreateLegacyDevice)?,
            block_device_configs,
            drive_handler_id_map: HashMap::new(),
            network_interface_configs: NetworkInterfaceConfigs::new(),
            #[cfg(feature = "vsock")]
            vsock_device_configs: VsockDeviceConfigs::new(),
            epoll_context,
            api_event,
            from_api,
            write_metrics_event,
            seccomp_level,
        })
    }

    fn update_drive_handler(
        &mut self,
        drive_id: &String,
        disk_image: File,
    ) -> result::Result<(), DriveError> {
        if let Some(device_idx) = self.drive_handler_id_map.get(drive_id) {
            match self.epoll_context.get_device_handler(*device_idx) {
                Ok(handler) => {
                    match handler.handle_event(
                        virtio::block::FS_UPDATE_EVENT,
                        *device_idx as u32,
                        EpollHandlerPayload::DrivePayload(disk_image),
                    ) {
                        Err(devices::Error::PayloadExpected) => {
                            panic!("Received update disk image event with empty payload.")
                        }
                        Err(devices::Error::UnknownEvent { device, event }) => {
                            panic!("Unknown event: {:?} {:?}", device, event)
                        }
                        _ => Ok(()),
                    }
                }
                Err(e) => {
                    warn!("invalid handler for device {}: {:?}", device_idx, e);
                    Err(DriveError::BlockDeviceUpdateFailed)
                }
            }
        } else {
            Err(DriveError::BlockDeviceUpdateFailed)
        }
    }

    // Attaches all block devices from the BlockDevicesConfig.
    fn attach_block_devices(
        &mut self,
        device_manager: &mut MMIODeviceManager,
    ) -> std::result::Result<(), StartMicrovmError> {
        // We rely on check_health function for making sure kernel_config is not None.
        let kernel_config = self
            .kernel_config
            .as_mut()
            .ok_or(StartMicrovmError::MissingKernelConfig)?;

        if self.block_device_configs.has_root_block_device() {
            // If no PARTUUID was specified for the root device, try with the /dev/vda.
            if !self.block_device_configs.has_partuuid_root() {
                kernel_config
                    .cmdline
                    .insert_str(" root=/dev/vda")
                    .map_err(|e| StartMicrovmError::KernelCmdline(e.to_string()))?;

                if self.block_device_configs.has_read_only_root() {
                    kernel_config
                        .cmdline
                        .insert_str(" ro")
                        .map_err(|e| StartMicrovmError::KernelCmdline(e.to_string()))?;
                }
            }
        }

        let epoll_context = &mut self.epoll_context;
        for drive_config in self.block_device_configs.config_list.iter_mut() {
            // Add the block device from file.
            let block_file = OpenOptions::new()
                .read(true)
                .write(!drive_config.is_read_only)
                .open(&drive_config.path_on_host)
                .map_err(|e| StartMicrovmError::OpenBlockDevice(e))?;

            if drive_config.is_root_device && drive_config.get_partuuid().is_some() {
                kernel_config
                    .cmdline
                    .insert_str(format!(
                        " root=PARTUUID={}",
                        //The unwrap is safe as we are firstly checking that partuuid is_some().
                        drive_config.get_partuuid().unwrap()
                    ))
                    .map_err(|e| StartMicrovmError::KernelCmdline(e.to_string()))?;
                if drive_config.is_read_only {
                    kernel_config
                        .cmdline
                        .insert_str(" ro")
                        .map_err(|e| StartMicrovmError::KernelCmdline(e.to_string()))?;
                }
            }

            let (epoll_config, curr_device_idx) = epoll_context.allocate_virtio_block_tokens();
            self.drive_handler_id_map
                .insert(drive_config.drive_id.clone(), curr_device_idx - 1);

            let block_box = Box::new(
                devices::virtio::Block::new(
                    block_file,
                    drive_config.is_read_only,
                    epoll_config,
                    drive_config.rate_limiter.take(),
                )
                .map_err(StartMicrovmError::CreateBlockDevice)?,
            );
            device_manager
                .register_device(
                    block_box,
                    &mut kernel_config.cmdline,
                    Some(drive_config.drive_id.clone()),
                )
                .map_err(StartMicrovmError::RegisterBlockDevice)?;
        }

        Ok(())
    }

    fn attach_net_devices(
        &mut self,
        device_manager: &mut MMIODeviceManager,
    ) -> std::result::Result<(), StartMicrovmError> {
        // We rely on check_health function for making sure kernel_config is not None.
        let kernel_config = self
            .kernel_config
            .as_mut()
            .ok_or(StartMicrovmError::MissingKernelConfig)?;

        for cfg in self.network_interface_configs.iter_mut() {
            let epoll_config = self.epoll_context.allocate_virtio_net_tokens();

            let allow_mmds_requests = cfg.allow_mmds_requests();
            let rx_rate_limiter = cfg.rx_rate_limiter.take();
            let tx_rate_limiter = cfg.tx_rate_limiter.take();

            if let Some(tap) = cfg.take_tap() {
                let net_box = Box::new(
                    devices::virtio::Net::new_with_tap(
                        tap,
                        cfg.guest_mac(),
                        epoll_config,
                        rx_rate_limiter,
                        tx_rate_limiter,
                        allow_mmds_requests,
                    )
                    .map_err(StartMicrovmError::CreateNetDevice)?,
                );

                device_manager
                    .register_device(net_box, &mut kernel_config.cmdline, None)
                    .map_err(StartMicrovmError::RegisterNetDevice)?;
            } else {
                return Err(StartMicrovmError::NetDeviceNotConfigured)?;
            }
        }
        Ok(())
    }

    #[cfg(feature = "vsock")]
    fn attach_vsock_devices(
        &mut self,
        device_manager: &mut MMIODeviceManager,
        guest_mem: &GuestMemory,
    ) -> std::result::Result<(), StartMicrovmError> {
        let kernel_config = self
            .kernel_config
            .as_mut()
            .ok_or(StartMicrovmError::MissingKernelConfig)?;

        for cfg in self.vsock_device_configs.iter() {
            let epoll_config = self.epoll_context.allocate_virtio_vsock_tokens();

            let vsock_box = Box::new(
                devices::virtio::Vsock::new(cfg.guest_cid as u64, guest_mem, epoll_config)
                    .map_err(StartMicrovmError::CreateVsockDevice)?,
            );
            device_manager
                .register_device(vsock_box, &mut kernel_config.cmdline, None)
                .map_err(StartMicrovmError::RegisterVsockDevice)?;
        }
        Ok(())
    }

    fn configure_kernel(&mut self, kernel_config: KernelConfig) {
        self.kernel_config = Some(kernel_config);
    }

    fn flush_metrics(&mut self) -> std::result::Result<VmmData, VmmActionError> {
        if let Err(e) = self.write_metrics() {
            if let LoggerError::NeverInitialized(s) = e {
                return Err(VmmActionError::Logger(
                    ErrorKind::User,
                    LoggerConfigError::FlushMetrics(s),
                ));
            } else {
                return Err(VmmActionError::Logger(
                    ErrorKind::Internal,
                    LoggerConfigError::FlushMetrics(e.to_string()),
                ));
            }
        }
        Ok(VmmData::Empty)
    }

    fn write_metrics(&mut self) -> result::Result<(), LoggerError> {
        // If we're logging dirty pages, post the metrics on how many dirty pages there are.
        if LOGGER.flags() | LogOption::LogDirtyPages as usize > 0 {
            METRICS.memory.dirty_pages.add(self.get_dirty_page_count());
        }
        LOGGER.log_metrics()
    }

    fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> {
        let mem_size = self
            .vm_config
            .mem_size_mib
            .ok_or(StartMicrovmError::GuestMemory(
                memory_model::GuestMemoryError::MemoryNotInitialized,
            ))?
            << 20;
        let arch_mem_regions = arch::arch_memory_regions(mem_size);
        self.guest_memory =
            Some(GuestMemory::new(&arch_mem_regions).map_err(StartMicrovmError::GuestMemory)?);
        Ok(())
    }

    fn check_health(&self) -> std::result::Result<(), StartMicrovmError> {
        if self.kernel_config.is_none() {
            return Err(StartMicrovmError::MissingKernelConfig)?;
        }
        Ok(())
    }

    fn init_devices(&mut self) -> std::result::Result<(), StartMicrovmError> {
        let guest_mem = self
            .guest_memory
            .clone()
            .ok_or(StartMicrovmError::GuestMemory(
                memory_model::GuestMemoryError::MemoryNotInitialized,
            ))?;

        // Instantiate the MMIO device manager.
        // 'mmio_base' address has to be an address which is protected by the kernel.
        let mut device_manager =
            MMIODeviceManager::new(guest_mem.clone(), arch::get_reserved_mem_addr() as u64);

        self.attach_block_devices(&mut device_manager)?;
        self.attach_net_devices(&mut device_manager)?;
        #[cfg(feature = "vsock")]
        self.attach_vsock_devices(&mut device_manager, &guest_mem)?;

        self.mmio_device_manager = Some(device_manager);
        Ok(())
    }

    fn init_microvm(&mut self) -> std::result::Result<(), StartMicrovmError> {
        self.vm
            .memory_init(
                self.guest_memory
                    .clone()
                    .ok_or(StartMicrovmError::GuestMemory(
                        memory_model::GuestMemoryError::MemoryNotInitialized,
                    ))?,
                &self.kvm,
            )
            .map_err(|e| StartMicrovmError::ConfigureVm(e))?;
        self.vm
            .setup_irqchip(
                &self.legacy_device_manager.com_evt_1_3,
                &self.legacy_device_manager.com_evt_2_4,
            )
            .map_err(|e| StartMicrovmError::ConfigureVm(e))?;
        #[cfg(target_arch = "x86_64")]
        self.vm
            .create_pit()
            .map_err(|e| StartMicrovmError::ConfigureVm(e))?;

        // mmio_device_manager is instantiated in init_devices, which is called before init_microvm.
        let device_manager = self
            .mmio_device_manager
            .as_ref()
            .ok_or(StartMicrovmError::DeviceManager)?;
        for request in &device_manager.vm_requests {
            if let VmResponse::Err(e) = request.execute(self.vm.get_fd()) {
                return Err(StartMicrovmError::DeviceVmRequest(e))?;
            }
        }

        self.legacy_device_manager
            .register_devices()
            .map_err(StartMicrovmError::LegacyIOBus)?;

        Ok(())
    }

    fn start_vcpus(
        &mut self,
        entry_addr: GuestAddress,
    ) -> std::result::Result<(), StartMicrovmError> {
        // vm_config has a default value for vcpu_count.
        let vcpu_count = self
            .vm_config
            .vcpu_count
            .ok_or(StartMicrovmError::VcpusNotConfigured)?;
        self.vcpu_handles = Some(Vec::with_capacity(vcpu_count as usize));
        // It is safe to unwrap since it's set just above.
        let vcpu_handles = self.vcpu_handles.as_mut().unwrap();

        let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count + 1) as usize));

        for cpu_id in 0..vcpu_count {
            let io_bus = self.legacy_device_manager.io_bus.clone();
            // mmio_device_manager is instantiated in init_devices, which is called before
            // start_vcpus.
            let device_manager = self
                .mmio_device_manager
                .as_ref()
                .ok_or(StartMicrovmError::DeviceManager)?;
            let mmio_bus = device_manager.bus.clone();
            let vcpu_thread_barrier = vcpu_thread_barrier.clone();
            // If the lock is poisoned, it's OK to panic.
            let vcpu_exit_evt = self
                .legacy_device_manager
                .i8042
                .lock()
                .expect("Failed to start VCPUs due to poisoned i8042 lock")
                .get_eventfd_clone()
                .map_err(|_| StartMicrovmError::EventFd)?;

            let mut vcpu = Vcpu::new(cpu_id, &self.vm).map_err(StartMicrovmError::Vcpu)?;
            let seccomp_level = self.seccomp_level;
            // It is safe to unwrap the ht_enabled flag because the machine configure
            // has default values for all fields.
            vcpu.configure(&self.vm_config, entry_addr, &self.vm)
                .map_err(StartMicrovmError::VcpuConfigure)?;
            vcpu_handles.push(
                thread::Builder::new()
                    .name(format!("fc_vcpu{}", cpu_id))
                    .spawn(move || {
                        unsafe {
                            extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {
                            }
                            // This uses an async signal safe handler to kill the vcpu handles.
                            register_signal_handler(
                                VCPU_RTSIG_OFFSET,
                                sys_util::SignalHandler::Siginfo(handle_signal),
                                true,
                            )
                            .expect("Failed to register vcpu signal handler");
                        }

                        // Load seccomp filters for this vCPU thread.
                        // Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters
                        // altogether is the desired behaviour.
                        if let Err(e) = default_syscalls::set_seccomp_level(seccomp_level) {
                            panic!(
                                "Failed to set the requested seccomp filters on vCPU {}:\
                                 Error: {:?}",
                                cpu_id, e
                            );
                        }

                        vcpu_thread_barrier.wait();

                        loop {
                            match vcpu.run() {
                                Ok(run) => match run {
                                    VcpuExit::IoIn(addr, data) => {
                                        io_bus.read(addr as u64, data);
                                        METRICS.vcpu.exit_io_in.inc();
                                    }
                                    VcpuExit::IoOut(addr, data) => {
                                        if addr == MAGIC_IOPORT_SIGNAL_GUEST_BOOT_COMPLETE
                                            && data[0] == MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE
                                        {
                                            let now_cpu_us = now_cputime_us();
                                            let now_us =
                                                chrono::Utc::now().timestamp_nanos() / 1000;

                                            let boot_time_us = now_us as usize
                                                - START_INSTANCE_REQUEST_TS.load(Ordering::Acquire);
                                            let boot_time_cpu_us = now_cpu_us as usize
                                                - START_INSTANCE_REQUEST_CPU_TS
                                                    .load(Ordering::Acquire);
                                            warn!(
                                                "Guest-boot-time = {:>6} us {} ms, \
                                                 {:>6} CPU us {} CPU ms",
                                                boot_time_us,
                                                boot_time_us / 1000,
                                                boot_time_cpu_us,
                                                boot_time_cpu_us / 1000
                                            );
                                        }
                                        io_bus.write(addr as u64, data);
                                        METRICS.vcpu.exit_io_out.inc();
                                    }
                                    VcpuExit::MmioRead(addr, data) => {
                                        mmio_bus.read(addr, data);
                                        METRICS.vcpu.exit_mmio_read.inc();
                                    }
                                    VcpuExit::MmioWrite(addr, data) => {
                                        mmio_bus.write(addr, data);
                                        METRICS.vcpu.exit_mmio_write.inc();
                                    }
                                    VcpuExit::Hlt => {
                                        info!("Received KVM_EXIT_HLT signal");
                                        break;
                                    }
                                    VcpuExit::Shutdown => {
                                        info!("Received KVM_EXIT_SHUTDOWN signal");
                                        break;
                                    }
                                    // Documentation specifies that below kvm exits are considered
                                    // errors.
                                    VcpuExit::FailEntry => {
                                        METRICS.vcpu.failures.inc();
                                        error!("Received KVM_EXIT_FAIL_ENTRY signal");
                                        break;
                                    }
                                    VcpuExit::InternalError => {
                                        METRICS.vcpu.failures.inc();
                                        error!("Received KVM_EXIT_INTERNAL_ERROR signal");
                                        break;
                                    }
                                    r => {
                                        METRICS.vcpu.failures.inc();
                                        // TODO: Are we sure we want to finish running a vcpu upon
                                        // receiving a vm exit that is not necessarily an error?
                                        error!("Unexpected exit reason on vcpu run: {:?}", r);
                                        break;
                                    }
                                },
                                Err(vstate::Error::VcpuRun(ref e)) => match e.errno() {
                                    // Why do we check for these if we only return EINVAL?
                                    libc::EAGAIN | libc::EINTR => {}
                                    _ => {
                                        METRICS.vcpu.failures.inc();
                                        error!("Failure during vcpu run: {:?}", e);
                                        break;
                                    }
                                },
                                _ => (),
                            }
                        }

                        // Nothing we need do for the success case.
                        if let Err(e) = vcpu_exit_evt.write(1) {
                            METRICS.vcpu.failures.inc();
                            error!("Failed signaling vcpu exit event: {:?}", e);
                        }
                    })
                    .map_err(StartMicrovmError::VcpuSpawn)?,
            );
        }

        // Load seccomp filters for the VMM thread.
        // Execution panics if filters cannot be loaded, use --seccomp-level=0 if skipping filters
        // altogether is the desired behaviour.
        default_syscalls::set_seccomp_level(self.seccomp_level)
            .map_err(|e| StartMicrovmError::SeccompFilters(e))?;

        vcpu_thread_barrier.wait();

        Ok(())
    }

    fn load_kernel(&mut self) -> std::result::Result<GuestAddress, StartMicrovmError> {
        // This is the easy way out of consuming the value of the kernel_cmdline.
        // TODO: refactor the kernel_cmdline struct in order to have a CString instead of a String.
        let kernel_config = self
            .kernel_config
            .as_mut()
            .ok_or(StartMicrovmError::MissingKernelConfig)?;
        let cmdline_cstring = CString::new(kernel_config.cmdline.clone()).map_err(|_| {
            StartMicrovmError::KernelCmdline(kernel_cmdline::Error::InvalidAscii.to_string())
        })?;

        // It is safe to unwrap because the VM memory was initialized before in vm.memory_init().
        let vm_memory = self.vm.get_memory().ok_or(StartMicrovmError::GuestMemory(
            memory_model::GuestMemoryError::MemoryNotInitialized,
        ))?;
        let entry_addr = kernel_loader::load_kernel(
            vm_memory,
            &mut kernel_config.kernel_file,
            arch::HIMEM_START,
        )
        .map_err(|e| StartMicrovmError::Loader(e))?;
        kernel_loader::load_cmdline(vm_memory, kernel_config.cmdline_addr, &cmdline_cstring)
            .map_err(|e| StartMicrovmError::Loader(e))?;

        // The vcpu_count has a default value. We shouldn't have gotten to this point without
        // having set the vcpu count.
        let vcpu_count = self
            .vm_config
            .vcpu_count
            .ok_or(StartMicrovmError::VcpusNotConfigured)?;
        arch::configure_system(
            vm_memory,
            kernel_config.cmdline_addr,
            cmdline_cstring.to_bytes().len() + 1,
            vcpu_count,
        )
        .map_err(|e| StartMicrovmError::ConfigureSystem(e))?;
        Ok(entry_addr)
    }

    fn register_events(&mut self) -> std::result::Result<(), StartMicrovmError> {
        // If the lock is poisoned, it's OK to panic.
        let event_fd = self
            .legacy_device_manager
            .i8042
            .lock()
            .expect("Failed to register events on the event fd due to poisoned lock")
            .get_eventfd_clone()
            .map_err(|_| StartMicrovmError::EventFd)?;
        let exit_epoll_evt = self
            .epoll_context
            .add_event(event_fd, EpollDispatch::Exit)
            .map_err(|_| StartMicrovmError::RegisterEvent)?;
        self.exit_evt = Some(exit_epoll_evt);

        self.epoll_context
            .enable_stdin_event()
            .map_err(|_| StartMicrovmError::RegisterEvent)?;

        Ok(())
    }

    fn start_microvm(&mut self) -> std::result::Result<VmmData, VmmActionError> {
        START_INSTANCE_REQUEST_CPU_TS.store(now_cputime_us() as usize, Ordering::Release);
        START_INSTANCE_REQUEST_TS.store(
            (chrono::Utc::now().timestamp_nanos() / 1000) as usize,
            Ordering::Release,
        );
        info!("VMM received instance start command");
        if self.is_instance_initialized() {
            return Err(VmmActionError::StartMicrovm(
                ErrorKind::User,
                StartMicrovmError::MicroVMAlreadyRunning,
            ));
        }

        self.check_health()
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::User, e))?;
        // Use expect() to crash if the other thread poisoned this lock.
        self.shared_info
            .write()
            .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock")
            .state = InstanceState::Starting;

        self.init_guest_memory()
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::Internal, e))?;

        self.init_devices()
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::Internal, e))?;
        self.init_microvm()
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::Internal, e))?;

        let entry_addr = self
            .load_kernel()
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::Internal, e))?;

        self.register_events()
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::Internal, e))?;
        self.start_vcpus(entry_addr)
            .map_err(|e| VmmActionError::StartMicrovm(ErrorKind::Internal, e))?;

        // Use expect() to crash if the other thread poisoned this lock.
        self.shared_info
            .write()
            .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock")
            .state = InstanceState::Running;

        // Arm the log write timer.
        // TODO: the timer does not stop on InstanceStop.
        let timer_state = TimerState::Periodic {
            current: Duration::from_secs(WRITE_METRICS_PERIOD_SECONDS),
            interval: Duration::from_secs(WRITE_METRICS_PERIOD_SECONDS),
        };
        self.write_metrics_event
            .fd
            .set_state(timer_state, SetTimeFlags::Default);

        // Log the metrics straight away to check the process startup time.
        if let Err(_) = LOGGER.log_metrics() {
            METRICS.logger.missed_metrics_count.inc();
        }

        Ok(VmmData::Empty)
    }

    /// Waits for all vCPUs to exit and terminates the Firecracker process.
    fn stop(&mut self, exit_code: i32) {
        info!("Vmm is stopping.");

        if let Err(e) = self.epoll_context.disable_stdin_event() {
            warn!("Cannot disable the STDIN event. {:?}", e);
        }

        if let Err(e) = self
            .legacy_device_manager
            .stdin_handle
            .lock()
            .set_canon_mode()
        {
            warn!("Cannot set canonical mode for the terminal. {:?}", e);
        }

        // Log the metrics before exiting.
        if let Err(e) = LOGGER.log_metrics() {
            error!("Failed to log metrics while stopping: {}", e);
        }

        // Exit from Firecracker using the provided exit code.
        std::process::exit(exit_code);
    }

    fn is_instance_initialized(&self) -> bool {
        let instance_state = {
            // Use expect() to crash if the other thread poisoned this lock.
            let shared_info = self.shared_info.read()
                .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock");
            shared_info.state.clone()
        };
        match instance_state {
            InstanceState::Uninitialized => false,
            _ => true,
        }
    }

    fn run_control(&mut self) -> Result<()> {
        const EPOLL_EVENTS_LEN: usize = 100;

        let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); EPOLL_EVENTS_LEN];

        let epoll_raw_fd = self.epoll_context.epoll_raw_fd;

        // TODO: try handling of errors/failures without breaking this main loop.
        'poll: loop {
            let num_events = epoll::wait(epoll_raw_fd, -1, &mut events[..]).map_err(Error::Poll)?;

            for i in 0..num_events {
                let dispatch_idx = events[i].data as usize;

                if let Some(dispatch_type) = self.epoll_context.dispatch_table[dispatch_idx] {
                    match dispatch_type {
                        EpollDispatch::Exit => {
                            match self.exit_evt {
                                Some(ref ev) => {
                                    ev.fd.read().map_err(Error::EventFd)?;
                                }
                                None => warn!("leftover exit-evt in epollcontext!"),
                            }
                            self.stop(FC_EXIT_CODE_OK as i32);
                        }
                        EpollDispatch::Stdin => {
                            let mut out = [0u8; 64];
                            let stdin_lock = self.legacy_device_manager.stdin_handle.lock();
                            match stdin_lock.read_raw(&mut out[..]) {
                                Ok(0) => {
                                    // Zero-length read indicates EOF. Remove from pollables.
                                    self.epoll_context.disable_stdin_event()?;
                                }
                                Err(e) => {
                                    warn!("error while reading stdin: {:?}", e);
                                    self.epoll_context.disable_stdin_event()?;
                                }
                                Ok(count) => {
                                    // Use expect() to panic if another thread panicked
                                    // while holding the lock.
                                    self.legacy_device_manager
                                        .stdio_serial
                                        .lock()
                                        .expect(
                                            "Failed to process stdin event due to poisoned lock",
                                        )
                                        .queue_input_bytes(&out[..count])
                                        .map_err(Error::Serial)?;
                                }
                            }
                        }
                        EpollDispatch::DeviceHandler(device_idx, device_token) => {
                            METRICS.vmm.device_events.inc();
                            match self.epoll_context.get_device_handler(device_idx) {
                                Ok(handler) => {
                                    match handler.handle_event(
                                        device_token,
                                        events[i].events,
                                        EpollHandlerPayload::Empty,
                                    ) {
                                        Err(devices::Error::PayloadExpected) => panic!(
                                            "Received update disk image event with empty payload."
                                        ),
                                        Err(devices::Error::UnknownEvent { device, event }) => {
                                            panic!("Unknown event: {:?} {:?}", device, event)
                                        }
                                        _ => (),
                                    }
                                }
                                Err(e) => {
                                    warn!("invalid handler for device {}: {:?}", device_idx, e)
                                }
                            }
                        }
                        EpollDispatch::VmmActionRequest => {
                            self.api_event.fd.read().map_err(Error::EventFd)?;
                            self.run_vmm_action().unwrap_or_else(|_| {
                                warn!("got spurious notification from api thread");
                                ()
                            });
                        }
                        EpollDispatch::WriteMetrics => {
                            self.write_metrics_event.fd.read();
                            // Please note that, since LOGGER has no output file configured yet, it will write to
                            // stdout, so logging will interfere with console output.
                            if let Err(e) = self.write_metrics() {
                                error!("Failed to log metrics: {}", e);
                            }
                        }
                    }
                }
            }
        }
    }

    // Count the number of pages dirtied since the last call to this function.
    // Because this is used for metrics, it swallows most errors and simply doesn't count dirty
    // pages if the KVM operation fails.
    #[cfg(target_arch = "x86_64")]
    fn get_dirty_page_count(&mut self) -> usize {
        if let Some(ref mem) = self.guest_memory {
            let dirty_pages = mem.map_and_fold(
                0,
                |(slot, memory_region)| {
                    let bitmap = self
                        .vm
                        .get_fd()
                        .get_and_reset_dirty_page_bitmap(slot as u32, memory_region.size());
                    match bitmap {
                        Ok(v) => v
                            .iter()
                            .fold(0, |init, page| init + page.count_ones() as usize),
                        Err(_) => 0,
                    }
                },
                |dirty_pages, region_dirty_pages| dirty_pages + region_dirty_pages,
            );
            return dirty_pages;
        }
        0
    }

    fn configure_boot_source(
        &mut self,
        kernel_image_path: String,
        kernel_cmdline: Option<String>,
    ) -> std::result::Result<VmmData, VmmActionError> {
        if self.is_instance_initialized() {
            return Err(VmmActionError::BootSource(
                ErrorKind::User,
                BootSourceConfigError::UpdateNotAllowedPostBoot,
            ));
        }

        let kernel_file = File::open(kernel_image_path).map_err(|_| {
            VmmActionError::BootSource(ErrorKind::User, BootSourceConfigError::InvalidKernelPath)
        })?;
        let mut cmdline = kernel_cmdline::Cmdline::new(arch::CMDLINE_MAX_SIZE);
        cmdline
            .insert_str(kernel_cmdline.unwrap_or(String::from(DEFAULT_KERNEL_CMDLINE)))
            .map_err(|_| {
                VmmActionError::BootSource(
                    ErrorKind::User,
                    BootSourceConfigError::InvalidKernelCommandLine,
                )
            })?;

        let kernel_config = KernelConfig {
            kernel_file,
            cmdline,
            cmdline_addr: GuestAddress(arch::CMDLINE_START),
        };
        self.configure_kernel(kernel_config);

        Ok(VmmData::Empty)
    }

    fn set_vm_configuration(
        &mut self,
        machine_config: VmConfig,
    ) -> std::result::Result<VmmData, VmmActionError> {
        if self.is_instance_initialized() {
            return Err(VmmActionError::MachineConfig(
                ErrorKind::User,
                VmConfigError::UpdateNotAllowedPostBoot,
            ));
        }

        if let Some(vcpu_count_value) = machine_config.vcpu_count {
            // Check that the vcpu_count value is >=1.
            if vcpu_count_value <= 0 {
                return Err(VmmActionError::MachineConfig(
                    ErrorKind::User,
                    VmConfigError::InvalidVcpuCount,
                ));
            }
        }

        if let Some(mem_size_mib_value) = machine_config.mem_size_mib {
            // TODO: add other memory checks
            if mem_size_mib_value <= 0 {
                return Err(VmmActionError::MachineConfig(
                    ErrorKind::User,
                    VmConfigError::InvalidMemorySize,
                ));
            }
        }

        let ht_enabled = match machine_config.ht_enabled {
            Some(value) => value,
            None => self.vm_config.ht_enabled.unwrap(),
        };

        let vcpu_count_value = match machine_config.vcpu_count {
            Some(value) => value,
            None => self.vm_config.vcpu_count.unwrap(),
        };

        // If hyperthreading is enabled or is to be enabled in this call
        // only allow vcpu count to be 1 or even.
        if ht_enabled && vcpu_count_value > 1 && vcpu_count_value % 2 == 1 {
            return Err(VmmActionError::MachineConfig(
                ErrorKind::User,
                VmConfigError::InvalidVcpuCount,
            ));
        }

        // Update all the fields that have a new value.
        self.vm_config.vcpu_count = Some(vcpu_count_value);
        self.vm_config.ht_enabled = Some(ht_enabled);

        if machine_config.mem_size_mib.is_some() {
            self.vm_config.mem_size_mib = machine_config.mem_size_mib;
        }

        if machine_config.cpu_template.is_some() {
            self.vm_config.cpu_template = machine_config.cpu_template;
        }

        Ok(VmmData::Empty)
    }

    fn insert_net_device(
        &mut self,
        body: NetworkInterfaceConfig,
    ) -> std::result::Result<VmmData, VmmActionError> {
        if self.is_instance_initialized() {
            return Err(VmmActionError::NetworkConfig(
                ErrorKind::User,
                NetworkInterfaceError::UpdateNotAllowedPostBoot,
            ));
        }
        self.network_interface_configs
            .insert(body)
            .map(|_| VmmData::Empty)
            .map_err(|e| VmmActionError::NetworkConfig(ErrorKind::User, e))
    }

    #[cfg(feature = "vsock")]
    fn insert_vsock_device(
        &mut self,
        body: VsockDeviceConfig,
    ) -> std::result::Result<VmmData, VmmActionError> {
        if self.is_instance_initialized() {
            return Err(VmmActionError::VsockConfig(
                ErrorKind::User,
                VsockError::UpdateNotAllowedPostBoot,
            ));
        }
        self.vsock_device_configs
            .add(body)
            .map(|_| VmmData::Empty)
            .map_err(|e| VmmActionError::VsockConfig(ErrorKind::User, e))
    }

    fn set_block_device_path(
        &mut self,
        drive_id: String,
        path_on_host: String,
    ) -> std::result::Result<VmmData, VmmActionError> {
        // Get the block device configuration specified by drive_id.
        let block_device_index = self
            .block_device_configs
            .get_index_of_drive_id(&drive_id)
            .ok_or(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::InvalidBlockDeviceID,
            ))?;

        let file_path = PathBuf::from(path_on_host);
        // Try to open the file specified by path_on_host using the permissions of the block_device.
        let disk_file = OpenOptions::new()
            .read(true)
            .write(!self.block_device_configs.config_list[block_device_index].is_read_only())
            .open(&file_path)
            .map_err(|_| {
                VmmActionError::DriveConfig(ErrorKind::User, DriveError::CannotOpenBlockDevice)
            })?;

        // Update the path of the block device with the specified path_on_host.
        self.block_device_configs.config_list[block_device_index].path_on_host = file_path;

        // When the microvm is running, we also need to update the drive handler and send a
        // rescan command to the drive.
        if self.is_instance_initialized() {
            self.update_drive_handler(&drive_id, disk_file)
                .map_err(|e| VmmActionError::DriveConfig(ErrorKind::User, e))?;
            self.rescan_block_device(&drive_id)?;
        }
        Ok(VmmData::Empty)
    }

    fn rescan_block_device(
        &mut self,
        drive_id: &String,
    ) -> std::result::Result<VmmData, VmmActionError> {
        // Rescan can only happen after the guest is booted.
        if !self.is_instance_initialized() {
            return Err(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::OperationNotAllowedPreBoot,
            ));
        }

        // Safe to unwrap() because mmio_device_manager is initialized in init_devices(), which is
        // called before the guest boots, and this function is called after boot.
        let device_manager = self.mmio_device_manager.as_ref().unwrap();
        match device_manager.get_address(drive_id) {
            Some(&address) => {
                for drive_config in self.block_device_configs.config_list.iter() {
                    if drive_config.drive_id == *drive_id {
                        let metadata = metadata(&drive_config.path_on_host).map_err(|_| {
                            VmmActionError::DriveConfig(
                                ErrorKind::User,
                                DriveError::BlockDeviceUpdateFailed,
                            )
                        })?;
                        let new_size = metadata.len();
                        if new_size % virtio::block::SECTOR_SIZE != 0 {
                            warn!(
                                "Disk size {} is not a multiple of sector size {}; \
                                 the remainder will not be visible to the guest.",
                                new_size,
                                virtio::block::SECTOR_SIZE
                            );
                        }
                        return device_manager
                            .update_drive(address, new_size)
                            .map(|_| VmmData::Empty)
                            .map_err(|_| {
                                VmmActionError::DriveConfig(
                                    ErrorKind::User,
                                    DriveError::BlockDeviceUpdateFailed,
                                )
                            });
                    }
                }
                Err(VmmActionError::DriveConfig(
                    ErrorKind::User,
                    DriveError::BlockDeviceUpdateFailed,
                ))
            }
            _ => Err(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::InvalidBlockDeviceID,
            )),
        }
    }

    // Only call this function as part of the API.
    // If the drive_id does not exist, a new Block Device Config is added to the list.
    fn insert_block_device(
        &mut self,
        block_device_config: BlockDeviceConfig,
    ) -> std::result::Result<VmmData, VmmActionError> {
        if self.is_instance_initialized() {
            return Err(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::UpdateNotAllowedPostBoot,
            ));
        }

        self.block_device_configs
            .insert(block_device_config)
            .map(|_| VmmData::Empty)
            .map_err(|e| VmmActionError::DriveConfig(ErrorKind::User, e))
    }

    fn init_logger(
        &self,
        api_logger: LoggerConfig,
    ) -> std::result::Result<VmmData, VmmActionError> {
        if self.is_instance_initialized() {
            return Err(VmmActionError::Logger(
                ErrorKind::User,
                LoggerConfigError::InitializationFailure(
                    "Cannot initialize logger after boot.".to_string(),
                ),
            ));
        }

        let instance_id;
        let firecracker_version;
        {
            let guard = self.shared_info.read().unwrap();
            instance_id = guard.id.clone();
            firecracker_version = guard.vmm_version.clone();
        }

        match api_logger.level {
            LoggerLevel::Error => LOGGER.set_level(Level::Error),
            LoggerLevel::Warning => LOGGER.set_level(Level::Warn),
            LoggerLevel::Info => LOGGER.set_level(Level::Info),
            LoggerLevel::Debug => LOGGER.set_level(Level::Debug),
        }

        LOGGER.set_include_origin(api_logger.show_log_origin, api_logger.show_log_origin);
        LOGGER.set_include_level(api_logger.show_level);

        let options = match api_logger.options {
            Value::Array(options) => options,
            _ => vec![],
        };

        LOGGER
            .init(
                &AppInfo::new("Firecracker", &firecracker_version),
                &instance_id,
                api_logger.log_fifo,
                api_logger.metrics_fifo,
                options,
            )
            .map(|_| VmmData::Empty)
            .map_err(|e| {
                VmmActionError::Logger(
                    ErrorKind::User,
                    LoggerConfigError::InitializationFailure(e.to_string()),
                )
            })
    }

    fn send_response(outcome: VmmRequestOutcome, sender: OutcomeSender) {
        sender
            .send(outcome)
            .map_err(|_| ())
            .expect("one-shot channel closed");
    }

    fn run_vmm_action(&mut self) -> Result<()> {
        let request = match self.from_api.try_recv() {
            Ok(t) => *t,
            Err(TryRecvError::Empty) => {
                return Err(Error::ApiChannel)?;
            }
            Err(TryRecvError::Disconnected) => {
                panic!("The channel's sending half was disconnected. Cannot receive data.");
            }
        };

        match request {
            VmmAction::ConfigureBootSource(boot_source_body, sender) => {
                Vmm::send_response(
                    self.configure_boot_source(
                        boot_source_body.kernel_image_path,
                        boot_source_body.boot_args,
                    ),
                    sender,
                );
            }
            VmmAction::ConfigureLogger(logger_description, sender) => {
                Vmm::send_response(self.init_logger(logger_description), sender);
            }
            VmmAction::FlushMetrics(sender) => {
                Vmm::send_response(self.flush_metrics(), sender);
            }
            VmmAction::GetVmConfiguration(sender) => {
                Vmm::send_response(
                    Ok(VmmData::MachineConfiguration(self.vm_config.clone())),
                    sender,
                );
            }
            VmmAction::InsertBlockDevice(block_device_config, sender) => {
                Vmm::send_response(self.insert_block_device(block_device_config), sender);
            }
            VmmAction::InsertNetworkDevice(netif_body, sender) => {
                Vmm::send_response(self.insert_net_device(netif_body), sender);
            }
            #[cfg(feature = "vsock")]
            VmmAction::InsertVsockDevice(vsock_cfg, sender) => {
                Vmm::send_response(self.insert_vsock_device(vsock_cfg), sender);
            }
            VmmAction::RescanBlockDevice(drive_id, sender) => {
                Vmm::send_response(self.rescan_block_device(&drive_id), sender);
            }
            VmmAction::StartMicroVm(sender) => {
                Vmm::send_response(self.start_microvm(), sender);
            }
            VmmAction::SetVmConfiguration(machine_config_body, sender) => {
                Vmm::send_response(self.set_vm_configuration(machine_config_body), sender);
            }
            VmmAction::UpdateBlockDevicePath(drive_id, path_on_host, sender) => {
                Vmm::send_response(self.set_block_device_path(drive_id, path_on_host), sender);
            }
        };
        Ok(())
    }
}

// Can't derive PartialEq directly because the sender members can't be compared.
// This implementation is only used in tests, but cannot be moved to mod tests,
// because it is used in tests outside of the vmm crate (api_server).
impl PartialEq for VmmAction {
    fn eq(&self, other: &VmmAction) -> bool {
        match (self, other) {
            (
                &VmmAction::UpdateBlockDevicePath(ref drive_id, ref path_on_host, _),
                &VmmAction::UpdateBlockDevicePath(ref other_drive_id, ref other_path_on_host, _),
            ) => drive_id == other_drive_id && path_on_host == other_path_on_host,
            (
                &VmmAction::ConfigureBootSource(ref boot_source, _),
                &VmmAction::ConfigureBootSource(ref other_boot_source, _),
            ) => boot_source == other_boot_source,
            (
                &VmmAction::InsertBlockDevice(ref block_device, _),
                &VmmAction::InsertBlockDevice(ref other_other_block_device, _),
            ) => block_device == other_other_block_device,
            (
                &VmmAction::ConfigureLogger(ref log, _),
                &VmmAction::ConfigureLogger(ref other_log, _),
            ) => log == other_log,
            (
                &VmmAction::SetVmConfiguration(ref vm_config, _),
                &VmmAction::SetVmConfiguration(ref other_vm_config, _),
            ) => vm_config == other_vm_config,
            (
                &VmmAction::InsertNetworkDevice(ref net_dev, _),
                &VmmAction::InsertNetworkDevice(ref other_net_dev, _),
            ) => net_dev == other_net_dev,
            (
                &VmmAction::RescanBlockDevice(ref req, _),
                &VmmAction::RescanBlockDevice(ref other_req, _),
            ) => req == other_req,
            (&VmmAction::StartMicroVm(_), &VmmAction::StartMicroVm(_)) => true,
            (&VmmAction::FlushMetrics(_), &VmmAction::FlushMetrics(_)) => true,
            _ => false,
        }
    }
}

/// Starts a new vmm thread that can service API requests.
///
/// # Arguments
///
/// * `api_shared_info` - A parameter for storing information on the VMM (e.g the current state).
/// * `api_event_fd` - An event fd used for receiving API associated events.
/// * `from_api` - The receiver end point of the communication channel.
/// * `seccomp_level` - The level of seccomp filtering used. Filters are loaded before executing
///                     guest code.
///                     See `seccomp::SeccompLevel` for more information about seccomp levels.
/// * `kvm_fd` - Provides the option of supplying an already existing raw file descriptor
///              associated with `/dev/kvm`.
pub fn start_vmm_thread(
    api_shared_info: Arc<RwLock<InstanceInfo>>,
    api_event_fd: EventFd,
    from_api: Receiver<Box<VmmAction>>,
    seccomp_level: u32,
    kvm_fd: Option<RawFd>,
) -> thread::JoinHandle<()> {
    thread::Builder::new()
        .name("fc_vmm".to_string())
        .spawn(move || {
            // If this fails, consider it fatal. Use expect().
            let mut vmm = Vmm::new(
                api_shared_info,
                api_event_fd,
                from_api,
                seccomp_level,
                kvm_fd,
            )
            .expect("Cannot create VMM.");
            match vmm.run_control() {
                Ok(()) => {
                    info!("Gracefully terminated VMM control loop");
                    vmm.stop(FC_EXIT_CODE_OK as i32)
                }
                Err(e) => {
                    error!("Abruptly exited VMM control loop: {:?}", e);
                    vmm.stop(FC_EXIT_CODE_GENERIC_ERROR as i32);
                }
            }
        })
        .expect("VMM thread spawn failed.")
}

#[cfg(test)]
mod tests {
    extern crate tempfile;

    use super::*;

    use std::fs::File;
    use std::io::BufRead;
    use std::io::BufReader;
    use std::sync::atomic::AtomicUsize;

    use self::tempfile::NamedTempFile;
    use devices::virtio::ActivateResult;
    use net_util::MacAddr;
    use vmm_config::machine_config::CpuFeaturesTemplate;

    impl Vmm {
        fn get_kernel_cmdline_str(&self) -> &str {
            if let Some(ref k) = self.kernel_config {
                k.cmdline.as_str()
            } else {
                ""
            }
        }

        fn remove_addr(&mut self, id: &String) {
            self.mmio_device_manager
                .as_mut()
                .unwrap()
                .remove_address(id);
        }

        fn default_kernel_config(&mut self) {
            let kernel_file_temp =
                NamedTempFile::new().expect("Failed to create temporary kernel file.");
            let kernel_path = String::from(kernel_file_temp.path().to_path_buf().to_str().unwrap());
            let kernel_file = File::open(kernel_path).unwrap();

            let mut cmdline = kernel_cmdline::Cmdline::new(arch::CMDLINE_MAX_SIZE);
            assert!(cmdline.insert_str(DEFAULT_KERNEL_CMDLINE).is_ok());
            let kernel_cfg = KernelConfig {
                cmdline,
                kernel_file,
                cmdline_addr: GuestAddress(arch::CMDLINE_START),
            };
            self.configure_kernel(kernel_cfg);
        }

        fn set_instance_state(&mut self, instance_state: InstanceState) {
            self.shared_info.write().unwrap().state = instance_state;
        }

        fn update_block_device_path(&mut self, block_device_id: &str, new_path: PathBuf) {
            for config in self.block_device_configs.config_list.iter_mut() {
                if config.drive_id == block_device_id {
                    config.path_on_host = new_path;
                    break;
                }
            }
        }

        fn change_id(&mut self, prev_id: &str, new_id: &str) {
            for config in self.block_device_configs.config_list.iter_mut() {
                if config.drive_id == prev_id {
                    config.drive_id = new_id.to_string();
                    break;
                }
            }
        }
    }

    struct DummyEpollHandler {
        evt: Option<DeviceEventT>,
        flags: Option<u32>,
        payload: Option<EpollHandlerPayload>,
    }

    impl EpollHandler for DummyEpollHandler {
        fn handle_event(
            &mut self,
            device_event: DeviceEventT,
            event_flags: u32,
            payload: EpollHandlerPayload,
        ) -> std::result::Result<(), devices::Error> {
            self.evt = Some(device_event);
            self.flags = Some(event_flags);
            self.payload = Some(payload);
            Ok(())
        }
    }

    #[allow(dead_code)]
    #[derive(Clone)]
    struct DummyDevice {
        dummy: u32,
    }

    impl devices::virtio::VirtioDevice for DummyDevice {
        fn device_type(&self) -> u32 {
            0
        }

        fn queue_max_sizes(&self) -> &[u16] {
            &[10]
        }

        fn ack_features(&mut self, page: u32, value: u32) {
            let _ = page;
            let _ = value;
        }

        fn read_config(&self, offset: u64, data: &mut [u8]) {
            let _ = offset;
            let _ = data;
        }

        fn write_config(&mut self, offset: u64, data: &[u8]) {
            let _ = offset;
            let _ = data;
        }

        #[allow(unused_variables)]
        #[allow(unused_mut)]
        fn activate(
            &mut self,
            mem: GuestMemory,
            interrupt_evt: EventFd,
            status: Arc<AtomicUsize>,
            queues: Vec<devices::virtio::Queue>,
            mut queue_evts: Vec<EventFd>,
        ) -> ActivateResult {
            Ok(())
        }
    }

    fn create_vmm_object(state: InstanceState) -> Vmm {
        let shared_info = Arc::new(RwLock::new(InstanceInfo {
            state,
            id: "TEST_ID".to_string(),
            vmm_version: "1.0".to_string(),
        }));

        let (_to_vmm, from_api) = channel();
        let vmm = Vmm::new(
            shared_info,
            EventFd::new().expect("cannot create eventFD"),
            from_api,
            seccomp::SECCOMP_LEVEL_ADVANCED,
            None,
        )
        .expect("Cannot Create VMM");
        return vmm;
    }

    #[test]
    fn test_device_handler() {
        let mut ep = EpollContext::new().unwrap();
        let (base, sender) = ep.allocate_tokens(1);
        assert_eq!(ep.device_handlers.len(), 1);
        assert_eq!(base, 1);

        let handler = DummyEpollHandler {
            evt: None,
            flags: None,
            payload: None,
        };
        assert!(sender.send(Box::new(handler)).is_ok());
        assert!(ep.get_device_handler(0).is_ok());
    }

    #[test]
    fn test_insert_block_device() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        let f = NamedTempFile::new().unwrap();
        // Test that creating a new block device returns the correct output.
        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("root"),
            path_on_host: f.path().to_path_buf(),
            is_root_device: true,
            partuuid: None,
            is_read_only: false,
            rate_limiter: None,
        };
        assert!(vmm.insert_block_device(root_block_device.clone()).is_ok());
        assert!(vmm
            .block_device_configs
            .config_list
            .contains(&root_block_device));

        // Test that updating a block device returns the correct output.
        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("root"),
            path_on_host: f.path().to_path_buf(),
            is_root_device: true,
            partuuid: None,
            is_read_only: true,
            rate_limiter: None,
        };
        assert!(vmm.insert_block_device(root_block_device.clone()).is_ok());
        assert!(vmm
            .block_device_configs
            .config_list
            .contains(&root_block_device));

        // Test insert second drive with the same path fails.
        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("dummy_dev"),
            path_on_host: f.path().to_path_buf(),
            is_root_device: false,
            partuuid: None,
            is_read_only: true,
            rate_limiter: None,
        };
        assert!(vmm.insert_block_device(root_block_device.clone()).is_err());

        // Test inserting a second drive is ok.
        let f = NamedTempFile::new().unwrap();
        // Test that creating a new block device returns the correct output.
        let non_root = BlockDeviceConfig {
            drive_id: String::from("non_root"),
            path_on_host: f.path().to_path_buf(),
            is_root_device: false,
            partuuid: None,
            is_read_only: false,
            rate_limiter: None,
        };
        assert!(vmm.insert_block_device(non_root).is_ok());

        // Test that making the second device root fails (it would result in 2 root block
        // devices.
        let non_root = BlockDeviceConfig {
            drive_id: String::from("non_root"),
            path_on_host: f.path().to_path_buf(),
            is_root_device: true,
            partuuid: None,
            is_read_only: false,
            rate_limiter: None,
        };
        assert!(vmm.insert_block_device(non_root).is_err());

        // Test update after boot.
        vmm.set_instance_state(InstanceState::Running);
        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("root"),
            path_on_host: f.path().to_path_buf(),
            is_root_device: false,
            partuuid: None,
            is_read_only: true,
            rate_limiter: None,
        };
        assert!(vmm.insert_block_device(root_block_device).is_err())
    }

    #[test]
    fn test_insert_net_device() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);

        // test create network interface
        let network_interface = NetworkInterfaceConfig {
            iface_id: String::from("netif"),
            host_dev_name: String::from("hostname"),
            guest_mac: None,
            rx_rate_limiter: None,
            tx_rate_limiter: None,
            allow_mmds_requests: false,
            tap: None,
        };
        assert!(vmm.insert_net_device(network_interface).is_ok());

        let mac = MacAddr::parse_str("01:23:45:67:89:0A").unwrap();
        // test update network interface
        let network_interface = NetworkInterfaceConfig {
            iface_id: String::from("netif"),
            host_dev_name: String::from("hostname2"),
            guest_mac: Some(mac.clone()),
            rx_rate_limiter: None,
            tx_rate_limiter: None,
            allow_mmds_requests: false,
            tap: None,
        };
        assert!(vmm.insert_net_device(network_interface).is_ok());

        // Test insert new net device with same mac fails.
        let network_interface = NetworkInterfaceConfig {
            iface_id: String::from("netif2"),
            host_dev_name: String::from("hostname3"),
            guest_mac: Some(mac),
            rx_rate_limiter: None,
            tx_rate_limiter: None,
            allow_mmds_requests: false,
            tap: None,
        };
        assert!(vmm.insert_net_device(network_interface).is_err());

        // Test that update post-boot fails.
        vmm.set_instance_state(InstanceState::Running);
        let network_interface = NetworkInterfaceConfig {
            iface_id: String::from("netif"),
            host_dev_name: String::from("hostname2"),
            guest_mac: None,
            rx_rate_limiter: None,
            tx_rate_limiter: None,
            allow_mmds_requests: false,
            tap: None,
        };
        assert!(vmm.insert_net_device(network_interface).is_err());
    }

    #[test]
    fn test_machine_configuration() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);

        // test the default values of machine config
        // vcpu_count = 1
        assert_eq!(vmm.vm_config.vcpu_count, Some(1));
        // mem_size = 128
        assert_eq!(vmm.vm_config.mem_size_mib, Some(128));
        // ht_enabled = false
        assert_eq!(vmm.vm_config.ht_enabled, Some(false));
        // no cpu template
        assert!(vmm.vm_config.cpu_template.is_none());

        // 1. Tests with no hyperthreading
        // test put machine configuration for vcpu count with valid value
        let machine_config = VmConfig {
            vcpu_count: Some(3),
            mem_size_mib: None,
            ht_enabled: None,
            cpu_template: None,
        };
        assert!(vmm.set_vm_configuration(machine_config).is_ok());
        assert_eq!(vmm.vm_config.vcpu_count, Some(3));
        assert_eq!(vmm.vm_config.mem_size_mib, Some(128));
        assert_eq!(vmm.vm_config.ht_enabled, Some(false));

        // test put machine configuration for mem size with valid value
        let machine_config = VmConfig {
            vcpu_count: None,
            mem_size_mib: Some(256),
            ht_enabled: None,
            cpu_template: None,
        };
        assert!(vmm.set_vm_configuration(machine_config).is_ok());
        assert_eq!(vmm.vm_config.vcpu_count, Some(3));
        assert_eq!(vmm.vm_config.mem_size_mib, Some(256));
        assert_eq!(vmm.vm_config.ht_enabled, Some(false));

        // Test Error cases for put_machine_configuration with invalid value for vcpu_count
        // Test that the put method return error & that the vcpu value is not changed
        let machine_config = VmConfig {
            vcpu_count: Some(0),
            mem_size_mib: None,
            ht_enabled: None,
            cpu_template: None,
        };
        assert!(vmm.set_vm_configuration(machine_config).is_err());
        assert_eq!(vmm.vm_config.vcpu_count, Some(3));

        // Test Error cases for put_machine_configuration with invalid value for the mem_size_mib
        // Test that the put method return error & that the mem_size_mib value is not changed
        let machine_config = VmConfig {
            vcpu_count: Some(1),
            mem_size_mib: Some(0),
            ht_enabled: Some(false),
            cpu_template: Some(CpuFeaturesTemplate::T2),
        };
        assert!(vmm.set_vm_configuration(machine_config).is_err());
        assert_eq!(vmm.vm_config.vcpu_count, Some(3));
        assert_eq!(vmm.vm_config.mem_size_mib, Some(256));
        assert_eq!(vmm.vm_config.ht_enabled, Some(false));
        assert!(vmm.vm_config.cpu_template.is_none());

        // 2. Test with hyperthreading enabled
        // Test that you can't change the hyperthreading value to false when the vcpu count
        // is odd
        let machine_config = VmConfig {
            vcpu_count: None,
            mem_size_mib: None,
            ht_enabled: Some(true),
            cpu_template: None,
        };
        assert!(vmm.set_vm_configuration(machine_config).is_err());
        assert_eq!(vmm.vm_config.ht_enabled, Some(false));
        // Test that you can change the ht flag when you have a valid vcpu count
        // Also set the CPU Template since we are here
        let machine_config = VmConfig {
            vcpu_count: Some(2),
            mem_size_mib: None,
            ht_enabled: Some(true),
            cpu_template: Some(CpuFeaturesTemplate::T2),
        };
        assert!(vmm.set_vm_configuration(machine_config).is_ok());
        assert_eq!(vmm.vm_config.vcpu_count, Some(2));
        assert_eq!(vmm.vm_config.ht_enabled, Some(true));
        assert_eq!(vmm.vm_config.cpu_template, Some(CpuFeaturesTemplate::T2));

        // 3. Test update vm configuration after boot.
        vmm.set_instance_state(InstanceState::Running);
        let machine_config = VmConfig {
            vcpu_count: Some(2),
            mem_size_mib: None,
            ht_enabled: Some(true),
            cpu_template: Some(CpuFeaturesTemplate::T2),
        };
        assert!(vmm.set_vm_configuration(machine_config).is_err());
    }

    #[test]
    fn new_epoll_context_test() {
        assert!(EpollContext::new().is_ok());
    }

    #[test]
    fn enable_disable_stdin_test() {
        let mut ep = EpollContext::new().unwrap();
        // enabling stdin should work
        assert!(ep.enable_stdin_event().is_ok());

        // doing it again should fail
        // TODO: commented out because stdin & /dev/null related issues, as mentioned in another
        // comment from enable_stdin_event().
        // assert!(ep.enable_stdin_event().is_err());

        // disabling stdin should work
        assert!(ep.disable_stdin_event().is_ok());

        // enabling stdin should work now
        assert!(ep.enable_stdin_event().is_ok());
        // disabling it again should work
        assert!(ep.disable_stdin_event().is_ok());
    }

    #[test]
    fn add_event_test() {
        let mut ep = EpollContext::new().unwrap();
        let evfd = EventFd::new().unwrap();

        // adding new event should work
        let epev = ep.add_event(evfd, EpollDispatch::Exit);
        assert!(epev.is_ok());
    }

    #[test]
    fn epoll_event_test() {
        let mut ep = EpollContext::new().unwrap();
        let evfd = EventFd::new().unwrap();

        // adding new event should work
        let epev = ep.add_event(evfd, EpollDispatch::Exit);
        assert!(epev.is_ok());
        let epev = epev.unwrap();

        let evpoll_events_len = 10;
        let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); evpoll_events_len];

        // epoll should have no pending events
        let epollret = epoll::wait(ep.epoll_raw_fd, 0, &mut events[..]);
        let num_events = epollret.unwrap();
        assert_eq!(num_events, 0);

        // raise the event
        assert!(epev.fd.write(1).is_ok());

        // epoll should report one event
        let epollret = epoll::wait(ep.epoll_raw_fd, 0, &mut events[..]);
        let num_events = epollret.unwrap();
        assert_eq!(num_events, 1);

        // reported event should be the one we raised
        let idx = events[0].data as usize;
        assert!(ep.dispatch_table[idx].is_some());
        assert_eq!(
            *ep.dispatch_table[idx].as_ref().unwrap(),
            EpollDispatch::Exit
        );
    }

    #[test]
    fn test_kvm_context() {
        use std::os::unix::fs::MetadataExt;
        use std::os::unix::io::FromRawFd;

        let c = KvmContext::new(None).unwrap();

        assert!(c.max_memslots >= 32);

        let kvm = Kvm::new().unwrap();
        let f = unsafe { File::from_raw_fd(kvm.as_raw_fd()) };
        let m1 = f.metadata().unwrap();
        let m2 = File::open("/dev/kvm").unwrap().metadata().unwrap();

        assert_eq!(m1.dev(), m2.dev());
        assert_eq!(m1.ino(), m2.ino());
    }

    #[test]
    fn test_check_health() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        assert!(vmm.check_health().is_err());

        let dummy_addr = GuestAddress(0x1000);
        vmm.configure_kernel(KernelConfig {
            cmdline_addr: dummy_addr,
            cmdline: kernel_cmdline::Cmdline::new(10),
            kernel_file: tempfile::tempfile().unwrap(),
        });
        assert!(vmm.check_health().is_ok());
    }

    #[test]
    fn test_is_instance_initialized() {
        let vmm = create_vmm_object(InstanceState::Uninitialized);
        assert_eq!(vmm.is_instance_initialized(), false);

        let vmm = create_vmm_object(InstanceState::Starting);
        assert_eq!(vmm.is_instance_initialized(), true);

        let vmm = create_vmm_object(InstanceState::Halting);
        assert_eq!(vmm.is_instance_initialized(), true);

        let vmm = create_vmm_object(InstanceState::Halted);
        assert_eq!(vmm.is_instance_initialized(), true);

        let vmm = create_vmm_object(InstanceState::Running);
        assert_eq!(vmm.is_instance_initialized(), true);
    }

    #[test]
    fn test_attach_block_devices() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        let block_file = NamedTempFile::new().unwrap();

        // Use Case 1: Root Block Device is not specified through PARTUUID.
        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("root"),
            path_on_host: block_file.path().to_path_buf(),
            is_root_device: true,
            partuuid: None,
            is_read_only: false,
            rate_limiter: None,
        };
        // Test that creating a new block device returns the correct output.
        assert!(vmm.insert_block_device(root_block_device.clone()).is_ok());
        assert!(vmm.init_guest_memory().is_ok());
        assert!(vmm.guest_memory.is_some());

        vmm.default_kernel_config();

        let guest_mem = vmm.guest_memory.clone().unwrap();
        let mut device_manager =
            MMIODeviceManager::new(guest_mem.clone(), arch::get_reserved_mem_addr() as u64);
        assert!(vmm.attach_block_devices(&mut device_manager).is_ok());
        assert!(vmm.get_kernel_cmdline_str().contains("root=/dev/vda"));

        // Use Case 2: Root Block Device is specified through PARTUUID.
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("root"),
            path_on_host: block_file.path().to_path_buf(),
            is_root_device: true,
            partuuid: Some("0eaa91a0-01".to_string()),
            is_read_only: false,
            rate_limiter: None,
        };

        // Test that creating a new block device returns the correct output.
        assert!(vmm.insert_block_device(root_block_device.clone()).is_ok());
        assert!(vmm.init_guest_memory().is_ok());
        assert!(vmm.guest_memory.is_some());

        vmm.default_kernel_config();

        let guest_mem = vmm.guest_memory.clone().unwrap();
        let mut device_manager =
            MMIODeviceManager::new(guest_mem.clone(), arch::get_reserved_mem_addr() as u64);
        assert!(vmm.attach_block_devices(&mut device_manager).is_ok());
        assert!(vmm
            .get_kernel_cmdline_str()
            .contains("root=PARTUUID=0eaa91a0-01"));

        // Use Case 3: Root Block Device is not added at all.
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        let non_root_block_device = BlockDeviceConfig {
            drive_id: String::from("not_root"),
            path_on_host: block_file.path().to_path_buf(),
            is_root_device: false,
            partuuid: Some("0eaa91a0-01".to_string()),
            is_read_only: false,
            rate_limiter: None,
        };

        // Test that creating a new block device returns the correct output.
        assert!(vmm
            .insert_block_device(non_root_block_device.clone())
            .is_ok());
        assert!(vmm.init_guest_memory().is_ok());
        assert!(vmm.guest_memory.is_some());

        vmm.default_kernel_config();

        let guest_mem = vmm.guest_memory.clone().unwrap();
        let mut device_manager =
            MMIODeviceManager::new(guest_mem.clone(), arch::get_reserved_mem_addr() as u64);
        assert!(vmm.attach_block_devices(&mut device_manager).is_ok());
        // Test that kernel commandline does not contain either /dev/vda or PARTUUID.
        assert!(!vmm.get_kernel_cmdline_str().contains("root=PARTUUID="));
        assert!(!vmm.get_kernel_cmdline_str().contains("root=/dev/vda"));

        // Test that the non root device is attached.
        assert!(device_manager
            .get_address(&non_root_block_device.drive_id)
            .is_some());

        // Test partial update of block devices.
        let new_block = NamedTempFile::new().unwrap();
        let path = String::from(new_block.path().to_path_buf().to_str().unwrap());
        assert!(vmm
            .set_block_device_path("not_root".to_string(), path)
            .is_ok());

        // Test partial update of block device fails due to invalid file.
        assert!(vmm
            .set_block_device_path("not_root".to_string(), String::from("dummy_path"))
            .is_err());
    }

    #[test]
    fn test_attach_net_devices() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        assert!(vmm.init_guest_memory().is_ok());
        assert!(vmm.guest_memory.is_some());

        vmm.default_kernel_config();

        let guest_mem = vmm.guest_memory.clone().unwrap();
        let mut device_manager =
            MMIODeviceManager::new(guest_mem.clone(), arch::get_reserved_mem_addr() as u64);

        // test create network interface
        let network_interface = NetworkInterfaceConfig {
            iface_id: String::from("netif"),
            host_dev_name: String::from("hostname3"),
            guest_mac: None,
            rx_rate_limiter: None,
            tx_rate_limiter: None,
            allow_mmds_requests: false,
            tap: None,
        };

        assert!(vmm.insert_net_device(network_interface).is_ok());

        assert!(vmm.attach_net_devices(&mut device_manager).is_ok());
        // a second call to attach_net_devices should fail because when
        // we are creating the virtio::Net object, we are taking the tap.
        assert!(vmm.attach_net_devices(&mut device_manager).is_err());
    }

    #[test]
    fn test_init_devices() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        vmm.default_kernel_config();
        assert!(vmm.init_guest_memory().is_ok());

        assert!(vmm.init_devices().is_ok());
    }

    #[test]
    fn test_configure_boot_source() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);

        // Test invalid kernel path.
        assert!(vmm
            .configure_boot_source(String::from("dummy-path"), None)
            .is_err());

        // Test valid kernel path and invalid cmdline.
        let kernel_file = NamedTempFile::new().expect("Failed to create temporary kernel file.");
        let kernel_path = String::from(kernel_file.path().to_path_buf().to_str().unwrap());
        let invalid_cmdline = String::from_utf8(vec![b'X'; arch::CMDLINE_MAX_SIZE + 1]).unwrap();
        assert!(vmm
            .configure_boot_source(kernel_path.clone(), Some(invalid_cmdline))
            .is_err());

        // Test valid configuration.
        assert!(vmm.configure_boot_source(kernel_path.clone(), None).is_ok());
        assert!(vmm
            .configure_boot_source(kernel_path.clone(), Some(String::from("reboot=k")))
            .is_ok());

        // Test valid configuration after boot (should fail).
        vmm.set_instance_state(InstanceState::Running);
        assert!(vmm
            .configure_boot_source(kernel_path.clone(), None)
            .is_err());
    }

    #[test]
    fn test_rescan() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        vmm.default_kernel_config();

        let root_file = NamedTempFile::new().unwrap();
        let scratch_file = NamedTempFile::new().unwrap();
        let scratch_id = "not_root".to_string();

        let root_block_device = BlockDeviceConfig {
            drive_id: String::from("root"),
            path_on_host: root_file.path().to_path_buf(),
            is_root_device: true,
            partuuid: None,
            is_read_only: false,
            rate_limiter: None,
        };
        let non_root_block_device = BlockDeviceConfig {
            drive_id: scratch_id.clone(),
            path_on_host: scratch_file.path().to_path_buf(),
            is_root_device: false,
            partuuid: None,
            is_read_only: true,
            rate_limiter: None,
        };

        assert!(vmm.insert_block_device(root_block_device.clone()).is_ok());
        assert!(vmm
            .insert_block_device(non_root_block_device.clone())
            .is_ok());

        assert!(vmm.init_guest_memory().is_ok());
        assert!(vmm.guest_memory.is_some());

        let guest_mem = vmm.guest_memory.clone().unwrap();
        let mut device_manager =
            MMIODeviceManager::new(guest_mem.clone(), arch::get_reserved_mem_addr() as u64);

        let dummy_box = Box::new(DummyDevice { dummy: 0 });
        // Use a dummy command line as it is not used in this test.
        let _addr = device_manager
            .register_device(
                dummy_box,
                &mut kernel_cmdline::Cmdline::new(arch::CMDLINE_MAX_SIZE),
                Some(scratch_id.clone()),
            )
            .unwrap();

        vmm.mmio_device_manager = Some(device_manager);
        vmm.set_instance_state(InstanceState::Running);

        // Test valid rescan_block_device.
        assert!(vmm.rescan_block_device(&scratch_id).is_ok());

        // Test rescan block device with size not a multiple of sector size.
        let new_size = 10 * virtio::block::SECTOR_SIZE + 1;
        scratch_file.as_file().set_len(new_size).unwrap();
        assert!(vmm.rescan_block_device(&scratch_id).is_ok());

        // Test rescan block device with invalid path.
        let prev_path = non_root_block_device.path_on_host().clone();
        vmm.update_block_device_path(&scratch_id, PathBuf::from("foo"));
        match vmm.rescan_block_device(&scratch_id) {
            Err(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::BlockDeviceUpdateFailed,
            )) => (),
            _ => assert!(false),
        }
        vmm.update_block_device_path(&scratch_id, prev_path);

        // Test rescan_block_device with invalid ID.
        match vmm.rescan_block_device(&"foo".to_string()) {
            Err(VmmActionError::DriveConfig(ErrorKind::User, DriveError::InvalidBlockDeviceID)) => {
                ()
            }
            _ => assert!(false),
        }
        vmm.change_id(&scratch_id, "scratch");
        match vmm.rescan_block_device(&scratch_id) {
            Err(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::BlockDeviceUpdateFailed,
            )) => (),
            _ => assert!(false),
        }

        // Test rescan_block_device with invalid device address.
        vmm.remove_addr(&scratch_id);
        match vmm.rescan_block_device(&scratch_id) {
            Err(VmmActionError::DriveConfig(ErrorKind::User, DriveError::InvalidBlockDeviceID)) => {
                ()
            }
            _ => assert!(false),
        }

        // Test rescan not allowed.
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        assert!(vmm
            .insert_block_device(non_root_block_device.clone())
            .is_ok());
        match vmm.rescan_block_device(&scratch_id) {
            Err(VmmActionError::DriveConfig(
                ErrorKind::User,
                DriveError::OperationNotAllowedPreBoot,
            )) => (),
            _ => assert!(false),
        }
    }

    #[test]
    fn test_init_logger_from_api() {
        // Error case: update after instance is running
        let log_file = NamedTempFile::new().unwrap();
        let metrics_file = NamedTempFile::new().unwrap();
        let desc = LoggerConfig {
            log_fifo: log_file.path().to_str().unwrap().to_string(),
            metrics_fifo: metrics_file.path().to_str().unwrap().to_string(),
            level: LoggerLevel::Warning,
            show_level: true,
            show_log_origin: true,
            options: Value::Array(vec![]),
        };

        let mut vmm = create_vmm_object(InstanceState::Running);
        assert!(vmm.init_logger(desc).is_err());

        // Reset vmm state to test the other scenarios.
        vmm.set_instance_state(InstanceState::Uninitialized);

        // Error case: initializing logger with invalid pipes returns error.
        let desc = LoggerConfig {
            log_fifo: String::from("not_found_file_log"),
            metrics_fifo: String::from("not_found_file_metrics"),
            level: LoggerLevel::Warning,
            show_level: false,
            show_log_origin: false,
            options: Value::Array(vec![]),
        };
        assert!(vmm.init_logger(desc).is_err());

        // Error case: initializing logger with invalid option flags returns error.
        let desc = LoggerConfig {
            log_fifo: String::from("not_found_file_log"),
            metrics_fifo: String::from("not_found_file_metrics"),
            level: LoggerLevel::Warning,
            show_level: false,
            show_log_origin: false,
            options: Value::Array(vec![Value::String("foobar".to_string())]),
        };
        assert!(vmm.init_logger(desc).is_err());

        // Initializing logger with valid pipes is ok.
        let log_file = NamedTempFile::new().unwrap();
        let metrics_file = NamedTempFile::new().unwrap();
        let desc = LoggerConfig {
            log_fifo: log_file.path().to_str().unwrap().to_string(),
            metrics_fifo: metrics_file.path().to_str().unwrap().to_string(),
            level: LoggerLevel::Warning,
            show_level: true,
            show_log_origin: true,
            options: Value::Array(vec![Value::String("LogDirtyPages".to_string())]),
        };
        // Flushing metrics before initializing logger is erroneous.
        let err = vmm.flush_metrics();
        assert!(err.is_err());
        assert_eq!(
            format!("{:?}", err.unwrap_err()),
            "Logger(Internal, FlushMetrics(\"Logger was not initialized.\"))"
        );

        assert!(vmm.init_logger(desc).is_ok());

        assert!(vmm.flush_metrics().is_ok());
        let f = File::open(metrics_file).unwrap();
        let mut reader = BufReader::new(f);

        let mut line = String::new();
        reader.read_line(&mut line).unwrap();
        assert!(line.contains("utc_timestamp_ms"));

        // It is safe to do that because the tests are run sequentially (so no other test may be
        // writing to the same file.
        assert!(vmm.flush_metrics().is_ok());
        reader.read_line(&mut line).unwrap();
        assert!(line.contains("utc_timestamp_ms"));
    }

    #[cfg(target_arch = "x86_64")]
    #[test]
    fn test_dirty_page_count() {
        let mut vmm = create_vmm_object(InstanceState::Uninitialized);
        assert_eq!(vmm.get_dirty_page_count(), 0);
        // Booting an actual guest and getting real data is covered by `kvm::tests::run_code_test`.
    }
}
