From dbc8f7aa69ad72b1a24e7fbbe6898c7231270886 Mon Sep 17 00:00:00 2001 From: Adrian Catangiu Date: Thu, 30 Sep 2021 18:05:40 +0300 Subject: [PATCH 01/22] vmm: memory::restore doesn't require file Memory::restore now takes an optional &File and creates file-backed memory or anonymous memory depending on the option. Signed-off-by: Adrian Catangiu --- src/vmm/src/memory_snapshot.rs | 53 +++++++++++++++--------------- src/vmm/src/persist.rs | 3 +- src/vmm/tests/integration_tests.rs | 8 +++-- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/vmm/src/memory_snapshot.rs b/src/vmm/src/memory_snapshot.rs index 64a212a06dc..b219c424eb2 100644 --- a/src/vmm/src/memory_snapshot.rs +++ b/src/vmm/src/memory_snapshot.rs @@ -7,6 +7,8 @@ use std::fmt::{Display, Formatter}; use std::fs::File; use std::io::SeekFrom; +use crate::DirtyBitmap; +use utils::{errno, get_page_size}; use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; use vm_memory::{ @@ -14,14 +16,13 @@ use vm_memory::{ GuestMemoryRegion, MemoryRegionAddress, }; -use crate::DirtyBitmap; -use utils::{errno, get_page_size}; - /// State of a guest memory region saved to file/buffer. #[derive(Debug, PartialEq, Versionize)] // NOTICE: Any changes to this structure require a snapshot version bump. pub struct GuestMemoryRegionState { - /// Base address. + // This should have been named `base_guest_addr` since it's _guest_ addr, but for + // backward compatibility we have to keep this name. At least this comment should help. + /// Base GuestAddress. pub base_address: u64, /// Region size. pub size: usize, @@ -29,7 +30,7 @@ pub struct GuestMemoryRegionState { pub offset: u64, } -/// Guest memory state. +/// Describes guest memory regions and their snapshot file mappings. #[derive(Debug, Default, PartialEq, Versionize)] // NOTICE: Any changes to this structure require a snapshot version bump. pub struct GuestMemoryState { @@ -55,7 +56,7 @@ where /// Creates a GuestMemoryMmap given a `file` containing the data /// and a `state` containing mapping information. fn restore( - file: &File, + file: Option<&File>, state: &GuestMemoryState, track_dirty_pages: bool, ) -> std::result::Result; @@ -176,28 +177,27 @@ impl SnapshotMemory for GuestMemoryMmap { .map_err(Error::WriteMemory) } - /// Creates a GuestMemoryMmap given a `file` containing the data - /// and a `state` containing mapping information. + /// Creates a GuestMemoryMmap backed by a `file` if present, otherwise backed + /// by anonymous memory. Memory layout and ranges are described in `state` param. fn restore( - file: &File, + file: Option<&File>, state: &GuestMemoryState, track_dirty_pages: bool, ) -> std::result::Result { - vm_memory::create_guest_memory( - &state - .regions - .iter() - .map(|r| { - ( - Some(FileOffset::new(file.try_clone().unwrap(), r.offset)), - GuestAddress(r.base_address), - r.size, - ) - }) - .collect::>(), - track_dirty_pages, - ) - .map_err(Error::CreateMemory) + let mut regions = vec![]; + for region in state.regions.iter() { + let f = match file { + Some(f) => Some(FileOffset::new( + f.try_clone().map_err(Error::FileHandle)?, + region.offset, + )), + None => None, + }; + + regions.push((f, GuestAddress(region.base_address), region.size)); + } + + vm_memory::create_guest_memory(®ions, track_dirty_pages).map_err(Error::CreateMemory) } } @@ -302,7 +302,8 @@ mod tests { guest_memory.dump(&mut memory_file.as_file()).unwrap(); let restored_guest_memory = - GuestMemoryMmap::restore(&memory_file.as_file(), &memory_state, false).unwrap(); + GuestMemoryMmap::restore(Some(memory_file.as_file()), &memory_state, false) + .unwrap(); // Check that the region contents are the same. let mut actual_region = vec![0u8; page_size * 2]; @@ -336,7 +337,7 @@ mod tests { // We can restore from this because this is the first dirty dump. let restored_guest_memory = - GuestMemoryMmap::restore(&file.as_file(), &memory_state, false).unwrap(); + GuestMemoryMmap::restore(Some(file.as_file()), &memory_state, false).unwrap(); // Check that the region contents are the same. let mut actual_region = vec![0u8; page_size * 2]; diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 75fa29494b1..8ddc1246148 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -487,7 +487,8 @@ fn guest_memory_from_file( ) -> std::result::Result { use self::LoadSnapshotError::{DeserializeMemory, MemoryBackingFile}; let mem_file = File::open(mem_file_path).map_err(MemoryBackingFile)?; - GuestMemoryMmap::restore(&mem_file, mem_state, track_dirty_pages).map_err(DeserializeMemory) + GuestMemoryMmap::restore(Some(&mem_file), mem_state, track_dirty_pages) + .map_err(DeserializeMemory) } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 413018aa93c..fafa18dcaf7 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -217,8 +217,12 @@ fn verify_load_snapshot(snapshot_file: TempFile, memory_file: TempFile) { VERSION_MAP.clone(), ) .unwrap(); - let mem = GuestMemoryMmap::restore(memory_file.as_file(), µvm_state.memory_state, false) - .unwrap(); + let mem = GuestMemoryMmap::restore( + Some(memory_file.as_file()), + µvm_state.memory_state, + false, + ) + .unwrap(); let vm_resources = &mut VmResources::default(); From dafbc9913ae3d30b1580ec9862811d5700d46093 Mon Sep 17 00:00:00 2001 From: Adrian Catangiu Date: Thu, 30 Sep 2021 17:53:50 +0300 Subject: [PATCH 02/22] enhance /snapshot/load API Deprecate 'mem_file_path' field and add a new 'mem_backend' optional object. At least one of 'mem_file_path' and `mem_backend` fields are required and it is forbidden to specify both at the same time. `mem_backend` object contains: - 'backend_type': required String parameter that can take either: - File - Uffd as valid values. - 'backend_path': required String parameter. Interpretation of this field depends on the value of 'backend_type': - Path to file that contains the guest memory to be loaded if type is 'File', - Path to UDS where a custom page-fault handler process is listening and expecting a Uffd to be sent by Firecracker. The Uffd is used to handle Firecracker's guest memory page faults in this separate process. If `mem_file_path` is specified instead of `mem_backend`, we construct the `MemBackendConfig` object from the path specified, through `mem_file_path`, with `File` as backend type. Add plumbing for the new API, the Uffd path is only a stub for now. Signed-off-by: Adrian Catangiu Signed-off-by: Luminita Voicu --- CHANGELOG.md | 13 ++ src/api_server/src/parsed_request.rs | 19 +- src/api_server/src/request/snapshot.rs | 220 ++++++++++++++++++++++-- src/api_server/swagger/firecracker.yaml | 34 +++- src/vmm/src/persist.rs | 30 +++- src/vmm/src/rpc_interface.rs | 21 ++- src/vmm/src/vmm_config/snapshot.rs | 54 +++++- 7 files changed, 353 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 790b81eb52d..821e516fd79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,18 @@ piggy-backing on `--http-api-max-payload-size`. If left unconfigured it defaults to the value of `--http-api-max-payload-size`, to provide backwards compatibility. +- Added optional `mem_backend` body field in `PUT` requests on `/snapshot/load`. + This new parameter is an object that defines the configuration of the backend + responsible for handling memory loading during snapshot restore. The + `mem_backend` parameter contains `backend_type` and `backend_path` required + fields. `backend_type` is an enum that can take either `File` or `Uffd` as + value. Interpretation of `backend_path` field depends on the value of + `backend_type`. If `File`, then the user must provide the path to file that + contains the guest memory to be loaded. Otherwise, if `backend_type` is `Uffd`, + then `backend_path` is the path to a unix domain socket where a custom page + fault handler process is listening and expecting a UFFD to be sent by + Firecracker. The UFFD is used to handle the guest memory page faults in the + separate process. ### Changed @@ -25,6 +37,7 @@ - MmdsV2 is now Generally Available. - MmdsV1 is now deprecated and will be removed in Firecracker v2.0.0. Use MmdsV2 instead. +- Deprecated `mem_file_path` body field in `PUT` on `/snapshot/load` request. ### Fixed diff --git a/src/api_server/src/parsed_request.rs b/src/api_server/src/parsed_request.rs index a52ea4c5d2e..17a0660ff68 100644 --- a/src/api_server/src/parsed_request.rs +++ b/src/api_server/src/parsed_request.rs @@ -919,9 +919,13 @@ pub(crate) mod tests { assert!(connection.try_read().is_ok()); let req = connection.pop_parsed_request().unwrap(); assert!(ParsedRequest::try_from_request(&req).is_ok()); + let body = "{ \ \"snapshot_path\": \"foo\", \ - \"mem_file_path\": \"bar\", \ + \"mem_backend\": { \ + \"backend_type\": \"File\", \ + \"backend_path\": \"bar\" \ + }, \ \"enable_diff_snapshots\": true \ }"; sender @@ -931,6 +935,19 @@ pub(crate) mod tests { assert!(connection.try_read().is_ok()); let req = connection.pop_parsed_request().unwrap(); assert!(ParsedRequest::try_from_request(&req).is_ok()); + + let body = "{ \ + \"snapshot_path\": \"foo\", \ + \"mem_file_path\": \"bar\", \ + \"resume_vm\": true \ + }"; + sender + .write_all(http_request("PUT", "/snapshot/load", Some(&body)).as_bytes()) + .unwrap(); + + assert!(connection.try_read().is_ok()); + let req = connection.pop_parsed_request().unwrap(); + assert!(ParsedRequest::try_from_request(&req).is_ok()); } #[test] diff --git a/src/api_server/src/request/snapshot.rs b/src/api_server/src/request/snapshot.rs index 7124c0c0d09..6d1250b52d8 100644 --- a/src/api_server/src/request/snapshot.rs +++ b/src/api_server/src/request/snapshot.rs @@ -5,9 +5,23 @@ use super::super::VmmAction; use crate::parsed_request::{Error, ParsedRequest}; use crate::request::Body; use crate::request::{Method, StatusCode}; -use vmm::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams}; +use logger::{IncMetric, METRICS}; +use serde::de::Error as DeserializeError; +use vmm::vmm_config::snapshot::{ + CreateSnapshotParams, LoadSnapshotConfig, LoadSnapshotParams, MemBackendConfig, MemBackendType, +}; use vmm::vmm_config::snapshot::{Vm, VmState}; +/// Deprecation message for the `mem_file_path` field. +const LOAD_DEPRECATION_MESSAGE: &str = "PUT /snapshot/load: mem_file_path field is deprecated."; +/// None of the `mem_backend` or `mem_file_path` fields has been specified. +pub const MISSING_FIELD: &str = + "missing field: either `mem_backend` or `mem_file_path` is required"; +/// Both the `mem_backend` and `mem_file_path` fields have been specified. +/// Only specifying one of them is allowed. +pub const TOO_MANY_FIELDS: &str = + "too many fields: either `mem_backend` or `mem_file_path` exclusively is required"; + pub(crate) fn parse_put_snapshot( body: &Body, request_type_from_path: Option<&&str>, @@ -18,10 +32,7 @@ pub(crate) fn parse_put_snapshot( serde_json::from_slice::(body.raw()) .map_err(Error::SerdeJson)?, ))), - "load" => Ok(ParsedRequest::new_sync(VmmAction::LoadSnapshot( - serde_json::from_slice::(body.raw()) - .map_err(Error::SerdeJson)?, - ))), + "load" => parse_put_snapshot_load(body), _ => Err(Error::InvalidPathMethod( format!("/snapshot/{}", request_type), Method::Put, @@ -43,10 +54,64 @@ pub(crate) fn parse_patch_vm_state(body: &Body) -> Result } } +fn parse_put_snapshot_load(body: &Body) -> Result { + let snapshot_config = + serde_json::from_slice::(body.raw()).map_err(Error::SerdeJson)?; + + match (&snapshot_config.mem_backend, &snapshot_config.mem_file_path) { + // Ensure `mem_file_path` and `mem_backend` fields are not present at the same time. + (Some(_), Some(_)) => return Err(Error::SerdeJson(serde_json::Error::custom(TOO_MANY_FIELDS))), + // Ensure that one of `mem_file_path` or `mem_backend` fields is always specified. + (None, None) => return Err(Error::SerdeJson(serde_json::Error::custom(MISSING_FIELD))), + _ => {} + } + + // Check for the presence of deprecated `mem_file_path` field and create + // deprecation message if found. + let mut deprecation_message = None; + if snapshot_config.mem_file_path.is_some() { + // `mem_file_path` field in request is deprecated. + METRICS.deprecated_api.deprecated_http_api_calls.inc(); + deprecation_message = Some(LOAD_DEPRECATION_MESSAGE); + } + + // If `mem_file_path` is specified instead of `mem_backend`, we construct the + // `MemBackendConfig` object from the path specified, with `File` as backend type. + let mem_backend = match snapshot_config.mem_backend { + Some(backend_cfg) => backend_cfg, + None => { + MemBackendConfig { + // This is safe to unwrap() because we ensure above that one of the two: + // either `mem_file_path` or `mem_backend` field is always specified. + backend_path: snapshot_config.mem_file_path.unwrap(), + backend_type: MemBackendType::File, + } + } + }; + + let snapshot_params = LoadSnapshotParams { + snapshot_path: snapshot_config.snapshot_path, + mem_backend, + enable_diff_snapshots: snapshot_config.enable_diff_snapshots, + resume_vm: snapshot_config.resume_vm, + }; + + // Construct the `ParsedRequest` object. + let mut parsed_req = ParsedRequest::new_sync(VmmAction::LoadSnapshot(snapshot_params)); + + // If `mem_file_path` was present, set the deprecation message in `parsing_info`. + if let Some(msg) = deprecation_message { + parsed_req.parsing_info().append_deprecation_message(msg); + } + + Ok(parsed_req) +} + #[cfg(test)] mod tests { use super::*; - use crate::parsed_request::tests::vmm_action_from_request; + use crate::parsed_request::tests::{depr_action_from_req, vmm_action_from_request}; + use vmm::vmm_config::snapshot::{MemBackendConfig, MemBackendType}; #[test] fn test_parse_put_snapshot() { @@ -102,36 +167,87 @@ mod tests { body = r#"{ "snapshot_path": "foo", - "mem_file_path": "bar" + "mem_backend": { + "backend_path": "bar", + "backend_type": "File" + } }"#; let mut expected_cfg = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::File, + }, enable_diff_snapshots: false, resume_vm: false, }; - match vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap()) - { + + let mut parsed_request = parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap(); + assert!(parsed_request + .parsing_info() + .take_deprecation_message() + .is_none()); + + match vmm_action_from_request(parsed_request) { VmmAction::LoadSnapshot(cfg) => assert_eq!(cfg, expected_cfg), _ => panic!("Test failed."), } body = r#"{ "snapshot_path": "foo", - "mem_file_path": "bar", + "mem_backend": { + "backend_path": "bar", + "backend_type": "File" + }, "enable_diff_snapshots": true }"#; expected_cfg = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::File, + }, enable_diff_snapshots: true, resume_vm: false, }; - match vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap()) - { + let mut parsed_request = parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap(); + assert!(parsed_request + .parsing_info() + .take_deprecation_message() + .is_none()); + match vmm_action_from_request(parsed_request) { + VmmAction::LoadSnapshot(cfg) => assert_eq!(cfg, expected_cfg), + _ => panic!("Test failed."), + } + + body = r#"{ + "snapshot_path": "foo", + "mem_backend": { + "backend_path": "bar", + "backend_type": "Uffd" + }, + "resume_vm": true + }"#; + + expected_cfg = LoadSnapshotParams { + snapshot_path: PathBuf::from("foo"), + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::Uffd, + }, + enable_diff_snapshots: false, + resume_vm: true, + }; + + let mut parsed_request = parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap(); + assert!(parsed_request + .parsing_info() + .take_deprecation_message() + .is_none()); + match vmm_action_from_request(parsed_request) { VmmAction::LoadSnapshot(cfg) => assert_eq!(cfg, expected_cfg), _ => panic!("Test failed."), } @@ -144,17 +260,87 @@ mod tests { expected_cfg = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), - mem_file_path: PathBuf::from("bar"), + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::File, + }, enable_diff_snapshots: false, resume_vm: true, }; - match vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap()) - { + let parsed_request = parse_put_snapshot(&Body::new(body), Some(&"load")).unwrap(); + match depr_action_from_req(parsed_request, Some(LOAD_DEPRECATION_MESSAGE.to_string())) { VmmAction::LoadSnapshot(cfg) => assert_eq!(cfg, expected_cfg), _ => panic!("Test failed."), } + body = r#"{ + "snapshot_path": "foo", + "mem_backend": { + "backend_path": "bar" + } + }"#; + + assert_eq!( + parse_put_snapshot(&Body::new(body), Some(&"load")).err().unwrap().to_string(), + "An error occurred when deserializing the json body of a request: missing field `backend_type` at line 5 column 17." + ); + + body = r#"{ + "snapshot_path": "foo", + "mem_backend": { + "backend_type": "File", + } + }"#; + + assert_eq!( + parse_put_snapshot(&Body::new(body), Some(&"load")) + .err().unwrap().to_string(), + "An error occurred when deserializing the json body of a request: trailing comma at line 5 column 17." + ); + + body = r#"{ + "snapshot_path": "foo", + "mem_file_path": "bar", + "mem_backend": { + "backend_path": "bar", + "backend_type": "Uffd" + } + }"#; + + assert_eq!( + parse_put_snapshot(&Body::new(body), Some(&"load")) + .err() + .unwrap() + .to_string(), + Error::SerdeJson(serde_json::Error::custom(TOO_MANY_FIELDS.to_string())).to_string() + ); + + body = r#"{ + "snapshot_path": "foo" + }"#; + + assert_eq!( + parse_put_snapshot(&Body::new(body), Some(&"load")) + .err() + .unwrap() + .to_string(), + Error::SerdeJson(serde_json::Error::custom(MISSING_FIELD.to_string())).to_string() + ); + + body = r#"{ + "mem_backend": { + "backend_path": "bar", + "backend_type": "Uffd" + } + }"#; + + assert_eq!( + parse_put_snapshot(&Body::new(body), Some(&"load")) + .err().unwrap().to_string(), + "An error occurred when deserializing the json body of a request: missing field `snapshot_path` at line 6 column 15." + ); + assert!(parse_put_snapshot(&Body::new(body), Some(&"invalid")).is_err()); assert!(parse_put_snapshot(&Body::new(body), None).is_err()); } diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml index 5bb7a852155..07d242532e6 100644 --- a/src/api_server/swagger/firecracker.yaml +++ b/src/api_server/swagger/firecracker.yaml @@ -960,6 +960,25 @@ definitions: maximum: 32 description: Number of vCPUs (either 1 or an even number) + MemoryBackend: + type: object + required: + - backend_type + - backend_path + properties: + backend_type: + type: string + enum: + - File + - Uffd + backend_path: + type: string + description: Based on 'backend_type' it is either + 1) Path to the file that contains the guest memory to be loaded + 2) Path to the UDS where a process is listening for a UFFD initialization + control payload and open file descriptor that it can use to serve this + process's guest memory page faults + Metrics: type: object description: @@ -1097,8 +1116,10 @@ definitions: SnapshotLoadParams: type: object + description: + Defines the configuration used for handling snapshot resume. Exactly one of + the two `mem_*` fields must be present in the body of the request. required: - - mem_file_path - snapshot_path properties: enable_diff_snapshots: @@ -1107,7 +1128,16 @@ definitions: Enable support for incremental (diff) snapshots by tracking dirty guest pages. mem_file_path: type: string - description: Path to the file that contains the guest memory to be loaded. + description: + Path to the file that contains the guest memory to be loaded. + This parameter has been deprecated and is only allowed if + `mem_backend` is not present. + mem_backend: + $ref: "#/definitions/MemoryBackend" + description: + Configuration for the backend that handles memory load. If this field + is specified, `mem_file_path` is forbidden. Either `mem_backend` or + `mem_file_path` must be present at a time. snapshot_path: type: string description: Path to the file that contains the microVM state to be loaded. diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 8ddc1246148..aec23ce1825 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -13,7 +13,9 @@ use crate::builder::{self, StartMicrovmError}; use crate::device_manager::persist::Error as DevicePersistError; use crate::mem_size_mib; use crate::vmm_config::machine_config::MAX_SUPPORTED_VCPUS; -use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; +use crate::vmm_config::snapshot::{ + CreateSnapshotParams, LoadSnapshotParams, MemBackendType, SnapshotType, +}; use crate::vstate::{self, vcpu::VcpuState, vm::VmState}; use crate::device_manager::persist::DeviceStates; @@ -443,18 +445,22 @@ pub fn restore_from_snapshot( vm_resources: &mut VmResources, ) -> std::result::Result>, LoadSnapshotError> { use self::LoadSnapshotError::*; - let track_dirty_pages = params.enable_diff_snapshots; let microvm_state = snapshot_state_from_file(¶ms.snapshot_path, version_map)?; // Some sanity checks before building the microvm. snapshot_state_sanity_check(µvm_state)?; - let guest_memory = guest_memory_from_file( - ¶ms.mem_file_path, - µvm_state.memory_state, - track_dirty_pages, - )?; - + let mem_backend_path = ¶ms.mem_backend.backend_path; + let mem_state = µvm_state.memory_state; + let track_dirty_pages = params.enable_diff_snapshots; + let guest_memory = match params.mem_backend.backend_type { + MemBackendType::File => { + guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages) + } + MemBackendType::Uffd => { + guest_memory_from_uffd(mem_backend_path, mem_state, track_dirty_pages) + } + }?; builder::build_microvm_from_snapshot( instance_info, event_manager, @@ -491,6 +497,14 @@ fn guest_memory_from_file( .map_err(DeserializeMemory) } +fn guest_memory_from_uffd( + _mem_file_path: &Path, + _mem_state: &GuestMemoryState, + _track_dirty_pages: bool, +) -> std::result::Result { + unimplemented!() +} + #[cfg(target_arch = "x86_64")] fn validate_devices_number(device_number: usize) -> std::result::Result<(), CreateSnapshotError> { use self::CreateSnapshotError::TooManyDevices; diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index d008b7e4024..72cde8d4a6b 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -803,6 +803,7 @@ mod tests { use devices::virtio::VsockError; use seccompiler::BpfThreadMap; + use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType}; use mmds::data_store::MmdsVersion; use std::path::PathBuf; @@ -1604,7 +1605,10 @@ mod tests { // Without resume. let req = VmmAction::LoadSnapshot(LoadSnapshotParams { snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_backend: MemBackendConfig { + backend_type: MemBackendType::File, + backend_path: PathBuf::new(), + }, enable_diff_snapshots: false, resume_vm: false, }); @@ -1617,7 +1621,10 @@ mod tests { // With resume. let req = VmmAction::LoadSnapshot(LoadSnapshotParams { snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_backend: MemBackendConfig { + backend_type: MemBackendType::File, + backend_path: PathBuf::new(), + }, enable_diff_snapshots: false, resume_vm: true, }); @@ -2025,7 +2032,10 @@ mod tests { check_runtime_request_err( VmmAction::LoadSnapshot(LoadSnapshotParams { snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_backend: MemBackendConfig { + backend_type: MemBackendType::File, + backend_path: PathBuf::new(), + }, enable_diff_snapshots: false, resume_vm: false, }), @@ -2044,7 +2054,10 @@ mod tests { // Load snapshot should no longer be allowed. let req = VmmAction::LoadSnapshot(LoadSnapshotParams { snapshot_path: PathBuf::new(), - mem_file_path: PathBuf::new(), + mem_backend: MemBackendConfig { + backend_type: MemBackendType::File, + backend_path: PathBuf::new(), + }, enable_diff_snapshots: false, resume_vm: false, }); diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs index 9662de08cd3..c8634b66980 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs @@ -18,11 +18,24 @@ pub enum SnapshotType { } impl Default for SnapshotType { - fn default() -> SnapshotType { + fn default() -> Self { SnapshotType::Full } } +/// Specifies the method through which guest memory will get populated when +/// resuming from a snapshot: +/// 1) A file that contains the guest memory to be loaded, +/// 2) An UDS where a custom page-fault handler process is listening for +/// the UFFD set up by Firecracker to handle its guest memory page faults. +#[derive(Debug, Deserialize, PartialEq)] +pub enum MemBackendType { + /// Guest memory contents will be loaded from a file. + File, + /// Guest memory will be served through UFFD by a separate process. + Uffd, +} + /// Stores the configuration that will be used for creating a snapshot. #[derive(Debug, Deserialize, PartialEq, Serialize)] #[serde(deny_unknown_fields)] @@ -41,23 +54,52 @@ pub struct CreateSnapshotParams { } /// Stores the configuration that will be used for loading a snapshot. -#[derive(Debug, Deserialize, PartialEq, Serialize)] -#[serde(deny_unknown_fields)] +#[derive(Debug, PartialEq)] pub struct LoadSnapshotParams { /// Path to the file that contains the microVM state to be loaded. pub snapshot_path: PathBuf, - /// Path to the file that contains the guest memory to be loaded. - pub mem_file_path: PathBuf, + /// Specifies guest memory backend configuration. + pub mem_backend: MemBackendConfig, /// Setting this flag will enable KVM dirty page tracking and will /// allow taking subsequent incremental snapshots. - #[serde(default)] pub enable_diff_snapshots: bool, /// When set to true, the vm is also resumed if the snapshot load /// is successful. + pub resume_vm: bool, +} + +/// Stores the configuration for loading a snapshot that is provided by the user. +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +pub struct LoadSnapshotConfig { + /// Path to the file that contains the microVM state to be loaded. + pub snapshot_path: PathBuf, + /// Path to the file that contains the guest memory to be loaded. To be used only if + /// `mem_backend` is not specified. + #[serde(skip_serializing_if = "Option::is_none")] + pub mem_file_path: Option, + /// Guest memory backend configuration. Is not to be used in conjunction with `mem_file_path`. + /// None value is allowed only if `mem_file_path` is present. + #[serde(skip_serializing_if = "Option::is_none")] + pub mem_backend: Option, + /// Whether or not to enable KVM dirty page tracking. + #[serde(default)] + pub enable_diff_snapshots: bool, + /// Whether or not to resume the vm post snapshot load. #[serde(default)] pub resume_vm: bool, } +/// Stores the configuration used for managing snapshot memory. +#[derive(Debug, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct MemBackendConfig { + /// Path to the backend used to handle the guest memory. + pub backend_path: PathBuf, + /// Specifies the guest memory backend type. + pub backend_type: MemBackendType, +} + /// The microVM state options. #[derive(Debug, Deserialize, Serialize)] pub enum VmState { From e8bbb3d6bcd35bfe7680273ea110d9e7480614ee Mon Sep 17 00:00:00 2001 From: Adrian Catangiu Date: Thu, 30 Sep 2021 18:35:08 +0300 Subject: [PATCH 03/22] add Uffd support for memory snapshots When /snapshot/load specifies memory backend type as 'UffdOverUDS', Firecracker doesn't handle the memory file itself anymore and expects an external process to handle its guest memory page faults. To do this, anonymous memory is mmapped as guest memory while keeping the original memory regions shape. Then a Uffd is created and each guest memory range is registered with the Uffd so that any page faults won't be handled by the kernel, but will come up as events on the Uffd. Firecracker then sends the memory ranges descriptions/mappings along with the Uffd over a UnixDomainSocket specified in 'mem_backend_path' parameter on the API call. It is expected that on the other side there is already a process listening for incoming connections. Once Firecracker's connection is accepted, Firecracker sends the mappings and Uffd. The receiving process is from now responsible for handling any pagefaults on the Uffd. The communication medium is a UDS, the protocol is SOCK_STREAM and the encoding is JSON. Signed-off-by: Adrian Catangiu --- Cargo.lock | 185 +++++++++++++++++++++++-- src/api_server/src/request/snapshot.rs | 4 +- src/utils/src/lib.rs | 4 +- src/vmm/Cargo.toml | 1 + src/vmm/src/builder.rs | 8 ++ src/vmm/src/lib.rs | 5 + src/vmm/src/persist.rs | 151 +++++++++++++++++--- src/vmm/tests/integration_tests.rs | 1 + 8 files changed, 329 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7588d046f0d..121a99469dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,7 +17,7 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "cipher", "cpufeatures", "opaque-debug", @@ -107,6 +107,25 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.59.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -146,6 +165,27 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" @@ -161,6 +201,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "clang-sys" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cc00842eed744b858222c4c9faf7243aafc6d33f92f96935263ef4d8a41ce21" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "2.34.0" @@ -238,7 +289,7 @@ version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "crossbeam-utils", ] @@ -248,7 +299,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "crossbeam-epoch", "crossbeam-utils", ] @@ -259,7 +310,7 @@ version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "crossbeam-utils", "lazy_static", "memoffset", @@ -272,7 +323,7 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "lazy_static", ] @@ -397,7 +448,7 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi", ] @@ -412,6 +463,12 @@ dependencies = [ "polyval", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "half" version = "1.8.2" @@ -503,12 +560,28 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.117" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e74d72e0f9b65b5b4ca49a346af3976df0f9c61d550727f349ecd559f251a26c" +[[package]] +name = "libloading" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" +dependencies = [ + "cfg-if 1.0.0", + "winapi", +] + [[package]] name = "linux-loader" version = "0.4.0" @@ -524,7 +597,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -564,6 +637,12 @@ dependencies = [ "vmm-sys-util", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "mmds" version = "0.1.0" @@ -586,6 +665,30 @@ dependencies = [ name = "net_gen" version = "0.1.0" +[[package]] +name = "nix" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" +dependencies = [ + "bitflags", + "cc", + "cfg-if 1.0.0", + "libc", + "memoffset", +] + +[[package]] +name = "nom" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109" +dependencies = [ + "memchr", + "minimal-lexical", + "version_check", +] + [[package]] name = "num-traits" version = "0.2.14" @@ -617,6 +720,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "plotters" version = "0.3.1" @@ -651,7 +760,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "cpufeatures", "opaque-debug", "universal-hash", @@ -809,6 +918,12 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.4.0" @@ -897,6 +1012,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" + [[package]] name = "snapshot" version = "0.1.0" @@ -933,6 +1054,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "timerfd" version = "1.2.0" @@ -980,6 +1121,31 @@ dependencies = [ "subtle", ] +[[package]] +name = "userfaultfd" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b738009e099b4ded1ecf19dfb7631f69c24f16e0af6d29fd9b3f54a092aca46" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "libc", + "nix", + "thiserror", + "userfaultfd-sys", +] + +[[package]] +name = "userfaultfd-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a4be003c705d2c8dc1234d473856945e291bb998ac2e2d83e70328d964d7458" +dependencies = [ + "bindgen", + "cc", + "cfg-if 0.1.10", +] + [[package]] name = "utils" version = "0.1.0" @@ -1081,6 +1247,7 @@ dependencies = [ "serde", "serde_json", "snapshot", + "userfaultfd", "utils", "versionize", "versionize_derive", @@ -1122,7 +1289,7 @@ version = "0.2.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "wasm-bindgen-macro", ] diff --git a/src/api_server/src/request/snapshot.rs b/src/api_server/src/request/snapshot.rs index 6d1250b52d8..62c581ad744 100644 --- a/src/api_server/src/request/snapshot.rs +++ b/src/api_server/src/request/snapshot.rs @@ -60,7 +60,9 @@ fn parse_put_snapshot_load(body: &Body) -> Result { match (&snapshot_config.mem_backend, &snapshot_config.mem_file_path) { // Ensure `mem_file_path` and `mem_backend` fields are not present at the same time. - (Some(_), Some(_)) => return Err(Error::SerdeJson(serde_json::Error::custom(TOO_MANY_FIELDS))), + (Some(_), Some(_)) => { + return Err(Error::SerdeJson(serde_json::Error::custom(TOO_MANY_FIELDS))) + } // Ensure that one of `mem_file_path` or `mem_backend` fields is always specified. (None, None) => return Err(Error::SerdeJson(serde_json::Error::custom(MISSING_FIELD))), _ => {} diff --git a/src/utils/src/lib.rs b/src/utils/src/lib.rs index 64d2a40e6f9..5bfa7f74a87 100644 --- a/src/utils/src/lib.rs +++ b/src/utils/src/lib.rs @@ -6,8 +6,8 @@ // More specifically, we are re-exporting modules from `vmm_sys_util` as part // of the `utils` crate. pub use vmm_sys_util::{ - epoll, errno, eventfd, fam, generate_fam_struct_impl, ioctl, rand, seek_hole, syscall, tempdir, - tempfile, terminal, + epoll, errno, eventfd, fam, generate_fam_struct_impl, ioctl, rand, seek_hole, sock_ctrl_msg, + syscall, tempdir, tempfile, terminal, }; pub use vmm_sys_util::{ioctl_expr, ioctl_ioc_nr, ioctl_iow_nr}; diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 6f37220c6e9..c1dfb8ed379 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -14,6 +14,7 @@ libc = ">=0.2.39" linux-loader = ">=0.4.0" serde = { version = ">=1.0.27", features = ["derive"] } serde_json = ">=1.0.9" +userfaultfd = ">=0.4.0" versionize = ">=0.1.6" versionize_derive = ">=0.1.3" vm-superio = ">=0.4.0" diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index bfebbd96324..f7b275c4246 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -53,6 +53,7 @@ use linux_loader::loader::KernelLoader; use logger::{error, warn}; use seccompiler::BpfThreadMap; use snapshot::Persist; +use userfaultfd::Uffd; use utils::eventfd::EventFd; use utils::terminal::Terminal; use utils::time::TimestampUs; @@ -237,6 +238,7 @@ fn create_vmm_and_vcpus( instance_info: &InstanceInfo, event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, + uffd: Option, track_dirty_pages: bool, vcpu_count: u8, ) -> std::result::Result<(Vmm, Vec), StartMicrovmError> { @@ -298,6 +300,7 @@ fn create_vmm_and_vcpus( shutdown_exit_code: None, vm, guest_memory, + uffd, vcpus_handles: Vec::new(), vcpus_exit_evt, mmio_device_manager, @@ -362,6 +365,7 @@ pub fn build_microvm_for_boot( instance_info, event_manager, guest_memory, + None, track_dirty_pages, vcpu_config.vcpu_count, )?; @@ -444,11 +448,13 @@ pub fn build_microvm_for_boot( /// /// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another /// is returned. +#[allow(clippy::too_many_arguments)] pub fn build_microvm_from_snapshot( instance_info: &InstanceInfo, event_manager: &mut EventManager, microvm_state: MicrovmState, guest_memory: GuestMemoryMmap, + uffd: Option, track_dirty_pages: bool, seccomp_filters: &BpfThreadMap, vm_resources: &mut VmResources, @@ -463,6 +469,7 @@ pub fn build_microvm_from_snapshot( instance_info, event_manager, guest_memory.clone(), + uffd, track_dirty_pages, vcpu_count, )?; @@ -1088,6 +1095,7 @@ pub mod tests { shutdown_exit_code: None, vm, guest_memory, + uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, mmio_device_manager, diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 3ad2db8ecfc..5d28f556d88 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -64,6 +64,7 @@ use logger::{error, info, warn, LoggerError, MetricsError, METRICS}; use rate_limiter::BucketUpdate; use seccompiler::BpfProgram; use snapshot::Persist; +use userfaultfd::Uffd; use utils::epoll::EventSet; use utils::eventfd::EventFd; use vm_memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; @@ -251,6 +252,10 @@ pub struct Vmm { // Guest VM core resources. vm: Vm, guest_memory: GuestMemoryMmap, + // Save UFFD in order to keep it open in the Firecracker process, as well. + // Since this field is never read again, we need to allow `dead_code`. + #[allow(dead_code)] + uffd: Option, vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. vcpus_exit_evt: EventFd, diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index aec23ce1825..f3068cfeab9 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -6,7 +6,9 @@ use std::fmt::{Display, Formatter}; use std::fs::{File, OpenOptions}; use std::io::{self, Write}; +use std::os::unix::{io::AsRawFd, net::UnixStream}; use std::path::Path; +use std::process; use std::sync::{Arc, Mutex}; use crate::builder::{self, StartMicrovmError}; @@ -34,11 +36,14 @@ use crate::vmm_config::instance_info::InstanceInfo; use arch::regs::{get_manufacturer_id_from_host, get_manufacturer_id_from_state}; use logger::{error, info}; use seccompiler::BpfThreadMap; +use serde::Serialize; use snapshot::Snapshot; +use userfaultfd::{Uffd, UffdBuilder}; +use utils::sock_ctrl_msg::ScmSocket; use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; use virtio_gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; -use vm_memory::GuestMemoryMmap; +use vm_memory::{GuestMemory, GuestMemoryMmap}; #[cfg(target_arch = "x86_64")] const FC_V0_23_MAX_DEVICES: u32 = 11; @@ -67,6 +72,35 @@ pub struct MicrovmState { pub device_states: DeviceStates, } +/// This describes the mapping between Firecracker base virtual address and offset in the +/// buffer or file backend for a guest memory region. It is used to tell an external +/// process/thread where to populate the guest memory data for this range. +/// +/// E.g. Guest memory contents for a region of `size` bytes can be found in the backend +/// at `offset` bytes from the beginning, and should be copied/populated into `base_host_address`. +#[derive(Clone, Debug, Serialize)] +pub struct GuestRegionUffdMapping { + /// Base host virtual address where the guest memory contents for this region + /// should be copied/populated. + pub base_host_virt_addr: u64, + /// Region size. + pub size: usize, + /// Offset in the backend file/buffer where the region contents are. + pub offset: u64, +} + +#[derive(Clone, Debug, Serialize)] +/// This describes the information sent by Firecracker to the +/// page fault handler process through the UDS socket. +pub struct UffdInfo { + /// List of guest region mappings between Firecracker base virtual address + /// and offset in the buffer or file backend for a guest memory region. + pub guest_region_uffd_mappings: Vec, + /// Firecracker's PID which can be used by the page fault handler process to + /// bring down Firecracker at the end of the execution or in case an error occurs. + pub pid: u32, +} + /// Errors related to saving and restoring Microvm state. #[derive(Debug)] pub enum MicrovmStateError { @@ -183,20 +217,31 @@ impl Display for CreateSnapshotError { pub enum LoadSnapshotError { /// Failed to build a microVM from snapshot. BuildMicroVm(StartMicrovmError), + /// Snapshot cpu vendor differs than host cpu vendor. + CpuVendorCheck(String), + /// Failed to create an UFFD Builder. + CreateUffdBuilder(userfaultfd::Error), /// Failed to deserialize memory. DeserializeMemory(memory_snapshot::Error), /// Failed to deserialize microVM state. DeserializeMicrovmState(snapshot::Error), + /// Snapshot failed sanity checks. + InvalidSnapshot(String), /// Failed to open memory backing file. MemoryBackingFile(io::Error), /// Failed to resume Vm after loading snapshot. ResumeMicroVm(VmmError), /// Failed to open the snapshot backing file. SnapshotBackingFile(&'static str, io::Error), - /// Snapshot cpu vendor differs than host cpu vendor. - CpuVendorCheck(String), - /// Snapshot failed sanity checks. - InvalidSnapshot(String), + /// Unable to connect to UDS in order to send information regarding + /// handling guest memory page-fault events. + UdsConnection(io::Error), + /// Failed to register guest memory regions to UFFD. + UffdMemoryRegionsRegister(userfaultfd::Error), + /// Failed to send guest memory layout and path to user fault FD used to handle + /// guest memory page faults. This information is sent to a UDS where a custom + /// page-fault handler process is listening. + UffdSend(kvm_ioctls::Error), } impl Display for LoadSnapshotError { @@ -204,10 +249,13 @@ impl Display for LoadSnapshotError { use self::LoadSnapshotError::*; match self { BuildMicroVm(err) => write!(f, "Cannot build a microVM from snapshot: {}", err), + CreateUffdBuilder(err) => write!(f, "Cannot create UFFD builder: {:?}", err), + CpuVendorCheck(err) => write!(f, "CPU vendor check failed: {}", err), DeserializeMemory(err) => write!(f, "Cannot deserialize memory: {}", err), DeserializeMicrovmState(err) => { write!(f, "Cannot deserialize the microVM state: {:?}", err) } + InvalidSnapshot(err) => write!(f, "Snapshot sanity check failed: {}", err), MemoryBackingFile(err) => write!(f, "Cannot open the memory file: {}", err), ResumeMicroVm(err) => write!( f, @@ -219,8 +267,16 @@ impl Display for LoadSnapshotError { "Cannot perform {} on the snapshot backing file: {}", action, err ), - CpuVendorCheck(err) => write!(f, "CPU vendor check failed: {}", err), - InvalidSnapshot(err) => write!(f, "Snapshot sanity check failed: {}", err), + UdsConnection(err) => write!( + f, + "Cannot connect to UDS in order to send information on \ + handling guest memory page-faults due to: {}", + err + ), + UffdMemoryRegionsRegister(err) => { + write!(f, "Cannot register memory regions to UFFD: {:?}.", err) + } + UffdSend(err) => write!(f, "Cannot send FD and memory layout to UFFD: {}", err), } } } @@ -453,19 +509,21 @@ pub fn restore_from_snapshot( let mem_backend_path = ¶ms.mem_backend.backend_path; let mem_state = µvm_state.memory_state; let track_dirty_pages = params.enable_diff_snapshots; - let guest_memory = match params.mem_backend.backend_type { - MemBackendType::File => { - guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages) - } + let (guest_memory, uffd) = match params.mem_backend.backend_type { + MemBackendType::File => ( + guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)?, + None, + ), MemBackendType::Uffd => { - guest_memory_from_uffd(mem_backend_path, mem_state, track_dirty_pages) + guest_memory_from_uffd(mem_backend_path, mem_state, track_dirty_pages)? } - }?; + }; builder::build_microvm_from_snapshot( instance_info, event_manager, microvm_state, guest_memory, + uffd, track_dirty_pages, seccomp_filters, vm_resources, @@ -498,11 +556,68 @@ fn guest_memory_from_file( } fn guest_memory_from_uffd( - _mem_file_path: &Path, - _mem_state: &GuestMemoryState, - _track_dirty_pages: bool, -) -> std::result::Result { - unimplemented!() + mem_uds_path: &Path, + mem_state: &GuestMemoryState, + track_dirty_pages: bool, +) -> std::result::Result<(GuestMemoryMmap, Option), LoadSnapshotError> { + use self::LoadSnapshotError::{ + CreateUffdBuilder, DeserializeMemory, UdsConnection, UffdMemoryRegionsRegister, UffdSend, + }; + + let guest_memory = + GuestMemoryMmap::restore(None, mem_state, track_dirty_pages).map_err(DeserializeMemory)?; + + let uffd = UffdBuilder::new() + .close_on_exec(true) + .non_blocking(true) + .create() + .map_err(CreateUffdBuilder)?; + + let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); + for (mem_region, state_region) in guest_memory.iter().zip(mem_state.regions.iter()) { + let host_base_addr = mem_region.as_ptr(); + let size = mem_region.size(); + + uffd.register(host_base_addr as _, size as _) + .map_err(UffdMemoryRegionsRegister)?; + backend_mappings.push(GuestRegionUffdMapping { + base_host_virt_addr: host_base_addr as u64, + size, + offset: state_region.offset, + }); + } + + // Wrapp backend mappings and PID into a structure to be sent through the UDS socket. + let uffd_info = UffdInfo { + guest_region_uffd_mappings: backend_mappings, + pid: process::id(), + }; + + // This is safe to unwrap() because we control the contents of the structure + // (i.e RegionBackendMapping entries and PID). + let uffd_info = serde_json::to_string(&uffd_info).unwrap(); + + let socket = UnixStream::connect(mem_uds_path).map_err(UdsConnection)?; + socket + .send_with_fd( + uffd_info.as_bytes(), + // In the happy case, we can close the fd since the other process has it open and is + // using it to serve us pages. + // + // The problem is that if other process crashes/exits, firecracker guest memory + // will simply revert to anon-mem behavior which would lead to silent errors and + // undefined behavior. + // + // To tackle this scenario, we send Firecracker's PID to the page fault handler, + // so that the handler is able to notify Firecracker of any crashes/exits. + // Moreover, Firecracker holds a copy of the UFFD fd as well, so that even if the + // page fault handler process does not tear down Firecracker when necessary, the + // uffd will still be alive but with no one to serve faults, leading to guest freeze. + uffd.as_raw_fd(), + ) + .map_err(UffdSend)?; + + Ok((guest_memory, Some(uffd))) } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index fafa18dcaf7..e5a3162bd2e 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -232,6 +232,7 @@ fn verify_load_snapshot(snapshot_file: TempFile, memory_file: TempFile) { &mut event_manager, microvm_state, mem, + None, false, &mut empty_seccomp_filters, vm_resources, From 703a945b8e8f19e000b0f2da597a9cca6c658a7e Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Fri, 18 Mar 2022 15:27:37 +0200 Subject: [PATCH 04/22] update dockerfiles and image tag Signed-off-by: Luminita Voicu Signed-off-by: Diana Popa --- tools/devctr/Dockerfile.aarch64 | 11 +++++++++++ tools/devctr/Dockerfile.x86_64 | 11 ++++++++++- tools/devtool | 2 +- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/devctr/Dockerfile.aarch64 b/tools/devctr/Dockerfile.aarch64 index 94fcf5fabde..80b8256fc70 100644 --- a/tools/devctr/Dockerfile.aarch64 +++ b/tools/devctr/Dockerfile.aarch64 @@ -26,6 +26,8 @@ ENV LC_ALL=C.UTF-8 RUN apt-get update \ && apt-get -y install --no-install-recommends \ binutils-dev \ + # Needed in order to be able to compile `userfaultfd-sys`. + clang \ cmake \ curl \ file \ @@ -43,6 +45,7 @@ RUN apt-get update \ libssl-dev \ lsof \ make \ + musl-tools \ net-tools \ openssh-client \ pkgconf \ @@ -87,6 +90,14 @@ RUN mkdir "$TMP_BUILD_DIR" \ && cd / \ && rm -rf "$TMP_BUILD_DIR" +RUN ln -s /usr/bin/musl-gcc /usr/bin/aarch64-linux-musl-gcc + +# help musl-gcc find linux headers +RUN cd /usr/include/aarch64-linux-musl \ + && ln -s ../aarch64-linux-gnu/asm asm \ + && ln -s ../linux linux \ + && ln -s ../asm-generic asm-generic + # Build iperf3-vsock RUN mkdir "$TMP_BUILD_DIR" && cd "$TMP_BUILD_DIR" \ && git clone https://github.com/stefano-garzarella/iperf-vsock \ diff --git a/tools/devctr/Dockerfile.x86_64 b/tools/devctr/Dockerfile.x86_64 index 1e58b406681..74240787c4c 100644 --- a/tools/devctr/Dockerfile.x86_64 +++ b/tools/devctr/Dockerfile.x86_64 @@ -26,6 +26,8 @@ ENV LC_ALL=C.UTF-8 RUN apt-get update \ && apt-get -y install --no-install-recommends \ binutils-dev \ + # Needed in order to be able to compile `userfaultfd-sys`. + clang \ cmake \ curl \ file \ @@ -42,6 +44,7 @@ RUN apt-get update \ libcurl4-openssl-dev \ lsof \ make \ + musl-tools \ net-tools \ openssh-client \ pkgconf \ @@ -91,7 +94,7 @@ RUN mkdir "$TMP_BUILD_DIR" \ && cargo install cargo-kcov \ && cargo +"stable" install cargo-audit \ # Fix a version that does not require cargo edition 2021. - && cargo install cargo-deny --version '^0.9.1' \ + && cargo install --locked cargo-deny --version '^0.9.1' \ && cargo kcov --print-install-kcov-sh | sh \ && rm -rf "$CARGO_HOME/registry" \ && ln -s "$CARGO_REGISTRY_DIR" "$CARGO_HOME/registry" \ @@ -100,6 +103,12 @@ RUN mkdir "$TMP_BUILD_DIR" \ && cd / \ && rm -rf "$TMP_BUILD_DIR" +# help musl-gcc find linux headers +RUN cd /usr/include/x86_64-linux-musl \ + && ln -s ../x86_64-linux-gnu/asm asm \ + && ln -s ../linux linux \ + && ln -s ../asm-generic asm-generic + # Build iperf3-vsock RUN mkdir "$TMP_BUILD_DIR" && cd "$TMP_BUILD_DIR" \ && git clone https://github.com/stefano-garzarella/iperf-vsock \ diff --git a/tools/devtool b/tools/devtool index cd19d5dac28..c6963536a20 100755 --- a/tools/devtool +++ b/tools/devtool @@ -72,7 +72,7 @@ DEVCTR_IMAGE_NO_TAG="public.ecr.aws/firecracker/fcuvm" # Development container tag -DEVCTR_IMAGE_TAG="v34" +DEVCTR_IMAGE_TAG="v34_uffd" # Development container image (name:tag) # This should be updated whenever we upgrade the development container. From 4278282ff0123a9afe9ed4ed0dff9b172ed6eb7e Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Mon, 7 Mar 2022 17:40:11 +0200 Subject: [PATCH 05/22] tests: update dependencies Signed-off-by: Luminita Voicu --- tests/framework/dependencies.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/framework/dependencies.txt b/tests/framework/dependencies.txt index e6ff40a8e5f..64ebeda576e 100644 --- a/tests/framework/dependencies.txt +++ b/tests/framework/dependencies.txt @@ -1 +1 @@ -{'jailer v1.0.0 (/firecracker/src/jailer)', 'logger v0.1.0 (/firecracker/src/logger)', 'version_check v0.9.4', 'devices v0.1.0 (/firecracker/src/devices)', 'typenum v1.15.0', 'arch v0.1.0 (/firecracker/src/arch)', 'versionize v0.1.6', 'arch_gen v0.1.0 (/firecracker/src/arch_gen)', 'libc v0.2.117', 'serde_json v1.0.78', 'ryu v1.0.9', 'syn v1.0.86', 'versionize_derive v0.1.4 (proc-macro)', 'log v0.4.14', 'ctr v0.8.0', 'kvm-ioctls v0.11.0', 'vmm-sys-util v0.9.0', 'vm-superio v0.5.0', 'quote v1.0.15', 'vm-fdt v0.1.0', 'aead v0.4.3', 'serde v1.0.136', 'serde_derive v1.0.136 (proc-macro)', 'utils v0.1.0 (/firecracker/src/utils)', 'vm-memory v0.1.0 (/firecracker/src/vm-memory)', 'dumbo v0.1.0 (/firecracker/src/dumbo)', 'cfg-if v1.0.0', 'seccompiler v1.0.0 (/firecracker/src/seccompiler)', 'aes v0.7.5', 'snapshot v0.1.0 (/firecracker/src/snapshot)', 'itoa v1.0.1', 'event-manager v0.2.1', 'unicode-xid v0.2.2', 'cipher v0.3.0', 'net_gen v0.1.0 (/firecracker/src/net_gen)', 'micro_http v0.1.0 (https://github.com/firecracker-microvm/micro-http?rev=0a58eb1#0a58eb1e)', 'subtle v2.4.1', 'api_server v0.1.0 (/firecracker/src/api_server)', 'generic-array v0.14.5', 'crc64 v1.0.0', 'universal-hash v0.4.1', 'cpufeatures v0.2.1', 'polyval v0.5.3', 'io_uring v0.1.0 (/firecracker/src/io_uring)', 'proc-macro2 v1.0.36', 'rate_limiter v0.1.0 (/firecracker/src/rate_limiter)', 'timerfd v1.2.0', 'regex-syntax v0.6.25', 'regex v1.5.4', 'opaque-debug v0.3.0', 'linux-loader v0.4.0', 'bitflags v1.3.2', 'aes-gcm v0.9.4', 'bincode v1.3.3', 'firecracker v1.0.0 (/firecracker/src/firecracker)', 'ghash v0.4.4', 'mmds v0.1.0 (/firecracker/src/mmds)', 'kvm-bindings v0.5.0 (https://github.com/firecracker-microvm/kvm-bindings?tag=v0.5.0-1#4569d3f5)', 'cpuid v0.1.0 (/firecracker/src/cpuid)', 'rebase-snap v0.1.0 (/firecracker/src/rebase-snap)', 'vm-memory v0.7.0', 'base64 v0.13.0', 'virtio_gen v0.1.0 (/firecracker/src/virtio_gen)', 'lazy_static v1.4.0', 'vmm v0.1.0 (/firecracker/src/vmm)'} \ No newline at end of file +{'serde v1.0.136', 'lazy_static v1.4.0', 'syn v1.0.86', 'micro_http v0.1.0 (https://github.com/firecracker-microvm/micro-http?rev=0a58eb1#0a58eb1e)', 'ghash v0.4.4', 'serde_derive v1.0.136 (proc-macro)', 'bincode v1.3.3', 'serde_json v1.0.78', 'lazycell v1.3.0', 'thiserror-impl v1.0.30 (proc-macro)', 'minimal-lexical v0.2.1', 'glob v0.3.0', 'unicode-xid v0.2.2', 'ctr v0.8.0', 'vm-memory v0.1.0 (/firecracker/src/vm-memory)', 'kvm-ioctls v0.11.0', 'itoa v1.0.1', 'libc v0.2.117', 'bitflags v1.3.2', 'vm-memory v0.7.0', 'vmm v0.1.0 (/firecracker/src/vmm)', 'virtio_gen v0.1.0 (/firecracker/src/virtio_gen)', 'proc-macro2 v1.0.36', 'ryu v1.0.9', 'generic-array v0.14.5', 'subtle v2.4.1', 'timerfd v1.2.0', 'libloading v0.7.3', 'io_uring v0.1.0 (/firecracker/src/io_uring)', 'rate_limiter v0.1.0 (/firecracker/src/rate_limiter)', 'universal-hash v0.4.1', 'peeking_take_while v0.1.2', 'shlex v1.1.0', 'memchr v2.4.1', 'regex v1.5.4', 'bindgen v0.59.2', 'vm-fdt v0.1.0', 'vm-superio v0.5.0', 'linux-loader v0.4.0', 'nix v0.23.1', 'clang-sys v1.3.1', 'version_check v0.9.4', 'thiserror v1.0.30', 'autocfg v1.0.1', 'mmds v0.1.0 (/firecracker/src/mmds)', 'api_server v0.1.0 (/firecracker/src/api_server)', 'rebase-snap v0.1.0 (/firecracker/src/rebase-snap)', 'seccompiler v1.0.0 (/firecracker/src/seccompiler)', 'cc v1.0.73', 'typenum v1.15.0', 'kvm-bindings v0.5.0 (https://github.com/firecracker-microvm/kvm-bindings?tag=v0.5.0-1#4569d3f5)', 'cfg-if v0.1.10', 'vmm-sys-util v0.9.0', 'opaque-debug v0.3.0', 'quote v1.0.15', 'dumbo v0.1.0 (/firecracker/src/dumbo)', 'versionize v0.1.6', 'event-manager v0.2.1', 'regex-syntax v0.6.25', 'userfaultfd-sys v0.4.1', 'polyval v0.5.3', 'userfaultfd v0.4.2', 'cpufeatures v0.2.1', 'logger v0.1.0 (/firecracker/src/logger)', 'cfg-if v1.0.0', 'snapshot v0.1.0 (/firecracker/src/snapshot)', 'utils v0.1.0 (/firecracker/src/utils)', 'aes-gcm v0.9.4', 'log v0.4.14', 'cpuid v0.1.0 (/firecracker/src/cpuid)', 'versionize_derive v0.1.4 (proc-macro)', 'memoffset v0.6.5', 'jailer v1.0.0 (/firecracker/src/jailer)', 'firecracker v1.0.0 (/firecracker/src/firecracker)', 'crc64 v1.0.0', 'nom v7.1.0', 'base64 v0.13.0', 'aead v0.4.3', 'devices v0.1.0 (/firecracker/src/devices)', 'cipher v0.3.0', 'arch v0.1.0 (/firecracker/src/arch)', 'rustc-hash v1.1.0', 'cexpr v0.6.0', 'net_gen v0.1.0 (/firecracker/src/net_gen)', 'arch_gen v0.1.0 (/firecracker/src/arch_gen)', 'aes v0.7.5'} \ No newline at end of file From cdf59b78ab7a38cd2184501a7d25506286bdd3db Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Tue, 8 Mar 2022 17:52:26 +0200 Subject: [PATCH 06/22] cargo: update licence allow-list Signed-off-by: Luminita Voicu --- deny.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deny.toml b/deny.toml index 031ac2d4e44..fbd8a421886 100644 --- a/deny.toml +++ b/deny.toml @@ -2,5 +2,6 @@ allow = [ "MIT", "Apache-2.0", - "BSD-3-Clause" + "BSD-3-Clause", + "ISC" ] From 5ab553978e98aef1d74bbf4e039340f5051e875f Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Mon, 4 Apr 2022 09:53:18 +0300 Subject: [PATCH 07/22] uffd: don't send fc's PID to PF handler over UDS The page fault handler process requires FC's PID in order to be able to notify FC of any crashes/exists. The way we initially intended to make the handler process aware of FC's PID was to have FC sent its PID explicitly through the UDS. But there is a problem with this approach: Firecracker's PID depends on the way the process was started. When normally started through the jailer, Firecracker will see its global PID. However, if using `--new-pid-ns` flag when running the jailer, the jailer will spawn FC in a new PID namespace, which would cause Firecracker to not be aware of its PID and see it as being 1. Moreover, when not using `--new-pid-ns` flag, we recommend to the users to spawn the jailer process into a new PID namespace when starting it. This is why sending FC's PID from whithin the FC process is not reliable. To tackle this, we no longer send FC's PID explicitly, but advise the page fault handler process to use `getsockopt` call with `SO_PEERCRED` option in order to fetch the global Firecracker PID at the moment of connecting. Signed-off-by: Luminita Voicu --- src/vmm/src/persist.rs | 51 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index f3068cfeab9..9358690724b 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -8,7 +8,6 @@ use std::fs::{File, OpenOptions}; use std::io::{self, Write}; use std::os::unix::{io::AsRawFd, net::UnixStream}; use std::path::Path; -use std::process; use std::sync::{Arc, Mutex}; use crate::builder::{self, StartMicrovmError}; @@ -89,18 +88,6 @@ pub struct GuestRegionUffdMapping { pub offset: u64, } -#[derive(Clone, Debug, Serialize)] -/// This describes the information sent by Firecracker to the -/// page fault handler process through the UDS socket. -pub struct UffdInfo { - /// List of guest region mappings between Firecracker base virtual address - /// and offset in the buffer or file backend for a guest memory region. - pub guest_region_uffd_mappings: Vec, - /// Firecracker's PID which can be used by the page fault handler process to - /// bring down Firecracker at the end of the execution or in case an error occurs. - pub pid: u32, -} - /// Errors related to saving and restoring Microvm state. #[derive(Debug)] pub enum MicrovmStateError { @@ -587,29 +574,41 @@ fn guest_memory_from_uffd( }); } - // Wrapp backend mappings and PID into a structure to be sent through the UDS socket. - let uffd_info = UffdInfo { - guest_region_uffd_mappings: backend_mappings, - pid: process::id(), - }; - - // This is safe to unwrap() because we control the contents of the structure - // (i.e RegionBackendMapping entries and PID). - let uffd_info = serde_json::to_string(&uffd_info).unwrap(); + // This is safe to unwrap() because we control the contents of the vector + // (i.e GuestRegionUffdMapping entries). + let backend_mappings = serde_json::to_string(&backend_mappings).unwrap(); let socket = UnixStream::connect(mem_uds_path).map_err(UdsConnection)?; socket .send_with_fd( - uffd_info.as_bytes(), - // In the happy case, we can close the fd since the other process has it open and is + backend_mappings.as_bytes(), + // In the happy case we can close the fd since the other process has it open and is // using it to serve us pages. // // The problem is that if other process crashes/exits, firecracker guest memory // will simply revert to anon-mem behavior which would lead to silent errors and // undefined behavior. // - // To tackle this scenario, we send Firecracker's PID to the page fault handler, - // so that the handler is able to notify Firecracker of any crashes/exits. + // To tackle this scenario, the page fault handler can notify Firecracker of any + // crashes/exits. There is no need for Firecracker to explicitly send its process ID. + // The external process can obtain Firecracker's PID by calling `getsockopt` with + // `libc::SO_PEERCRED` option like so: + // + // let mut val = libc::ucred { pid: 0, gid: 0, uid: 0 }; + // let mut ucred_size: u32 = mem::size_of::() as u32; + // libc::getsockopt( + // socket.as_raw_fd(), + // libc::SOL_SOCKET, + // libc::SO_PEERCRED, + // &mut val as *mut _ as *mut _, + // &mut ucred_size as *mut libc::socklen_t, + // ); + // + // Per this linux man page: https://man7.org/linux/man-pages/man7/unix.7.html, + // `SO_PEERCRED` returns the credentials (PID, UID and GID) of the peer process + // connected to this socket. The returned credentials are those that were in effect + // at the time of the `connect` call. + // // Moreover, Firecracker holds a copy of the UFFD fd as well, so that even if the // page fault handler process does not tear down Firecracker when necessary, the // uffd will still be alive but with no one to serve faults, leading to guest freeze. From 5e3c096daa432184296cc240180e6d07aa5e0957 Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Wed, 13 Apr 2022 15:14:48 +0300 Subject: [PATCH 08/22] uffd: enable EVENT_REMOVE for ballooning The feature will get enabled only when the microVM uses a balloon device. Signed-off-by: Diana Popa --- src/vmm/src/persist.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 9358690724b..e7e271e5ee1 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -37,7 +37,7 @@ use logger::{error, info}; use seccompiler::BpfThreadMap; use serde::Serialize; use snapshot::Snapshot; -use userfaultfd::{Uffd, UffdBuilder}; +use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; use utils::sock_ctrl_msg::ScmSocket; use versionize::{VersionMap, Versionize, VersionizeResult}; use versionize_derive::Versionize; @@ -501,9 +501,14 @@ pub fn restore_from_snapshot( guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages)?, None, ), - MemBackendType::Uffd => { - guest_memory_from_uffd(mem_backend_path, mem_state, track_dirty_pages)? - } + MemBackendType::Uffd => guest_memory_from_uffd( + mem_backend_path, + mem_state, + track_dirty_pages, + // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device + // is present in the microVM state. + microvm_state.device_states.balloon_device.is_some(), + )?, }; builder::build_microvm_from_snapshot( instance_info, @@ -546,6 +551,7 @@ fn guest_memory_from_uffd( mem_uds_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, + enable_balloon: bool, ) -> std::result::Result<(GuestMemoryMmap, Option), LoadSnapshotError> { use self::LoadSnapshotError::{ CreateUffdBuilder, DeserializeMemory, UdsConnection, UffdMemoryRegionsRegister, UffdSend, @@ -554,7 +560,15 @@ fn guest_memory_from_uffd( let guest_memory = GuestMemoryMmap::restore(None, mem_state, track_dirty_pages).map_err(DeserializeMemory)?; - let uffd = UffdBuilder::new() + let mut uffd_builder = UffdBuilder::new(); + + if enable_balloon { + // We enable this so that the page fault handler can add logic + // for treating madvise(MADV_DONTNEED) events triggerd by balloon inflation. + uffd_builder.require_features(FeatureFlags::EVENT_REMOVE); + } + + let uffd = uffd_builder .close_on_exec(true) .non_blocking(true) .create() From 9cc14518096771ba5d60166c4f7c203e738d35cf Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Mon, 21 Mar 2022 16:34:20 +0200 Subject: [PATCH 09/22] CI: add support for new mem_backend object Signed-off-by: Diana Popa CI: enable uffd testing when building from snapshot Signed-off-by: Diana Popa Signed-off-by: Luminita Voicu --- tests/framework/artifacts.py | 13 +++++++++++ tests/framework/builder.py | 41 ++++++++++++++++++++++++++++------ tests/framework/resources.py | 43 +++++++++++++++++++++++++++--------- tests/framework/utils.py | 43 ++++++++++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 18 deletions(-) diff --git a/tests/framework/artifacts.py b/tests/framework/artifacts.py index c88c4704be0..b2933b7711a 100644 --- a/tests/framework/artifacts.py +++ b/tests/framework/artifacts.py @@ -479,6 +479,19 @@ class SnapshotType(Enum): DIFF = 1 +class SnapshotMemBackendType(Enum): + """ + Supported guest memory backend types used for snapshot load. + + - `FILE`: establishes if the guest memory is backed by a file. + - `UFFD`: indicates that the guest memory page faults are handled by + a dedicated UFFD page-fault handler process. + """ + + FILE = 'File' + UFFD = 'Uffd' + + class Snapshot: """Manages Firecracker snapshots.""" diff --git a/tests/framework/builder.py b/tests/framework/builder.py index 60f6ae8cbdf..b0ae378aba6 100644 --- a/tests/framework/builder.py +++ b/tests/framework/builder.py @@ -10,8 +10,8 @@ from conftest import init_microvm, _test_images_s3_bucket from framework.defs import DEFAULT_TEST_SESSION_ROOT_PATH from framework.artifacts import ( - ArtifactCollection, Artifact, DiskArtifact, Snapshot, - SnapshotType, NetIfaceConfig + ArtifactCollection, Artifact, DiskArtifact, NetIfaceConfig, + Snapshot, SnapshotMemBackendType, SnapshotType ) from framework import utils import host_tools.logging as log_tools @@ -175,7 +175,13 @@ def build_from_snapshot(self, diff_snapshots=False, use_ramdisk=False, fc_binary=None, jailer_binary=None, - daemonize=True): + daemonize=True, + # If None, it means that the guest memory is + # backed by a file. + # If specified, establishes that page-faults + # resulted when loading the guest memory + # are handled by a dedicated UFFD PF handler. + uffd_path=None): """Build a microvm from a snapshot artifact.""" vm = init_microvm(self.root_path, self.bin_cloner_path, fc_binary, jailer_binary,) @@ -210,10 +216,31 @@ def build_from_snapshot(self, guest_ip=iface.guest_ip, netmask_len=iface.netmask, tapname=iface.tap_name) - response = vm.snapshot.load(mem_file_path=jailed_mem, - snapshot_path=jailed_vmstate, - diff=diff_snapshots, - resume=resume) + + full_fc_version = \ + vm.version.get_from_api().json()['firecracker_version'] + if utils.compare_dirty_versions(full_fc_version, '1.0.0') > 0: + if uffd_path: + jailed_uffd = vm.copy_to_jail_ramfs(uffd_path) if use_ramdisk \ + else vm.create_jailed_resource(uffd_path) + mem_backend = { + 'type': SnapshotMemBackendType.UFFD, + 'path': jailed_uffd + } + else: + mem_backend = { + 'type': SnapshotMemBackendType.FILE, + 'path': jailed_mem + } + response = vm.snapshot.load(mem_backend=mem_backend, + snapshot_path=jailed_vmstate, + diff=diff_snapshots, + resume=resume) + else: + response = vm.snapshot.load(mem_file_path=jailed_mem, + snapshot_path=jailed_vmstate, + diff=diff_snapshots, + resume=resume) status_ok = vm.api_session.is_status_no_content(response.status_code) # Verify response status and cleanup if needed before assert. diff --git a/tests/framework/resources.py b/tests/framework/resources.py index afe1fd5bd89..6f42240d047 100644 --- a/tests/framework/resources.py +++ b/tests/framework/resources.py @@ -463,12 +463,27 @@ def put(self, **args): ) @staticmethod - def create_json(mem_file_path, snapshot_path, diff=False, resume=False): + def create_json( + snapshot_path, + diff=False, + resume=False, + mem_backend=None, + mem_file_path=None + ): """Compose the json associated to this type of API request.""" - datax = { - 'mem_file_path': mem_file_path, - 'snapshot_path': snapshot_path, - } + if mem_file_path: + datax = { + 'mem_file_path': mem_file_path, + 'snapshot_path': snapshot_path, + } + else: + datax = { + 'mem_backend': { + 'backend_type': str(mem_backend['type'].value), + 'backend_path': mem_backend['path'] + }, + 'snapshot_path': snapshot_path, + } if diff: datax['enable_diff_snapshots'] = True if resume: @@ -494,13 +509,21 @@ def create(self, mem_file_path, snapshot_path, diff=False, version=None): version=version ) - def load(self, mem_file_path, snapshot_path, diff=False, resume=False): + def load( + self, + snapshot_path, + diff=False, + resume=False, + mem_file_path=None, + mem_backend=None + ): """Load a snapshot of the microvm.""" response = self._load.put( - mem_file_path=mem_file_path, snapshot_path=snapshot_path, diff=diff, - resume=resume + resume=resume, + mem_file_path=mem_file_path, + mem_backend=mem_backend ) if resume and "unknown field `resume_vm`" in response.text: @@ -548,9 +571,7 @@ def patch(self, **args): ) @staticmethod - def create_json( - metrics_path=None, - ): + def create_json(metrics_path=None): """Compose the json associated to this type of API request.""" datax = {} if metrics_path is not None: diff --git a/tests/framework/utils.py b/tests/framework/utils.py index ad9065f800d..c0a9c4e7b4f 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -611,6 +611,49 @@ def compare_versions(first, second): return 0 +def sanitize_version(version): + """ + Get rid of dirty version information. + + Transform version from format `vX.Y.Z-W` to `X.Y.Z`. + """ + if version[0].isalpha(): + version = version[1:] + + return version.split("-", 1)[0] + + +def compare_dirty_versions(first, second): + """ + Compare two versions out of which one is dirty. + + We do not allow both versions to be dirty, because dirty info + does not reveal any ordering information. + + :param first: first version string + :param second: second version string + :returns: 0 if equal, <0 if first < second, >0 if second < first + """ + is_first_dirty = "-" in first + first = sanitize_version(first) + + is_second_dirty = "-" in second + second = sanitize_version(second) + + if is_first_dirty and is_second_dirty: + raise ValueError + + diff = compare_versions(first, second) + if diff != 0: + return diff + if is_first_dirty: + return 1 + if is_second_dirty: + return -1 + + return diff + + def get_kernel_version(level=2): """Return the current kernel version in format `major.minor.patch`.""" linux_version = platform.release() From 7c4cdaef33d3b0448e29fa7b39b3e7c27d05abc3 Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Mon, 21 Mar 2022 17:41:31 +0200 Subject: [PATCH 10/22] CI: test uffd bad socket path scenarios Signed-off-by: Diana Popa Signed-off-by: Luminita Voicu --- .../integration_tests/functional/test_uffd.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/integration_tests/functional/test_uffd.py diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py new file mode 100644 index 00000000000..3fd282f9ddd --- /dev/null +++ b/tests/integration_tests/functional/test_uffd.py @@ -0,0 +1,102 @@ +# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Test UFFD related functionality when resuming from snapshot.""" +import logging +import os + +from framework.artifacts import SnapshotMemBackendType +from framework.builder import MicrovmBuilder, SnapshotBuilder +from framework.utils import run_cmd + +import host_tools.network as net_tools + + +def create_snapshot(bin_cloner_path): + """Create a snapshot of a microVM.""" + vm_builder = MicrovmBuilder(bin_cloner_path) + vm_instance = vm_builder.build_vm_nano() + basevm = vm_instance.vm + root_disk = vm_instance.disks[0] + ssh_key = vm_instance.ssh_key + + basevm.start() + ssh_connection = net_tools.SSHConnection(basevm.ssh_config) + + # Verify if guest can run commands. + exit_code, _, _ = ssh_connection.execute_command("sync") + assert exit_code == 0 + + # Create a snapshot builder from a microvm. + snapshot_builder = SnapshotBuilder(basevm) + + # Create base snapshot. + snapshot = snapshot_builder.create([root_disk.local_path()], + ssh_key) + + basevm.kill() + + return snapshot + + +def test_bad_socket_path(bin_cloner_path, test_microvm_with_api): + """ + Test error scenario when socket path does not exist. + + @type: negative + """ + logger = logging.getLogger("uffd_bad_socket_path") + + logger.info("Create snapshot") + snapshot = create_snapshot(bin_cloner_path) + + logger.info("Load snapshot, mem %s", snapshot.mem) + vm = test_microvm_with_api + vm.spawn() + jailed_vmstate = vm.create_jailed_resource(snapshot.vmstate) + + response = vm.snapshot.load( + mem_backend={ + 'type': SnapshotMemBackendType.UFFD, + 'path': 'inexsistent' + }, + snapshot_path=jailed_vmstate + ) + + assert vm.api_session.is_status_bad_request(response.status_code) + assert "Load microVM snapshot error: Cannot connect to UDS in order to " \ + "send information on handling guest memory page-faults due to: " \ + "No such file or directory (os error 2)" in response.text + + +def test_unbinded_socket(bin_cloner_path, test_microvm_with_api): + """ + Test error scenario when PF handler has not yet called bind on socket. + + @type: negative + """ + logger = logging.getLogger("uffd_unbinded_socket") + + logger.info("Create snapshot") + snapshot = create_snapshot(bin_cloner_path) + + logger.info("Load snapshot, mem %s", snapshot.mem) + vm = test_microvm_with_api + vm.spawn() + jailed_vmstate = vm.create_jailed_resource(snapshot.vmstate) + + socket_path = os.path.join(vm.path, "firecracker-uffd.sock") + run_cmd("touch {}".format(socket_path)) + jailed_sock_path = vm.create_jailed_resource(socket_path) + + response = vm.snapshot.load( + mem_backend={ + 'type': SnapshotMemBackendType.UFFD, + 'path': jailed_sock_path + }, + snapshot_path=jailed_vmstate + ) + + assert vm.api_session.is_status_bad_request(response.status_code) + assert "Load microVM snapshot error: Cannot connect to UDS in order to" \ + " send information on handling guest memory page-faults due to: " \ + "Connection refused (os error 111)" in response.text From e887e23909ef80e40a66e26ef3a06a26e0000e71 Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Tue, 29 Mar 2022 09:08:33 +0300 Subject: [PATCH 11/22] CI: negative tests for /snapshot/load API Signed-off-by: Luminita Voicu --- .../integration_tests/functional/test_api.py | 102 +++++++++++++++++- 1 file changed, 99 insertions(+), 3 deletions(-) diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index b190502e724..0325e92c71f 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -16,11 +16,11 @@ import host_tools.drive as drive_tools import host_tools.network as net_tools -from conftest import _test_images_s3_bucket +from conftest import _test_images_s3_bucket, init_microvm from framework.utils import is_io_uring_supported -from framework.artifacts import ArtifactCollection, SnapshotType, \ - NetIfaceConfig, DEFAULT_DEV_NAME, DEFAULT_TAP_NAME +from framework.artifacts import ArtifactCollection, NetIfaceConfig, \ + DEFAULT_DEV_NAME, DEFAULT_TAP_NAME, SnapshotType from framework.builder import MicrovmBuilder, SnapshotBuilder MEM_LIMIT = 1000000000 @@ -1640,3 +1640,99 @@ def test_map_private_seccomp_regression(test_microvm_with_ssh): response = test_microvm.mmds.put(json=data_store) assert test_microvm.api_session.is_status_no_content(response.status_code) + + +# pylint: disable=protected-access +def test_negative_snapshot_load_api(bin_cloner_path): + """ + Test snapshot load API. + + @type: negative + """ + vm_builder = MicrovmBuilder(bin_cloner_path) + vm = init_microvm(vm_builder.root_path, vm_builder.bin_cloner_path) + vm.spawn() + + # Specifying both `mem_backend` and 'mem_file_path` should fail. + datax = { + 'snapshot_path': 'foo', + 'mem_backend': { + 'backend_type': 'File', + 'backend_path': 'bar' + }, + 'mem_file_path': 'bar', + } + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + err_msg = "too many fields: either `mem_backend` or " \ + "`mem_file_path` exclusively is required." + assert err_msg in response.text, response.text + + # API request with `mem_backend` but no `backend_type` should fail. + datax = { + 'snapshot_path': 'foo', + 'mem_backend': { + 'backend_path': 'bar' + } + } + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + err_msg = "missing field `backend_type`" + assert err_msg in response.text, response.text + + # API request with `mem_backend` but no `backend_path` should fail. + datax = { + 'snapshot_path': 'foo', + 'mem_backend': { + 'backend_type': 'File' + } + } + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + err_msg = "missing field `backend_path`" + assert err_msg in response.text, response.text + + # API request with invalid `backend_type` should fail. + datax = { + 'snapshot_path': 'foo', + 'mem_backend': { + 'backend_type': 'foo', + 'backend_path': 'foo' + } + } + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + err_msg = "unknown variant `foo`, expected `File` or `Uffd`" + assert err_msg in response.text, response.text + + # API request without `snapshot_path` should fail. + datax = { + 'mem_backend': { + 'backend_type': 'File', + 'backend_path': 'foo' + } + } + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + err_msg = "missing field `snapshot_path`" + assert err_msg in response.text, response.text + + # API request without `mem_backend` or `mem_file_path` should fail. + datax = {'snapshot_path': 'foo'} + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + err_msg = "missing field: either `mem_backend` or " \ + "`mem_file_path` is required" + assert err_msg in response.text, response.text From de1696e7320b3bf4cd1ac6633d191cecf527e540 Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Mon, 21 Mar 2022 18:19:49 +0200 Subject: [PATCH 12/22] CI: add an example of uffd handler Signed-off-by: Diana Popa Signed-off-by: Luminita Voicu --- tests/host_tools/uffd/Cargo.lock | 362 ++++++++++++++++++ tests/host_tools/uffd/Cargo.toml | 22 ++ .../host_tools/uffd/src/bin/valid_handler.rs | 291 ++++++++++++++ 3 files changed, 675 insertions(+) create mode 100644 tests/host_tools/uffd/Cargo.lock create mode 100644 tests/host_tools/uffd/Cargo.toml create mode 100644 tests/host_tools/uffd/src/bin/valid_handler.rs diff --git a/tests/host_tools/uffd/Cargo.lock b/tests/host_tools/uffd/Cargo.lock new file mode 100644 index 00000000000..0dbdc9f612a --- /dev/null +++ b/tests/host_tools/uffd/Cargo.lock @@ -0,0 +1,362 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bindgen" +version = "0.59.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cc00842eed744b858222c4c9faf7243aafc6d33f92f96935263ef4d8a41ce21" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "itoa" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.121" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f" + +[[package]] +name = "libloading" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd" +dependencies = [ + "cfg-if 1.0.0", + "winapi", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "net_gen" +version = "0.1.0" + +[[package]] +name = "nix" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6" +dependencies = [ + "bitflags", + "cc", + "cfg-if 1.0.0", + "libc", + "memoffset", +] + +[[package]] +name = "nom" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "proc-macro2" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "ryu" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" + +[[package]] +name = "serde" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" + +[[package]] +name = "syn" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704df27628939572cd88d33f171cd6f896f4eaca85252c6e0a72d8d8287ee86f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "uffd" +version = "1.1.0" +dependencies = [ + "libc", + "nix", + "serde", + "serde_json", + "userfaultfd", + "utils", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "userfaultfd" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b738009e099b4ded1ecf19dfb7631f69c24f16e0af6d29fd9b3f54a092aca46" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "libc", + "nix", + "thiserror", + "userfaultfd-sys", +] + +[[package]] +name = "userfaultfd-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a4be003c705d2c8dc1234d473856945e291bb998ac2e2d83e70328d964d7458" +dependencies = [ + "bindgen", + "cc", + "cfg-if 0.1.10", +] + +[[package]] +name = "utils" +version = "0.1.0" +dependencies = [ + "libc", + "net_gen", + "serde", + "vmm-sys-util", +] + +[[package]] +name = "vmm-sys-util" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "733537bded03aaa93543f785ae997727b30d1d9f4a03b7861d23290474242e11" +dependencies = [ + "bitflags", + "libc", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/tests/host_tools/uffd/Cargo.toml b/tests/host_tools/uffd/Cargo.toml new file mode 100644 index 00000000000..f83ed0f9c3f --- /dev/null +++ b/tests/host_tools/uffd/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "uffd" +version = "1.1.0" +authors = ["Amazon Firecracker team "] +edition = "2018" + +[dependencies] +utils = { path = "../../../src/utils" } + +libc = ">=0.2.39" +nix = "0.23.0" +serde = { version = ">=1.0.27", features = ["derive"] } +serde_json = ">=1.0.9" +userfaultfd = ">=0.4.0" + +[workspace] + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" diff --git a/tests/host_tools/uffd/src/bin/valid_handler.rs b/tests/host_tools/uffd/src/bin/valid_handler.rs new file mode 100644 index 00000000000..7750041fc03 --- /dev/null +++ b/tests/host_tools/uffd/src/bin/valid_handler.rs @@ -0,0 +1,291 @@ +// Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Provides functionality for a userspace page fault handler +//! which loads the whole region from the backing memory file +//! when a page fault occurs. + +use std::collections::HashMap; +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::{mem, ptr}; + +use libc::c_void; +use nix::poll::{poll, PollFd, PollFlags}; +use nix::sys::mman::{mmap, MapFlags, ProtFlags}; +use serde::Deserialize; +use userfaultfd::Uffd; +use utils::get_page_size; + +use utils::sock_ctrl_msg::ScmSocket; + +// This is the same with the one used in src/vmm. +/// This describes the mapping between Firecracker base virtual address and offset in the +/// buffer or file backend for a guest memory region. It is used to tell an external +/// process/thread where to populate the guest memory data for this range. +/// +/// E.g. Guest memory contents for a region of `size` bytes can be found in the backend +/// at `offset` bytes from the beginning, and should be copied/populated into `base_host_address`. +#[derive(Clone, Debug, Deserialize)] +pub struct GuestRegionUffdMapping { + /// Base host virtual address where the guest memory contents for this region + /// should be copied/populated. + pub base_host_virt_addr: u64, + /// Region size. + pub size: usize, + /// Offset in the backend file/buffer where the region contents are. + pub offset: u64, +} + +struct MemRegion { + mapping: GuestRegionUffdMapping, + page_states: HashMap, +} + +struct UffdPfHandler { + mem_regions: Vec, + backing_buffer: *const u8, + uffd: Uffd, + // Not currently used but included to demonstrate how a page fault handler can + // fetch Firecracker's PID in order to make it aware of any crashes/exits. + _firecracker_pid: u32, +} + +#[derive(Clone)] +enum MemPageState { + Uninitialized, + FromFile, + Removed, + Anonymous, +} + +impl UffdPfHandler { + pub fn from_unix_stream(stream: UnixStream, data: *const u8, size: usize) -> Self { + let mut message_buf = vec![0u8; 1024]; + let (bytes_read, file) = stream + .recv_with_fd(&mut message_buf[..]) + .expect("Cannot recv_with_fd"); + message_buf.resize(bytes_read, 0); + + let body = String::from_utf8(message_buf).unwrap(); + let file = file.expect("Uffd not passed through UDS!"); + + let mappings = serde_json::from_str::>(&body) + .expect("Cannot deserialize memory mappings."); + let memsize: usize = mappings.iter().map(|r| r.size).sum(); + + // Make sure memory size matches backing data size. + assert_eq!(memsize, size); + + let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) }; + + let creds: libc::ucred = get_peer_process_credentials(stream); + + let mem_regions = create_mem_regions(&mappings); + + Self { + mem_regions, + backing_buffer: data, + uffd, + _firecracker_pid: creds.pid as u32, + } + } + + fn update_mem_state_mappings(&mut self, start: u64, end: u64, state: &MemPageState) { + for region in self.mem_regions.iter_mut() { + for (key, value) in region.page_states.iter_mut() { + if key >= &start && key < &end { + *value = state.clone(); + } + } + } + } + + fn populate_from_file(&self, region: &MemRegion) -> (u64, u64) { + let src = self.backing_buffer as u64 + region.mapping.offset; + let start_addr = region.mapping.base_host_virt_addr; + let len = region.mapping.size; + // Populate whole region from backing mem-file. + // This offers an example of how memory can be loaded in RAM, + // however this can be adjusted to accommodate use case needs. + let ret = unsafe { + self.uffd + .copy(src as *const _, start_addr as *mut _, len, true) + .expect("Uffd copy failed") + }; + + // Make sure the UFFD copied some bytes. + assert!(ret > 0); + + return (start_addr, start_addr + len as u64); + } + + fn zero_out(&mut self, addr: u64) -> (u64, u64) { + let page_size = get_page_size().unwrap(); + + let ret = unsafe { + self.uffd + .zeropage(addr as *mut _, page_size, true) + .expect("Uffd zeropage failed") + }; + // Make sure the UFFD zeroed out some bytes. + assert!(ret > 0); + + return (addr, addr + page_size as u64); + } + + fn serve_pf(&mut self, addr: *mut u8) { + let page_size = get_page_size().unwrap(); + + // Find the start of the page that the current faulting address belongs to. + let dst = (addr as usize & !(page_size as usize - 1)) as *mut c_void; + let fault_page_addr = dst as u64; + + // Get the state of the current faulting page. + for region in self.mem_regions.iter() { + match region.page_states.get(&fault_page_addr) { + // Our simple PF handler has a simple strategy: + // There exist 4 states in which a memory page can be in: + // 1. Uninitialized - page was never touched + // 2. FromFile - the page is populated with content from snapshotted memory file + // 3. Removed - MADV_DONTNEED was called due to balloon inflation + // 4. Anonymous - page was zeroed out -> this implies that more than one page fault + // event was received. This can be a consequence of guest reclaiming back its + // memory from the host (through balloon device) + Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => { + let (start, end) = self.populate_from_file(region); + self.update_mem_state_mappings(start, end, &MemPageState::FromFile); + return; + } + Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => { + let (start, end) = self.zero_out(fault_page_addr); + self.update_mem_state_mappings(start, end, &MemPageState::Anonymous); + return; + } + None => { + (); + } + } + } + + panic!( + "Could not find addr: {:?} within guest region mappings.", + addr + ); + } + + fn run_loop(&mut self) { + let pollfd = PollFd::new(self.uffd.as_raw_fd(), PollFlags::POLLIN); + + // Loop, handling incoming events on the userfaultfd file descriptor. + loop { + // See what poll() tells us about the userfaultfd. + let nready = poll(&mut [pollfd], -1).expect("Failed to poll"); + + let revents = pollfd.revents().unwrap(); + println!( + "poll() returns: nready = {}; POLLIN = {}; POLLERR = {}", + nready, + revents.contains(PollFlags::POLLIN), + revents.contains(PollFlags::POLLERR), + ); + + // Read an event from the userfaultfd. + let event = self + .uffd + .read_event() + .expect("Failed to read uffd_msg") + .expect("uffd_msg not ready"); + + // We expect to receive either a Page Fault or Removed + // event (if the balloon device is enabled). + match event { + userfaultfd::Event::Pagefault { addr, .. } => self.serve_pf(addr as *mut u8), + userfaultfd::Event::Remove { start, end } => self.update_mem_state_mappings( + start as *mut u8 as u64, + end as *mut u8 as u64, + &MemPageState::Removed, + ), + _ => panic!("Unexpected event on userfaultfd"), + } + } + } +} + +fn get_peer_process_credentials(stream: UnixStream) -> libc::ucred { + let mut creds: libc::ucred = libc::ucred { + pid: 0, + gid: 0, + uid: 0, + }; + let mut creds_size = mem::size_of::() as u32; + + let ret = unsafe { + libc::getsockopt( + stream.as_raw_fd(), + libc::SOL_SOCKET, + libc::SO_PEERCRED, + &mut creds as *mut _ as *mut _, + &mut creds_size as *mut libc::socklen_t, + ) + }; + if ret != 0 { + panic!("Failed to get peer process credentials"); + } + + creds +} + +fn create_mem_regions(mappings: &Vec) -> Vec { + let page_size = get_page_size().unwrap(); + let mut mem_regions: Vec = Vec::with_capacity(mappings.len()); + + for r in mappings.iter() { + let mapping = r.clone(); + let mut addr = r.base_host_virt_addr; + let end_addr = r.base_host_virt_addr + r.size as u64; + let mut page_states = HashMap::new(); + + while addr < end_addr { + page_states.insert(addr, MemPageState::Uninitialized); + addr += page_size as u64; + } + mem_regions.push(MemRegion { + mapping, + page_states, + }); + } + + mem_regions +} + +fn main() { + let uffd_sock_path = std::env::args().nth(1).expect("No socket path given"); + let mem_file_path = std::env::args().nth(2).expect("No memory file given"); + + let file = File::open(mem_file_path).expect("Cannot open memfile"); + let size = file.metadata().unwrap().len() as usize; + + // mmap a memory area used to bring in the faulting regions. + let memfile_buffer = unsafe { + mmap( + ptr::null_mut(), + size, + ProtFlags::PROT_READ, + MapFlags::MAP_PRIVATE, + file.as_raw_fd(), + 0, + ) + .expect("mmap failed") + } as *const u8; + + // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker. + let listener = UnixListener::bind(&uffd_sock_path).expect("Cannot bind to socket path"); + + let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); + let mut uffd_handler = UffdPfHandler::from_unix_stream(stream, memfile_buffer, size); + + uffd_handler.run_loop(); + println!("Uffd thread done!"); +} From ec5159f40fc5c1c943d218a90a80b74fc6484b66 Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Thu, 24 Mar 2022 12:26:11 +0200 Subject: [PATCH 13/22] CI: compile uffd handler example as a fixture Signed-off-by: Diana Popa Signed-off-by: Luminita Voicu --- tests/conftest.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index c07a208c45c..8aec6d60729 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -394,6 +394,40 @@ def bin_seccomp_paths(test_fc_session_root_path): } +@pytest.fixture(scope='session') +def uffd_handler_paths(test_fc_session_root_path): + """Build UFFD handler binaries.""" + # pylint: disable=redefined-outer-name + # The fixture pattern causes a pylint false positive for that rule. + uffd_build_path = os.path.join( + test_fc_session_root_path, + build_tools.CARGO_RELEASE_REL_PATH + ) + + extra_args = '--release --target {}-unknown-linux-musl' + extra_args = extra_args.format(platform.machine()) + build_tools.cargo_build(uffd_build_path, + extra_args=extra_args, + src_dir='host_tools/uffd') + + release_binaries_path = os.path.join( + test_fc_session_root_path, + build_tools.CARGO_RELEASE_REL_PATH, + build_tools.RELEASE_BINARIES_REL_PATH + ) + + valid_handler = os.path.normpath( + os.path.join( + release_binaries_path, + 'valid_handler' + ) + ) + + yield { + 'valid_handler': valid_handler, + } + + @pytest.fixture() def microvm(test_fc_session_root_path, bin_cloner_path): """Instantiate a microvm.""" From 0e5eee55af4245476930505562d7d74288fe4ead Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Thu, 24 Mar 2022 12:27:07 +0200 Subject: [PATCH 14/22] add util fn for starting process in screen Signed-off-by: Diana Popa Signed-off-by: Luminita Voicu --- tests/framework/microvm.py | 44 +++++-------------------------------- tests/framework/utils.py | 45 +++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 39 deletions(-) diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 6b25f5e9964..14779d1cf43 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -21,7 +21,6 @@ from threading import Lock from retry import retry -from retry.api import retry_call import host_tools.logging as log_tools import host_tools.cpu_load as cpu_tools @@ -552,7 +551,7 @@ def spawn(self, create_logger=True, # to be run by customers (together with CLONE_NEWPID flag). # # We have to use an external tool for CLONE_NEWPID, because - # 1) Python doesn't provide a os.clone() interface, and + # 1) Python doesn't provide os.clone() interface, and # 2) Python's ctypes libc interface appears to be broken, causing # our clone / exec to deadlock at some point. if self._jailer.daemonize: @@ -560,44 +559,13 @@ def spawn(self, create_logger=True, else: # This file will collect any output from 'screen'ed Firecracker. self._screen_log = self.SCREEN_LOGFILE.format(self._session_name) - start_cmd = 'screen -L -Logfile {logfile} '\ - '-dmS {session} {binary} {params}' - start_cmd = start_cmd.format( - logfile=self.screen_log, - session=self._session_name, - binary=self._jailer_binary_path, - params=' '.join(jailer_param_list) + screen_pid, binary_pid = utils.start_screen_process( + self._screen_log, self._session_name, + self._jailer_binary_path, + jailer_param_list ) - - utils.run_cmd(start_cmd) - - # Build a regex object to match (number).session_name - regex_object = re.compile( - r'([0-9]+)\.{}'.format(self._session_name)) - - # Run 'screen -ls' in a retry_call loop, 30 times with a one - # second delay between calls. - # If the output of 'screen -ls' matches the regex object, it will - # return the PID. Otherwise a RuntimeError will be raised. - screen_pid = retry_call( - utils.search_output_from_cmd, - fkwargs={ - "cmd": 'screen -ls', - "find_regex": regex_object - }, - exceptions=RuntimeError, - tries=30, - delay=1).group(1) - self._screen_pid = screen_pid - - self.jailer_clone_pid = int(open('/proc/{0}/task/{0}/children' - .format(screen_pid), - encoding='utf-8').read().strip()) - - # Configure screen to flush stdout to file. - flush_cmd = 'screen -S {session} -X colon "logfile flush 0^M"' - utils.run_cmd(flush_cmd.format(session=self._session_name)) + self.jailer_clone_pid = binary_pid # Wait for the jailer to create resources needed, and Firecracker to # create its API socket. diff --git a/tests/framework/utils.py b/tests/framework/utils.py index c0a9c4e7b4f..33e05856255 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -16,7 +16,7 @@ from collections import namedtuple, defaultdict import psutil from retry import retry - +from retry.api import retry_call from framework.defs import MIN_KERNEL_VERSION_FOR_IO_URING CommandReturn = namedtuple("CommandReturn", "returncode stdout stderr") @@ -729,3 +729,46 @@ def configure_mmds(test_microvm, iface_ids, version=None, ipv4_address=None, assert test_microvm.api_session.is_status_no_content(response.status_code) return response + + +def start_screen_process(screen_log, session_name, binary_path, binary_params): + """Start binary process into a screen session.""" + start_cmd = 'screen -L -Logfile {logfile} ' \ + '-dmS {session} {binary} {params}' + start_cmd = start_cmd.format( + logfile=screen_log, + session=session_name, + binary=binary_path, + params=' '.join(binary_params) + ) + + run_cmd(start_cmd) + + # Build a regex object to match (number).session_name + regex_object = re.compile( + r'([0-9]+)\.{}'.format(session_name)) + + # Run 'screen -ls' in a retry_call loop, 30 times with a 1s + # delay between calls. + # If the output of 'screen -ls' matches the regex object, it will + # return the PID. Otherwise, a RuntimeError will be raised. + screen_pid = retry_call( + search_output_from_cmd, + fkwargs={ + "cmd": 'screen -ls', + "find_regex": regex_object + }, + exceptions=RuntimeError, + tries=30, + delay=1).group(1) + + binary_clone_pid = int(open( + '/proc/{0}/task/{0}/children'.format(screen_pid), + encoding='utf-8' + ).read().strip()) + + # Configure screen to flush stdout to file. + flush_cmd = 'screen -S {session} -X colon "logfile flush 0^M"' + run_cmd(flush_cmd.format(session=session_name)) + + return screen_pid, binary_clone_pid From 3d0c1407cb4accadd5a7b4492709883f0e59a8df Mon Sep 17 00:00:00 2001 From: Diana Popa Date: Thu, 24 Mar 2022 12:38:42 +0200 Subject: [PATCH 15/22] CI: integration test for valid uffd scenario Signed-off-by: Diana Popa Signed-off-by: Luminita Voicu --- tests/framework/builder.py | 16 +-- tests/framework/utils.py | 27 +++++ .../integration_tests/functional/test_api.py | 6 +- .../functional/test_balloon.py | 8 +- .../functional/test_snapshot_basic.py | 42 +++++--- .../integration_tests/functional/test_uffd.py | 100 +++++++++++++++++- .../functional/test_vsock.py | 8 +- .../performance/test_snapshot_perf.py | 4 +- 8 files changed, 181 insertions(+), 30 deletions(-) diff --git a/tests/framework/builder.py b/tests/framework/builder.py index b0ae378aba6..895f261f13f 100644 --- a/tests/framework/builder.py +++ b/tests/framework/builder.py @@ -170,6 +170,7 @@ def build(self, # so we do not need to move it around polluting the code. def build_from_snapshot(self, snapshot: Snapshot, + vm=None, resume=False, # Enable incremental snapshot capability. diff_snapshots=False, @@ -183,11 +184,12 @@ def build_from_snapshot(self, # are handled by a dedicated UFFD PF handler. uffd_path=None): """Build a microvm from a snapshot artifact.""" - vm = init_microvm(self.root_path, self.bin_cloner_path, - fc_binary, jailer_binary,) - vm.jailer.daemonize = daemonize - vm.spawn(log_level='Error', use_ramdisk=use_ramdisk) - vm.api_session.untime() + if vm is None: + vm = init_microvm(self.root_path, self.bin_cloner_path, + fc_binary, jailer_binary,) + vm.jailer.daemonize = daemonize + vm.spawn(log_level='Error', use_ramdisk=use_ramdisk) + vm.api_session.untime() metrics_file_path = os.path.join(vm.path, 'metrics.log') metrics_fifo = log_tools.Fifo(metrics_file_path) @@ -221,11 +223,9 @@ def build_from_snapshot(self, vm.version.get_from_api().json()['firecracker_version'] if utils.compare_dirty_versions(full_fc_version, '1.0.0') > 0: if uffd_path: - jailed_uffd = vm.copy_to_jail_ramfs(uffd_path) if use_ramdisk \ - else vm.create_jailed_resource(uffd_path) mem_backend = { 'type': SnapshotMemBackendType.UFFD, - 'path': jailed_uffd + 'path': uffd_path } else: mem_backend = { diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 33e05856255..158e5c97ed1 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -78,6 +78,33 @@ def get_cpu_percent(pid: int) -> float: return cpu_percentages +class UffdHandler: + """Describe the UFFD page fault handler process.""" + + def __init__(self, name, args): + """Instantiate the handler process with arguments.""" + self._proc = None + self._args = [f"/{name}"] + self._args.extend(args) + + def spawn(self): + """Spawn handler process using arguments provided.""" + self._proc = subprocess.Popen( + self._args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + bufsize=1 + ) + + def proc(self): + """Return UFFD handler process.""" + return self._proc + + def __del__(self): + """Tear down the UFFD handler process.""" + self._proc.kill() + + # pylint: disable=R0903 class CpuMap: """Cpu map from real cpu cores to containers visible cores. diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 0325e92c71f..d527272351d 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -1467,7 +1467,11 @@ def test_get_full_config_after_restoring_snapshot(bin_cloner_path): ssh_key, SnapshotType.FULL) - microvm, _ = microvm_builder.build_from_snapshot(snapshot, True, False) + microvm, _ = microvm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=False + ) expected_cfg = setup_cfg.copy() diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py index 81afa8c910d..58d4bec8931 100644 --- a/tests/integration_tests/functional/test_balloon.py +++ b/tests/integration_tests/functional/test_balloon.py @@ -646,9 +646,11 @@ def _test_balloon_snapshot(context): basevm.kill() logger.info("Load snapshot #{}, mem {}".format(1, snapshot.mem)) - microvm, _ = vm_builder.build_from_snapshot(snapshot, - True, - diff_snapshots) + microvm, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=diff_snapshots + ) # Attempt to connect to resumed microvm. ssh_connection = net_tools.SSHConnection(microvm.ssh_config) diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py index 04a8c6bc679..8cd5b2d4933 100644 --- a/tests/integration_tests/functional/test_snapshot_basic.py +++ b/tests/integration_tests/functional/test_snapshot_basic.py @@ -102,9 +102,11 @@ def _test_seq_snapshots(context): for i in range(seq_len): logger.info("Load snapshot #{}, mem {}".format(i, snapshot.mem)) - microvm, _ = vm_builder.build_from_snapshot(snapshot, - True, - diff_snapshots) + microvm, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=diff_snapshots + ) # Attempt to connect to resumed microvm. ssh_connection = net_tools.SSHConnection(microvm.ssh_config) @@ -236,9 +238,11 @@ def test_patch_drive_snapshot(bin_cloner_path): # Load snapshot in a new Firecracker microVM. logger.info("Load snapshot, mem %s", snapshot.mem) - microvm, _ = vm_builder.build_from_snapshot(snapshot, - True, - diff_snapshots) + microvm, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=diff_snapshots + ) # Attempt to connect to resumed microvm. ssh_connection = net_tools.SSHConnection(microvm.ssh_config) @@ -446,9 +450,11 @@ def test_negative_postload_api(bin_cloner_path): logger.info("Load snapshot, mem %s", snapshot.mem) # Do not resume, just load, so we can still call APIs that work. - microvm, _ = vm_builder.build_from_snapshot(snapshot, - False, - True) + microvm, _ = vm_builder.build_from_snapshot( + snapshot, + resume=False, + diff_snapshots=True + ) fail_msg = "The requested operation is not supported after starting " \ "the microVM" @@ -517,7 +523,11 @@ def test_negative_snapshot_permissions(bin_cloner_path): os.chmod(snapshot.mem, 0o000) try: - _, _ = vm_builder.build_from_snapshot(snapshot, True, True) + _, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=True + ) except AssertionError as error: # Check if proper error is returned. assert "Cannot open the memory file: Permission denied" in str(error) @@ -528,7 +538,11 @@ def test_negative_snapshot_permissions(bin_cloner_path): os.chmod(snapshot.vmstate, 0o000) try: - _, _ = vm_builder.build_from_snapshot(snapshot, True, True) + _, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=True + ) except AssertionError as error: # Check if proper error is returned. assert "Cannot perform open on the snapshot backing file:" \ @@ -544,7 +558,11 @@ def test_negative_snapshot_permissions(bin_cloner_path): os.chmod(snapshot.disks[0], 0o000) try: - _, _ = vm_builder.build_from_snapshot(snapshot, True, True) + _, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=True + ) except AssertionError as error: # Check if proper error is returned. assert "Block(BackingFile(Os { code: 13, kind: PermissionDenied" \ diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py index 3fd282f9ddd..49ab5aed92d 100644 --- a/tests/integration_tests/functional/test_uffd.py +++ b/tests/integration_tests/functional/test_uffd.py @@ -1,15 +1,21 @@ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test UFFD related functionality when resuming from snapshot.""" + import logging import os +from subprocess import TimeoutExpired + +import stat from framework.artifacts import SnapshotMemBackendType from framework.builder import MicrovmBuilder, SnapshotBuilder -from framework.utils import run_cmd +from framework.utils import run_cmd, UffdHandler import host_tools.network as net_tools +SOCKET_PATH = "/firecracker-uffd.sock" + def create_snapshot(bin_cloner_path): """Create a snapshot of a microVM.""" @@ -19,6 +25,14 @@ def create_snapshot(bin_cloner_path): root_disk = vm_instance.disks[0] ssh_key = vm_instance.ssh_key + # Add a memory balloon. + response = basevm.balloon.put( + amount_mib=0, + deflate_on_oom=True, + stats_polling_interval_s=0 + ) + assert basevm.api_session.is_status_no_content(response.status_code) + basevm.start() ssh_connection = net_tools.SSHConnection(basevm.ssh_config) @@ -38,6 +52,43 @@ def create_snapshot(bin_cloner_path): return snapshot +def spawn_pf_handler(vm, handler_path, mem_path): + """Spawn page fault handler process.""" + # Copy snapshot memory file into chroot of microVM. + jailed_mem = vm.create_jailed_resource(mem_path) + # Copy the valid page fault binary into chroot of microVM. + jailed_handler = vm.create_jailed_resource(handler_path) + + handler_name = os.path.basename(jailed_handler) + args = [SOCKET_PATH, jailed_mem] + + uffd_handler = UffdHandler(handler_name, args) + real_root = os.open("/", os.O_RDONLY) + + os.chroot(vm.chroot()) + os.chdir('/') + st = os.stat(handler_name) + os.chmod(handler_name, st.st_mode | stat.S_IEXEC) + + uffd_handler.spawn() + try: + outs, errs = uffd_handler.proc().communicate(timeout=1) + print(outs) + print(errs) + assert False, "Could not start PF handler!" + except TimeoutExpired: + print("This is the good case!") + + # The page fault handler will create the socket path with root rights. + # Change rights to the jailer's. + os.chown(SOCKET_PATH, vm.jailer.uid, vm.jailer.gid) + + os.fchdir(real_root) + os.chroot(".") + + return uffd_handler + + def test_bad_socket_path(bin_cloner_path, test_microvm_with_api): """ Test error scenario when socket path does not exist. @@ -100,3 +151,50 @@ def test_unbinded_socket(bin_cloner_path, test_microvm_with_api): assert "Load microVM snapshot error: Cannot connect to UDS in order to" \ " send information on handling guest memory page-faults due to: " \ "Connection refused (os error 111)" in response.text + + +def test_valid_handler(bin_cloner_path, + test_microvm_with_api, + uffd_handler_paths): + """ + Test valid uffd handler scenario. + + @type: functional + """ + logger = logging.getLogger("uffd_unbinded_socket") + + logger.info("Create snapshot") + snapshot = create_snapshot(bin_cloner_path) + + logger.info("Load snapshot, mem %s", snapshot.mem) + vm_builder = MicrovmBuilder(bin_cloner_path) + vm = test_microvm_with_api + vm.spawn() + + # Spawn page fault handler process. + _pf_handler = spawn_pf_handler( + vm, + uffd_handler_paths['valid_handler'], + snapshot.mem + ) + + vm, _ = vm_builder.build_from_snapshot(snapshot, vm=vm, + resume=True, + uffd_path=SOCKET_PATH) + + # Inflate balloon. + response = vm.balloon.patch(amount_mib=200) + assert vm.api_session.is_status_no_content( + response.status_code + ) + + # Deflate balloon. + response = vm.balloon.patch(amount_mib=0) + assert vm.api_session.is_status_no_content( + response.status_code + ) + + # Verify if guest can run commands. + ssh_connection = net_tools.SSHConnection(vm.ssh_config) + exit_code, _, _ = ssh_connection.execute_command("sync") + assert exit_code == 0 diff --git a/tests/integration_tests/functional/test_vsock.py b/tests/integration_tests/functional/test_vsock.py index 2984ae065ca..b7642154679 100644 --- a/tests/integration_tests/functional/test_vsock.py +++ b/tests/integration_tests/functional/test_vsock.py @@ -246,9 +246,11 @@ def test_vsock_transport_reset( test_vm.kill() # Load snapshot. - test_vm, _ = vm_builder.build_from_snapshot(snapshot, - True, - False) + test_vm, _ = vm_builder.build_from_snapshot( + snapshot, + resume=True, + diff_snapshots=False + ) # Check that vsock device still works. # Test guest-initiated connections. diff --git a/tests/integration_tests/performance/test_snapshot_perf.py b/tests/integration_tests/performance/test_snapshot_perf.py index f2860139f2b..49c9f23ace0 100644 --- a/tests/integration_tests/performance/test_snapshot_perf.py +++ b/tests/integration_tests/performance/test_snapshot_perf.py @@ -172,8 +172,8 @@ def snapshot_resume_producer( """Produce results for snapshot resume tests.""" microvm, metrics_fifo = vm_builder.build_from_snapshot( snapshot, - True, - snapshot_type == SnapshotType.DIFF, + resume=True, + diff_snapshots=snapshot_type == SnapshotType.DIFF, use_ramdisk=use_ramdisk) # Attempt to connect to resumed microvm. From 2a0c0d01e580b5d044f7d028f0400e794009f0de Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Wed, 13 Apr 2022 14:58:01 +0300 Subject: [PATCH 16/22] temporary change artifact root bucket to dev Signed-off-by: Luminita Voicu --- tests/framework/artifacts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/framework/artifacts.py b/tests/framework/artifacts.py index b2933b7711a..501c2285ead 100644 --- a/tests/framework/artifacts.py +++ b/tests/framework/artifacts.py @@ -304,7 +304,7 @@ class ArtifactCollection: PLATFORM = platform.machine() # S3 bucket structure. - ARTIFACTS_ROOT = 'ci-artifacts' + ARTIFACTS_ROOT = 'ci-artifacts-uffd' ARTIFACTS_DISKS = '/disks/' + PLATFORM + "/" ARTIFACTS_KERNELS = '/kernels/' + PLATFORM + "/" ARTIFACTS_MICROVMS = '/microvms/' From a3354479b33db8c7e4431f61d715fed5658a561a Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Mon, 18 Apr 2022 15:10:00 +0300 Subject: [PATCH 17/22] CI: add malicious uffd page fault handler Signed-off-by: Luminita Voicu --- tests/conftest.py | 8 + tests/framework/builder.py | 9 +- tests/framework/resources.py | 11 +- .../uffd/src/bin/malicious_handler.rs | 32 ++ .../host_tools/uffd/src/bin/valid_handler.rs | 305 ++---------------- tests/host_tools/uffd/src/lib.rs | 4 + tests/host_tools/uffd/src/uffd_utils.rs | 247 ++++++++++++++ .../integration_tests/functional/test_uffd.py | 52 +++ 8 files changed, 388 insertions(+), 280 deletions(-) create mode 100644 tests/host_tools/uffd/src/bin/malicious_handler.rs create mode 100644 tests/host_tools/uffd/src/lib.rs create mode 100644 tests/host_tools/uffd/src/uffd_utils.rs diff --git a/tests/conftest.py b/tests/conftest.py index 8aec6d60729..86788f3a8a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -423,8 +423,16 @@ def uffd_handler_paths(test_fc_session_root_path): ) ) + malicious_handler = os.path.normpath( + os.path.join( + release_binaries_path, + 'malicious_handler' + ) + ) + yield { 'valid_handler': valid_handler, + 'malicious_handler': malicious_handler, } diff --git a/tests/framework/builder.py b/tests/framework/builder.py index 895f261f13f..cc2d08e0d6f 100644 --- a/tests/framework/builder.py +++ b/tests/framework/builder.py @@ -182,7 +182,8 @@ def build_from_snapshot(self, # If specified, establishes that page-faults # resulted when loading the guest memory # are handled by a dedicated UFFD PF handler. - uffd_path=None): + uffd_path=None, + timeout=None): """Build a microvm from a snapshot artifact.""" if vm is None: vm = init_microvm(self.root_path, self.bin_cloner_path, @@ -235,12 +236,14 @@ def build_from_snapshot(self, response = vm.snapshot.load(mem_backend=mem_backend, snapshot_path=jailed_vmstate, diff=diff_snapshots, - resume=resume) + resume=resume, + timeout=timeout) else: response = vm.snapshot.load(mem_file_path=jailed_mem, snapshot_path=jailed_vmstate, diff=diff_snapshots, - resume=resume) + resume=resume, + timeout=timeout) status_ok = vm.api_session.is_status_no_content(response.status_code) # Verify response status and cleanup if needed before assert. diff --git a/tests/framework/resources.py b/tests/framework/resources.py index 6f42240d047..67a7af70a54 100644 --- a/tests/framework/resources.py +++ b/tests/framework/resources.py @@ -454,12 +454,13 @@ def __init__(self, api_usocket_full_name, api_session): self._snapshot_cfg_url = api_url + self.SNAPSHOT_LOAD_URL self._api_session = api_session - def put(self, **args): + def put(self, timeout=None, **args): """Load a snapshot of the microvm.""" datax = self.create_json(**args) return self._api_session.put( "{}".format(self._snapshot_cfg_url), - json=datax + json=datax, + timeout=timeout ) @staticmethod @@ -515,7 +516,8 @@ def load( diff=False, resume=False, mem_file_path=None, - mem_backend=None + mem_backend=None, + timeout=None ): """Load a snapshot of the microvm.""" response = self._load.put( @@ -523,7 +525,8 @@ def load( diff=diff, resume=resume, mem_file_path=mem_file_path, - mem_backend=mem_backend + mem_backend=mem_backend, + timeout=timeout ) if resume and "unknown field `resume_vm`" in response.text: diff --git a/tests/host_tools/uffd/src/bin/malicious_handler.rs b/tests/host_tools/uffd/src/bin/malicious_handler.rs new file mode 100644 index 00000000000..0cc3790ad4d --- /dev/null +++ b/tests/host_tools/uffd/src/bin/malicious_handler.rs @@ -0,0 +1,32 @@ +// Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Provides functionality for a malicious page fault handler +//! which panics when a page fault occurs. + +use nix::poll::{poll, PollFd, PollFlags}; +use std::os::unix::io::AsRawFd; +use uffd::uffd_utils::create_pf_handler; + +fn main() { + let uffd_handler = create_pf_handler(); + let pollfd = PollFd::new(uffd_handler.uffd.as_raw_fd(), PollFlags::POLLIN); + + // Loop, handling incoming events on the userfaultfd file descriptor. + loop { + let _ = poll(&mut [pollfd], -1).expect("Failed to poll"); + + // Read an event from the userfaultfd. + let event = uffd_handler + .uffd + .read_event() + .expect("Failed to read uffd_msg") + .expect("uffd_msg not ready"); + + // We expect to receive either a Page Fault or Removed + // event (if the balloon device is enabled). + if let userfaultfd::Event::Pagefault { .. } = event { + panic!("Fear me! I am the malicious page fault handler.") + } + } +} diff --git a/tests/host_tools/uffd/src/bin/valid_handler.rs b/tests/host_tools/uffd/src/bin/valid_handler.rs index 7750041fc03..fc11d10c28c 100644 --- a/tests/host_tools/uffd/src/bin/valid_handler.rs +++ b/tests/host_tools/uffd/src/bin/valid_handler.rs @@ -5,287 +5,46 @@ //! which loads the whole region from the backing memory file //! when a page fault occurs. -use std::collections::HashMap; -use std::fs::File; -use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; -use std::os::unix::net::{UnixListener, UnixStream}; -use std::{mem, ptr}; - -use libc::c_void; use nix::poll::{poll, PollFd, PollFlags}; -use nix::sys::mman::{mmap, MapFlags, ProtFlags}; -use serde::Deserialize; -use userfaultfd::Uffd; -use utils::get_page_size; - -use utils::sock_ctrl_msg::ScmSocket; - -// This is the same with the one used in src/vmm. -/// This describes the mapping between Firecracker base virtual address and offset in the -/// buffer or file backend for a guest memory region. It is used to tell an external -/// process/thread where to populate the guest memory data for this range. -/// -/// E.g. Guest memory contents for a region of `size` bytes can be found in the backend -/// at `offset` bytes from the beginning, and should be copied/populated into `base_host_address`. -#[derive(Clone, Debug, Deserialize)] -pub struct GuestRegionUffdMapping { - /// Base host virtual address where the guest memory contents for this region - /// should be copied/populated. - pub base_host_virt_addr: u64, - /// Region size. - pub size: usize, - /// Offset in the backend file/buffer where the region contents are. - pub offset: u64, -} - -struct MemRegion { - mapping: GuestRegionUffdMapping, - page_states: HashMap, -} - -struct UffdPfHandler { - mem_regions: Vec, - backing_buffer: *const u8, - uffd: Uffd, - // Not currently used but included to demonstrate how a page fault handler can - // fetch Firecracker's PID in order to make it aware of any crashes/exits. - _firecracker_pid: u32, -} - -#[derive(Clone)] -enum MemPageState { - Uninitialized, - FromFile, - Removed, - Anonymous, -} - -impl UffdPfHandler { - pub fn from_unix_stream(stream: UnixStream, data: *const u8, size: usize) -> Self { - let mut message_buf = vec![0u8; 1024]; - let (bytes_read, file) = stream - .recv_with_fd(&mut message_buf[..]) - .expect("Cannot recv_with_fd"); - message_buf.resize(bytes_read, 0); - - let body = String::from_utf8(message_buf).unwrap(); - let file = file.expect("Uffd not passed through UDS!"); - - let mappings = serde_json::from_str::>(&body) - .expect("Cannot deserialize memory mappings."); - let memsize: usize = mappings.iter().map(|r| r.size).sum(); - - // Make sure memory size matches backing data size. - assert_eq!(memsize, size); - - let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) }; +use std::os::unix::io::AsRawFd; - let creds: libc::ucred = get_peer_process_credentials(stream); +use uffd::uffd_utils::{create_pf_handler, MemPageState}; - let mem_regions = create_mem_regions(&mappings); - - Self { - mem_regions, - backing_buffer: data, - uffd, - _firecracker_pid: creds.pid as u32, - } - } - - fn update_mem_state_mappings(&mut self, start: u64, end: u64, state: &MemPageState) { - for region in self.mem_regions.iter_mut() { - for (key, value) in region.page_states.iter_mut() { - if key >= &start && key < &end { - *value = state.clone(); - } - } - } - } - - fn populate_from_file(&self, region: &MemRegion) -> (u64, u64) { - let src = self.backing_buffer as u64 + region.mapping.offset; - let start_addr = region.mapping.base_host_virt_addr; - let len = region.mapping.size; - // Populate whole region from backing mem-file. - // This offers an example of how memory can be loaded in RAM, - // however this can be adjusted to accommodate use case needs. - let ret = unsafe { - self.uffd - .copy(src as *const _, start_addr as *mut _, len, true) - .expect("Uffd copy failed") - }; - - // Make sure the UFFD copied some bytes. - assert!(ret > 0); - - return (start_addr, start_addr + len as u64); - } - - fn zero_out(&mut self, addr: u64) -> (u64, u64) { - let page_size = get_page_size().unwrap(); - - let ret = unsafe { - self.uffd - .zeropage(addr as *mut _, page_size, true) - .expect("Uffd zeropage failed") - }; - // Make sure the UFFD zeroed out some bytes. - assert!(ret > 0); - - return (addr, addr + page_size as u64); - } - - fn serve_pf(&mut self, addr: *mut u8) { - let page_size = get_page_size().unwrap(); +fn main() { + let mut uffd_handler = create_pf_handler(); - // Find the start of the page that the current faulting address belongs to. - let dst = (addr as usize & !(page_size as usize - 1)) as *mut c_void; - let fault_page_addr = dst as u64; + let pollfd = PollFd::new(uffd_handler.uffd.as_raw_fd(), PollFlags::POLLIN); - // Get the state of the current faulting page. - for region in self.mem_regions.iter() { - match region.page_states.get(&fault_page_addr) { - // Our simple PF handler has a simple strategy: - // There exist 4 states in which a memory page can be in: - // 1. Uninitialized - page was never touched - // 2. FromFile - the page is populated with content from snapshotted memory file - // 3. Removed - MADV_DONTNEED was called due to balloon inflation - // 4. Anonymous - page was zeroed out -> this implies that more than one page fault - // event was received. This can be a consequence of guest reclaiming back its - // memory from the host (through balloon device) - Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => { - let (start, end) = self.populate_from_file(region); - self.update_mem_state_mappings(start, end, &MemPageState::FromFile); - return; - } - Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => { - let (start, end) = self.zero_out(fault_page_addr); - self.update_mem_state_mappings(start, end, &MemPageState::Anonymous); - return; - } - None => { - (); - } - } - } + // Loop, handling incoming events on the userfaultfd file descriptor. + loop { + // See what poll() tells us about the userfaultfd. + let nready = poll(&mut [pollfd], -1).expect("Failed to poll"); - panic!( - "Could not find addr: {:?} within guest region mappings.", - addr + let revents = pollfd.revents().unwrap(); + println!( + "poll() returns: nready = {}; POLLIN = {}; POLLERR = {}", + nready, + revents.contains(PollFlags::POLLIN), + revents.contains(PollFlags::POLLERR), ); - } - - fn run_loop(&mut self) { - let pollfd = PollFd::new(self.uffd.as_raw_fd(), PollFlags::POLLIN); - - // Loop, handling incoming events on the userfaultfd file descriptor. - loop { - // See what poll() tells us about the userfaultfd. - let nready = poll(&mut [pollfd], -1).expect("Failed to poll"); - - let revents = pollfd.revents().unwrap(); - println!( - "poll() returns: nready = {}; POLLIN = {}; POLLERR = {}", - nready, - revents.contains(PollFlags::POLLIN), - revents.contains(PollFlags::POLLERR), - ); - - // Read an event from the userfaultfd. - let event = self - .uffd - .read_event() - .expect("Failed to read uffd_msg") - .expect("uffd_msg not ready"); - - // We expect to receive either a Page Fault or Removed - // event (if the balloon device is enabled). - match event { - userfaultfd::Event::Pagefault { addr, .. } => self.serve_pf(addr as *mut u8), - userfaultfd::Event::Remove { start, end } => self.update_mem_state_mappings( - start as *mut u8 as u64, - end as *mut u8 as u64, - &MemPageState::Removed, - ), - _ => panic!("Unexpected event on userfaultfd"), - } - } - } -} - -fn get_peer_process_credentials(stream: UnixStream) -> libc::ucred { - let mut creds: libc::ucred = libc::ucred { - pid: 0, - gid: 0, - uid: 0, - }; - let mut creds_size = mem::size_of::() as u32; - - let ret = unsafe { - libc::getsockopt( - stream.as_raw_fd(), - libc::SOL_SOCKET, - libc::SO_PEERCRED, - &mut creds as *mut _ as *mut _, - &mut creds_size as *mut libc::socklen_t, - ) - }; - if ret != 0 { - panic!("Failed to get peer process credentials"); - } - - creds -} - -fn create_mem_regions(mappings: &Vec) -> Vec { - let page_size = get_page_size().unwrap(); - let mut mem_regions: Vec = Vec::with_capacity(mappings.len()); - for r in mappings.iter() { - let mapping = r.clone(); - let mut addr = r.base_host_virt_addr; - let end_addr = r.base_host_virt_addr + r.size as u64; - let mut page_states = HashMap::new(); - - while addr < end_addr { - page_states.insert(addr, MemPageState::Uninitialized); - addr += page_size as u64; + // Read an event from the userfaultfd. + let event = uffd_handler + .uffd + .read_event() + .expect("Failed to read uffd_msg") + .expect("uffd_msg not ready"); + + // We expect to receive either a Page Fault or Removed + // event (if the balloon device is enabled). + match event { + userfaultfd::Event::Pagefault { addr, .. } => uffd_handler.serve_pf(addr as *mut u8), + userfaultfd::Event::Remove { start, end } => uffd_handler.update_mem_state_mappings( + start as *mut u8 as u64, + end as *mut u8 as u64, + &MemPageState::Removed, + ), + _ => panic!("Unexpected event on userfaultfd"), } - mem_regions.push(MemRegion { - mapping, - page_states, - }); } - - mem_regions -} - -fn main() { - let uffd_sock_path = std::env::args().nth(1).expect("No socket path given"); - let mem_file_path = std::env::args().nth(2).expect("No memory file given"); - - let file = File::open(mem_file_path).expect("Cannot open memfile"); - let size = file.metadata().unwrap().len() as usize; - - // mmap a memory area used to bring in the faulting regions. - let memfile_buffer = unsafe { - mmap( - ptr::null_mut(), - size, - ProtFlags::PROT_READ, - MapFlags::MAP_PRIVATE, - file.as_raw_fd(), - 0, - ) - .expect("mmap failed") - } as *const u8; - - // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker. - let listener = UnixListener::bind(&uffd_sock_path).expect("Cannot bind to socket path"); - - let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); - let mut uffd_handler = UffdPfHandler::from_unix_stream(stream, memfile_buffer, size); - - uffd_handler.run_loop(); - println!("Uffd thread done!"); } diff --git a/tests/host_tools/uffd/src/lib.rs b/tests/host_tools/uffd/src/lib.rs new file mode 100644 index 00000000000..a27e955e4c8 --- /dev/null +++ b/tests/host_tools/uffd/src/lib.rs @@ -0,0 +1,4 @@ +// Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +pub mod uffd_utils; diff --git a/tests/host_tools/uffd/src/uffd_utils.rs b/tests/host_tools/uffd/src/uffd_utils.rs new file mode 100644 index 00000000000..12795c8eb1c --- /dev/null +++ b/tests/host_tools/uffd/src/uffd_utils.rs @@ -0,0 +1,247 @@ +// Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::fs::File; +use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; +use std::os::unix::net::{UnixListener, UnixStream}; +use std::{mem, ptr}; + +use libc::c_void; +use nix::sys::mman::{mmap, MapFlags, ProtFlags}; +use serde::Deserialize; +use userfaultfd::Uffd; +use utils::get_page_size; + +use utils::sock_ctrl_msg::ScmSocket; + +// This is the same with the one used in src/vmm. +/// This describes the mapping between Firecracker base virtual address and offset in the +/// buffer or file backend for a guest memory region. It is used to tell an external +/// process/thread where to populate the guest memory data for this range. +/// +/// E.g. Guest memory contents for a region of `size` bytes can be found in the backend +/// at `offset` bytes from the beginning, and should be copied/populated into `base_host_address`. +#[derive(Clone, Debug, Deserialize)] +pub struct GuestRegionUffdMapping { + /// Base host virtual address where the guest memory contents for this region + /// should be copied/populated. + pub base_host_virt_addr: u64, + /// Region size. + pub size: usize, + /// Offset in the backend file/buffer where the region contents are. + pub offset: u64, +} + +struct MemRegion { + mapping: GuestRegionUffdMapping, + page_states: HashMap, +} + +pub struct UffdPfHandler { + mem_regions: Vec, + backing_buffer: *const u8, + pub uffd: Uffd, + // Not currently used but included to demonstrate how a page fault handler can + // fetch Firecracker's PID in order to make it aware of any crashes/exits. + _firecracker_pid: u32, +} + +#[derive(Clone)] +pub enum MemPageState { + Uninitialized, + FromFile, + Removed, + Anonymous, +} + +impl UffdPfHandler { + pub fn from_unix_stream(stream: UnixStream, data: *const u8, size: usize) -> Self { + let mut message_buf = vec![0u8; 1024]; + let (bytes_read, file) = stream + .recv_with_fd(&mut message_buf[..]) + .expect("Cannot recv_with_fd"); + message_buf.resize(bytes_read, 0); + + let body = String::from_utf8(message_buf).unwrap(); + let file = file.expect("Uffd not passed through UDS!"); + + let mappings = serde_json::from_str::>(&body) + .expect("Cannot deserialize memory mappings."); + let memsize: usize = mappings.iter().map(|r| r.size).sum(); + + // Make sure memory size matches backing data size. + assert_eq!(memsize, size); + + let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) }; + + let creds: libc::ucred = get_peer_process_credentials(stream); + + let mem_regions = create_mem_regions(&mappings); + + Self { + mem_regions, + backing_buffer: data, + uffd, + _firecracker_pid: creds.pid as u32, + } + } + + pub fn update_mem_state_mappings(&mut self, start: u64, end: u64, state: &MemPageState) { + for region in self.mem_regions.iter_mut() { + for (key, value) in region.page_states.iter_mut() { + if key >= &start && key < &end { + *value = state.clone(); + } + } + } + } + + fn populate_from_file(&self, region: &MemRegion) -> (u64, u64) { + let src = self.backing_buffer as u64 + region.mapping.offset; + let start_addr = region.mapping.base_host_virt_addr; + let len = region.mapping.size; + // Populate whole region from backing mem-file. + // This offers an example of how memory can be loaded in RAM, + // however this can be adjusted to accommodate use case needs. + let ret = unsafe { + self.uffd + .copy(src as *const _, start_addr as *mut _, len, true) + .expect("Uffd copy failed") + }; + + // Make sure the UFFD copied some bytes. + assert!(ret > 0); + + return (start_addr, start_addr + len as u64); + } + + fn zero_out(&mut self, addr: u64) -> (u64, u64) { + let page_size = get_page_size().unwrap(); + + let ret = unsafe { + self.uffd + .zeropage(addr as *mut _, page_size, true) + .expect("Uffd zeropage failed") + }; + // Make sure the UFFD zeroed out some bytes. + assert!(ret > 0); + + return (addr, addr + page_size as u64); + } + + pub fn serve_pf(&mut self, addr: *mut u8) { + let page_size = get_page_size().unwrap(); + + // Find the start of the page that the current faulting address belongs to. + let dst = (addr as usize & !(page_size as usize - 1)) as *mut c_void; + let fault_page_addr = dst as u64; + + // Get the state of the current faulting page. + for region in self.mem_regions.iter() { + match region.page_states.get(&fault_page_addr) { + // Our simple PF handler has a simple strategy: + // There exist 4 states in which a memory page can be in: + // 1. Uninitialized - page was never touched + // 2. FromFile - the page is populated with content from snapshotted memory file + // 3. Removed - MADV_DONTNEED was called due to balloon inflation + // 4. Anonymous - page was zeroed out -> this implies that more than one page fault + // event was received. This can be a consequence of guest reclaiming back its + // memory from the host (through balloon device) + Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => { + let (start, end) = self.populate_from_file(region); + self.update_mem_state_mappings(start, end, &MemPageState::FromFile); + return; + } + Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => { + let (start, end) = self.zero_out(fault_page_addr); + self.update_mem_state_mappings(start, end, &MemPageState::Anonymous); + return; + } + None => { + (); + } + } + } + + panic!( + "Could not find addr: {:?} within guest region mappings.", + addr + ); + } +} + +fn get_peer_process_credentials(stream: UnixStream) -> libc::ucred { + let mut creds: libc::ucred = libc::ucred { + pid: 0, + gid: 0, + uid: 0, + }; + let mut creds_size = mem::size_of::() as u32; + + let ret = unsafe { + libc::getsockopt( + stream.as_raw_fd(), + libc::SOL_SOCKET, + libc::SO_PEERCRED, + &mut creds as *mut _ as *mut _, + &mut creds_size as *mut libc::socklen_t, + ) + }; + if ret != 0 { + panic!("Failed to get peer process credentials"); + } + + creds +} + +fn create_mem_regions(mappings: &Vec) -> Vec { + let page_size = get_page_size().unwrap(); + let mut mem_regions: Vec = Vec::with_capacity(mappings.len()); + + for r in mappings.iter() { + let mapping = r.clone(); + let mut addr = r.base_host_virt_addr; + let end_addr = r.base_host_virt_addr + r.size as u64; + let mut page_states = HashMap::new(); + + while addr < end_addr { + page_states.insert(addr, MemPageState::Uninitialized); + addr += page_size as u64; + } + mem_regions.push(MemRegion { + mapping, + page_states, + }); + } + + mem_regions +} + +pub fn create_pf_handler() -> UffdPfHandler { + let uffd_sock_path = std::env::args().nth(1).expect("No socket path given"); + let mem_file_path = std::env::args().nth(2).expect("No memory file given"); + + let file = File::open(mem_file_path).expect("Cannot open memfile"); + let size = file.metadata().unwrap().len() as usize; + + // mmap a memory area used to bring in the faulting regions. + let memfile_buffer = unsafe { + mmap( + ptr::null_mut(), + size, + ProtFlags::PROT_READ, + MapFlags::MAP_PRIVATE, + file.as_raw_fd(), + 0, + ) + .expect("mmap failed") + } as *const u8; + + // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker. + let listener = UnixListener::bind(&uffd_sock_path).expect("Cannot bind to socket path"); + + let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); + + UffdPfHandler::from_unix_stream(stream, memfile_buffer, size) +} diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py index 49ab5aed92d..482fffe12e7 100644 --- a/tests/integration_tests/functional/test_uffd.py +++ b/tests/integration_tests/functional/test_uffd.py @@ -4,10 +4,14 @@ import logging import os +import socket from subprocess import TimeoutExpired import stat +import requests +import urllib3 + from framework.artifacts import SnapshotMemBackendType from framework.builder import MicrovmBuilder, SnapshotBuilder from framework.utils import run_cmd, UffdHandler @@ -198,3 +202,51 @@ def test_valid_handler(bin_cloner_path, ssh_connection = net_tools.SSHConnection(vm.ssh_config) exit_code, _, _ = ssh_connection.execute_command("sync") assert exit_code == 0 + + +def test_malicious_handler(bin_cloner_path, + test_microvm_with_api, + uffd_handler_paths): + """ + Test malicious uffd handler scenario. + + The page fault handler panics when receiving a page fault, + so no events are handled and snapshot memory regions cannot be + loaded into memory. In this case, Firecracker is designed to freeze, + instead of silently switching to having the kernel handle page + faults, so that it becomes obvious that something went wrong. + + @type: negative + """ + logger = logging.getLogger("uffd_unbinded_socket") + + logger.info("Create snapshot") + snapshot = create_snapshot(bin_cloner_path) + + logger.info("Load snapshot, mem %s", snapshot.mem) + vm_builder = MicrovmBuilder(bin_cloner_path) + vm = test_microvm_with_api + vm.spawn() + + # Spawn page fault handler process. + _pf_handler = spawn_pf_handler( + vm, + uffd_handler_paths['malicious_handler'], + snapshot.mem + ) + + # We expect Firecracker to freeze while resuming from a snapshot + # due to the malicious handler's unavailability. + try: + vm_builder.build_from_snapshot( + snapshot, vm=vm, + resume=True, + uffd_path=SOCKET_PATH, + timeout=30 + ) + assert False + except (socket.timeout, + urllib3.exceptions.ReadTimeoutError, + requests.exceptions.ReadTimeout) \ + as _err: + assert True, _err From d93430b8fae99d5e211f146a05a72bfc344dbfa1 Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Thu, 21 Apr 2022 09:37:55 +0300 Subject: [PATCH 18/22] CI: check old snap/load API for deprecation msg The PUT snapshot/load `mem_file_path` field is under the deprecation policy. Check for the presence of deprecation header in the response when using it. Signed-off-by: Luminita Voicu --- tests/integration_tests/functional/test_api.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index d527272351d..6a7f89cb597 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -1740,3 +1740,14 @@ def test_negative_snapshot_load_api(bin_cloner_path): err_msg = "missing field: either `mem_backend` or " \ "`mem_file_path` is required" assert err_msg in response.text, response.text + + # Deprecated API should return deprecation response header. + datax = { + 'snapshot_path': 'foo', + 'mem_file_path': 'bar' + } + response = vm.snapshot._load._api_session.put( + "{}".format(vm.snapshot._load._snapshot_cfg_url), + json=datax + ) + assert response.headers['deprecation'] From 7c296125bca58114d33a881d3b268a0b63930580 Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Wed, 20 Apr 2022 18:01:31 +0300 Subject: [PATCH 19/22] docs: update with UFFD details Signed-off-by: Luminita Voicu --- docs/device-api.md | 2 + docs/images/uffd_flow1.png | Bin 0 -> 40049 bytes docs/images/uffd_flow2.png | Bin 0 -> 33307 bytes docs/images/uffd_flow3.png | Bin 0 -> 33602 bytes docs/images/uffd_flow4.png | Bin 0 -> 32662 bytes ...handling-page-faults-on-snapshot-resume.md | 133 ++++++++++++++++++ docs/snapshotting/snapshot-support.md | 53 ++++++- 7 files changed, 181 insertions(+), 7 deletions(-) create mode 100644 docs/images/uffd_flow1.png create mode 100644 docs/images/uffd_flow2.png create mode 100644 docs/images/uffd_flow3.png create mode 100644 docs/images/uffd_flow4.png create mode 100644 docs/snapshotting/handling-page-faults-on-snapshot-resume.md diff --git a/docs/device-api.md b/docs/device-api.md index 295b26ada2e..a95c9bbc98c 100644 --- a/docs/device-api.md +++ b/docs/device-api.md @@ -59,7 +59,9 @@ specification: [firecracker.yaml](./../src/api_server/swagger/firecracker.yaml). | `InstanceActionInfo` | action_type | O | O | O | O | O | | `LoadSnapshotParams` | enable_diff_snapshots | O | O | O | O | O | | | mem_file_path | O | O | O | O | O | +| | mem_backend | O | O | O | O | O | | | snapshot_path | O | O | O | O | O | +| | resume_vm | O | O | O | O | O | | `Logger` | level | O | O | O | O | O | | | log_path | O | O | O | O | O | | | show_level | O | O | O | O | O | diff --git a/docs/images/uffd_flow1.png b/docs/images/uffd_flow1.png new file mode 100644 index 0000000000000000000000000000000000000000..a8150769a50aeb5246c72c6bceab663730652b54 GIT binary patch literal 40049 zcmce-c|4Tg`#&COQCi57C>lc)GJ_dY$TG}k9s5$k7zQ(#v2R(5Cc=!RP-JPbmVIkq zEF~h@B}Gl3Bzt89WeLTK@{T@6XbME^-_c_wHu3M;BV{hAX*wF?^+!(b90ZqqnyriQ?!=ccd~DX)eCt7I;qeB2mZ`7t%j{;7V|~f|7~? z9A*xKODk)GF3Q>}N{Xs*nC(CP9bH{${~AzLQ3(tnYYl@*E9-!k0fpul2z~?A5Do|x zIndJf_V#qKa&aP1z-;=e%1A|J1m_h3*4)rQ8mYN<(e)qF8HVm^p85g$HaIkutZU=}^Cx-# zL$bdMok8)U{nHJmq6q%}dEgu5?efoEl8ZmZ8O#cns|UFEFE)YkDgR=OzMD4!@2i9N zv0^x&&{%U-y0P*P^(?InP_}*yRZl$zmdcC57f}=}fmKVO5RsfiypJHL{zh5zIKi9X!_|d8jD|5d&P^b%Fz}4e>@ahN+*K zs+F;u0haD=s27ZOaj~Tv8hUH{dbqn;1#v{h%+WB|+!(1(Q1dYJ(zT>II$_B`RCQ2J zYDfki=cq(iwInDhTf^v1re-t>Rn;4Z0gv@a`T>3}K|x9kB`0GWa091mZA~}xrRmuE zV1t$YtSti73~c?am2nJ|flYuC9boeI^=;5qT|6_VTu%U<~j<*5)>b zIGUb{wShIx%ikYu6O48b_VIJ_vvqXycgN7Ze2h)-Mjmc1CMsS;ge8_?g)vql6HVc8 zUuQj%XMl&Rs|CXs5kU08GCXWyL3kq^gN`HHqUpx2Zb%!VF#^M&nt8)8E+%LqJ}AJ} zj7|FT+;y1W>&jc^uZnj3gxg2P$j5N2+{7P?+R zNF{xDECESyg<1QlI@%bZeaKeI<^*$71h5)97G~O5?;!9_ptGel9!d3sTUzU4mHe%U z*19G%8&eBqBOPtHwRNC}av&iPW`rb>d{xabhJHx6yIz2=kpUP)9}(d)kHFtK@2XA^=xOzMJ7})qL zqbRBvWp67#AF?Uj)R#i=hI?augFL-paCcKMpa&Ud<%=*Tcw1T+IRyq1JrNim4?T0F zBi%&Jji?-KqwDDD=4jxqZ0$`BFw@7vf$h?F^Ym5?GH~&QnK8@|BsCbr$x`2#W{gty zgPWN9n){QnI%=L+3nUq1j_{(Y`sllRbJ)kx2rfbX1b;mxW3q=S9!^C%x&$FG)@}rM z9gkp74EN`zX^bEXhjB7UmXCI4@_Sd$6_%co<+xaCAc9om4QM za9vwtQ*5w_ZGfI9THl0hLBkP|+Cjv?AVM(G8b`OJ(7|x}hImhJPdd`T+}wg}MfY&S z87Ud!5O4#yrK`1H0L9cjnBnH-3P;-b>v&RaNv=8`XgyU!Qyp7$keewc&{@aZ&D`DB z-9^<=KbVZfV2sg7KTk6(M=3fZZ2d3@e}=NLiWAisWA0=fXA(v3;x0m?px0S2lx3lkDTn@lh@M|eB?GxUtST#SfVeYA(4vo_KI zqm8yQu*8}n^bLc+X6abFFpQB#2$;T)t~HEARYC!>gG`Xd0seX(u0&gHqL;Q0+{fSF zJ3xi(Z)0c#_Q~4>kJ9sV_6gbYP=&A+;8$*<%7me(y>TYi78texTWWdZ|HZ+D0o#?FZNcHh^3D7pidIq`&`H@K` zf!=;bMu9;82g1EAdGk&SksbD%HY5@kmB;YbX^9PUr#oH=j9U?(>s5}{&dtmZ+csyPw8j4Z%i ziWvfJ;G<%Q4%RaRFhn13YisCa8PiSKm}E&M zgTB55X9PI8ZmuMT5eX5US zda5QaXf+#sHJmlw+B29;Fy_1jM_LnMNNbN^nzyB+u@A$@RhdjOrxU<4BXd0?TbLf1 z9XYcG`zdQ1fgVa&D-&;XS8#`l(Q)@PQL*vESOEZ}Z*ETIY_6*_ z2b3uTh^hY{XznkxuvHZjr4FxN}^9Ab(Nei)fjv-ILMG{4ngrgiY_ z@;Q&Xh*pj10i>zsa^HyySS)sTFE)bb-~UVQZ? zA5q=&^yyQ@3JKa}&SgO;&O8A&fD80fJcy=6a@e>Kw870DBidtNH=-SOh?1dV--pKkmz-c1jFt!tFH zBAX$vNvwOLECVa;e*}2ICw4~h7UxYt4sUk-pu=r|&3lYaJ5bSez?hHJ%MQtkVxTJoN zV9*tIdgX(NT_^A3FD{PuMe-CM2YlV9ph@6xXiZ9sng8sjw;qR(i>Clhb}eNRylOWh z$O7LdKVCFP{Sv2NexLR&8J5Gwc|tFq2CH9!yz}Ag?{gSfkoc?gioZJ}1(=44f=@_t zP4l`zSfeMCbZYci`C$>L(MzD;3gU0CG9W0TXT$9^I4ih(0&s7c!#&A10USK>+R?z$ zYpxz0vh2z`&9Gbu=V=s&S@DKo-M#Z;Q=Dld1emkicn@c5-I68_D4#A{`Q~AWYc4~u z0DX6)!4ecGbosw$MIBr;DNn@zUEFEzR&=?DO#*>?N&4UkCbX#+Om}Z}k6GQDSE=Q= z(UnxL=i(9X5SFky9rvX;i7k#n`%C&yzYBKs4YC(XY(x$U#=vVWt znYs6=&6_-G(G<`rU8ZYL7Q7J05x0)#bUQHlJ&q{9eZ=^vsP&x8gjOXHXM^0Wcn+MD zaYnGu^JaQ@c0r`f!`S?HkuaQsP|9;bSpFWcm@cI^K^*rJn zA#i+v;^%e4gW)QdXp%>=0&~7QK@K$bWBuQF)W48p-IAcQu;F6$XD%JL=K|#1%N3933}T-RWdBFg&9lCS+z3Mc1rctg=~*lPyTo%PV%@?&WYD|PX9Z!{3aN}eOa9<${k_tEse4|Hcm zBMzhWs_xcSKN;YEm_KJAJpB2icjzIYOX%TA9R6%Cjo_)|Y>dWx(Sg{}k%flt0MY@# z_>cV9o6nC%ySIDyg_%L>Jl*PGA}20Zpns{)@ik@b95~vZ;Dj?lS#USBzv8vej>v*` zjx{Cv%nYLJ9|!VVmD<0k^62p%Yuq>ZG)!!HX6RKOCUNd(pYr3-RRL{nuY~JwpScYU z4Y8m&fqX2ByTI(ee!cJaB$%^4>3xO}iaxG2Gg!w$dR-v3XPv*nbr;>~VqGgM@F*Ry zW^qzSnpiNXS~A;ujKlTgb4_30107{pTW2buB*>L1&6aY9@Z#`L-6zKN7m6#OL@ljt zOEW`pVe3mHUtUD31^<=>!$*g%{yJ`)rQV!+N~GN7T}*4bvQ*gGZ)~#6KH@zCWINEuapmu3fJxep}TCQGP zE}AJIYf&ipHcg>uZyB1I|HF%4F@dL81~6F_ei@%{s~4K2V!G$Yu8SN#-#BqlQ2c6D zEEzC4G`~(;KuP8={WRA+H2fRkZjZ%TB7`a1Ur9MsO&hv4)DVzoUq_cUyT^x5lV>`; zxh5AxYWLsT3====&lkJ@=*!KG_0-n_FpCmf)vjWjLtq#5qj(b9=kn0yu3abuuUZy| zgROF`V%NrF(2EG5%E)nQ%LML52-sfR&;`~0tRTwrQ2C?kA^uDL+cyp4t(Av+6cJH;{l+^dfG4Y{cVe=gr37aZQyJJfW3uZ$x`^3mT z%MYJz9#OEZe6$i|753qR0HuX3ZC&ZczqvXg5f~DZ=QGwg_xpF3YGdpH38(iNsw*=M zvy$|drlwuf12tBvsXzN1*mOx7|`KLBFnRD2xuQHxNFUkX50qrA4 zA3FVLhIVJje-PLudBrb|pH>o*uu}T*02}-oZ*ixJDB7Ehet90b9h)M{viMQ8DdK0$&F5!lLga5S2eWRngkhEnhXm8YxyMM7_-cl=LYn`$xMHiYEbg{B$h8l|O9zq)^Z>|ZQJX8=c`*}AX zpE>t&`XSTf9!Zd9;+I;jNbZ{&A=UMcMT8rQCqrm6KY!7F^qInL6CCJ;Ru2dvg=!JlErJsle$iz2b&=YKE#u+R4hn0<#l? z^?rn*$;J6537{s9t&ArdKcWqt@$<#O$*qHxY%C=4&&I|lm=eF@?6a+>gp2!lc}-{Y ze?FvteS7$f?}KkunE+UgT8zkGCF42beR})^?m?CT`#t%Rdz3+>8+7pJb=LNN**^aioj9lIn#?_5saEzZKykA_6=+}EXG zLtN0`WUV3Tm4@&u;Twxq7UnjuS=f_u0dqU_W5z0ZFHZVy)=7Hu#EWV;)_QP=TpC5<~`KaF!=1!B|ZsF`wA)% zLWyB-XGJIi!(}6}6A%57Ubc((=5@fAK1kaJJECm&Ya7VXup`Cj&b;Rb4FjI$SqqpO zxok|EcHp3O7tX`4hHq^IRJDN}D>$P6tB7>Gv1RA3HQR8LeIz2KC*x2p0p^G^qcB>U{|AU-h)0#MLsuAgsN*Uk9w6g#p^1HyI0w8^790X+cA9LpP^B6`f*2d+x^#GQkd6SAGrb?=5pq-NqKSU9p8GiW?{Y zgw|F4xSt(P6eZBVu1|L4eX8g*CZ(vL%Kca792bk4_pWXLm6c-8E*h22lq;OqgS2ew zNVBg>^*MCp>dCP-ud@{da4P|mAGlIaTD(k=GmBpwD(P_7Bl`d7Hgth94dMuS=xcj5 zmZZ5hIEq12CI#EA?0i5#Nep1V3cXwF(p=8`AJCANw6KRf7rQ=L`FwsN?mS>4&r7V^ zIN??-s&a44b9G_Ds;Li}hk=LfKc@a5q0Db)&;|%X8U8XGEMrU^axs5~CGMJR3j@Lq zW$%_MI0p^8+j5I&zk0(V-1MdF$rdSY+|=QgKsws6JY!t;c0)tMTR7Y2?*8OKHrPX$ zr%h3p3R?!#hbCV>Mu?Ly7(cxRc}yCj-!h!~jrC6^>EWi2wM4)}Dl8$mR@)j{NrGmz z`=_^EE1Q27j4P=F!dLc+SlZyK5gcg#ro^ErQ8O~Y!z;8m|3a^pt8vy9V=0NBXP>p- zd$CpjOJwHr)40*QanC-T`xPqEc0ntad=%!gIWC)BL*at?o=mRc>z=laC zaXh>=tEhjc=QD87K7V^MdJ3?o1nkK8{_yL0_%F$`r&2p3DW7KW~rAyA`4xuAC)fbnEcf zl}?BJG!m|*;|$9bay!4-AGpRAM;{zL>woyibXQ#)ys`K5!QH7D zIT%>UR~;rv0Y=TLiACF<2`E?-?aK*?JuKZhomtZ{^~zZ3V%N4m4~fZ@hiN= z4c5nf0Va=wHLYrcGsI5><}h9Z``iRVoZgs-f-On) z2p(F*qI4cdFrinrT#arr-=CwJE(raSzdSM)bKK?^skeb~T4u!vt)&%lrhmYTF<}%H#_wn)I1Jgo|w{sRON-TC{cX^eR-+4TsvNz=0)ZL=F>4_qEIvZ*jb=*1T z`NwyM2;byitSXCme;XGyUmM;Pc!GN5*153#TwD*mr>9hTcIrkvf%i#RP z3{Gp7N96`KM=UFNTIk;9#u>|WbKp~iGNhwi;RBnA6`#49sx|TXy}K#PbyPGRugel2@2)@a z^O&QuVIfj)0rAC zVYcx{l#Ub-@1xsaXu!f>2ddX@ZQE4-Ge0>ZrBXC*aux0$g z>kN+CMU`Vb&z+{)>{UimbI3EADcElb=K_qmgHK$nLI)o{9OZ00OcHIGujq9bX_=;a z%s#ekC#11e>iiU3M_1gEx)r)NgLgGAipFYr3gD(NxZ}QcdJkg=@#%u56F zVls*8QK|!$MYj4V#)MX;A~i|DwffPOH!>{RpD0I zD`YYmGAD7;)Xqs9mJQ|AoL-ibW>-Hnjt7X&=0GM#=Op~(&qO~L)gQ5-$xR}{N1e11^xxBD;wm!I(dY>rWM|CC#X zO3VJKj_O_25`78l0kdQ8S{QTh+6R*4gXnx~p$FeuoD}nrA(pYTCcBE1GrHZb8EKr& z>1gt5)b#r)s9CxIgOlDKu1*tacewca^2k)+Ku2_wlH*d~VmcOzDn*2M*Vb&>!)ru{ z`34C*yn|h?#Own94O{oHR&?j)(Q7m`kPsU)pj28}8elQ4gT5q{*0Ung>>%g%+sP69 zyI-GVL6aZ6WlbDp37RGU%$3-4q8nO){boyFP(8Mjn+mT#em@qKST8!VqjT)Y_H2!4 zyUu(s1qB(V@1GSPzj(uIcL^hYhG@ zdE6I%5TV0}1Ude`u2+f&bhI_Hk1-N?xXid>(_vdKJ zUQnYOn#BYI0UWLRUGj+oJNJvbwFriObKs`iIYx5t=u(KHS6uYG&WWKet*RrKmEW7* z-#izlY5Av8e1!lYcB$H{n~@2vT(MNRL_WHGvk*S$=6Y!d{M5IpX%v^kyC9bh&uX5#Vli ze;BO*97vEkSb`QvH~aZqIkQ{J1_uc*elK}V?gy@MVT6Us9Cd6oYB)E2StMXalJK?A z5iWOz`TO^Gvs6QXOONIo-R^g?U!EPtZ2np8xTC$jH4}NdI9rhL&@^x7;Lvchd0k7P zK|1^QOB6D zUN?ojxz?Tc zd8c}q?>`$Zykr#mn_17AekHT`a!L8i$H>5yFUP+XuL#i1SYBJEwX5xh79s_P6No9Y zst24k?q3blZTvDPfk^=_*b{-B*N;7*#RIu`=OOs@4v5rcsdD@n@jj{Q>>L04e4yo6 z<7zgg!`W|wFDq2vrOJNc&0_fdaNlCU42k0nTFHYETV{Nf)3BXbzehEn~ z-M_HZo&!r6q{LpRfHLx^=)Q$EJtpaMFJl6Fk%!w94=sok3{K7v&P-~B$~`@Qu9H=H zbB{kp^&tsybRM6J8ImAmd; zeCo@Z;c$+to9F(iaBgYIazly$UG0Ard%z;?Bk(J|*O#W!ILq$#8NZhxQ1OXhMGktQ*4y#%*5*3?TB_oC+vyjb{ zVD3zKa<^c1Bfzk#YGEO0;o#L<*9F5TMiT45-t7e;7xZ+0l|l>yL`I~Ypbc*ASsI6t zG;xGyFXx1HuocgrQ4jjXgQN{!wS)+rG~W%JUF|D-guH+D!m!mJIWYlpvbb`_*#O03 z=fe5;t$`aKS^UJUBR458c%K;JHSjPWg{}Li8Va6dg|AN|rR%}!O7cws@Jvl&pE23B zU)-c=zSO=hO5)NV{=LHTkIo#zA#O_(gnte{JJ+4ZNWls}a|%u3;R6BsmNg|{{Qeh` zl#D`mP94sC!Y^;ry-PG}uxeRFe3*v^EmLLl=-`5|ycSbHTK@BV1lM-+ZsP}oEt$xN z0(en4De%-GoV(&aQ8?OV2lETFK!!bd@PzZ?_m3Et<;#ecl(msq36pXXZc!TLFVlx- z>q!g9sqCEum~{8<-6rK&XeWr@S&%=r-9DQ-$Wl~B6 zT0VX>bJ1$Cb#niITM0V@omcv_b7OyD_PS8mrPYT?WiukJJEqdKzP+{hQgM-GAs{C! zySuN#HMTh?aDKGl3G;*KS^r}8Io4i}kiftPFF#hewp%uRb7EuZUDFHg=d51Jy1Y%f zdB*P~S{~;!1D+N-yLXP?%p{$}?X*aiF`S#ozV!5sHI&GP;*!*Z=;9i|g3LoF8o_C{ zV9n0Gsm|62i8=l34{`B6x^ued?G+Fq;6%72ggHR%e?|z(h4zZhtTO(Fei81ZAjNRk4aS+Li!e&cH$8PYA=6Q-s595<% zuG=mQU)n0kN-PZT0A4mV4XfTpzT?HGn98H_H&Siz?+Ct(gNF3zbRKz^%Y7_q09a7T zGu)=a-A0T1k0f5VbEL}MIe!-~ol`Yv7he&H!-A9}>gHSY0K5C$3?mDy3R_YHe5 zsY~vpW%13Eg+nEFNbJs(E;M_zht5O&{5Z({C{eJ%;aT|H{Jd!{cHC?w1IxY+ZExP^ zn|jL1vT1326$A{dk2)rM%835pxW!esLBLro&C#W0;LXeaG?P(f=$8!OITa_K`ggIZ zmr!;$vb;|$dhmC3wYO7<@vHfLcuBf2n{jbwpNj4iTV2z{AT}FSuRO(RdAA!cCYIkI9gPi z74M&-AUOXwr?K=cMh+pcTUycvyKH=$uP8`xZ4qp0sA9TC(64dRs+eB+6OuJggk(*& z)zF5B$d_-rWm!&AuiH7XXU={A#QDF2(kcIBw`flh-EX-JT9S7^DX$Zw4cR{mU*D!Gld#|6TAUCJh^P2Bf~6`6t|O>pvG@LgVb}=+bM;vx(Qmkhvx~I!1O6FPxYG zb~-95>JgC7q8nIFj>TV+>pVz+musqXaL4})hh0Vf`t>f!)XG8B^4-d z`mHrR8U%B```PWc>B+vd$po4<*L&pUO$0ULJtGDW&D?bn19C~{IVpX0`5b|kU&lh( zARk!FiPRhkn#tt(ZKx&G-MuVOcShhtOhlCcj;s;7qBOo@31E|w%UeOJu+t;c@jwS~ zLX<*Rynj!2NEsE0Fe}$zHmY5J(FWl9npD!s`Lh6%D#k+i}8mhknEYcW2;V0K0 zc=naw>qR1Q+|WA?+@;>P44?Vd!NEcVyyXE>YWnpJDF&l;Jh6A-E`Zvy~bYR2j+MFkACz`LnmFr%susoiIM8 zaQ}-nSSTmsn}YRu*~DQ6^v)@8QNL%Gh%UD9>0j%;eG(pcunXOx%r8A2Nmp$#6+%F3!wXaeB2jQsvc=<-m2 zn$IXtXlUq%r(q%EZMW@uetZLEEIh58(f@{|K)RbVy2qR32G{{G`oin}-)F0Cp!e(u z0p2o4=z-bQ7;Q>S1U4o||N4;!wpAww2M2GpIgmPl#61dHAph*E*aZL~`r|#rPi5+V z*{#H>Z;?VXJ^oh4yyJx28=1zeh|i$)T>)aBIJCRh4syj|r6UhhM)@e?IaDvy5HPD> zNB?Sa&pwXMK5g4200td?ybs%?>9%30Z~!#Ph(i!RzBZ5rEi_Pbj&zgQx62{O>@z_DJLyJj5bYg>1W@*L#Gr|IK>73t#ZRY z7BK$DYxY@~2w)cD%|KU!UmE(_o_g#Z;LMJI6j3ex>mJ~3+#`JeIUpb4I;aQ#-VJd4 zt9Lg}^9u^*$0S|qsBdVH!m*)P7GwwUzR`7%DLL-k7!RD;Qvey{xF7lYKJ&)Q<}R-@ zNMr6&0@!vx=XbX`LX`++Uv+9b+OU%&R9B?}9J#nOaL~PJVoS~Sem~4dW3o*+=63V_;IwOT8v*bz4xPJ|_Mf#@Luz10;}4xG z{eNKIT{PTpr7j2zDk2HS;np4(1`Nl3aR7%hR0#Y0CTA#7WNH6Sa08*`EW)Ah13y^( z(H+r*A}5K%mi&K+rkCwM~>GUs?okVcR+W_vC-E2N!K$+xu|O zEkUcA?R@SP2aeb`*Iwj|d{qlncmW2>P|` zh-y~g6ahVd28dianG^v7OcGV5*;nlYH#{TmRVccOy#CmC&E6SBiCc$I=UEw4^81lh zwT*V|_l;v(-G{KLP=$J{qt0QN+sj}Bf+A|wIb=NISZAR}E3&+kS3xTw>c!pO8=w)k zddA6@gHz@OIHCu6Shr&N-_@Jm%*sgDJy)ulN?Fibb2Y&$Ejqy z{rrr#H6A+cXa=FQclAjTA*i$5x3W2BixVG9!Ttr+cg}&kGFVxEGRHJhWP(sLA}8C zkb+K*{>)Ns&X5bcW%elADu6@ge9_@>B!b5YoQ2mt(V&nFh}lh_u|g`i zLE5fz+mBgnT~VRe5QM3{zDqhgvShDnkOa_0PSiV}fL|5pXckCQ-tL27ys z!+{-6dim?Sa+9$I3s#ZC&r4TECT`aTE*`m;FcY_ExBO+ij)0x~7L#pdfGN+%9n4^+ zNIRd2zp1wCUC*ERJ9i|1|D1!Z`BSd6Fs#13?u%2SkBU!iKIGPv_@Jk;JY+l|D0aGi z88&d~AQ5VOEoY28)_)FJ=DRkjM5Bfz5^apK4@_Q*nO=Lbu<=;U`=^T0?S}a3&k~R_ z48(JAQ;arPAD7D5Fs;1!d3Wo>3+uZ|LgLM{ckS4f@t4z`citg&M0sfhZ2oeX>>56R^cpr>&z|X#_=*>1DxFYf z`EK>wJ<;MMwM27YP5d}3=5*U0%3BMMEL;`K4b8GGeXJKdb6ub%rxumYyIY-)??u`D z7qx4|O&9oPOKc6+N2A7K(Y?MkIfs&9 z=a2CH_;f7s*g3uRr+M?AM5H8}CWJ$uAmk4fpx;~KL#t@EBE!W)@um)~W=+2>A8tv% zWW4dHME4*JQ{n~YyY5gO>{rJywI>V?BTh>bf6m_S$O1ao_O-m-Lu`KI1r7O-o5sMp zxfy*K9}Zzjy8ypqUkfOH*qyAKp7FdbR{N&^v{3iweer3!(Uml}q_%xj_~!ERHoF{9 zP4iz>x*;AfV7UDA!|hG$t90Fj6hDbG{PzRG?ym*>oQRDIV&nzf7j_h&tXw{An*Tgx z%A@nxxuBSQoWR(-yG0f|%_hsrjO!D%I-+KC9D0nq7?}Z^3EIx&FU{Q#KV`qW)U`i* zO~s<>8d~kWZ6%)f1O>t>OgY7D??*(7UuV3t#Pi<&O&NZ1ZT3=Qp+t?-&}a+)k{Y>R z$8+e(fkQEi=$70aFB4wtrRhTMqj_)|GA*`mO$(1**r+=lgK5G zO7d&xH1(Z1a5!3Ku`s7(8N)A=UgsthelAe_6nVAaVb7mhu?*w`{-s-_g=SdQvjbk{ z+3gPV+n)~C>K?sO)Kf2Vq`Nd~NuuXKGInv(%GBi;F78GhG|RrXBq5dQZTpm*_m;S4 zIA3WgAN6i-@j#~JV&=~9Z+U1d5>xPG@r7MGb=Jf+{PsuoY+jukeBpfG^j_A1a`M}Q zh&L4zr{})bzHpz-xt@+~+eU-JiUsYeC5L6DGQf@() zTSBf37mMF0>a5)xZVwl|vGw?vzrpm{`+;V&YOeiWHaSa@9_==TpX~&2PcJ;cE32zF zC^tSggNSaeg9?R3r^BBTfJ?{kIz!~Zhswl#9Fv90MeTlVA$f&ZZUHrgq>pKgDRKc! zv8DO}55ZNlhPxhibtubSJYU{@0xy2_OgJJ6&6aZAR2Ckokq~()#X9@AfzNW3?7OAt zsALz8;H#M##N6YflzBE@(yOMjuo3}BqY&?>ax7EnfueJJh#%d)|2C}+U)bOH-ah`x zIYo25`XZI@#pOAJ0=15hwhRo`=%QKY*M#i+r}u^!ZN4&aKY`oX-L%hrwC%~RLd&kl zOkE3vxf=t-Sj5zKGSvlgwwlBFFT9?T_PF?`*hlt4y;^t48ozUF!j%9qug96oLyv}+ zMZ3@HWZzv#8yW4}dst)j+~=GFe|{~TUZMXIA6%Qbw?;hP{_^nDIYFBW*Rt(%4=?z_d&{vo>r_bm*p}bd z(}PdKtbEpjM|wDE|50HJXBqaJ9C%lB3N~hsh{%s94$}{SydHq7a1`z!Zt6m?9ms15lIK{xW;~)+@g3i`&7@RV_&>@ z;aF4A!jMRji=kvMn=K~m3{-1ex>LV=f2zCET?l_AY)F_X82qFE$*#nM=fc%PJM$Go z*Zqk#MX99&`kyQXg1YWaV1KS{D1FAI>U>iBwc(Ui??QVY#pKUQ9_SmN{9JX3c0By? z%H+p7>H12Vcw|`?l=yJz$rp>Zy$S<0=#^nixEwIiTYokV9I^6@7g!VTQ4=ChG~L_^o>k5qN8s}%cU&lxwF3xA@1#bS? zVcSKGbh?n`h0Av}&K}7-Fjdq(71D#5C%#F&$x9XM*^IwZA?(ywe|HZ1Gcq!JpH8EJ z=m*z8iFi@C<&ZyeazR_oXPoB@V)pX+rhr_BiOnfByBw!0A6gm?J++641^xQ^VugQf z{>$d}mG-%&yx7DK#W6^-d{)Bm^v=9OS5@)VA9HpuuHhjXp>qd?_E^4-BwV_<^>7{5 z=e6hQKs|nA*k;!DeMcDv(?v0{YblCPSHA25d4Ef3@4HL3F@v*T^|wm5clkWYiVgwR zeoG-YlV6VIduWg1>-@i`Q~jUQ>2YZ8KR!>8M!@f%1+_QjyCpf5J1a@2fHj1N3v!&8 z9DXSAS=)w5ejtD4%%6J)*HU0+^Itx1$a+AAo-J>q|MuwVzFiwSzXxw6dtFmEMCww` zpTqyHn#3pkzp6>{Vyp}e?9_yI>=7}^)(HQ7SdMtl#EAfBqe=V3HB2@x*uE#K!hG7$ zRf?2kePbPvshRU>NK{C}wr6WtHBBqtdJ>`MA*_?|@=4&Pj*AX%Vfq~2MZ|BaZ(f1< zd!X&f%_`q}!L-#VA+@kSf{jKJ1ZMt5%~hH{*l|PI+Jni!r|s07`^U(a613cp-~GJM zRVo&00!!EH19dt6`?l}uIRucThTf~{D&YAvD%`t}z{Wbqt`!+b@WV#zgxU*lu6+^| zR4`TPH^@);eAKkYbm)=Or@gMHUw(?S%1yzJGzsTaI=!GvM}(su?;9FeGO^L1Lk%*- z(|`P{mPDWq?=)2E=Oy$-nCbgvd3(~R4cNKT@}&IK*6AyPa>-KK+{i`O0xR=|a0TC) zA(TE-@aZFe$bkUC)91OR8V^`CeI`9g);E0lb?*%ena)Lzo{rq}dwstTtt$NOIj%FH z-ga+enR$$}a$UjNoys5v*QvSoA1l8&J*1_gKL4^*tg;dT^z!*v&tEKpIzOCo$0Mo& z@ZfzuMx)oZ*_S-<$e_4!Bj~bw6Czwp`XWU=6)^Tg_b{ zOFj5c8u|<MU{c;8*-3Zi93F1-k9xmCWCaliFN$c|sR8?Mt`O(E5@FMPK|r&ex~ zAYm{sfjuIpiZvVP-%pm4-tnc|mEQzKGZ7X9%hAm1!;+D9++8Ql#LPqtOtY_e(ORhWTMLO&>Kh`@^N469XUZcVB(1w`E=pP)fIxE+4z!m_Uv3t9i zP#e{KMJg|qwj3YKJML3|yruQ&INR>4&KsgHs5NmOsP+*P4qp}mg^f|des(X5JX>m@ zoy)_2B7#1CbL_V=W6f@a?hZYAbvEr%Y@OMJ z;YpCeyr+k+RNQaN4NKnN8x)GMHq1Ye*`<3w@$9P#v}tJntXaGSN_(hO#s;k&slD>j z3qGY<8v7Gfkrx_C@4e)BP9V17jj_`~PLi?g`=^|mBg6NL4z%4qbH6;_J=}aRs9&*E zy(v)NP&ABbQ+JYKFAa+Xi?al9iKZhNtsDthcjH78Ql#n?Bi~}OoO@nLHqcD=c)Vj( z_a2cZdR|_sd-qp4$S%sTF6?~#a@W4IUw&ox_|Th9j?49Ib6qZeF||10{p+W+{EAcU zOwQRUwaf@0MhBlB)({1is1|;g3O^OZgiEa*54wijD!*m~!QH;|VrbV`b7Wp~iga=z z!*`ZwgO|5^i7FskMyt{Q@!uS6^mb`QYK^HLfss;qpMAc?>aKU`iOl(jac9 zpodvg-4!5c?$~_uxU5y3{bkjmKNB*bdPnYC$*nl?I{L-dB$%3fP5zFZImAW$VU^%J z_1cCB!?!iVcQT%C!sRPk1em_RpPTYcX|BAlj}y|b>bHPZ_9(??jvQDF?L!MsVWXqUS4*0=4P-mjC>X2q`H0!`=e6 zx+MtbltD(fmvTPFTw!%(qR;qzgKSQ0$Zev^ai7-f{uO)p_-rjrphaq5`P~-RKR4PU z&R#TxZr}MnN*WY_ZDS@TMH~gTj&k4zh!fot@9^Wrno{LTsk8pCPsyn#ze)4~RiYu7 z$gv-sC=^Y{aY5%dlyyuqJc?bV`J(Da&D@0<&lQ(X;*agBH|^4~##x$8uDXqm!#T;= zn;(Ft3d$q=XhbM~sLzv9W>sD|+Z2}p2J(0+N>0Jjc~nAS!KfJ9d^A7bvP8_!y7B>y z4<(vwJWczL>4Gns?)Urn?&{1ZI4}O8t1JTyIwd>fV72*jXduA!vG06MBP-~MU5IW% zM+~Cn%WukfblAkQjfR)+(YNCYd5VI)e?F!?@}nSX_wEOu1cjOWCl4ngUDcLUdS}E2 zP33vP=ZV1O2DpVqA#;3Li7{a`oNo58mH zJu_UGqu{$F`cS~tvlWqmdHX*2cE7nQAQzCHIQRZ%ne6$;Nx6Mn3lwY`6sEs4oxisp zWJz{M--`q4S@1q>XC6BE(6tjFuwwAE92{o}whZAKuhfZ)>-VO2@82J3g*qLo8B|%X zZF+6>z~q^2%QK%b%ueZ)h(hZU?jwQ7=bs2xBLnSLF5I{Y_3B#4 zVR?UBP&2%A@jrK6ispJKI0aj}zTe07pD%ke+UgIRLU8`XLcx}iZ%Z-?DqlXG3cO^# z-3+t*X~%gy^!#P*k9uGqu9+dDO(9QsuQX@w*>P(0`Q3Amwt6;h{zUE=rk>xi`tCtM z$+yYgJgF&_2iL=IuOBBY72`kr1aSDgU3g)_|6%Mq!T2KNOzirq$QE+hx>M{tZS z3#OblTQ&2KcztKD6_+CBm5ai6kNnLkzIBbSJX#2R8~(GLifPz0tl0WypVt%NTn7+;*5Y7ggr3OD z?6JiXFz1T%o+(m1`!zGRaow^o`oE<6)DL%OWnYFT2SD_4t3AO#GL)^XO1qFi{=LTO z|H()q{aR}{lGIH2$kDm)$)7iQ$~qnt2SkS#5U?8x=UD(fnibuOB_|!-NUEzb3>_T zqi8roz%-T8>THUC8gFp<#5p733IHw?X8(zFl!9&5og6%h)0#)eG7UNmGj5s98PYFO z7$7+OHjY&ry}FnjI(hcy=trj~^eilXk1bxN1B@)7P4+!QDKEdT@ElQ@&Bs$Iqd3t-ioD=5Y=fOS~=~+{!KExX4;fc|O-f+n!f13gfY8 zuPUv(5OW(gnPc!u$f!XlExwgBB_EBQoOw0>GuAXY-|2%%pbM}J) z;f;=m!UYMsuO2CDEY79hwf%ZI9f>@PZtMp>_k~sQ1S2TYzV9PiaL22@qP&^Kb6scI z8k<7jlq3=Y6s~cHMY@nkep8OFyqo@X?PUhm(L%rcH)E!a?|(GX0QJps=AiXlTd6XS zly{!^TQRpj7G5Iu3r=j|{>hFJ;Hs$!&0Q$o|6GbF+>8yLax0EoV^78k-OOfzY>vLz zv3YvwQAsBpdWCdZ&!cOu?v~v2BI1VaxBIxyjZOX7IK^5=;*^7#ReGv#6l^^m{5e>C&XYe(M~axB z?-&4-{ZybH3*|KW#3`z1E*+C&Vm^NjxX^wrl`+c;-o(E)?yNr23*8KHkJ4qjn(XW< z<~bl5y_1@|X^)HG{^oSwg9_Bf|pnYaT9A0ui*Zq(Go=S0KTw;r=z@gA;pcR`B7415}GmbA3y8kC6s zup{065}^Nazi15{5Vpdp)Tvw=I3OsXtn)1EjVc-I>_#;!p~&0OZxX>F6MS*-P|jc9 z{Qpzb79rgGCbPenlJe$rjSn5Au4BdLV|g!O2fuXtoVdH|a_u&%Xlj6O_F&<{n|chf z+Gty3;!0w~xF9LV9wWF><@V!xu3^okL3?S}SzXH(5;dpp);{%#Gw)h7PZ#{o_&n_< z-Hh$4n0erOA>p>UaH?y}Nl*(jmTqX_JzcP! zaeL#(R~0IEIwew?S=O>NdwZygtT?{Orz4Y%!l6D0@TFs@YjPO(x|w>T#7%AbHPKgu>S5An zCR?#}Cq@E7B{+IgoC*7W9Ut)7wk(sCA*^$~Vww!z-3wS*Od z8amJDjSBViovrnB$KmRS8w=dko71OxGqJaBl;5WY;;Rw}NIQG%x^t;bO-*^jfqk3S z%|A0x@!%}*m<}4LBbveSs5SbDpfz;rs-}bdn{tb3kG&gc`u=jkKLofNNFcp<84m&J zQ6w~(o&_W&iJfBW(-ndxdk-Om2F{+l;j1BENzX;ZX|z7u6nGg3i3?Qs2Z2iQ0dBmj=Ld_)OO1Hl?x>ZhNl-mx zD)hfL+s~(&;0Izi8%~pOn+KHD29%=H(>XG&JdNMnnAHE$y}KyP5rS$q*Y#}siNp#V zh$+Yx3t&b6E+VhRr=Xh8=daHX)YY~g%u4J>xnnMs?4~@wn_k|1?0p8Fiw#!goarGR zb@GA~k*uYip_SbB{PLj#f}RuFnS#p5_QID~R= z8gAME_#=k^zSM*m#8t<=bA@+K--k-K3weuQ~&zatC1}x=N~SluD$yaM9Y1= zClq5$MY0b?HFJ;rz&wK5Ejdu2-r*RKj>eev9N~K6&wGGo##*&jVHwcDz|*R{6zY&M z8L71_O`0g99fqsfMU0EqIr;gwnK-75kJTf`5!93>p5&@ z%b$PT)jO#3lPVgg|9qlO1tExx|Ew~SgMCa4V4454kCD7eopn*KqhY?iAPf6^(Zi5t z4X5dVV*ReQmCn>>N6M=8Ss47OH#or8_^~BgFg**amMAR(o&veS_s^PEv!!^goz%}a z$+Bf@UX&7izw_=8r@&+0UUeM?{1In$v%9j@vA}u;8ORmJuC>L*f(tgDIwe#uZ+iG= z`P78#@pWC9IXG-W_#Qv2`B}-Bm6s&a`K>V$NwZEC9}am5K~(?}7W|8w;OjM!fi0vn z2v?(wMObyw(LY*^c!-oK@^yLpw}>yC^*-{y{u%{j`m3sSa1Jm^KRKKLq^|6KK zV2U-7cxf8K*+Y2MV>2JAxi3$Ac<)a`ly$L^BaXhle*EG8D48p2$wG1cTQe7%8;@#6 zsd+Z=BGm}RP&Dyd5r1TH>Ddk@&aB>AJKEO9DW8YcHziV~k46jOuit-lz9Hb*<~#~q zlcaopx2@+38?*D0ZYOP=p#Fxi#?~xFe#@R&`q>&b zyPWrjPrmlrMF*j0w1IG6hwihXGZ*}A1}@vE@gph2f`l9SLEKhwD!;#RYwd!1fYgoz zjE$~;^$*VG^9dK+#X+ub_2`TW1y?*)FZ@Da5IHWvl6mK4k2fKHU+ch$BwtCn*`RFf-k; z9g-O1J8x1(kl{aLX(EbngRroqv4na@YScYM2r}0k_!QUP%o87DgzI}V8MMj6C7$u^ zv9+{_Mj;8iT_1b#g!@8V;WNt%DK_Dspfr1e&rKW+Z{$%4vjf{0uE%@u>DuVUt;bYJ z%CN_IETM@v6}?#Xm%&F@SER~75Yrm3zI6aAX8z3TGrM>Mk-w~T+L-68>6^F1ieBPh zEUK=(j$|Eks`Sewy$s%zI-^N$Z8DiJXd$;HsKdT$o-`k1&>`UH1acnr=x;q&k=4R@ z=1eTYHW>n14{|p&|J+Rowy*0g&&`3d^que0;fBijLN66s zFI!i}eF*~=5<$SxPlH3nF^p#!xcm&DPzhRyUl}ZO;lCg+?~owpoN@3NYbx;nY`U@; zFZIMo+3ANgTJd$9(!G8g?e&YuxIc>dZCuS;x)z7KI+M8Y{15uMa@hqhhB`XeQOX-o zS?&{9aIb@%Lfrmm={;mVBZCDOo~QKBg*QnN)1vT%uP+)8y_vMF@wHJ2)tackP4T>N z0(kc<7aodAJM>xN2q)Oswqs2~XqexuGiF82tXoSrv#SyPsb(=paryL@$Ks z72+=gW|66v!-Nn4gFF z{4prR($r#vfP2gev0FG8Kp+M&DsJW1}KR> zKym_X#dqMd!{w9(q)xso&yGHL@W6AWT~$Cg^Ekq9UAtc4<}hb^Fe+R?ilB1C>&4b! zNpsxs5SoX6c~L0*I`(Y#;a( zAO1-t7?0jr`Fe##(&jiV`+2gZdX~-$T`OWx`Zo-T&6-g9pf?3+wBRAgcNPt%QaA!)ij{SfQ9ySY@Ipe&4(aWbDXrDBMPd(?Vu(3;ynVDjPzOS88yFEqA05*%j! zvEIRax25`YhS6OvS}^2&81hp6MPv5)y<* zEAvJzl{iEQ$K>>R&7g@G8h+*A6Z8{(mF^P3Cj|epZha=$j?RPAU#MS|DwGK&nC-6o zolU{hunYTWan!$sRxOf?;_g7{^9gT+u8cD-xvO_l2kMY1`PiqHba?icKr=rHg2-zr zF(SrHaP}He5hTs)508I*N{7gJTJ68RrT}fgd)R)XuL>Czl1bL~ee{NZ+XrP0`nqx4 z(WanlnsWp)#q80xa`RhtItVE< z4S~7NHHxPpoam}VKLF183rq(h2Q+$Ph2+oEO?{9TbiIPH`gjNIS!_SUgzuOF>049$ z^5=@viq=0K8gFP|noUT%&z*%qn*|YeEilz%u-Qg$_|Ch{+60!=R$kTzD>ymMgC`IA zB08OgL=8zbOr2jCO&GYwkxNmQKFHF)cZNbj9WzNP*_+i-#fEDY-TB(E8|8Em{_V!` z*D7dP=XYZ4)={6*0RZw`?J^KLm3cXkR_zOplPH0afAA|myxgCuJN%GyQjan&4UU{rFE&*S#j zfLf5@WTzGr69yJQ?t=hlQu$I}U}ySckb*pkYjEPpGS=MSckC|~0WTJEa7Q7+8Ghmm z>fOVPvb5bLj;4sW6hixcWA$0|?tskw=}u}`)qyD~oM&!U27&37P%l#brk8Nnx20>| zttUOXSqU#>xhZB}3kBSl0Q-gF8ZO7ffv4EXv^#2sbv zEz~zGV0)hamb9$_0om?WyNzn^C2F=swIi*Ti}A3)PrsvIg}o2|ia1jKhXyJRXLqcV z_~^B0oJuHbj|Qa^&n>_OmsEqVhuk8j^`ULXCwmZq-=b~vB@&MrqV7{2hAhPo)@Q(N zG+~>V*gIYt*TiLqAz7mE#V^?$JSLMLL@SYwqED(%glVChwWZ6#G;IZ91h1(y=YTJe z;KdUO+{h6T$}Xqhb(NIv{C~x1Fbk?CwAa><+VkD&tOGV=Y2GX7m6i!Df+acCDbERf zq)wdkp4I%bfkDs={M2ub-fEbwf*(zoR0GOKF}O!RKDd_AQpmQzGYjojnYbV5n2&`U zq+#o^LwJDwy#@Tp&i@mE;5pK6rAQUo_0Te&*4N!rO$h^XEE_ywge0`&K)L94K@1bC zmW8@yU4Q{rMJNZe15yD_d|kls=QI8;rab?BNq{J++CqU>T8B5^=R}`rqoo>GkuHn> z90X8BkoH)@7}U=MwKl%RBgcP|792rS+^_p7VSNY0?vL`l3o~g-TGAwrotni6O&uF4 z)ErN+Y+QeGc?R)h`a=2s$Dg$y1D~HuWtH04U*h;Gzm6%s9)u*fXyHLl1W7zNN%Z{{ zvoExKv*oqn-$s!GOz=;|S1mY^rPYQg7k26|&BQ>UrWrj(^xZ|iR#C;|`5G05;m+g} zrMW|HZ9&4hb6jmnDx6f17a`jQs0}&}>E{3OQDg)2#OBAZ*)PQ4$Y3CK(fc-{&p8Ai z>{1_JabKQt@e=%J;%GcTPhF{5i$i5WJW+aYgc7#LaNwp2UoSd*Ekny6qIvySH^X3e zNe3azQ3+%?qY0m6UEVm_1#4W+YDxr_g|o2MuVi_-2aceTUdBKQ5O zT&BHQ?Md)hpXc`8-pJI=LV?+v?S2Y3ZiKgN0dE&~+cv)r#$PiHv2r|*e?sK9IQXBu zOj+>g=Cvb?LVj>*%HQsQa<#yrUmEbchJ%!|myp@kfTU=%8vx>r4?Ccv5WCh-2xElJ zY2wTcgn2#lAzCgtz_%S%0uTQB9@xIsx#@x<8SM3s#gK@T;u?e(&V#QX%zmQ>$Fi63 z^Klnz%mNb5R-eG!(tsz`5PAv$wbH>lXB0N7h_4w}at6g|ngdeb(7=A9jTP6hunZ^Sswf2)vl*J%IURKh`(vVQLGv#2 zXClv`-Q6w65awr!KHm0yMbxq`6Gf$ToS6AO2~|SjYv8?h++Mu;2>qJ@&a4v&${&=tbQrD z(nWUBV-%`*^5CS8__1quLEum;b@=G3b3MmT0pMpHhed~-lsE{1RRcgKM6~h|5aXzI zEdheV2&RHZ2!kLpr)Ogeg*-C-Zv!@IIJ+MM%2)^fgdlQ!2a#RD?T<`W=qZU#6GpJa zggx?F5|~{6W0OgILoK6&W^B^w>+KJ7+C`*jchXK|>eZ|E%#0&Cq1h|9K0ls<;8D@q z;Mqlt4xn&d?K9x)HWqb7;UZ|S+wzP{L?B<$w5D_AAx4qq1&Uc?FZLDIyA{Hp~J09O8yqZU9`% z@nCJ&*=JZVJ|XpabwE%Y0!E9YAJ5i=auF%_?;n|&$$7T8vQiJ)ofsQd(Odhn4PNC} zpK-ne_L85l&i038Pf^SKJEaWzC&yEPC~<{P2lh{Qi!v?Ly`%8F8t=LNFv6mLZ*7es zTHSt-^C-}=$)two_;Xk@G-DjTy*4vLO@WD>9_kKQ_?e*u)nkP>=1PWPGXC}-8Tucz zCZIN{l!n!I<_fHt@5xxZj3raAN%3m6q*&OexE%fXWDUr65R$Q-k8 z3}?SwVE#e(1}}&88;@@?6x1?E4)pbM?SV*NO_=}o?;EsKV1&SNsD3>bl0Ofm7JtsH zhtp?y;&Ysg6B-lpb8~ej8&?c&IJSe@=o>EgZnGFiw^j(6i&8;*W_>eKVqC7*;pN8vam372+>}= zh!Q;j-*n9{KJkytY!IQv-@uh}pZ7By)ZXE!B!JvC>Yek=cdc?A@nVSIaNm>C87htwz9@dIFcQW&Cu= z4)fZ`?%)<^CF?5QbK`3@;LV>-IBd~hDzGp#mFipD+Z^Ubp~jl2{Lh}cQ{^<1SM5eHD` z`&^?pvlkcw5g+ISaQ*qmmFM4uBKOGR8Z~beffAGvVChoZrb7pge(+$3^L_fN%?@{^ zXsTl(?|mQP!T=B^s6$y4*-lDWldgS$5Op_TQj<`_kH^4oIUMBCz%7( zr802S%yh4lOCGGx-ylQ2@EK)f^1D5E z2{}{hH6|^+S3teQ`S7{J7~QG%nwfpVhEklj6$L`{Ilau=U0AukA9q7&=&GfBESpv9 zLv`SoAbO_*gShH~v*tzcjzjdiNpcUgc4VP2z{mAY1XO&6hyw4rx#u&g6$Jg}bowsD zrljNu6?mf@lOl2HxnaNiC(*ly)=EdlB5OY%W6X{QxU;T;Uhmz~!q=v}2U$aS|Bc1H zC=KToQE$wYx65F>Ba3S?WD^zTq=l z#&}y;jpMD3B!Yl`w&6F4!M^*ASr!?hZkutU1j}ZZ*!iedStJ{DDXnP9rC~mU?a9AE zgjBh-tSrmnz`=u~aIQe@<`f|Rqe8|%6yS7{gwTa7c9m9@k`@4faR}`iep9XR5tEe1 zjcP6I&sVeH8D1#mp%uPd54biNriu2qLOWb9p`L6tsf~}azB=W@nL7FXS)80Jhxg{Z zDvmx!obltW?>{MUaiZMVBP}8xjkN5SNn|`?^8?0LA+6UR)Oq|Kt}EA@5q-2Iuw7HN zyzkYb=ibITfBjJQ+DofU`H(&j%v!9C)G_5q7H()cblUt?pYGhZysDBTY7W^i^0*rI z$~onK^W&VIohv83T7N6_bmb_@wr&snYCoMayfffPYz;|GG4L@tRu+8j)o|)g&s=+V zde|tvvmn)zN5>j~TBKXBN*|@lrC$6gNC3RBes=se6?1)eLXY8NO^G7MkWXg!EN2_% z2Q|jJzkV@Smi!8;?U%Hn>FLh5>C47%MA*e-g>jE;SM@tmLmoHz0B-9g4G9yixAqr;c&i?3Mgxav`R>gXIg%VEoA(gDb-%3p=x$6}`$kI`9pdCOCA#6!5h=%`Vl^*f-8boDRBZWmDj z#&(e|;@R~1G+e6hE>GvG%&=Lt447+`7i}N$(E1e+kwLfnaO3csr>!b!SZc8bAK_i^ zZK^uTz^qRx2N_S9e|-K~=#-|oU-jIWKRBD2eJLuFSy|@RJNpqs4ChpMEPHM8)=2pQ zHP*5&n}AsM>$Q1#r@lP95OSBj!lIv;XIOAgne0`wPz#a$CPeck( zLkFkr$ZApNI~b;><_NLu#(q86z5@3{ZjQqYH#a(&EuTnd_lrLQ?f3k(Wmd#uIvM zA1my7-TVFi6>)86zGptAX76H5nqJ+E!m%m26L&gZs+>!?n++ANoBJP`uZ?pRO%I?< zb+;~cMJ`Rx_!B4|6EP&&9s7;9lO0wFk8(e%q8`TuenX{RTgB6j!ekbwg8h&vVY0S- z9xhKDGlHCn1xXLzQ8vt7hsb0~M9vCb1vX2wa}h62J;gpsC97Obt0$oGC%{tCnKSMd@<>@``w>;>|aAYijf#*&k*|5suE$XUHVy51pK8#Ah!tV1c zm&I&CQ_6>!oQ63!yH{3M-dP`5&YHQeXOeB+`|?FotVH6pEeTiRIMgu2zV%(M^N{m_ zyUUL?e|{s6d7uPeddK?M$?DL+O1gfvc<+yg1Ju^CYMA5wE%z2DsPVO|FdcF)5?vyF z*Wjlj>o1HBkYP`WJOpk2t2cN)5-z>{tBRl->;;#eZM(Pc=D7Wm|3D&O94|yAsCMMv z_8ea^U1$aOsk@CL@D6Y$+k>q3$}94tIaQ7 zzZM;+^k$G<9@mY>U7NV_E}J)pDCDZ9=QuT#>|vu@ru}e2r~a5oa)o3^wc<)lf6e5+ zhGXo3zdS2=Y^K?Suh)oHCgRa}{g_U*&FP9)(Q(u5dlhtQDx$i@&{T7CqbNj@ZaL>`96^5!-@K`v0>nS-!C=t4dVyMDH2y= z^{nN{^)ee?lnlQTcvp%IJdlBOEu4ES@H-HK?mbwpo&Q?#eL2q&L(qM$r2Of+0*$}P z5%aq26w%7zd%P(^x>>#rdI_mf)~Zs&X}uu@j+50mDI!K)Z4+muo3-?!Emy^*tLAQ0 z`NNgQ6c*WgWTEh?yT8j>cuAFl`6|r`AP170mday3^VW}uM>C$i#^zPN_6`&x3p*Y0 zO$J=Q4)N+}>WqhJW|0b-+ji=)b+oQgNA@pKiYxjIGgD8XFwzcZc;V&Y0vLJ{ZcjF!Yw6Cfu_9xD`0c5-_#anhAMBMQh^1 zM|)i+YZq$Ga4FnX*^ecgJYEawBtrpg^<~kLUQ12_j$VQo`+1Mz4gY7Ar4!9DrN3ud zYob?(=7TJ`2adyfz3S}M^S(^% z33W+O`~5hs9aV5^;?k`A8&$m6wGaWKn`3`ituMi_#yWX(<8n_gaNK%Y(+m$rDpPpEOP$#OZ7l-=Hmu) z+$JzHT$*f+Kr08Qf(zEj>cG)s0Z4z5*Kx2VeE2C7No9|eI$my+mBv!pa}%)#OZ8pq zW`Df7T54#fCtx%*&zpA9wk@t7V~52&ejzdnYe!wsnT`4zKZJi

`%$T*2hb9xCJD zIne<2S815YkPw<9PyniQUq*YZ%|@E{0hXx>w*3mFN!#bHV4_Y8Fil~W@R;bPOh{h{OB5dGK^m44b;>=6jz`nKiz3%YJ1-YxR$bR+{8~dZ z{{r4C{*%!#)2(N+PE69d+uAp^9~Z{7ytlS7UEQ1T?jNFYU!K%6I{Ud#s%|Z%*laHU z?Z~p^??S=Kokt_fIw?7)wRI}^0S~en8&bKQjys*4akTvY`qft;E1U`F^U0Y|%B zznqG>#Q}G_EF*jAEWIiiMl?btrqi_u{*P2@G7SsRIvL^H(jQBg4j((F=d6Z_Y05rz z?r!RxuP+fxag;hTL)Va$b=Tq(L*L&+clydI1!Zw0Zf|$_EUS0>;gqLB0V*P_&mT^W z67!GE4HRCx&DX5=66hM4Iagksj#e>_kp4L`wBP4~CwXY#$M^}lm?L`cv%hyuvkrVd zRnk^z<>Gqm&i3oYjpg=~X~uW6$++^zXU2TpoGvFLl6Wu1^YK1}SbYJSjg2wn&d1tAF!p z8I>k*_=Z6KRuA-Uom`?50HugC!kPEAnl=|}nNavIyaytfznBEc_dgi3ZIX)4GCWvy za0R>j{iE?RW)8KRFeW@!W=xHWCEB?HOw%tuIoN&kecEsE5J@L@LrHLCZZ2%7%EMTa z?hxF~&qR1*^;q2?QArmzoeg`LqZGz?a*(gGfC%;VP|k_7jp1YOes;ZhS{@LKnd(3H zEfY;nA%6&VI6cH<3d5wgGAqo{0`cO)^73V{ej6DZOYeV# z)%oNBZ43}X7O+i%!9?+!9@|3})V=%gj__Tog>4lio$G9J8ZO|wMMJm`{J-t9Lwi?W z_L9uZ7u^%VV=MkS=9I5Z-HhV(1zj|imM$u{;o zvLp?$UavB90RGE8H#g~lcVMg-1a<3+`6Kvuj;an*i9$V@M~9y)1f_5(@SO=`=Hl9Sqrxq0?EP^EpV}2rT&rewOW!mnVo8*m@$#*aA1U)^^3C<_!0FNohB?bSn2_$pdpSf*J z2}f+P{OVTD8$Qwy`u$F%uLC45y|H_ZHxY)Q%|NLb7;E_hbkO#$XIPprM zKjK`Nnwq+<5OrNc{0s&|^~$uS0nC45zL6;V8Y4Ijq!K>eL4Z53LK^w?D+1h7<0b0z zH{sLylE8Ot66!r(FO!H=MF@Iv;CB9VBouV4gTI6{5)-*71|d8K)n0BjxUhz&LLDM< zHW`RS@lrMVD8W&yZ2i3K!bA*D^8lZ632zP_M1&++opfxWW)V0Fw7F1htD7FFGYO9)2PSvy=CKn4Qo_?+ooH%|8{! zc&hE`R>?OR4i(DV9!WKxo-$0p7!Ekm2q9^;!;=?_(@2O4k3%GT|BFTau}U_e&(L3& zqyQxOTN-xukMXpFU0`3bI?0z2p12VOIz#jz%rJ|3LIr}oKhA_# zmG|tM&Mr`+Fjtjdvll?siVosQEY$AjHaF;bJIJZGW!&ctF8J(HTA#gfv0Ec$G;!^2 zoU{+Gchw{I-67NTWaAqjcy4u=2w#6ozZBx}R_%q@yBhu^28>eFmn!Yts|swgt{jtR zZYXCk9gCFFzGUC`n`VB%Bj|^OYMRYE&xb2L44z|hDE==dmnt9bU7q24E0$@%EcGDR z(=^qzM2khjfxGWqPN2anQwv)qQ;;7-q;M@yy51Z6slsco;n4r=$pQx|$h!4rZLcgv z!=_gtmO8n8_B1C3cbqf%x@3Kj_i)-%AFgi4!LS~H=G1H;T@~mIcl=5BI?+W240lW7^50jy3L5BfE~pz%HXQ>z5B)4Y<&U3{Jcol z9f>djOAQ+4K$J~DBm<5^Xu^yYa)qNwP5>Rd!6;%|8n$O_lJ$)J>L%3^QormS~k6z zR%Gw?G&AUA--@DFov2R5Qer4@M9-20N+(=%ZJIzET4X<`qG(>cJ+ zuhu%nD**aT^|ogzct$_q?Rd7o@wIceO;?$Y1zn8n*GjWv1LYo^2Tq@>8&mgaE1SIP z8Otx{Sv{A!>2P%SZAQypBJaQ?CM*uv#x77Uf#Ta=e6Z$tK1DI zx$f2!-(22l^SJ>h0i&YSg!JK{tVq_&k1s#szVX~v?NW@0Hr7l3JIy%_w&^@S{V}Z} z8{LY+7UAG;wnYor>Ve+&nH*`bXj0!$1u9PEhhM`P&(%EfTjjqFTegxYb}Al(V~A;T zru9;viMX;B>H5Mco~th(nn@M4zQ!X0%nLJtaE$)$htTkLYEn3rB*LAew>e|s0pUS$ z1jPd=@F`I`L3#9qL>1EeE3FUCMMBxK>a-XbqsM^n;0epMBi!8Fr5@{Nfb?p!78k_c z13vFiHP4)}UmF^V!Kq$JZrmp9PJh!gsk!tkf9`6H3a?($^&_f#425*7{p+>wEUe$( z4mEM#UWqD_DZfo5;Z}1DbO06DeVT>Yt*$g!-7(vd7QR`9r%v#<)Y!;W=1*JzH;j&Nj|-k%I9&6Gvq3!`*i z;PMMI$#}1qOvmiZl@ftwqrdA6!*)_2gNGsvIr69yK7K=b=FM5uvLnZfk_o)}LBkO$ zuSj~aHrLPWG8xfXha#L{VF`i`fptASg*lfXY*mUEzx{n`>NipAMamr!B93FI?y8K> z-Ibpmn^jNZP7{Hz;@V~yy;~WXP-neiS?RfYWbDwoE^6v~!|$f3u{mRkAD{2=+P>B6 z=^7X64><6+opPYkMX}FQ?l@iRf%J;uQi!KW?iiHymyL_r$BUDARJ%2P;mJS0FdIYn z5QnZ=v1~2f`i2BJO=$n$(**7b$%+3QnIB-We7ekk;3Zst#L|zCiRZr8+=T434tgST zA1IfwY(m^iv(6`?Gu;$GA+id@UkS~d0Pj;H&KDsU!f|;``JJ5Wv>yb(4YtEjT(}56yZ4}#AvAY5 zuWu3dtrtaD87r<5lEdY{;=K|}^@iz{w(?uvttJz~c&}r0`8*$JGYs9b&d-V2ecI<~ zRM6>x!k4y8k{nKapCJ_IJBD~9_I+)$-Y7IO;I$*~w!b}9vgrP~`{8|W{>nfN?4?O6 z7OB+4KI1afnT7d2{Vq)s`upcoomW%vbLeX|e}=L1qYtH}4@r4->Bs2}Dh-vym#&c_ zBU%8?{ieD6zb-6Gjw%3I%pgyQJbiPb7c%K8gBYRvMu#6F4RX`px z0yTOyErhNFxYRrFLM?y*-7g^E{f7?+z#scP&=I>_iyIK)<@gvWkK4pgXd*<4FI^ZITbZvagtZEaB!;GL0lrraTP@A1KMVsc& z#+{Nm9IvobZx8b&GpBX0COJ<%kxy+N=W&h!@69A*CkO{hut!D-kMa`vm%r8S66DnG z95<^U5ikS>Gs9uWHnNtU{jkU3d=9U+)CT5kt!rEJPoft{M>8AV=A>SW;5L-GeHg{7 z!7}97Tr{Hn>})X#KlEh+9E7}Ooz9Z>^|(ivITGTQf|)_o6k+$Fcfo%&MUsv*>Yl>x|+2NYGSmv($TMQ%D^TRw`MeUsAk- zp{vDp4DtQ@_gs$YGlLc90FbfxLO+)lL67M`4)G5S^KsJe55=^2-epEo7fuxjpCx{T zVp5TOPDH@v*bOAXQ)Vfp{=;Pg^gw5}O*27~`m}y-EnrYYQL{^L4uvGqcV}El2{=(1 zt3ih3m*-~XLt9$GTk`k5E4kjt2Tl^w?r^iLQ&A1N95J0S2?DlgWBZqNz}8C<X14VQ|49|aa;J|^bPkdASdrc<5+|%pnAkYBHZ_p~|PhwnTqXNX6`&$Y}!exwM}^Z#uUd~74&NHY%T zAPmpX%bEa;_q+#M&N0YUEkDK41JJAj*3fmsU-&+h!2HM+_p$BFd_FYFcM`u;EJc5Q z$*owyYZLoOF0qL>>v<>mN<&klV@S8rQPsj^a^yPf9eN^sWtS$Wr?ZDPi=l8UK(`4E zcU@gYfEykMJ?d^N9aVq;RGI=A-3R~cL;(Cf`WsOr9e8O%+7|VEy*f+9rRQCzGmrAC zf)=uv07gsyU3U0ij}>8+jDu>$5g8d7Lia>234)2UQ{vUSb_xm#q5ZZnVQKZSTbBVJ zmGoRd3q!2pHUGa4W=Oj~-O*#m{9)l{6M>V)ybCnG)@>)N$k9Qa2lyioBRJ$)jS4M) z%g~&((TC)n3KW6=ee#a<3;#QzGKa8;Dc!^IJG9W{?v+OlB!Cw5ob2S^B7#%s=?eB) zvfw}esxaf*5F)hr90g#-|KIg0%U;bd)r6Khz%Nn)8EB_o0g%s)eYhp8pb+s=XjjBB z0MrKTm6es>Cnl^KaE;T@t}pQ+l70WTKnQqz02Bk8q9F)Wo6YZw(O&Ra%YF2*4-^)W z0DQ#!zLuje0y~U&LxI!Oz1?kh#CaRiw0ck_win)3U*$XJo_?x75ZPcSn&EE#V@|Yx zlyKg+?|3a9dr|j)+$dmvm`mLQ?uOd5eSh&W7^-WRG9Y&mxLzcH@K8a~+hD73Z4x%% zi{y4_@C>f<+JFJ>gLEHJnM`-*eLQVvr2}Tp4AC(ae`2nE|39-*m_uN662K(2&7f_1 z>iaF<9Jjoz8GpEYVj%qOcxcGm*h z1NWaz2qEQmsBVLrHvk4zKz0SHoxCAP6KLo*m3fR0XuY5mQ2{8IDD7|^S>qr@&_fYK z=Bcpgs-soR>jar6%2XXE7K?4z4pl+*g>dD-zp0Wpj1jMBN*lI6%-!=yj(ccOyku&4i>7gr;qe5P^!f%g$39B84;^ zXEtjYcvAq)$#Z`?mJ{K=79an)p)JHRLa|^R&xQ#8EI#xL20RqV|7be;DlxyNm!6N~#DlcjD+>f8+vyTZ$44-zS?yWe>&eW|i+{x&KtV z|M;-}a zLMYCCD7SJ)kjQp^`vSu2VuwSm@Cnz_VdPgAIFWZ9`8PZHfdd7+K$Gfp7+F34SY_uS zVXzV#ahshD#tC=Q{t;Q$Ql*dW_hrY$zdv;Jbs2 zUW(xoCLE!#6pYTbWxT*T|6EvJfX{J5<|XZ4O+3i2*7_jt68JaUdEo5GQF z@eDqP7I}&nkGoSLzq$b&cNFJ=e>1%+ijhfupiRaP8Tk7!J@KmVzUw0wz!eT(J9;>> z4qvN+Tng2*kG8Nv@g5Tc9c7*FMlNx~;`KD(ax zp=RCf(m&=4gbm$nUA#O4eC*pso+S;q`-6ZIS$_ZSG((HDc6eU*%P=qlSpmyMm|JJ5 zpp!Qx9bJSCFDWPz3 zI1q9hp<6g$cuxR=9R)m~;9398L$He<%+Jq9l_2Lo<(LlQwzEOSx3O|8| zpF+-<70T1>wh2Ah0sPLJ*#7af`+Yoy4N#Z%jQRoQu zv+wO`kSM0EF&sa>ulYWSFHLQCD8`;8-U1q>G$qMO!0AI?Glk|7REG|!p3DLCX9S4s z57%Z99yTTYZIWJ;ke(3W`$s_77Qe|4fkGS*DO#SMeN~v#{mL`~3GpDkht>*q8{3=- zjv%j^HqmyHcbz_Q{{By22!d{b(S+?V(&gjUEiR}$93^}Pl~*S#90j`e5FI}TxXBE( zX=*k*YyFNk!`1Ro=1q@to9a+rJq2d2Ae(#ng|!es0Ba+i_7gNPKtq0cDC;=&GYBa?>RZW%ht>3 zGy$5uJ|&_on^wC4*BrwI%KN$Ph7(V)q$^YbEWrut_0We7i)W#i z_*G2znSYY#qWnRx`;2~W917kN&6!t`rha~1P&hcjK#xU_f~Ca% zvuDq~-Fv+M&?p@0_Z~bj5C#QA;PcCYhyWGo8Q=EC`U-CVI0>--@CNm1-kJ+E+U9K< zA3$XKJ?m-LO=S-I(7QP+lm3546YY_doZR&m(`-)^W^MKq<7dE={Y`E&6maK3^-7J6 zP#MWb$p>N^p|4g@IoyB-_Fb*KA63Pzn1l^e^FJc;)9mW-CNw>(xRy74Bp4XTZftIj zj?@JTdEhtI7_y>(`0D!OF0>}JQpZf4e&19S0U-&ZdeGGwM-o-E>UEH|a}4kH5ju6U=^gO2 z(>TahktECK5!|HL(6X(ach=}C=^f^z89Et}Md6gr!tz={QO`f2GKUC6(5>=Ye=WRp zlvuA`}Hu6E=8C4%Y%(mQS%bI1BWpLcykJocQT0 zl)m1Ee|n0+qj=@gNFr$LxI}NDAOqf#kkwS&DNC|5MyrpP54_Y&>>NO!#iH@u9dLcU8!9y@3AG%-h?q{oB$$VRZtCSa7Gz0i-e?7~%{d!NkWZTI5? zq7M_1(%z+hwAQ##tjS_ujZaPaLH?fCm?ya4tE%>#EkPs=GK_~1HIKOF8kf-(=VKcA zrOmR?6Z0@1QC|!;kLZ~P+D?qxR$6?dPJSV%G6%<7-ofibN1l*QJ*6yW-VbGPg5>3w z;I&=c)t_}Hk@n8O2TVj)KE4ucr#_qB*9s4*Ge+KwU-<*?gib5o@0RWi&HmfjM;3Qn+@TL>_0fD9ic%O&m>Nk;cFjKVU}@zgid-^Y5O=KD-lgg zKg(pmnfSJkL&+7Uq+M0Kii4nIis-RRmmcd zq6*Q#9Jy)B*#R=6tol&P8QfZImhyxA%u==PSb(^xlBVYDNv<&n;P?f*FJWk#?`^c@ ztEE`U5p+-QxLPWnvZd=nSpy%C`T8PoLRs%HPFs*FaT^)^s!pC8T>Y{AUQ@gl!}o{9 zn92-uRb^xJu{; zsCjHB6ydiTdfQC73#d`OQ8B?EcMCJ{RCq#&p%>NKU&4cLJ0CaLeivl2g9|9j$)PL;1&%#9JQe_%1G}gwS`pMRhh9{~r_Lx1EP*!(R_Lv^ zW&HWnId1hC@C9rt2{BBczW)IBhZXnNYyfqmK$bOJd6f*>)e72~nS8W+w<@UjfQe>< zn);k&dR`#kV4`E7min5C!tIz+Adv)Npf}78o^yrV{R0YnWCXrmu}4(@9r(Nu6e+N> z9MEyGYBzRjfSinpBtYHJjgOAr1>H)8i5h%CZF?5rec@RA1@vjM;1B!LDXSOp?~Sow O00K`}KbLh*2~7axAt}@V literal 0 HcmV?d00001 diff --git a/docs/images/uffd_flow2.png b/docs/images/uffd_flow2.png new file mode 100644 index 0000000000000000000000000000000000000000..9486db6354637cf95018d6c65658602e2a91fdf3 GIT binary patch literal 33307 zcmeFYc|4Tw8$W7EiY#T#-XcYoF_;;$jh(?T_I(V-SZBRrHF@Gcj@WkPIA1fnh$l0Bzt5DFl$GU`!1BtfHcBvQur62y z_@@L?04YJ7{-sA_y@US4NL4`zU~thKjll(xf{DJ;U^sAX5=8P20FJ{tvA&hEDMgA>sfz{3DhB@?|{mCZVza-VHPg^Fx7+jDj7ER4ID)M2eMX=)ZCx zh2y>drSbBn;4nZ(l%>H2fW*CUXlw{N5MYD*k5E_R*-B%-nbQVC5}11P-VmQV|02h%_$Xs}wKC&?I0u+=m1C!!5Z0vt_=WIJF8K5(l5 zvtR>#CqJC6D%uaF|_g}TL=1EqX4BcM3_@8NO)6(fe~=w z4+*dZI0iy60ghxi1*sH-4ktqcaDjoIYE)mOb$FPCnnPd!DUbvUB!DgK?2*=J2U9%A zj!H5Dg~RL#UM5f{D?_{?DLgRD3*kleRCXZSLn)SOK}wh)RXwV*s-2SqLRST0V`3U^ zYp(BU8|IC5)c3>^9qkB2plRS|Vc|;-vo&)7rVdU)gn|QuDc0tJ-of7Z5S)dz87a&u zJkZeu=4b-(L#rskd@TZl2#y8-v6=zF3kh;UfK}mGh)NjB808nL3NwZSDhv&`4RVC) z;)u3TV{eB5Rr4^oEtG8LWrj1u+CltHNf>)lm@&q|&el7a(FbKyA~hU=KzatL*;=aE z!wrK?%phoch^@YL7#R!@1Y1&cRsC(LmipcxFxHo9)X z1{9E~A)12lgpeT+6LX?IPE`#bh=w5T@qV^u=42yFZ#`8%AE5Joq!2tQkW7T@D;r?R zkRUHl2pX+yu4|?5WnqX6CJ+J%#z++lKb!;HTTRzDl#B`hqe)gI8#N3p1V>N?qY>7| z2qacj4M8#rv|%WYlcRx62t$Plo=Tp^j^TDjjOB0V3--s_n<`lvz=BXdUj9aUCQ1gX zhE7z206i-lgk*-c)-?(-wLzgQEYV^9CZT4bfhNG>g!={u7VTMp; zb5&akVDsP>PACk<6J>+(GSc_cB@*qN0&D`I79_N#sVYJ*OyAJg2|^5kds>plO=1f>^hgTufL3}C@wHe@}0oTa5B%F7h0M23*5R%D32j}jE(PqB2=H?lUFTOrXXT@!r*V9c>#eSf7eFF3&w=mF8pU&+eO2Tu&qqX3uTDkKwBn600hwY8s` z71cq-&d<@;(bOamVS=@?wm=yWgAtBKex@dbfS@pfp^u4$wJAKn&)hZ`WffrP1M#=E z4l`$%BOg<90~Ezq#mm9Y+8!7-@CznVO?-o`tqctfAOx~efInId12w^0Bd7t!dRR2Y zQw`!6s^@DGfYP(V=vf)S{ms36i8yF524SMBOEy4Zp*~h1yd%U?&(YdR$ts)x_taH2 zv1QmJWh5GeGw>!D6UgC!l;Ms+NV16`-Wmh8@iwQT_58gY&Cn!0%Met!kE6M<56sJw zFUis9~(?~xUQ{npm``T1%@G1m_9-uWQtb_Q8B{l`4X*c%nZ!*Yy09>C84Tbvp;8A3wP=t*G!dew@-hcrm*+RWY1Rp&xAqeDP3xSzZQOal&Ytt~I z712?d=;&pKv~`5pg&26*sj3DT>V<~l02c_S;DVqAHblk@n_2{$nuZ2ho0^2d9qo|D zXso9dhGb@=4_7e&Bj6-hkO35sRJaPrlwg6-^8$Lsuq;R{SjonaVid0Li&hRttB@&V zB-96HiNRUkUHb`NPb}MkJ7~vVpOhKapT;;YCsQwtx*V4f;G?a?4bwoKCDj9)|;GST(g|eeH)>IX33qzU$#6+ww zoT5xbtA^_t6YXq)TZSYPGgBl8hJibfV0y+7V{0`Pq+zHn$rojArVR29!cs6GPdj6K zh-Vl@S=9?+6c%a(GPY(+yS|#Wai|}X7_1lSK=2Lmgist+yp=6Oz(Ll5jwpgL4jkY^ z_MyO3b$yUdUg7?}U}Gmesv#K#(S?xxl)ZzAHa;j2I0ys@R`OJ(0HGV)+A7dd-_*y` z3NY|EG{qZ2B||+!jD3{JNW8zXD%eoTQrQYHl16@N*uXGEs2M~L1`9&td^{l`0a&n_ zg>MMMMCs|NIT{eWF$hyT5*Vcn)3rwEnR+Wjph12X<`lruz>%S3WPp*Aow<>xrLCEQkY?4l43H^*R=_A`<25+;{P!GPI3A5 zm^YfR{cdbbFvCNa^|HnFrl#ulY3`;lsrt*8gOU}Om)CNfBXj<~l{x(Zv0X-lp1-$z zci*q{>cN7oZT-qb+tv8wrJT_U3q=#treOr=G-Il;TKW`JV@CsqEt7 zxll_>%QU@z-)c`3n1n7*O`=@7ltiU-7be!RSz z4%5MY=#SHGPM81DB@2FDoe6C*0_HsYE)+-hw8q6<^l)KY)^1A`T9MFW(iCgu%oE*C zmXa3&w5Qk}c>OPP3n@L$A{c>-x#&S+qvX0P#4JxSbN^zY8~#-@-Zx;jYR?ql)e+JO z{dp)y7GjIMBo)IQ1_k9+{>8R#`2I7~pX~DNmcRJ8F9ry4!Cyl3-B}keIZ>#5il< znrRFv?j|C~A&dn2z$y$}Ga05|5HSU=m`Qr%FklWmbO0t?9>6uLu;U}&e_3)3O%+x> ziD3o=1c~Wq1D<=~p5z0wAfyAaiO=XvVmWG~!(awn|KHwNN9TJ14mMADS_ZH04 zRE_Kqjw+|q&KYqN!heqc-u!4kW1Pn4%pIireFOKY#k_*E5D+#CJ^z<_NERSuXd-ZF z;d#-l^ioVvA~nA)BWC=2xxfOXY=wumojY+mY*mk&j$yyu(X}n9Xg#w|czvwoZRqu^y$O}d+NC?;jjfwVHgrR{xqyp2r1<;3Oxgt*eX zQ)7ymk;~@smdkj37iRW*tH=3-gtl3B6!{KV#_wd~3|Rv|BXcke49QsZthq9UdxZ0nS;il0=4d&{-L_4GIUpWJTn zNJA0M`>&uTow}AkoW_P zM!YGLF(mP(tf?dpZkqSj0N(X1LgaFc#D$k5bx3B?{zCZ!!BHvqDh~s;mO{C7yE?IN zSC5o0*1Xedg#bi5o(F?vvji_%CQ(VA(s+*ak(eDQ)im!L1X1&Ya$m?3XMIIVdT^l{&OM zX|%sW-&({nR^!%UV5gjkkM=Dr(I}vbsqviCyKeF+ow}EKK0ZQXc}-1?1@M|R-@%t@ z-Gp~f9{m2kTzF+71=4q4m9N6Bu@sJ^1drX){2PXzPB8J2KNYpL0o!58%ZvWvAoe}t zSOwRJ=Y(VM+l1444~#BaK015aPSJ6Rhj#k5me(6z_ZW-5@K(|;C~8%OU)Ij}VCk@h zJ%LxNd99l>FY6oVIsP9J3N)s9olIa+-oND9aT2hIp-1Gt)^oGa)dZmlx>_Z!E{87L zGPTdguCG>iogut@+a2Ln8{()E(9hK7b*ZTRQA||W1ez2$Wd3?HWfe@=-f~UJ1^2o? zi>`AVZ2o!gr@4OX*Lmw~ek(mY_^-!&R`~B{latnf;seLZ$tgENqc1**}l&Cb{&GjxG$$pFnkBYCG8?}-r!`LcpJxBE3TX4nv0BNIBEh(=fZ>5TYG+*&g<#wC&gJj+qa=B( zXF{?&&)zrGoDL1^Is#daiQkqi^>0WZ|M|6gOh_U7S8PyM(1jRriT8iS)Yy^vc25A% z!`7m^1s8eK|1s?G-hc1CWX?Re@8;nT@D410!8)TDv&>|8M>c`Q4sefvw@n%|dC3T9 z*%{uF`IhIB?D0HjXR8mE{KuUC=}$*3yXia-68!%x02TdzLKeXsbd%|)DWiL~Y2n&;PsmJde9yATqpiAwqHivB=$-E~-YHT#S z$p{ytIb50}_|wkn8|Ip#mh4M%%%-6dy#oWNTQg!`Z{1hXfmmnV`u^ouZghM1@cSu} zwC;6(N#5B_`zs%K1tc+sG~1EOxD=*FB5R{`ZAQdqvCe&nyLaS3uA_WV8dKxDQ|duS zk8H^C^g)Bz3VrNQlOLDw$yO$KuQ|wD8oj3v{3+j|97vq4tGMxd{w@suK3P(we-besO;^=w*=vHaUDaTR|4uF=1* zx1acYL{1J*9^!CuZaFo-sHzjU$5o!)xVxxbO2%<3Bgjc%vz+B(i~$X zwKR*h!T;urZ}qR`9SzguS_9ZGy4;YSav@S5`Z z=pvA?1i5NTL)eizqLaseT={y|>5cwZx61TV$D2cotB0%Sev81Hs(FCEbJM28o^k+3 z&tuOFO9W`ya%oSDjP~}p(we<+qod6yGlUf3kNi~cJagn69b3HVUuHMC-W2*$ky`Q~ zT{K4*7$x?_-NF6#P> z0}9r1BXWQ=B5oBP0i*#K8-l7IIFf~3dmkZ=%XxhAF9D^aRj~eM5kTC^DkEyva zxz-guh+``NxfkhkBfWfJ?1&J!gR5%Zqb^vt=9_S;MmGKthE7@<&TKxq^ zbtNavRH|PEB+Dv5M$7@`AD`CFX!tmO2m$iRQI7ZEPPHvQp2k4Z1D9B2dpge>Aw@SS zi)^Ufz~BAtm9WhP0ne_~CAmID_f{$RjboES35mn{sbY=?xflEYZn+N!y(BSJ>x0cc z);@Z8H_e=({s=d-R!sVX%c1+WlLN(;lfwSuVzrr?`BeYE{Dn2+(UgE})`A4e$)6?&g>qBNu6I7%IWh@`NyjERlAukHb z+C5Lmf&`adN#?(F{WRETryCqR-t6$ggE{2qr)Lu93(Yb*fwyTj{`P;iMuOkxjBH;w zH)B`wOP6<68?7gF*lWt#SENjJ#P7>0`h0pgIrpRMwo!A~s@LxmR>Ao3WHy?7b&hyI=cgtPwE?HqDZfgXBD(Ki+F7h@`&2QrG*Z`J zYAg3^w1H2Ik2u?XGnSoaZ}jT+va8pphbK1|hB}@YBqIw9xqf}G;rx5Boht&Vc}vL1 z`}=!JFSif+r8>3Dt{nbA)^>1idwHzGQu|=0Ec%v+>YEs5);kx>AL%TPl{-T|c%mE**joRymEHWc~w!Y8ySl3p6`+bzE)w_1r^;=Zizxlui+1$6{J_&`XEs(QBqO$dt&(pSvOYXY7yGrVoF~m)8;QqH&gz)m zJ}Rlyoa!V7#WR6C-yTbo2&p163{B=(07${nuHeo@3u)ducU5jCRHIY8g}Q0o-gStG zKaM?rTMTx}Qzv+=QMJ^ztSI~Pcj2(T4V>5eJG!c*FV7w6{RQ#bI&*X8S!%Y!zp3iJ z$99ThEFR8HBnimHNt>mY_^aw*b70B)e|CuvEMH$|1NjY>=)9B8BZ^6?_}&TG`ptF| zAAPV#7So`dU}fjt2(JqJd#Fj>UM^?aA8jDEGDXe2Pg`i{n9E4t$@cs9Qd*t7@O15o z1fY>t+w1RsY%UJ(>}{6$aptItJm>4(-&~9@u_|~oHa2EI_M`JwGLNK|6d;$K{gt+% z6E_R{AYJtERC`Xq;#4Ot`_D{GeLyCiA9eXA6%)4=uwAhp(}oUH2+=uB+(I;B_yvlh1vf zI?ls=9ne<$&E2`&tH{>Ka6E6oNUi*rO83g{nV9#wuOhaWzqVb~P7;(aJql;JqDdEv zDK^iF`?I@Vwg&xu@;eOb_u(@$f|b;Igw_=NXh%mXZ{6UW^;6>o%fE;Fm21NN5jM2} zj$_-Ga8mpiTJM7DUwEh85;?*Etp` zZJB0O|1T#rBeraQNNRRxzdP6d`CcYaths*)Wq8Z|hKpNq;c>nTB|@RjOg(h@ z`5PwluKBl#dV|%5Vy3e9S|M0Z$+`$J5591U_6+;suC%~?|32~Sp?`K(qpr|_Hc(f8fNvzKNq=3- zJxUK<9<`bc{Qm0rP?)4ba!sQ$n~kndV%pXT>z zz$h&QQn^ zQF;gs3sIVil{HA>UkJ>NRnU;hkLA(ket(lw$S!y@QmWrsr=$W^oJtj5HP6-Qe!c&> z&}>1sn_YsPTT35G{(v*Hn8(HPYD#CH@;Ut}Iaa+`{rKMcTwfPpz+O<6oc^qKNiIya z$ID830oKc&$5$%>CjKGPoZVY%2B!4V%?WTc157!rW%Hy#DmWL7FnEZMHot$Zy%{z24DMG2CEl4QDNB~Hg1l2I~hOGWh*WrLdmAV6X2~ z=SOPcBe7XELZ|%7G$CQ)?#PEiU6F1D(ydqetDal~<$?g$%D!1Ve&Q;+>6!GD4JRSL zBVG|n`g9)g5ZS`?LjIE(utIlW)hXUOa`muRNqmcQsZc6UqMJgCJ0XYO z|26@5wPH7p(-v=p#M3{#&5+Z|DiPhkgG=E2YteK1)If!+#^&>Ba8guKRMt}dT()9t zz&TylTPpJEK*@dL|XIs@`Ocnd7zquM`U_i`?EgG zN528-rc9X2pO3j?j<0ZUYqfm0b0Dr3@NlgeEJ?i>%R*u~t@+}J=Y>`!ny5N;!`D`j z`LQC)r;OC(cDTzix>()Lw64wtFFq+)^hxEpPniX8#8_mLtJ4Q>U_g9md(O1U=V#r0 zk2PhZX>XW!PrW&fT<}u2K!LwxQBS4AEY_|L=szrEjo+>gEQ;x^%2j09_#QGkXU4gy zb@i}Wv5Rd)um1dHf3gCswp1PWC z;Bf#;{;>K$@mOS#k~SA@-gXXgfjs?DY-LCXs-!b_5LtzOqIvAwGwVX@$C14bFSM1b z{`TYa*#*g~ob(%^Uov)Ay_3P9<_twFDYB}SM90U)f-eThpgdxFg><&i@~f~;EH_`^ zM|IvE`KVUMt)8KkF<`r)ro5x}6F;p65Xke&B}W^q`2$M#ooo2h0_3WQwxqSM!r~Miy;@vB(Yv# zCs41wdiz`CXPLSEI?;^%#htkND^YtkT)!jnt7p!e`~`w&XCDj~twAZZ-9%@OQ!D=q zKkf9uCw(M38`SbyByXVFVh4TpD{s(gc(HU`7%x-JZ^)sA$HtkGaGdihX{Gb@S3S;yN-h~@duF`2Ma z-Oxk8M6cetDsx2{#2@#g@+kc~w5JUhh`CxQWXBoYzaYw=x6XDwU#>Z0O2lb8%(6H! z9ok_!($ClPR-JfxCWXDSfa{fTXd5u|k9q~v6QE@}5&jm!tzo>4HCj3zbDUyJU2~_f zP;)G*XFe@j&jqoPA9WB{59q;)<`P|qRzt|G_4tc%kQwO}5|8`%VnwOSolu*w2flgX z?M=0!(A;pxy-2W;%sIDnF+uue@r10?>8~L~Zf>y{;Z(Rzan}u=Q(7}Sv>*K9=p6i~ zOJ>v;P;(O=UiN40OgW(X>^D!hZ)k|{BIkAIND7h(&AF@s>|>F;YrCwZFNtnM6i=gI z#hYun}f;HtJ%FAn^89nY=l$FSO9cG~>Z)JG0U-bmhcY_IA2v;9iTzbUdn zX0M#z!%R|Vp*MIZuws8e6?pwrf$7r-?eTC??h>R1!hZTDg7GIT^E<9#MD?J@99^=z zRxWq($9?aw(z$4nqh7OS{h9E31>BN%#3yCoY&DM!cD7ea3ReKXddZAYw{J&!L% zwfK&^4%QXM*{)_=YGcxJ=eOf~?0vzkUVmZ34l=KyUHbR{+Rrjaa=*`<;HCLvba9Si3dOyn}4@w z?gC&v$kHz0jq8NQXiYjv>B?sj|NmkEem7Z$p&zmeW$~rY+epg5pufa?#t5m0X6)c6sz@EDy)_R^8L-6^?>~$Wy{^@nW45S*Zih zd9dOR*toYlLq@D5E!LfvI(?NBfr7_pZ-lX^k&DiClH#s~)0p3$MztzgnSIH`XsCMZ zoFGek4VcUb(N4R;>BZ;b$I%P=I>XbSpRr@DYxGB|$Fdt9Boe@Wd&q zL_=s=`A^c-0m@lS*qwA?5&b6G)VmDvp9WnICk&o22Ufq2?i4bw=$L+Vr1uGsQ@^Z| zc>#F9z=Ey`m3IQF-+z-`n$19Vtj6@RE%U=mX1;ohT+bg$X_6(J_#q5O&h~v@Han3a zg+EH7ofa2t9$$(6E*1=}=s%rb*xso~96J8|GI1Gu(9pp-f>n>s8vocvY~gk1KbDwp zegZv}%;kQJ#q6YetfN%|ESR%-^dcAzX6kY^h=4e@0!%)x^y~N#3fd$?;)*3 z_>U$F{k8vjdk4pBcdi^OcX?(0vH+VruGo1^+3gO`QK|ys9*_@8KXP=cHP^p~D6asj z8{T)F*2FO%PTo|iLY<%I8g zHY(QFd{lyHQ^$6%&q4@iWZmtu<*OvuXYW*BK&+)y-D3NnQ<1K9Ox((adH}m+%A+CV zx@(@!?UC;5gZq|RQaAXa4bc1aO%9-ZaXAyzn|*TT`iHzzLJrEL>*ILp8P;_b=Px2% z%~|dx68j83j9l4eWU*nEg-)%l zcqPx+c`v*wY|5<3yeA~_@bfQw{S-kiZ$U-yeeuGoLx#0+%QpguHhpbz_h5kc%qqC+ zQXtBX${0xsZNRbdv

}}rs;+^$&jeBF0}LL1a@jy-BVam+Hh29=1po*2TkT32MqRhraJNTW_N1(?U2m+)RC#*^ z5N^+aNkxbPaFX-U&Dwbp0EGl;%1g`0Altt&AAH*-LYHAm$ig4qACmsW#>1{jKTY}q z058VACz(d4GOCZ~9CR0W1%b(lzqw%T^YGH-uNaG|)QA)9)tqxt{_io>ZC8z=`|=Yq zRGlBr%dy4h0R2Q4x7-iV0NM;JE}!}J6?j>rHtH$G&Q6CXrq^MeI8eNYy5RW1`u*~k zcil-Ql-?&mL)8wZ3#U&3FX=r=cPWzrPBy*Fab_<#=*+Zl@1w*=rVoB}wSNEDLTiv| zJda^$8=`5kWeCu=x)-k$m4Futg8^-8VQO?&q%L&Z4oS@4O(_F769Oye(mC@MzcqfY z%>#BgGY-)g*)~|{S5n?LVVonww=Du296US4^ixiDAZzb;{)0TB-jDz^bhyB_(EKN( z&Y5Z&aw~|R#+f%DWp7{JUNa<@*6POURWgBusNIdbVZqG2L7Tr23abUipBgCnEN2O5 zsV=AXZZ)fHo{Y}iYlG4L!uMYjr!_;{TYXl5eX|i~!lgb5KsY0M-j_N~3eW`dBHX`C zCy4BqqFM*vojjM6+%b%*6d2^o8!)^W@bh!dj^`W++x{Nc7G*67|h?l06x0w6)LviQmFqc-oY zEt$r2moPnkCux17nyFWC-40;q4`%#I*(`I*V(_)Em)L0KzTvJc)SF^GC&8uxa@}6x zcFq<(d^&Y5h97kKUo-&nRLl5^<^Xo4g;B-^*)D+MFNy+~lf6er9~J-*h8yXg`plfb zWYr;cWEz1@%ET0DTe2^p59PrU0Iff;ODtnvC~VcvP~#D}ZPzCh(->&!Y)y#PKSbdF zmx(%9_o`+3ke8ha_bHe}d9;bhGrRVnD?Qfv zj#|`j(e;mKtrkPwK3MU;xCzLl7C8i3=hq2EG#Qs?GZ0yeBcFc9v_|%GAjH&822V7< zJ6zvCsTl22xHc6P-(S=!*WCBZVCVPL>hf5`r^isvh)^nK<(Xy}{x5U)TqU642U?e4 zNi676U?1XPcku8VU~^fvu7YEKZ&vQ?``0E~Y63cS(c5ZQ@`mN?CCMVguQvz-{e_{} zL4T({p}gFdP(x<*fd`!S$TCvk^KGYi;VMrcA!^e$O?{&C5ZI%T_<7-lGuy@q`s*H+ z5vQ38Ls`?gn7=!6K3`sDUbJg;NJ!2FhbvV4jo>OMy7gwBuXuH75i+B2XT)9WKk%uj z$Ft3oZ@zI(Rv)(CJV+2-iRbJ6Y$K(od-+?vFtDw^JNWVQm7LvZKp4(_w3z6FiNzAT z=Z$OGijS&zWp4kCEzC;aVcGaGoO(&;>ej~TpO#s-#hoc{_BY%{+d|y|l{&Br;b(gT zsMv-Q)vs73yy;-VgE^6v5a ztv|!Ex2k^ZP|jOsj>^1!XS0^#s4S}(@r@g$lr zuhEYiatq(Y#HUWpJbHI%-T6DwTU7Fur26avn9^(^#Twe|xodR{T0zfV+iG?3m!tq5~(g)DGpVbU1bqwO{-~rS#5b~L^khk@=O=7jcxliG2-uX{r-gJ z#{3|Aghlunrw#hW`&GOE-XxX(+1($TABQjAfA(T;WKTep|Je2#J<*EQ=m$_+TbQ%K zp>|m}034dsZIhjku@O8~0k4%+oqL5++P|0IoK;Hsxm!Iatm&%)K)y;A5Wi(?P_fd) zjMCrYZ82Ktulj}dx34XS{aucwrcEA%bIjb)nSBi+3+rCY}FqZ}R)B ze~EL2gy8c`26*r&C;jJ*di@mPlb2A%8<#S`EZNdBd2L}^S7;PIT5|fu>)e{^B@v=F z0VST_)`fcGyA4Wr-o9|Z5@9v;jh|*@E}Ho3Lr976t{=Iqbbp_+oa~^WK-b)Xu+ckY zcu>`BR0Ocz>PpefIVi_Ux0cSQ;e1gxL(_v_Rvd2^-_8XvQAcR8%OqX-BkIJp%gf`f z$tO{9&TVsXXS)(RWTr-jE9syTql?R1S2-m;1xnN7_jN*ZwL_AseSf;sDk3yg<$v`L z@C9jVl*T_!*NWUpXvVF{26tDL8CChri&R%sn-)sn^8h>=kKNfZ7mp)4JOP3`uUpt3 zUI}@*mP)+r>DS*1D;+MYU^@6ctA|fqdZHC8AZb-CiO%Muxv66*q5W>o zGWU*}p3~y?D01fgyiuo9h|{1*A}FCOqvG}Qy(SxB{(H4b{v&DSlL{PiXckm~cC|mnEd-1sy_QSa_z@f(u8!OraI>{Ol2a5c$kq8!sL_Fu1 zI3I?6e_sOdTL$x^g+oE!8}(q@OHKmv8nyZ_XH2HPzmVU`+1$EDjmtT2p7sfw>{%Xs zlk){Ub* zB0FL2syZ?@>(aBIat8c5T_X;&sI=3v_Z3~EYl?0W_7U4FnmRXrPYpGyvFxR@W{Lj{ z_gz|f!PHnIaC15d|M>CaiOib#`1nn;7noE&%GR5a+m*;OM#Q%%r$$nx+1>BE@yK#? zo4urbQx{m7``&P?jZJyu>ycWTZ&$iFpdm?4B{t6tr8k6UQWxzjFk3S)KXuCbqn?GS zshS?nWS@gnQTyOL-w$W$;}hbE_b#w(7w4$r)a))h<<|~QzsYuYKHxy<6wk#WN1tVg zCf&b$ol6cfx&1A9=*d%H!Q|_0Z!LU0Irv5*P$=)gh+WAukD~mjxasL{W)53(k{r#a z%igm;obRZ(I?P^v>rJ_l5gYAYH$SRsPz($60lh6m6n^e5=&5lscM<(@XB(}Qduv>@y*_w=|7-OW zu;+9!y#^X*A$C2)wi`XLU@rSNyzHm!3+d-|OAASz<)URbOCwWv*hs0f**W6c*zC4c z!O^q*%&zh#p~p6NSPKOUJcS)5e> zuxn0TgKXO&1XNx_h-@<=eR_ zFYNyED0QoR#PRw}Ua&($KemnVC0X$(srffshYA}TeU$xRIy3oNZG`UxnwdqtF6j^E zP|}Ye$G7cXd@p`PnJ6R_c$E_UrIo-g@Ur7A+shx=+xEQ@Y;>Q;SC&&IbXN16C;R|g zJ*?6$w)xM50ASAVESUi8_(&s-K03d?GqBm(nzp?E1E$KBMS>?~GbZQn^ z8XLK^qWME;S6S)#3Vs$nCBn`{>uKn$v=Na`*I zmkq6&s)@S&olUyP41s^`UHIk!i1Jf-_5^yGk`Nj+VYT|hDZ3Fio9Wg8%H~A=kV)Z* z%g~BgLmYhC*c%evyo%JwX%0U)npU812*3mMoL+rrzK};DYP`!kg_)jcrWU*rta)MB zHauWO-2WMUb^6OIVXSZGgH)R=Y(y*bX?0~l~?bn z^N72~OC0U0+(j`Gqh!iLX`G@VP(^WVr^iSChe?O2_CUxoBDzQC}O7xq*ST@b5}4;hu{ZHvu#pTfy8|OSaSDycLG$w@l4y2@w2zcwew$bL0sj z%RNom+|)hhaR2$l(DWIF))^aIli||IOCjfLpv2W*@4B-zaj*4N1)K}?&Hs?;mgJV& z_`U1ajh+mhn>Q8-1Sb8x21QhlAlYJBFMD zV1b-`^u;$02ph^4>l5N-zJqGBMnX@1Z2&W)Gar+vJ*TJF4n`m~031BwQdXVVqq^W! zp9=H-8Zv~f2QixF{rOMg0{<1F;FeP5o6Mw=@`huZPSb}!N8{3(nw8w{Hu2%Fx-Dn* zvbnfAQ8vG^1J3LD-^~m$Rz@WddH`n>@zT@3+c6UE#WH7|wvdHKEKrH)JHR&)E)8L9 zK<4spK&Fph>6^y908PFhRC3c$@o^jA@K62`mke~05E48HUDvMZy1ZR9`|5HFeIpu7 zkk-UnoCOdK0jilo+qDN9^We`<&BW;}c^=@qoY`(bTg8z4Z(P8)OU|+lNd9BijMhVW z$5{3%xFT%6UYp1Hrf84X60-W1C773@%xR)uun##Z0f0Sc=1Z?PyWqJhd=CrTHJALc z#%R&{%@DncNEnE`p(4=Az`p@s#n}D<@p%4jy1MnvUSoB2UBjkl=&03MgSu8}X(QlU zpBG6$#lbGE|B~`aolp+2iFA$*qYI)Ybuot<%bYd})&@&it~C^anqx}X)@&^T{n}Dg zPfKY7{q_|1=A$pD{Z_SSUu@*HX2y%VfP6}@#;JsMGo7rtZg znM-%NCJw)i58E?xwZDv9ZHQ6XX?)6^l@Gwy9~sL2^a4^~=LwHSHqzts)Xg>9nqt+K zpC>{+u7vzOc_2VyuiaMNF1?VquA;O&((_i2tc>k@GCkZ-|9f`;D-7~J0(?}G@0of! z;?rOq(fw@*L3ibK+wO?AGu-9*L^v?q|I0Gb7z=erG0n!<^!`Qd>9pgE;I6(mwdNy( zy2>e*JiaUb)XwAd6Sh-{_Yfy;X2Q_v>gS8~Q||&z)^kK%dQkRCJK5&B+ewU1%2wS~ z;MhpeZN4w+`&L_TcNfu*AjY^w?m^GcW_u$E?c65<@YRLJ!mZpW@WHnKn&^-P0H1s9xo zj7422Q5ke)c6q4@2wPz;o=rP<--txG6Gz^SK!VRvQ|ObI(ymD}lXfMNSbFn;86!9*S!`Mw6uEIug9OO#o2#~t(TYTTlVUV7YAK}rgtHJsRED^qey(w!<<4$d)Z+Dn5F;NSUI{O33v7{&jaEZs|t?<*K}W*;-}dQPBB=M}v zE`ddmQ5MgS2?0dT@vs~q|7Lx984f(nbYU8^fSZAy#N1>opNn~;3P3wP3^QG86W8=V z3j6A?s=jVrK(?fGNVftC2uSCq5$RM?5KxegO?OGRNK1os*QQHEN7hl5uCqhhB>ZL{p zoXR=EOvi{45*5fbO}g{>1XJ(c;RZT=vL^sva~+Zzm$`W5CydIl%yApOYj4 zHP|2zGGumIQ~^#f$dVTT_W{>IQ0ODg@C&1u@tVPAyXD)lC#b^_7OwB>z~S}((C+cT z=?{$8Ri3LOi4bLdWBXsuCeI{I;l&y;CH?dp!aYokvVNbd_PHh~s69r^fnHWm*c{j) z^Kf_bBTU&S_NMkJy_IL|;z%Ol4`{pp*3ue1K#YSgSu1pyI*3H%D)|c>l_tBuBS5da zva@q%m;Za$5R|`u;%MrnMX@qC!&qCJi3~f-QJ;Y-CwsgGTw0I$XSE|O6`wxIRs43ISo7B*O5Fj()!(I3?+GsTzb!emS@ z;@fI0C!+z$v)S`X3ltCEy!=-=GU6c-=}laB3IJxlY6mc0#y$vX5Qr5hg_Ryi;_Pg( zSG(85^2~gYve>Yd+H$f~({y*C)zGzvM|`0P#=&JiSbQlDc;cG;*znD3oTmEv^RX%e zxLf~`C~Q}6WhjG&&w5s6krUL$ahoQQF#1sE9V#sT?RgzUYH(})p&olc4!HF)um&Ko zV_MHQe-wi4{swG(kOjDB+!f;67sJ5dalYSA*gu4>fkD8iGCu$4T5>OsZK@{a*(c0F5|b;x+GwUzx`J8_@jxX27U@H(gzL z2?s=H%ylf1r_4UP=a1^-AAr0!{nxjwf5)9CVmgnQ2WWL~B$ZIP`4BZAT#LJQHxxu+ zopG5B6o`i(^q`jMR2V~aFx-A-h;>ISmvwRn&r*xcXZ9&3tXzhTyIe9o#- z;KQt%^(&7v_NzY8Z+caBJX35zb7g`@FGJXLYiR*%xhR8CF`1}2sA5wg@hSA;aFsaz zY<=V&Y;V3BAPAaZiFA1q^F*6{AcYSX998G=--azg>%z!7oALgp0W|)ljqn}Oa6L&j zG>PY=8+L!NQ3#Pk_w|}x>~dZt^t(ZByp?*RXoBnN_?&m9AsieWV?~-+*fyE6nz2VL znO0SD!(K;g57hFNO3XBp(AQ@R1Dne6W*MYISHAjsxGwXw2TFfqXCIv8ka%r^d>qaC zX!5T+`0aym*VgfKO)wMivSG$Iz^UjO-hPWsG^g~V1Op<_X=|d8G(bIHsZb^ObZ3SE zyhn6>((DfuAb!(m`T#j5TPMs^HaaR^NTmG&Jk)#QQC#e?{PJg>;H$lI&aorgls zPt(P`OO29L27&>FE2!PJeR1G3_XuEJBFYz7g(YH5p5PH>a=#4Kef%2pSK|EMFU%J^ zhzaTV-ruVsBqH4UB0#BzCicT;NWhh7p!Ez}?uLBiz%l6wZwLN_^KBm? zn_INO`|kF9uk~(h{+Av<^EX!_7tf>n#<}uaUpst}FRyx|#+*h79@(egf1y}Xuk4~}Q6fvN{LTe1|Z>{CH9zAxDp81w=PzBFiR z1FR>-T*=z=7bvEfZGz$c%v0l$E^xX#Qs?>pWb5zQ7lZA2C75$K@Q!QT~<%iwCohJE}Ki zOK$@WxioCwi4W;`;dTT@QRWHCFNzV>H09JGpr-CEo$5N?oArtc9vmyx zpZQ~AJ*1N(hhOuU_e`4a@j?7Iz`4x^)e{c^$yA`OkLOMC-y*uX3bvMJ_k5(ZOfRnq z>}GdcnhP}CPPm9cebc1EI9H0UIiNfUhFd=E4*r+sz?-SN>jXBRU6nQ4^M3TqyCEr* zbtp`m`jJM1y{QvkxBwj+s^e}}rPU0tx6hzDy=yr(a++-Q(L>nJt~5cJDC&|^Wy9JJ zpUYaiKN8CB6dL3!Q3-pTtK3Am8#$D1{Yc^ax;$6)_M7npEGi5i<8qe59njXz<`_r= zOHhup40Yk7`2_g%vOSZ^86KLfuR8*kL~wPv1*A3*#ltuw_%$y8b{8)|*}{d`vF11$ zMI%<^?^9pKuk7^v@n@`cK$KGr9z9n~{l@QcUhe1E`f-MtVEAUxo`_vfr=9oB+-#R{ zy|R0Sarc;fLM){&vO0&G5fAZRgUQUWK@B00k*L!){#_SHtZHb_XYHf7Hr^u zps|ed&%hk%z^*-+gtZQs_v&a^??2Aa+V44{T!Ipi-y0hnM|`5T6uch_P6cq$hXt?> zK~2OGC}}a9j3X6wKg9+VZB05J?8TUsUzq17St%1*&3_`Tgxn{T5JkD^H*CKu0q&h3 z$NmhwMfyl0^&DNGE<`g4qw|GC>;S~Er!Zw#agb29!A1Z%O_v_j8;#`uLXB?59~ zn3gL?8OHnRI-87Sx_aA16aq#$ywRCeHz8KMcvAi5KhDns{7IQWY4ffMqSo!pz~X)a zK6qx_qBt-slYo*OO)L?~MkYF3_UU>%2AA^i#QJ;KQD#6~r!`dUkC2=g_pv^Az1tk_1s`i(t;d=j1mJd`TArTO{4P;`O@WK5GYQf z1xss0ZOfYt)x$4EKC^FYQIK6ny4|g7J%Z|05ElF**a$F)xhvd|m|N0o_je*(Ir8sg z6mOf(@Cpw#+Y`IvRC2dDL|xNBy2|y*^Y+CbSgOo6&SCw};1?5VSNx|#u47-dc>9EZ zz}Xpz5yJtus1?s;12P0?sB(=7jis6vN9DEu={o_y)7CLD!efgZF+%_vX%2TB0M~~ksh27RU2@qMBw{Tu)cA#l| zKptfo%y<8De#2y7jxpkW@I4{5v9WCS&1v7q#rV+FtPOW563&b zCiC+nOp^wb$Mfy^=9ltu9^whuI;=-|`Yicw$WJ_!mq)9^sZxc5hUXoQdkT;^mi;KOIG%n$xJ^6iHGBt~~5>U|kcu-uw>WJ7a4-sqqG9#MCsdXD3cnM4|`wQ4%&HA z3Zl2}Z9T!ZdDGVms;&lhl#mU2t*|#ZqwpDm%DOX2Of8mE8mh1jU#jI*f8ZVL8LyT< zEh}a3hQk%DV(ZwrJnc@*o8g}@zlB{bME)dMo#Gk_xR9my8WgR4+M(i#g%I-52%eN{a6c}Y&E2TG7{5TPv>wy0!QPgt8Rt5}K=SdMi*S5e zBEB7A^vS3X9hE0D&y0?-HJD%NK=;N|?pV-irs@bQ%?EvYtnUqHL3yxmO75i#yZZ)A z^)Hp>s;d+_;-|u0hz0tecfBf0=sR_AyH?BQ!J!t5a9u`Q8f*BTRBKBdN<0vePL*u@ zE624jg@M~F@(^BkRR39_-13eSd+#Uy%qgXoldJOxRuxHEcTU6iijydozPKCnx0kuR zFG#+jP4@5*57?Xt2AFu?M6w)dpXdzm!pN!hCBaEqcAQTZYPnXCRb8-zE!pr`-wt-xI!Jh$O2sN2bK-n*^J z+lEdxGFsqqM^-5`;o|(zF(4{M6{sBqA-r6jF|1Pf2^@fJn%pXDpHOrv*EpVUaiN!$ zZmZ89Z~mB5rG;tL*I91gTSwg~G7V!<79vFESLmtkXHe{edt0Pns#w z`1cOAdq4J~R8%_8&riV<{V?iLJl~9!5984GX2|@+W}pIpK76IPce$&{t$O+CI_Wu# zjMw_CC!ME-bfV#eB7))ZAnTsn#LEq$--LZI8veB>U4beF4UVc#v)AAS8OQ3mq6jW` zSisNNQc7(I@gPoF>KRd#i{tt%Sk5Frh-snalkVaW2VAY2k!PTL4EI359R(YY%{--P ze>}9x>__;7BR@svR<}4J37&&+$tUg!5P?nN;hOG@fVV>j4z^xuN05Lbf(q2 zP0fh#MLOn--;#kd)xZ*nNPJyqU4p}IF*haENC-8Eb0s}{WXBNa3*=z)G z=OY|0qQb)F?US#cIuo|)qN`pqc>A<;UZJx--wZ&@hZTHAebnOd>^9Bm86O645oivc9ENMm3EOP042bsYUxgb|k)`a9n`o&nS^~`C6 zD85vO-11XN=Q=cd1c(@Q$cFqra)9TZc+6EDo@oL>7rZ#qm#60Kt5}~*E;@+oomYEn z^#>+B#bG%=qIoqYtW_%L1W(s}2v)sZH2oDk3V&6eDr=O!l83)d%44{S%5;MBwunA? z)xwxuXBmVdjfML81hBux!xVN=8aS=C9fce3@FX8$W0VTwi5mv93E>)TOt+Rh&p&MT zJVTy(F|J*)@s)Sy5V9WkhfO@8@mRQQ z5X}g_KTO6I$M9YG;jfYh2CbgK=dWHYuSAO3?zNj#+tc_pJ+3Q)WQDV}UJYq=MNv?s zT}5ePX)PEC`G}z1oi^-tf1PBN@!D=HT_4S(b9SSV9ygn_wH{(67B#UZKurF{W&6W4 z>HAY(wK*J`;;c)*C$dr8o-aqKSaED@(*p((2X@+)HMj?-~;-n zfu2~fZdoL)TuPh)NNv)Kk#?u68_2~o#+2#e>TU9>v1e^bqvOK<@^P7f@Y89Tbj{k_!Jkr^e2zm?-!R?pBu{)Q z{2MRzU>9wI#k1A-z$<@L_%o!%+Jf(MG$TT^tL^f8f5Hbe6yHygKa$fC8u4 zz{9Gqwkv)3PPmd95^*je1a%f32TRiLtYMe#KnS5H0Ff&O@p1fLZeY@LV=lEEC11S@ z`#0aG**5Qs-}!i*&XZ+$RtbFlH@z}{_p^{GyABQwbN6==?VYpf7POnjYos#gx#^{wE8Ke9 zOW`c(s7OHbqqB@Ge#qm=xYTvx{V;f5cslXa*h zG6;n=9l`S6Y{A5BvG`S$?e8|^rvTB>I1{>31Qc=;C?dlK{PPR(p=g$Sf_3YDTnR6- zbv_&KZwu($%mOOrwdV?|u)AB9ItL$Q0{at-ZbP`3c&pP52`g!#-}8;1nHD>&ecJpq z_G{la2!mSywJQ98GHmyzX)%95Y_SdpuKM+jqT}uynaK2MSK170f9AKruhuK1TI<=+ zlHgkuNxgOG-QfjRW9@DbAeLm=9&{Ap1RPP{n>)CjLxT*5O|W%=L?oaNl@#fIwC~3M zhI*ezT+C}H3FX*aIpfW3^~J9W)5p!qd)?ztCWjjhEU~HoV0_|dU!U)xqBaqZ=C;8J zC+C0Ny~2HM{xkVUl7QEvX=f+twe8R23qYtZ3eZL_wno61y^GxvkVP0a^O6t7`7KR| zuMxIKomGYm-&N0hshaP0i1YwS&>F6fh_PdS!}|%r=ASZ z3>h$QHPEpp*;Oij8@76Muc)dI*E^XwT1;SkuDUB;T7Eo~5m2ew?-!;~_21fXtQ-@Z ziD6-W7r!O+UN%v|*Yw?BGJrYT_9{S|!+=gUof5E|eNm2q)LXX6P~<+vQisba-~tyE zpI5d?R~&C6M%+J*e0H$hJ1+5!g3r336FwlXoGz@Iex~_Vi_Uts-a^byqADeYg6nOc z&-PSRwqdK+uNF_CN6P8p0HZ2;M&8p1R7&(*w;RNdE>E_SMBHorU;rorn606{B;G=! z(-WZlQEY$$Fy3&W22x~sUZz`TeSUtBeb`vl9GzqOJ+Y9>HMB@I4$1A84k%B&Yy{Ic z$~u7lk&y+KL3;+chJLNgi>eobV5?y{1e5IS4C^2#G zv(vQDNrsZmEanNZ z*mXJV(ZNTGt<;6x_R(|gFBWtgjZZ=!J~uNe_h~uVH)M76b5cyC|E$cx_*R)(fGkDJ zMvH^wCEGL~F5d9o4Gfnd4_kY3;Il9(csa241Es+e7}|^cVMat^dUttzDfvFh6lV5U zr&c*iY2*O81n>si{`M3%vAslJnMNT20@l>2@S@Cn8pgw!&`pP+&j9&^ zc;B3mBGej`PCg80`l8t9ZH|9s002M|pH2FuC(zG$DB|{<-LNHmIH(@L+sLn~%(_zr z9LA{EWk5eC>ZYTu1U_E@DKGoEn>D~oN6&QMJC^Cqv*vgBQ$1uX=NA5z_-pC!&${&Y z*Pu3Pa28gLuReLDVyCI6>l+_QhtjX^jf$pS7DUH>Aq>9!wYuvKQx)}w6i-)Rnuzpf z0YN|34?*w$CFd6?fAomn`w~S5?m!F_Dzk)Fo{*HNaG+9=!=!55#_>dC99@q0eI~^W z5Paf`(beAL?w=|JI&=w>H}1H2bRI5UkrYBY;(QTMz8&$|_O*ZW8Bt)cN$S367Ed1^ z>&CD!5Tm#D$^20L3;whW;l=7T*cS#SuUQN6gA@v+|0M$8LfQq297sTA_5*^Dk$2?EnC>Rz!G-arXt{=!PKM*_?*S*)|Gn zEu7%23qRf6shp}A?q>3ixwc?+ydAr+n2a;gjw%`^Wfw)ziwPbp6xRwr)omy~3~l1s z${}&Q<*nxf(usjA*?Go)4yfMg6vklcvS>9XT?%MUYZOtpYP}E7`4-pVP>e<`;(AZR&#F)*%L5X)O9y)p=kn%lw(MZ^j-)HmZ#Oh6Cdl0vS=hXJT&5+sTMT& z+~x0`l)8TM*<90!%2#qJq7k^6la^}A-A;rPH5oKw9>qqlXJ%6Ered;XRMTyKcIm&C zIi~Sl4ZE$&r59OB^KwwXN)mHDmSX$8GRX3Bw#0O8l&s&~8%Sl=KG4^ut%<{ugo~VJ9u;i7t}G z+E_Ddh?StVHSTiDZ`Ah^T%?ir6P411KgKdCER7YaBh*Du9N95VQw2oEKxaUdHvxj0 zkE#0x9UE|)Ec>TxEDuh1bZu$u-TF2(C!OsUTeg9}*Lw3*crl)H7uy$TQlPN{Po~&i zUKNR*zsMW2#yAmthCkRKKED2qXxNE3EWaU2o#?n);#>aB`^=~EJz^^sp!D~*O)@y1S9EOx-^UWv7AKbpa>KP>F z#+zC7ZhDZ`Vk_-?pjGDo7nh0H&8KZ})yCCaxYgIquG4i0Pi@3gn6%cs3AZ8>DyQJx z**}Hp8}0nO#H#zwHrb|_pbGGx0qUPL5p(z3EKi<>r{6Z=W_n*=19OB^u*W?P4>Zy$Vb$xf%x54j5vgzI|1(@mj9BkIv+yqN~B)sf0M|0+eA06{P zR!*2TZ<5}5Eg=lE{757~5X-2rWJw{C2(_tS7`pdFc6?*aiuhKdfKRd5zji!(Dgt#3 zyFD8%Dn{6pOS2AyT?|U8zc$B)`N|e(?c@9C1gSKUL>M>GJWH77@wUc0Y6TFh;ozKL zwpW!>JL?_Y@w4(w9OT6)@V@~TIQmuf-th5B&BKwrs?{eA4Y{CL)@|_ zn$HxXj($Ps##jbhowuWQzUO~&9JvmOFUp+ut2MvM>q{57XHmBssI7fkIHV(%u5D+u z>|vP4tXIPHf~epBb;64`qqC-)w)G7rXqhR!E*{tCKwPhJ@#ZRuc_x!#Eqo`czQW{- z!Hj4UPa_BB>bKpkM(?sa(sx(Y5)D+l+f0_6wgWG>bwsJ`mg~<0>XT61G8WZ|Q=@hl z0Cm_~g#*QXVnAl%TozB%TBd7gO4df7bzmAu9b#t&e>TN_4M^}#*fxcHU4k{`p6m!{ znr_E*78=-8jAxDU-8UD6Ni1kx2vM>vg55H&KZ8`O`T*Q(ENeY*kN_H3*AAOfNJu2V z5Wf)+<6lCOCic`{s1_)njeI%^tXQ8F7M~GsvoV`}p9fZqn70?B?b7A3Ud+EqhhckS zYxi0|I>gKNt~o7^WV8JjboR4^q2T8lpVJyDUF!SxP{oHLR`@QNbWRi`FH7XSFd_jnvr2$EZ z3}Ft{P~9huH%6AqVS&s26!$@mxo05`y*=`}j;oq_%iHvM>c`tP_#fNjfX}+=?tH>$ zGd8=94 zTJ%SOT%Kn7R1mZA2aN=#AY^Wh$+QG~;_-5pmbB1>&!k5NG?0J;A`A-Hvb0OTq|l$K zNnJAuGdRg?QcK<0=`q+ZMW~z5k2`C<>opvXi9DQ7>8#$$EfQnTHr)PV+-x$0%wkv< z$Vg?tLA+TTeG}}Z$SEvUhrb#GbgIXtR>^&%{D>)v+UNez=~HS7I^_D@)|=R21q zdbm1RiiFJH)th8IO}|LSyxg@RwQ!`)oyic4two`AB(X*Fdy?736i&7%^Hj zgD21L^m>7SYNjqOulzie&j3BxF%6f5O~FfEuXWH$4MVWUinm1tmCDAy=lljsE%?z1 zixjl(;+_OFKXaxzr&v=KTGzu7Z&>S_MR#7r_k;*xm3~1gF|THs(U?%byZeL}wWH|? z?jS=|%poZu_&_PBP&GR!VN=h&tTmHHC_S2zEVv+&Y*an@I$sMDS~gc)l@~{w3xN#n zrOKTv{?w*j*H5=7!Z(o|CUh0fz;#Zz~)o@mPT zZY*adSn8@P^3wxdSYLA?0}l2jO4k0w-ZcVdu#lqtQeQJw6YEoAT~a9oMvdq1gc z9XiOcY<%^u#bw~$4~^z0U9+EG3MV`-`&<6Qxf{sMTII>x-S`-C9>W^v zY~lG^SAhzvz_ntU$S~6DNU!t4d3)JK6fQd{@2N(UvN4Nkc{kVxrcT3k`aWX5Qb~h` zsdI@7K9j)}ZAhhIj*-`AQRnim47vFGY)#*XMX6RQ`g>W{dA*;ptP?q=e2TBxWL}>* z4?O(NtMVM^3v`@1F^2P~4zsKSb(B;a9Q+;y|9!u#i4t9fAIUsBMWQ)w(VrsYUPT*5 zHamYHcRSrCQ=>9~^NT=fgsMM1x84`GN4>d@IU$dcp%$c{VU}Zg_1vU5^xbRxXjH``Lm1}Wim$dIYCqy(FRj2Dd6c=*1Od#PO!!5-U)w*qz(@9d#xen$oSVoOK z{$OA1ghVqWJleQ+B@@c|By%WcSfkY4?B29?D0h=QT5RGCe}I1zys7asSjYSINepF# zvRun<07-0aNYWD47eiINd;-&Jl{OQy6} z98C3dOgbmb&v~~vQ$575#wDn1ttfN=AB;R8+gYaYxhK1HOsA6?>EB^O;;+xtZYT=| zHd^azdRr#QoM@j`7M+0o+q(Z6)ws)tu^^h?s0el;8|jIl$bDU7p_coCSVWe}JZ(85 z)8)Nt0mM9iZ>mbR)@GqpmYY5T!ok|{2q^`*&vm}pRk9>;@+xgFF!=^O>~I?=HssXQY`W~OZHX+rI2XpmG3DVb#qc&~+5sBNeUhm-{@!{V`i+e8^73wGg7YeFK#vbP zDCA|(==k2V=^Y^8#pa3~8yw(xqmV^p%7!v#P1`h!$o^DkO?X08Xx2H81DW&cB67rR zy!qNM5kcgLwJ+@Q1ax%z=kN3U-zhg(4o}SCHLkD*O5yO1C^MMQ9z37-F2<6;-M*K;}?{apzwEJGzz_^LFVi%eZcAQDqzoP87mf#+Av$&(eDyO8__q@?<>5E&H5osY+u!K%qz_PHLqxbYs;mfTkqOE_B0=UJMHdI5 z%K4yDiu?HO`p6rRf^E4rR$vuPzvc|YkyJ@=B(=dv8F7TR6P0POKf)cb(0 zfW-CDf$eA{NSYrss4-z&-tH{3@YZW95< z+x@H%8GtzR;lFR_i-!T=G8y}|0hUR9n0_)0_yc+Gbik*?vm88MB{~kTQrH^=h|qVd zy1B%8(`$Fc9m*p>Yk1>#zxI73SQ70YB#TL5%8>}f90ho@>u$rJm%e4MYhHllD;tne zjt4(v)1NNlzT%H7D(B>>rWQNvFeFmxabeE_6kL-}x_f&1D@=M<(qQu#-%;-lzjIXD zEz7treS93dRi`k_p;u4qur?4w!uC4JTD*gc8BKc{fZ49w6}>AXvNTS%98)}?O?>#k zaPN5Gv~? zwpf?u8J>^e?q`WWYt>NN3NLN=K33dNmfBtDcc zwVGnn64SKTqMLnCe|CGdH@RitedBa-{8PCm5>RnBP5^@^k#F8b5+O=VSPis`$pG28 ztGio1P0;Br5Rg6uIRZJLW4r?Rm7~?~)n8S=i!n&iLIwQa4c6&kT$)7UCtB~s=DqJc z8!pyToZ`=L0EqZK2Aoc`QLvK58=i7CH#Pb_a#E~GZieX*`kCGL&7Qelm^p; z9*f-^$9Vv8>e2Eyl5_m4{Rs*Km;TOe*j7ur9gj0S)OsSRo~Wv-=Itz}0}S&8NUr7r za144Kw8#WHfzb#t5Xr@4=XM|%@>u*#j5QzBB-BcY7KUVi*kq1Y$?H*&At4scDFQOt z>OeC!Ep^G51S)2~5;AMoONHRxSzMw`%wush(>nYa)cgo^iAzF{-1p#*f$Fg~M&R-3 z#!p!w$NLm$F~?I1*!M;7%@@#5=PHmAPlC?fr~wqr+~$)BU3-wz5^&m#pLLqlXRVUs zb6ri))?l`Kz za$GA9DH3EN$lsU3RR=cRH*!BQ;!UE_c&P9ZM!)%BL_k1LeaHNehL`qtteCgg1`wNm zZaMKX_W~p*DgkylH(*rd<9Y%500>gk+23Bf!eP_Z?;Gu)S4P;2y*-`9VolCFYV$@q zm2c(YB*Me8Ri`7$dcj_PSXc-z6OaN1^=trxPAe%RBU@^G^jM`l1}CYb+=V2r2;=!` z-P>;j2^K2}Ac?`Q50xb%pJY&`0-*!z;eIDB7Ai5M5*1+_e~0*q`cUcb}|VC*W~I^8<3?70VR* zfXr^LLN~u@_gNJ(282e+E{ym|9{!#YgevyFyj1*D^ae<}<+?p&(oh?&cIhD>(oN^aX05QO3drk3~gA-$8>tjDc@|o=D?$ z4prw#2HB3y5S$e|9N-sx=S|ek7}9Nns>>2wA_q zx#GDK)PZ_m?bPv5b%bPQi{@C<-XKG=LgV!J@$&ar*l+zJ361fnz>mV=yW}L&#)iv* z__TqC{(4;7@V#|Sj0ujav;n`G5Z*W3H9FP1V9f_mS&IjwI?=(_Qo9xJ>u*kHYG`~> z=U{p=o@MaU*W#rvNMsAN5B(w)Ln{v2`0nF5eyEIT@xO)HqfgS^ps1NsR=rE~kF?xJ%8x1aW!?{_@nWX&zOJ~%j?wC4^dbq*lw=>l zMQ2wN(<;>~be(nyaRuD`oU-=0R#hWFOz*^3mFl zudTfYVN{hikJf*^`@N`KjWs3R^ov#PXS6WC0)V138#) ze74Pb4~j__ALg%ABZq)siFRy>2s>K%{S;C;F<$+Ua)w6h37QZQ>U!y0QC@j~f=)X5 z5LymQ%XZONE$fm>t-V4-5VW%(f>?Y+*>eeJrsEHOc!z|>@Go2B;HcqzlWuoi#1jZ1 zQM&W_okfdz119H7{Q9JtsK${;N>{!E zRfw2NydDEY&5HctC+?~ec!~jdxsTW5@m`=$pl9{t>E_g*ljP`1$;xhVhRBh%)AU(C zgUHg;`(fepVX86G#+Hd9kClR^Bp@ zM-|kUW#&(!N!=Vo=i>96!KvT#;oYf(5|_#dffaMIM1TgPT$O6{Rm24<+5iP6D%DX? zu%lhZU062C3EBpi+@FJtv+8LyteiU|lkTcT$yfNziJ;@MLC29X{mHy)W~6hWV59~= zAgCh2O!uYu({X8cA$^E3!yM|x&(He%PchZ>0C@>_=LNH0`jltgbPL^jwoF9jYt6PR zRWSqWpsHX08VUs9iZPj~kQv*efZs0sPhWEozX&-pqrim$NL`0U&+&!)mbA3ehHm|( zuduz;R17uYpx<+boXHFCCj3F(S9}=JK6SvNaFIJ?**z(YgC$2*iDv$-FHuure$;|Y z^H4-w|L-7+&|Mz!2^oC9LHz%YW!n%74(fCdoHP*QI-$lw(r{brt*r%Ufj5i7Z`-q5 zrc;f2kk-kBh>a)l>o>V@W=-k-2FicqV}#SQMFkNvL!NKifA%0Ye4l_T>tP_`jQ{-q zrbQugL{q%q^mpCypZ!Gt4!!?#zAYZOqW>d*{r`C&3(`Fu~5 zsWECF_YrOu7M6VmXgwSY3+p%w%Ptt_9`H+rz@ZKBXBP#B(qSp-5c$Qz!d*c{+E9Ih z+&qZ{7HO#NUr*8yMGrECDh<_>hCq-+f-BXZOahPKcONp|)6J8B|Emq62!Y5eLggVa z3q>_)C<3MoJ|M~pYHCoszxq476G(p(f+;8h0w;}}T|G%uGQ~q0ssp|okf;PA_zQjp zpC;zu!v_4Pr0ApsbAro)M})60kzh@5!FW=oA^I>VTmh;Aem;poTNq)bA-dq1=;=)W ze^3NhZ!&WT-rqS;!IKQSE5Q{Y3QFK7T@Qb<59p+?rlO#zpsb>zte^x}1`YqkpT3fc zg3`_+&7Iwx{XPF}HYV!<1b>Prki(8t{zaGBK%hFi|MhEtFARru3m~HqiV&0@!`#@5 z$`m2!L#6u?{_2A#1bDiFg+QdCNFZ^%r?b1ivk&Ov`L|GwP*y}!D-&H+6x0m?^K{l% zwS)fE9d708;tiuf^i7qONZ#IrK$@qnA=<+g4smv&Qc0FZ45}eS#nniM>f?hT;5uNeOCh=gfmh}6{Z5WhPYT*+nVEb&G2NVG+gz9;5a(ef?*k?Ownh!`gv1)49F;V z2ASyUW#*-WHVvSYFmA3^Ff*u600wKM%5bOq!JUzSDSb~DVso z-RL$z+B$wnKokWv4pi17`0E4K{c$*5Us{l}VSon|M^~czl1aW8W0)&lk>F)T)3<>8 zliZAqnM1AVDuK#&2oqmpQlPQ7zctJUELYhdD2fhTFUUqk#mdewz*yIXZ0oM)=c455 z0~Adk;iF8`@g#d#nfn1n($`V3Mj@S%x@5eSk8&UwX=~sDbX6CMvvb7+LPu2+`FR?Q86g@ij5Drda9w>tWnIoDtSWWLHCf z%fKLru>~ATHc<5PG*nac@-R>iAh~(RP?q}(|0#Aa3feiZK=*^KSLtf+zhA6AYvKj3_Jr43|0s4?4pY? zqq_#6oh`vvV}GKhrz=^NAa>Es=8PZ%>(_2igaMk`Y2N~51gTwny;=R z)*1=XfmuSya1%Qi3T=xvc6Ro(H3OY+1P=p@9mUw)%aeu;H1`Vd)72$-8#2cuR6H1H z12}l{)4`xIsyK!*#sClTwL+L!+4xwys@MeBdP8A$Znz)@GRVu&Q;{6tYG59yX5?pS z<_uFsnydLLLC6*^SPwwo#LZk;744-@a(A_H;el~<4 z4@H^*)fuLOrugdO^u3MLh>GS|J2HhxM0!{RDbe9jTbP~_)|jS3bMdi7nAms*LR8&I z?xsqLo?a+3HxHtXzoDOso2`-}RMk|~)}4WcS?DQR+xQvTxvJR^5w;MRp{E@TV;R6O z4j?)EP*jXOnd^lsS*y94yU+uaTmun?MnqTu)y_}}=7IG=dYRxD7@~<46sl&zR76WU zgXZk-<3&)laxgIILn2%nh!qk0uzolh8;MScwOfu7XoTSbDlJuvEC8 zUyutykxX-j=mZh1q1Lv_E>YULbAb}{!i zcQv5;TOzHY9WaA6Fw)cMFKA3f#gBMpuR-otfK#0ju@L2ha#8h++_g zWdeID=!35yo)4>59(xJiw z-EF*}Xn(v1-p50U3fe3%0luIGX-oD8e@snO;W!-%9%2qLG4Zyd>AD%{S-PSiK^TY^ z$=S_@=Bf-qnK1Cs00`DA01Pl;8XMft#|)+l9y}qIFbf9W&RU6tBs0*q;H1>i4+5I# zY;8j%5E;&1z5&h{8#@}!%f}A__riD)l&wwBmWsA817!nqQV^hFhV}3_K-;N!+Spo9 z{VB}#`>R1zbQB>r;M7vF^8@1XWKh(odWvWyoZ^f|`RRGmAwGesz)kAoQI=#AsyoTr z!yOuEX@@d0!4U{>hJh~7QbPl*swr3zuqz#kg};>&+!pB!BbnJi=w@WBtrEct&GhRu ze?^LwHIx7Y&v2xtIRWcqZWyRyiN|Ag(Rw=GUJN9XXzr_nr4juMtx+aO9hj0gUe6*( z$H>~n7j!i8)q_wdstl~IJKl=q=5B>l#!`%Es$LMXrwhr2V4&h^lROXx&Z=f;q$%AD=iw8;R5PLq z%*_p}>qYcLGnFC;YV8Iic!LH*GlnY7C{WRpsG^1@nyXT%UK9+4N^ph1{TM{Lt~h2x=T%*5!{rU9a!-0PE6*lm15u;t-tf`^FciKjA(>PR8W|x5Y zE&)@v(5S37-uX8jH<_K#+97OfD>J2geE1hjoj&!Pg)=*e?-CV=*W1hP5OH-l-T?G! zyvM?xKDL`Ho`p*&f`xrR_3=+E7>&hV2_^mbMwyV(W(}9rM!?Ub)sBPFP=TzhRHcbD zC|CRyGcMDvn@4Orq5%~Pwv*#|+;ABTtJDQw)`QeAj+76Qy95+Dc5zp${di}4i-jvC z43HP`8$AuC$p`Zfm(Ue35ewqyL8cVGodex7xWPP97g)gK zZngyT9XW8H?c5RDKWRY6H8_~V8d<=MdpITX!8mQ!cUcP}JA;P#!1`p8(zuxOD28=V zKv%$xHsI!pxc|;9jXeLi5&l*pyr^ld0#RiODeDv~yLr;y`8ixymf8l-*%!kmMKPYanAF# z!Fn_df|s*;>NZnY?=X!%>~&tt%cD2B-HpM7ds&NPjk2<86mbXY7{il2r3zCC4hdjl z?M8uu%o^x}@a@(Sj{8|PBC_gIV9)o0J@=NB58aw-KhP8Kc5r7N7OtnKASt0@S1$I# zn0uoN^mkej5)xR($taB3cf41U#N_T<3EQ%x-SwUsC(vBN|Km9rjKzTGuU+Pm5)}Bta1&!6xUSKAh)M8y3{Wb}VUI&9zdds@rr89)zM9RH>eU%e(=LZ#9QbIvy)#$@3~qc(=-lDxAn0noTHkj! zblNUs3Yfn5H_(4~tbFhZCLj6WOZrO`B2ZpyS)WykFB0hWRnF8131%Y-G>YEO9Y_+U z`sfo8Kt5uid77UiHk7$l^}z50e;XjcV3VD}Of%fooTPah?9LT)@WuH1I>{Yu2fZA4 zm#bGme_qvRQ@~=cu%!XB4&*+@K2Wjx29uPC1IMpPABqI^HcJ*79pOe{N(!5Z{sTr{>Z^{+TOUy207_Ker~H*!Pg{lddz+ z$x5Op-W`qWy05E#Q6o5Tb>kNla-!whO%4?agYwtQ(@grHo(E5x2vYK7Su9|?Ywj2EqJ)rRMcL^*Om}OsRx^~y5{(_p;<&8*x%eMQvrTaOLs_-ae zHpB=IySrreXyvr092S$*^!+^#+xWZ%hov4CP0cpwJE7&~)|!=%P32xX^%*tITBU^F zxNwc<5UwJ4ew-_OSvWkKBTj-=Z86@T4}FhEhruvC-#UKR1W~t->s;fBdMf<>+xIZk z^WgRU`=_)DAhWtB_L~VFgmgr`l5y?}_lXoygB+>wyaq`$3=0+0ld%;SO#-ZRF6)~5&v^ViQ)jdh+Lf_Q{(iwqz zHMVIJN^H4)GoI@HE~mxI2dW%AWqxq4`{~z{)}`Gc)DWIYb&+FQ7q#JWC97(! zokLuVn|oko#4U(ktbp**m|T93W(`L(U;#Xo$9 zI~_u~UY!spf3Gc)8XJG%nsSyQ_cg#f3}1N5`9de6V4&cz9nJH)QdpCm+RXb(=wLlP zUqNW}QAYG>wepLn{G9g;JnC#cCMv?diE8~*$5cs;fyeMiZ2M-is$D;-o@fAOKz9_1 zy>bx|!6PidbwGMCqc3{wj5zJ5#Q39$r#kg7>51`e&*oPwQy^a4wZ_xerc&jtRj3<} zcE1eY85}vNv(nmt-o`M})M@)~kDBOxzjfVmXsq+R_L9J-$D3bNUYiIx zaRp!HVecl}IGt}`?2dMmf0+TT;nxWA#U4=;M)8Wg39VT8R6iUZ(_E*%qDXviijPd@ ze|2J??jNVN!cIoT*%819;Pu8WyecKe#O?!?JrW- zzwWxm502GsPT2JDXSSzob(AL^CvEH5ykJGSqmQYBkRYE0++A`Z+$2%qP>Ex8k)de- zsW-|&rcN=SMYTpuU~?riKYlNZ^wJ0YAK#|KJglCmEoTgbaq>$R3`gB*Hsa$wb+}~J zxGPueQe(<4Ej9%^YTdq`6edjXVonI8vpq1-<(;sD|ttZ z-%~DMV*yH2;CHO@M3(e;sulTVljTt*#@fl(v4Zx5&gewh@O%5b#ht!x7(PUn@&4Al zw*RKOTvY~yAvZl(AKeYrzXT5)YCCOLK054n6ZuS|*CNwlorWB6qxt%F9)DWWcVu$; z;hPS5#{jOiRkdMDpNsvge&5yJ?~W7G(s^>Ey}O8lu_&~>9yNNNK17TSg>G}UU?Qgb>%&a%>mE0*RW+o__bs@0&dm<$N@4Eu9M=h--P$ZpF6BFm z9Y4mH>*|A(Kh3!`-+W}n_>*9yaGEq!dvlimW%1?b!kOT3u25p0z5mS$4pO(|!6Dkk z&kP#Ekj&#CaO!5D>41`)$cH$-3sG%MPscn+`8dIOR&pK7>{7Sfq+YOxR#VU9Xgmc`5s_2I?+qA@Y(#u4V8 zBMcFN`%ud|KnG^ourlMpB}AT}B8|{roEN{+b=uYN-KXWGnyJ)Qe>ju1M(JhY#ih+sR*%B&iF^tk%hA-c%fF`}p*h zW%1RYpS+Lw+(_f)YD9;2_<~$2{)!_wvz7#8V?R9Nv?(8(59yT(?l@g-e?2nHkAIgW z=h{N$_VdU+HOH9&WTPaX^m(4-V==E6Dhb_>Su0+%%$@4tLr8Jx`OvI+I>ZV!$X@&oafq<$g=gsqCF z08Lv-@;ADD`Iy9T`6&H)%Hy&}b_Ce)ty7=B{T@cet4RDl-~FahCbTtA9|5;*RC54I zqk914Zf9#pQ`7LB$od0`r%GStX%;DqF(LE{n7_~F{u{sXyLsp?y9DVpp9JZ7UJKn(A6 zfEbd{sTVU2Qg3InoJi4PA7Dd_9t2HTUC?yTG&T6aK`Nzri04UuR_gJ$X_7z!wqQ%! z1w3ze-VPzz$80hxXxB93uC5v;hxeLywet}=5`eCs2F$xfJ#8H`K*0ZST?qYU-etsx zH)b{|gQh$#T9@%+dnz2E&H_KnSe^Yo;GryX!wjtbrhq|57}N4uK-wLuef)w2B4l|= zI75RT`a0@>B)%m+;Bih?-}R`QB2vIL@Cl{vq!>rRmeu(A#wb7i@g~}}IaYLaeWrn5 zlR=#+%r(gri#v9%=~V=;lG{6>#ov8`#a54W3URr$eblKh{80Cf*K@4RC}8wU;H&q zgD&uAe&p5c_gCG|eC%g7CG;Y%0IK4)<@o26vHcUDiX*a7aS|@CBZHPE4xObc zNKbq!vV8T}wb^35B|)-S>x6MyF}tbDcKQ3u>mc;m^8NK*S_+$D zy6MQ~@byNPdrSN-HOlDg=YGR{vN)-%eR5p zE3N_utqMHAw78he>MZi_t*kkyg4S>&%z4H93AsQNZGZ=5;*{K{1Cli}sxeIC8#(r*(_MFg)* zJOBP#QS_+gEc?K;dH6oj|D{1F^HXIV63XLru5%x!jJ_-SNNBw!ebVY-3NJ#iYOs=g z%{e@Qm;DX)8W@qVL@|0WR<`|%H%X3 zf4pj+Vpl$UOEy^r5H;)dZ~bNu8Nc<(shTRwSe`2MYR+}yCsql?zZWb_tQ^dFCk>S4 z`Q^>(>M}k=sqK@zx6d$nPO-o}ytn=BNC7n)YYo*SI|p_+aQ?!K{vHzRVMKCyqO5tI zI4V|zQ}pc9TVoH9(QABESx$b9=(6S;hxS?)TOptOe*ZXE>C+c} zTsJ%`;P+Rj#C=`2t|t$-otWPMPT>^4pqZebAR>7PbF}q`y`jZ^F^x!IKZTvS zmj}xYG^_6nXavo&d@8oK4prdIdixk3d&@Gp-ZqT1;`u6kKZrimS)z@0N;U1^AR5rn z))&q|cI8^=y|vdP22S@E=8f5RA>ZcKBE3Fv@CaQwEN>UIxBB4?35Y_kSDT7Qmw2Qy zCScGJkvvs%UL3tVP;PL-$*Y&}5U|^sBZXQ^G=hA)WsuB)JPPw-OL(B@{IVdXdP;KS zti%j1jKojvEyW}&#B<?hj!r-iNANO;r;j&?KFx1NF z*L`cHSKgVfK9-+_Sw0Wxz{QEdNkAuOK0iv8Gf1!tXghHUE!+ycnrS)Spq;1sVXJ~5 zm;crHQ`QPImveI5Kw(3U;#B2N=&z-JSckS|U<#uNcads&_-5q0+%xn7+U&O%UWWw# zV5CD81cjF-d#o>^M>WjzuoJ9T+D?DH^tJ5%Zs5)KH@cYb>@Wo^5zZTL7ViF0@Q{I8 zTU+v(1Stc7`hu2|GR4MY2t<$PH&r6D>nx}Qr{^QH_!Nsk2B=YQu*giY}K-U#XNXPI&ZF?1@lkyXigV%fSEM)qFlkLEWT?UQ2ssq+Ul%~Hll~l zqIS^;v>NFV0bAOv)xbmb7!|0Rx`STn_nR**fkUDs;-p^;lkE$~0Od<*Ukt|pmuGp! zBT7NQRT34skEtZuaPVU_*CCDx9?%QA_`FmM&{*P=js++56}Bl*ovVyHVE-!(a5=W? z=PV4c4dZ{ji^kMLZko+dUbRS*E7Tc54v|$^_jvbivyOgWlYRv>PI?9{Hq?|(0cS< z?_u(jbrk$KaP7u!+TZ@cnitSuL<%&6085*78RT4j%~6l2;JTn>fyICI|N70nV2Bu^ z5VGy}6kK@gPf}Bmw_MF-N8vp2O@r3k7?780_si{VSoyh@P{QljC;RR@J!1 zK?3i9Qp(AiJZU2$2NI-R+-KPxCO_WK^5*|BCY2Qe)I{`6d}vAb1-}BR*)X6a2dR(D zBSK4%k(sxzY%vY^pe}cHZ0MW&w;FkYEt+>#7>jaOA3lEBn01Y5Xl6X3hZYkyQO<)O z={EH~!tV)&W11hT4^4dk*?oI3BvurzBG>cqTHfz_66aTaXU8X^cxQ%O$6w!9`89jo zOf+3pq^Ja#6O+%=><-a<=VW$mGOgiccr52yeq3g}jFBU(=EzcUHl{&Ai%rwMgjg_C zD^`Db^TyF0QWhdNTu9N4rRZ}>=;ieF?;vayQt=c5OpkFyc<})CxT`KNjUX{{9aVa0&|f7`I9)?H3oAyMZG{eycl zDjyDTKOTOgzipN=7)o*oQ)qbDTNeKI87fxbF4#Frs4h(GuYG-(_m6$;azr>iq3&Cw zPDQgt2^91WRM9y2CG;ByDZ%_IM4?1PXNt6>7wlo7R@|03b*YNry01Fcn0x;x=6wXv zgWLE1sRzOAb(eGZpwpE3-&)BQyRH2B;I3TUD{^2o7S;V8x%m)L*#s1#+u+6hBcRfd z9$G@f0fBMOQ3Sfq0lyaO;t`iMRCP&EnsVp$kO)T4DCW31Y>9$1evHJA7nm?;^ z*$n;r$o#{R0@CGnE>DblK6?yQR0O65&J0R=|9Ci|(AHgyD)a9)tof=2wSvxI5cyf& z>DzxFii_aZp(I=AtK|3-nf9g$Y45UMjrzx+_6;t(nKfa_5K;ZMvybZS-@K`QjQ$Er zd^}a}9&j@99^N%AHj_Vg@$wDf-6mq&VR6T_#0}=9NPbo^!pc^fulw4wi!n`5+M>ln zo2LTmCR5sF7hYIoMl6fze3)o2TbeyAG8ib7F|bzrzQ70djB$+8c`UfA+VSYp*aB~d z^P8840xBr2Sw>&Db_Dr#N01AR;)Hx?ZKW>0_fFE-wbb#$`$ z*&}fKuv_ur(x%%eldgvj<{#zk?KM@^1K(<3)8Gg~T*QA|c4P+()cdJ7@upu9>~zYa zzIi|Xz7$&GR2BPc*uB7Rd*3N|f%*|g`#MGP*Ngqn8N2CwM-cgg>HO-X)0UyiKK<0w z>6Hh&AE3;e9}PTLdC0!`_WCKq8-3s&LF4-Th2P~BDd#Tj8mzq>Hs6qhDmIo+q^^qv zKFEoxHHn%GaX|gqI8;WQ#N}*D*R5?fX6*Kxo02{~xL6}Fa~&%9{cEDP)^y|IbQkL8 zpT)TkK>bZ>8qW&$$Mt0W3kz^FKxD_Kr7?AXZF|h_%;=K_nPz*#_smlK(C?E&4fC2) z<0rPQ@Yr~+qr61DjcapdexFX_+pxA0L8@8RmZfD1Xh7aYhOnEHpVZ(^^z5N0@e zt#8}}v{un$N_>eRi5o1i)cxpbC)n<#to!q1!pHY_B349SJdAqcyE=bd1~w~?wR*DoO_X0hpUHfUNpLBCt_%CS}Th&$zY#7(67wrw}FI>pN3@H?JXH^R;UxG6JIMI*Q;L?|=+xC6&$YnxTo<{kL;jY#-OzoW!y3;6 zIqvI5=sZ+?EHd3?-?c(EjaL6@kS%#h zha(=tq`0dWUI-wPiBHj0gtoh9IM(*ftfXi0ik;(3^s;#Q2P$Y8=s7EYiGRME7y| zo7tIjCdWrk2zENWez3IHEZf59pq!&A*JI+XM0Apz?DF)N{PJGlf6VUE`Tcvpek4Z= zokLoaFDDuq-phIFxtT#T?2;c|j!weYj~U-BetSOlm|Dr8c+E_|;}YRaX#K?(+)tH=zU&_B%Vwbk|jSpR7i51nz%YdcUxu`s7jvUqW3}zenT-%wU5luHA zlcX2ni=-e;CQ4oIHe|~qa?Mov3?_D&*nKqvX<0+$w0@2 zLmkBeW@g#Q>kk!WI$JfXxB0-`g2i@;?-L`YQ+)ywt&3$Fr~Ky95kqC#pf<5=-Dp&eFdxxP`2xCHTk3GQT2l&oDT>Q?vrA<&M)gCt%=4;${%nuq?Pa(~#K{ zD|+SD2xe_uSIYLa{WBeDZs++>gNdFLt`XHnR56#+bP9#L| z#f@`GuCLzSd`|m)BJcFQ2lwpQF1@J!(~_jD{(0Il>^k(YgjQK4d5p?jhqqaP&b1z{OlxS6kJKx5*F4K+m*o!^??GIWWUEkB+xaKX zMp!Wz(&0Q>;H|_K&P#SLPs{V1^uVaLt$h;?a<>|bKIH5-ymKa2?e^%mPsKf6Ayr@Q zDiT@e4|hc8`J{@Y21S-R-d~dk;S6w?1Ai{4NsQmhjpxwkoz^O-=UmAc7_M`y#XlQ9 z5g9>PNGgIqHcYQ3jv1%F3Jd1tm)EyA7FzGRU`+Ms-_=%m26c~G_?YT|9iE`8y@<#u77t#I_8ONX6S_Xj~5c=xI(5 zp|7uGc@I0K7C=%hqpb_iTr6~cdYGE9qR?I=;a9FV5$ZBMQ5E!O<4s^_$)}D#9(SK` z?0EZk;0))VmmS}#OEJlLqTRhQAuLw^u`o8O)M`aqX_7^ z19m7H3d&8m36Hv)>1z6Q8eN|qEQ8q`ieDqyG(!?Lv|@vQjB{B$GPmTaenihy_1EEA zd+d$k*U|}^8me%TF2E0#cj-M9>i%TkOt>BL0srLi;w8uR;}1_q^Y=dwI50Aw^a@-# zAMd2g->X1xPW6bguV;J3CNU%I-pZ`D`M}OhEo3`7 zjVQ=_Rp=FT-T(ON?%upJroB^sl%}G-ltX*1UXF&PS_HSrDZJ9uztFelGqD1ddH4GB zQlH}QmBNWogw;TTps7JplmGe$V9fSUaFq(F7k}9~;s~u-(LwgpN6>jr^&W9k=YHg8 zt@kLDbsA)g#O*lyrq*}6KZJ9r)1lt&ub+Gk7`ot zkG)+ief_4Prl_9Xp?>>Z{q{G>uoCHUJJ|TMQTfopIvTI#+R45`*L>6biw?9Ei|OV4 zN8o&*UT2!wD#4wLNsQBnt+%Zt zYZRj`l`b9v{%ToYi9Ej@9t)v)#)fIU9R3cdizY^m$}OQAWFYm)H>N$ zm|29nV+83h|Ge?XeC(5dQqq|z6tCEQ$2V(1Yn|qwjFaehIE-@lm^g|_zf^!-m1qN1 zriWFpS0|ItOdKwL$Y*}q+fUh}mt_f@rV%GZ6P4ReoMMeh)wI&w@S?vQYLNMrTiRbZ zpS4jsa`AMo>|`pY{r*FR*@=-;Kd}jWjlL>dZELssbS~dK8&nzw;u>vr;e7$(JKBAR z8Os3euJ`Ve&)mASK)&B-a{e96wkobm@{p>ch{{R;^LD{l6#n)nYnSGz5eTAmpdhK84ywtl&67VnX4o92 zwK1}JBcxKPh@6AXXT$3KKIu;l(O4FG-kGn5Y^-uwpV_>=c=v;A$ImKI)iH4>`SF9L zSe)#|zX)yHdQ&V|`cXIR{;O**rhnIb@~QfT$vf`!{o+-KaUCd?_)67W8e0~%Jf}bl z_?-IjR5+&?Yr+G)aylpdxy2SDp2KLw`9bm-$HLRq_y$67(DwZz_sSCKr=RXF(q=j| zFRsp-)xkcOmsCL)hU=#hf2B$Xm^wIhfJ#q!41g_O(sv5Fvt4G%x*wyz#15?<=!0)v zMbzzaZo2hL?ukO@!GUT&4AVfjdYDXL1KP(EPAZ-crp2tm(7QW ztnGsfGCvIy?FU*D550H@e-xpY`>^|SEunk2eJH7_?-cLbx1uan!D#@)bS#uqcr&vh zK{|Pz^YDaGi-R~k+*l|Sz@bi7-I*7p zmziztOMD-f-08GN7le{Fo@?!Kv*j2lwLHI3Q+%%!t@TmX+FII2>bR63r>uGYYD3ns zFPv2vFWcdP65q@7^H#yGu~NC<okC$7A z-Z*^90OXfyukTlbe3DtnIGqa&#eZr6ps2(RKc3_i)JkKlyov}sHv6;nT+sZiQu)u8 z2h*k?oe{I|@p<+1jd|VnBigb;9x#sFi}Q1@WFimAB<{Ix$R;@o}*tw@j~%E>0f@3cb9?M$3I`;@ry zw+3(KJcgKdg)Kh(_TuK>)th{#hNo?x|2(O1H#^5JTElD6LoRi&J^QGf#i^N#V^dez zra&pMa`&woWq!o={KzT8JkrlgZ#JfXt1ofW>HBJH{UGL_V^n&kj5E&OjY~xSPIi;)G)_GKtf?To&V{Un zMzvcNN7#V)KRPl&qKHGquE#s<#wWQft6@99S<+JFqal*4d2)o~WqC&5d#7mfPEx1bsTC7GFi**`OVJFl+W9A^p~rNqxzl-1((2laj_l^+9_bo=bR1KQT| zK6_K_^AyfFL~c_qR;1k_d|+_b&>PMIHSVr@b$_)Lj`{u zdFnq)FaMho95bdn9AU_M4%{a)%Sp8VFYABup&2JG8&?V~y{SRPD_iZs8(VylZ+eA@ zzrz1lVQ)<7pM|}{z=-43ECg1Mv(5ob(BRDU8UD)!yqYr>;sp9ZtgUgUd0Ft(=K6}Y z&);bLi<^dso>0_3#P&Vo)X@R|r@Q0@G94S5UXaog+OKHpEn9V9taJu1r zD(Sdw#Sye)og2%sn*$uupbRy<2o|iU@6{YQDh5io)#9_Gn|o`wXIT>=@Xv38fe8E zsISlpT&jPcIrG?li>Q{gl<8AZ5u;wPvg9pdx3Kr^s+vD>=<#;}F+l*5tYHw4DnK8?c4@ zE-S1iYPugD_=c#w2e8{3kMpupWj?3Lg6o_)P$J$Z=4#(daqHeEwz{2%LcmdJ1Q4Dr zyuJsO1~qjpAoHuPQ%=iiw$$Am8NyTwTTjnv+3`Fcbr~aY^SKuz+G1ssGDbhA?vhqgZg$6a#8^q%I)eE#gBtX4h!6|H+8Lkz6(h|Re?Bm!>oP>p%5Hp~RAC^tzA zl}Dx6tJs&1THA-YC=EP4IX=pz_Rk46guJ<{qxe!tMS!PkX`!SH8%iEfoF-8HhF=Dt zmJ>G~&lCbE$N`Wu(p}o`f}0&7CU9;Q0OwLqHi1!lKuj?Bg)+(<#eC_*5$n?t*=uot zeZ8R&GCCUdA2UM3NDqX-$KVBk5;+>&0gC>OLBKOKMax3Le1)Ga?gCrDr*+DXH$_Ri z1BjJ2km7;#-FILI*i)qy^7g8*4^WSfT?10$VZL(m?OS>O3%wT=N|ugy?v9Comred{ z|85?0oe{=;#vUZPbr}f)=ln~jg9`vfgi-((jV}m$2K1O8i!T5_TL;Bifc$oYV))UM zDR$=0dmN(U=d|44-4ryphd%i4bd#Q^oNTJ~cKJ_63fqO1QoSBr*oIp+^=l{rOwRvY z(KxPasFv=!(c&A9Fcb$-AcAqxS~M+mTJF~L5HI_{rt53|o$IqYAd=-!I1qd~QfxCE82AF7=9Rz*f zl}l@jp30ZLBKANO&u#I`tPtn`CMg&RydXem^n{F??qmHa&0t z%11pXtH#GeRgr0ezA0A}*f(2m%Z&F`o&i_i*HV;R9~dUe&Cbp?0>Dpqftln}@1Fgy z!uP#e{PiJ08gm$%p&`65+WrdM0-NPwg%ka59+VXrtiKe!YxkZDZAtQcy{AmGSoZ8Y zlJgaSRro_Tmjo~W84-MF`()3#&Bfd8mp8I6&;39G00`^8Bg&02!q7X|fM4&~1r^RW zhO+PFlebgm5r(iKqYqmSY|J$u6VkelOZ<6KBA6VQ-(K*soCj#ZAs&vM%*ZiYVq zYNsei!|0y!_&^O@LO&)F?8hl3$J!tz{aBGgy(*(r6~k{K>%ZaB#PqYYmtdvwGA3fu z$s%IdH^;t`g}E& z3$$M6tcZO2s6BzM8N7^*@?Bs4ZT7^?D2PtK4^(gBr8srGYXTY-rML}lN#x|czLXtj zEKLv=pjD9x0)kP6Ewvl9v`MSToE%KD@$XbcXC2~~vR$XF9-3}O1)-t@kQK?UnE=Ja zL?a3M`SYL1#R9M*Kuo0z>VG*ot{?f}66LR(k@VHvk_NAWX6nu;PzV7 zQFuvSi(z>6`6~Y}Pu$~<^Z_7GV$Mu|)qz8D7N)t%kL)Y0T)#ecjhHZjZ1|t&@5v0= z0!1dxO^YIn*5Gr$P+wk0^6N%1!g+)*DheQs($7}BEsFklWA*J^M}}r`hWKXzYH*We zpbXv^5@IW{usS#F)zSCN$6V2y;`W!f={JFu^=)7klrQeB#y1nD%3HSq z^r}b!%|wN{Xps6>8Ow_z)kRc z08Ti?)L&vp0~;*Q>pjffXe{L^#YZB$$V2syt^xbbmpeMq6R|V4{6a}h)c*eB&)3sN-dnS_2H)m zukrs%jNVtJ_?S4aUX?B0T;?W~*_OVG--q8yHjLlwaMcp|3BK#Ma2!4_R|IKjkduRK zl2`zBH(5+;hGxy#oI9E5-spa{c$@X5yXU)F99SoLs+V?W`*&OA8;jOn-n1LAO^D{j zw8nA<&HrrubTK#N4-w#ACca3FZmunvD=EDM@KBwDzf*VbKopS7uJfqKRF^`maYG0ygS=6y`1>oxcn9Fvp6J4&Qea={Z?Vp>6(^m zGJ^Mz+@1Ezz`KM4h>r(%HRtp5Y&L8I^X}e=!iZd5{_ptKTZQkcIe{jiYb202 z3f_zM=HzJ^rvVK7h{WrA2^UF)6`5+3yjwwqXOd&;G($<9K{+LkIZipYhCzDorM*e= zE7|+;>thVd7^7HeLJOfV`NB*$x<&Ap0Vauke7s*#t zCxSA(33jDfK^nPhZr1Zch%9(jEH$W0G%i$(Iz`9t_HbCy54 z{PTs*wqD0A&u1un?r1G4POM1F@wL-iWvN$ZBxG~p%iPtuSfrRvBJ5=@UT>o0dKkKt ztpC#Dwe9HX=J;W6pV)U8!`QFSe2S-uXcdPh2|?s7+0GQD>{~Qjr7@si)_#faDnD+< z2`RlCb)Rp{2Nr1`sxX&We?1@nVMIbtA?8}~dBs-te&E7Bv1gPCBs2uCDExehojJq@kMb~Zvn$UlZ{3{( zjLe!>`v#x=TkW4K^RvHzfA|zK|Kw>x1Xu21JMG?y<_2+lz2)v~RD2@p^p6>9fLxq| zh$R?+f?Gn1?(Nt`N!kc*%ejz{T%UB>_Q6EGJVE<&%uh8+`GsRsk*W+4hoHAP-4o=d#M%?M&EZ>^rZ=a<^ z-SCo0W|}3lpT6{-}hd>5=K{8hUbqs{RQVuH9q;PQJo9c7Q}X zaQsDW?c`7ef}F&qTM3Ep4?Ys)iYj~i>IzOw64v}#m|SR<#F))7xGl$(dKL;562N90SU7J8CmeIEQFx?z5yRU z@m&K=}* z3i|XzU=5&5WV`8cTBUzhwU{fW>v+NOp?ThI%NXUDf zqpSCA;=?2n^>a6l%^(!)@CJy&x)OlaWL&^KYrL0SaSgMlpRq6lMY$2HR^#6vqMKTa z-ALREtFph~4c@WQN%0u~NU;IY2aQTQU@&hGNIASRtE@xkoj26_c^wJPTQh>{)vDIH z5j7UNqQ$h34SDO*Xp4|ERH0%7H!pd~@KVkBNc3_>=rpInGnzYdv9bG`!7G^Btg-xE zxmoHeNXoOzmzU)BuS|rWY&93}>Ea)~)@U0B8y{8U0`Ywm%-s;A9T%BtM^g$&AR5oA z%5ztTHB$T2LQ96f`HeGu^fyrH7|6AV)d<#BPp00nG#;U5XRsfn zX6t|6rrTaSZ+AUfcyEG$%!_PS6yK@f{k6691LR}paH3MhR=NfnaOV5z96qNrHG{)J zXvcBJv9@u2Wo8awF+g-L08mFLfVk#|x?!(z9XR2^(V7h;51@Zg02@qn-3^qosqN$= zdD}%3^5B;=T+8hDyP9yr#z&(F1aDdvc%M}WfS{YA4#@Z}PuYT?haCjc+L3(6`6Tsu z^kPMXonM6`0cv#a*9VzekP}PH#EEI@ERJ_4oVJAk2eJnsu63RfdjSTuq)}Kk4r~>8 z-TWZc;dmq(`oy7nQR`Aik z0?gYAyfUy0puP3q=NRw<414>o{)P~z-yz>$vNPfA;JA5{i%B+p>S1Hf|I6=-FsL}x z0}EL?bfyM{K~x@1Jbv`++Xv8KQj0G%Z2EuH_0>^PMeo{*ASoRJ(j_e*-2&3x-Jz6p zgMgHDgEWG4ccVzx&<#p=cg@}7@2l0uG z5s0|$O3iq^F85gk0-*ZL`ps(kFQt;&Y!~WbemKB)<5~4?`GZxs2-8mxT>W#Z3~W)9 znkLC@qJ-t8o+gE}d*x6zf5cEPdV}54uTKAG>h!V&3K4Pu$y4k4dgm}4%yoEm;5 z;JTmE{0Vqb$iWz7QDA3jxklQL0!@*Cyb2)GvAmq98BG3^r^qTUpFzZnSp9qnLCR$& z$OYD&OF^T5pQ=Jg-UoxQ={pEWJ|ZN=Qb`t1sV}gvn08;HhkDDF1D1sKb3Lb?Md#d6Vp4xWr^Ee^Z4VM9&VTa*UN|vNGv4Dlyv^ zHput1gAk)suSun>1g4ZE=(((HlsGfJwV692)PG<%KpdqMT=U zJ2jHzzhd*J2jAaMi7xu(=9Fb?^vhrU25^2??HfN3A4Off#lcSdGflvQy(fw!97Ii2 zracjt2kXIQJ(0wSse`bHjNM#@`;{>SZKr!mqOqaH0P-qbQer)?4zr@qB<|Bw@H82i z5)jqohl!DC(Utv)mMiy@t7l)(2%g8{{6@ZgI>#Fzn^ygE>7THI>qzjhZ$f9WDGunx z2vP+7wB&>!cTdmaeJ4P!sCD~vh7V%M-@uXoyRi`j=3vu})iXM$ zjLNMK-Kj|y#8z4C`IhQ!(@gn^YB>=Ex8XpUiP;M@BA`#PZNJXwapmBUY-1m>jCma(v!e*FrK?}7S zAQlDzW2pDd>0hu!Rxar7Uhz7@C#7g{$J{zJi4>~jf4!=plK7gpOwQ-Lt;DGy*#Oq{ z+E$idPo;6!tEH(=2O#xLpaOWLfD(1EzJmg*XrNw_2?p7uj#GL!R=SeTxz@lXrcr3G z!l&?0ceTX!7Vf0fn333#EQA7Qk>3YR7LoXIHazO#{n~tv%j4w-dyuFZd}rDh1+uIm zi2-(Lspj+&9w*UlX-Lhj>S_bV-l(_WS(J)YXlS;Z95+;CId$v-yGX-22?S##;RJW3 zy^%zm46w{av3kiWu<#!nfG1@-U8)UB4vl09M}xG`Jipx3A)ul3#ZWxk9LWd=gbTjf ziZLQpNFYltuyoZ0*$e%k#_o6Z_zXqO?kk_VjOj?+MJO(tz^#j2aFgsz*IsAV>o{)W z&4>#TVBwWo-xD)svcGgN>FLZqsbO<@l)1+hWYFQf#?_D(Cb6C3+Tx^3P0m{bj zgd9OO=T$v9!$uS`A>Vfy4Eb*wEs4LN5GRjUuTF99i~B;^8A&ZhYXX z&7>mLZi=&&GHt9h3gY5!m<-BG51ShzVNnHAU07pGDZ;>MH}kbC23^mkymaHXBhKyR+3dIyil`F+&T4x~wf z#9owOj75{FW1x1B7A~F396K=+3LU zmiUDjnHGsh?E2??gl3lm>)Zsvu{ivBzpt#T&(>+t>o`{ao?*QRmxW8C8JR5Exw;!$ z9&|F{lIp!36&m#b)_3hrP-QY27Rn9tuj}fpLs|{C4b7eIUTfr!vtF|;OD7xzv1Mw4 zi#*k&VK_aibGbaJGQ}-$abX47XZC3@+Ds;KIXQVg6=g}Bpqq1zCL~`$KX0)bk6r~{ zwLn1~nu-4MO=mB5sH3KujFt`ZEpWvq+s8L0xjb&hVeZ01i#9ikCqx3oDms@#FjGJ( z+8}UGAGgoz`F!Napxm=!IhSrcU+$}2*`60_vp^rOmvhGvY4a-qUvw6|rl=J<43)e0 z3ffOl>CnZy;}E8@Boe~a?ISC$+L>=&_kJ8Vf0CKL5;oMmm+df&vOQj1-jK~*rY~*2 zcqHf@ah)o=_BZt)^g`8HUFYm>mwC3st{wxP(Je&8sqZ`7SNsZ2ji-dr6321z%wZWEWPDK(~T((4~ zo5OcqKHDCWu+E8wbq=mGzXyO|tK`P5Z@J5StAlTo-tuP%U(ETy%o#A1)^_Z4&of(p zSBW9YdT)(-uT%ee{%vvje3+K(rq|wT*zRb>N~^^0^&2*0CGOfI4i&*y3=e&@gw9xh zo7D&1Q2`UfkK7`{iVNXkG2i+lI#j@?Jc`Q4V7BI6;=C3qcAM)NdfZk~UiJPIBW`cv zUPDZgRoM}|&s}EMYwZ$d()8u>L+UdtD>Pc42)*8idK7J>i`1bHj!!21bB;lH3Nv3oBa zPaA2?haO;HXFLBH#0KQbsgAB4c$%cs34E~30H1(K|NCTCLsBwfU$xO^{jMN$fAOS1 zV3B?=szn82o}UU>zH1FDzM%L(@`|*=9FacW1CVCd;kqJ-0mQFRcFwl;bu0;ND~E|T zfn06>#8g20N%VtO?MDucVIuz-`(+BBzq`d;J~y`{LhwHKr8Eo^ZO*$&)S(2j=A$JE z^bt|RAHAYmjU^R_4BM5bl8^K`Cho3;DYsSFgJ<+mML%^yYdEeJg0%`vse_yqQ zG%$4(>eP}{a!S~Sa$9Db<*@M!Pfk~0suy>>TRJ-Ij5b@Uh*GQ0oiq4~ILeMxA4@jm%^6!Zahgt`T;af$*TZ6zRR)G;FP5x6(=ri&O2@Ct)H+sGtyEX0x9{5_ap|7%v2gqit(=iK6 zM!X!W8d=J_D440JDsXWk*Lp+EqHCZ!xv?tFzD=i)ew{=K=MqXxE&*hX|H! zm4MruL4x(+%>EUgxrLEt9IUg`3;w@D-5sB72H)k47~+xHll=^(KXW*#NYmmO+q$Yf zdJ2M_JI#;)J32*61VNu-v)#XPgM8buRPRN0_zn{HWn8t{cT_A{7{6;lC1TW-s8N~y zjLqWlRLu^Y=vAw1lH}?pbGVDt$L_9tk(*1eDLMDz!VIfj(=c+9NhB)6)vR|iPRb41 z8;k9eu?gvO!iab&Sq^lAPG9gDPej=__->eplG-sR2_wG0-21b1c=|Y{RqxCHvHpJ8 zOHRzQPPwFicK$p@lt8q@`4QWK=|sDOH9*Mt&>fdqANNJc+jozYH;_DA95Ug%%m!vr zVTX12UG>sv821J+gc==BP-xM!Lj5xD!_9S*=RKxxZ8XZ!W}TuV0pj{f4j*2?Ey))UTd zmwgoQ5v6$ymP^^AaHc}5t}H0_n{Kag}&*0B5iRf6#iz=!@0C47bxqWG)+ z#R4X;quJY!6H?~G8JoA&AOID2oNYmR_Uj}g0Z4Acw0SdHcWU21XLo+!WojQSq~N$dWdpxi;> zdw<>NvX|sZt}KHKBGwR47Az(h?<<)-xY6q&Pu1?v8xZpaWm9pWzi_-)(cy`qXr>B( zH&~;nKxw6Gc}s5JZcWN>4V7e=TS;?6D@zHj zaz@D&e}pcTO%Rm7W?No$5h-irvc0yKi6eHtu1}FXLYJSEv9Cru%T?DUAC>GYcA&a* z{e64j)oyKR?VEHa7Wb(dt@f3eMo^kiZi5czh;{c{v@C%YU5K#veW)+h59970lHC zKi;1=e9!|-uX~Sc9)T9mT-ydNnbKY~qHsX_@uk~tC|`M(DqtSVqivEt|1% zk!o%DhuOOCG7#v)mf=w5TVfdaRj|NqZ8y={N~H|>Hu_Y2y)bsH=!2zu#IeYaQbM-U zqfayqDvve38_(rP1o4LX-->-h`!H!awWldLnNp^e=6#}qfwm2Hu zU>KOe=k5|zgW+JZ2|X&`-@mwU0_{MmOeGWh6T_d(Q4jJ1ia&5UpPSvo>dvP1BZaaz zUqWww1)-{IXjLo#Bo7bw%>oBcJGZrRatL?+Z;40ydU=-R^@H3~zZ%==02bCm;x`sQ zbeNe%7qzUGZ7ZAa1bmG&flu+qm5dK;L3D%g$wdnWf`f^)08PKUU9_%qS>x7Z606$XOj`x@Efzzc_=AkG@yo}mZZFW3% zSh44~ke;=1A48qiYM*};S~S~k#-1w?xv#%!!n`ersHo;B)avHdhdtr=lf2|7-a!0C zT6Ra{XLku8q;%~iDcTSZ)PNL}6u3A7vSq_$A7ow}TW1(5=jeEl315AjK^-#@mvkGo z`>GbwvqJj(tmOH_?na4Ln_1+*H{n(&CZK(a98t6>S9v1PEZm@MslQ`c7+BFX9w?;O z>Tn}*{LvTGRI*v9a|}2NruUV|g@8*kKW!iWmQbbS_WGozH%DvEBql_SVrWc zvB=V});DKYh6R5;XOJ{%~wFl+OX;y2we$sZlyv9aMq6qafYba)Y?0`6u1{Md9L_l>tS}xm7WlrmB)|0EVUHF%?o0G=*Tw!=} zu}AP{&<~m{e0NJ|*x`-iL)HO3aw~pFhTg_UXVMD)_U^_B?@lxmmX`b9f=Em2VVUobtMJNmutjfm=`lwi! zp+J&Nt_kF?PINWY?ygU8O!63W@wG8;6f_I@=HHeEPrvXH%Ay{mB-5c8~8wkPfI zK1k9jpKEsBST*E6KEIJ&A4(?$@t=6zrl3;2l@O=!o@@1FP1%ih7NTQvjz!7J0s<`2Oj9y51izdB8}?ti6fP@km0s3^{ znSP)_t~fG4p*)jK^7f(&Fdu|T=Zwn|4%L5z`ZYzZnX=m!!>E9q6^+}(MJ4td&4b8S@)@0fsuZ9FNxiO*A_0i7V z#aptPUhK!pK+HMc_KyrsH_yCLXPGY$!>ObE#?*+(>MBRpn@p2v-EjCli9N}3RDh3L zlZI(}mD&7j&j4e$ix)g>P9GqIK~4nQZYJ-+b&<_-bwDl|Y`l10QUX zaQWE_$OHWro|xxZnda~IJZ(R1j(j)@p`bC<6Y@i#8Lg(yEkpBMLb*_?_LiQszAXFQ z`MrcL^WpBCU-W3MzJP(AkjFKfd#uH8MIvmUrpo)yG2;AH7-01kRuj%8jp#nFur+we zZOv3oH|US&$0RwA_CY<^d#5#jHy~R*WBRpWnZPeeB#dYl(qq1zpgv_v3f=MjlvX2b z#|>!GO_Sfz(|7O~QLo2x0XoFxMYGGcIYQb*@2Jt_d92JLKlfh1dQsdB{q}T8qZX>C zk{JY}4H|lTu_vZ+9CJ}q#!desb`-=}c>_vjUeHHD;B1D2v@fFv5Wi6uW49bB6-t(C zoy^lk1w&7R!%AtV@5JIGHLd&ck3^|Ng@1ZY@2QY21(PN81WV1E3Aj6)T9S<(Fr3S@(0msWyFdW14zT(!2H2 z8@kLMe0O*GXHl(#>>ig*(V*=|v4mw9s;r)P+|jb%dIlkZGRbm*AVVUb4O~yzG8-+bY516;8r+Uu7=^=F|nx_QPtXis>XPNR{(h0dQn{=Q}39 zflb}IaTR;FnntVqL;bG*$l-&R(N+>k${QQXN1l2TpzW(-&Uh2Pt|<%!ykO3#dGkGT zz>GP5wc{^|L6)N(xoI`kL_-~($*688rD3qUTgKf%ie2~E>asKqNC9v_(lc#fp1X7r z>EhdrSj(ZUI^iJTd{0&Ru+~^%ox_#IAxOVyHY?rWJg6QBE-j`=Gy9qULw9$0gQGuh zaBmHWe8$<#RTqtdY}rvlwyaM}LRa6jY|4%5e(Mzg6AS}|t76orSQnsRSU74HgsH8d zIK7wTNU(=Z(4Q?U+-^w7YR<0WN`;ZT>vFb7p={08Ff^!+5w;tPLx!y%?4By1j=w-E zN+#gTl~e(JmPR12lxkI^jzPeMmLx}FlAJAz9Jx`p{9TZ*$n&HY!B2fPgWh%kP{Ht3 zLNn{%7_@tS-|fv`Uh)KBIGV4?a)i0noBWp~5{b0>Q~VNn77sluS86>vNz4jyZ%Ap; z56d;aFsjux>8RU^G$h67ILPuBF*a}tOTz0LWWTOvJ<>@PTIH_0uP$YuX;4*tTcHb7 zS65k4LAF|CNga}*-!M6{gQ&wHh~zo7e^6z;z;bRifm31V!`Kz0uJeOAGnl%+-CRl> zQZiNcW4F1?&-ebgD(=JJ{H97W2_KNLxT#+@OH86@3o&BAhHO50@{$1d8v*^TP}ZW? zgeUb&3xzI2SpuH9q<<0;0j6h4=LgHT-0CYB!&{boU>3$RHhAWh>)x~qWSIsZ>7p~g zn4MWxqy%lV<$gEM&?p|!=kl;}IX@pmB4o(>NM-B8T4zOHRFx0IglNBwE$UHthlhs$KA~%GvMAnb^dx(x zNVS*S%*0ldXY?EAAeY_Zw)WMk83bz$$?twIbPz<8Ef|>$)MnfL%ZjP&-6f4NIa_XK zi#Mq}4!KZLMS%{}iPz?jl-yw|8No8tv8WMw4_#5Jk_+uO^P5X;Gjww6bVGqF-QR~y z975zd$%|jUAQj*_FDqOb;q2x`Rx`){j(Yy{+1+*jqJOvfO!24yRU}5n5SM@cdR=Uw zerPbW=FrZm}*a z-X|n{zc0Jz`XHivw=wA>upR3HDdKZ_^t)nXfMCrAX)z_xtLnfBo_CyndO-u}a;SF3 z+wLyYB|1HcpJq~p=M%Z^YH^*?DXH{BC`sxS*1~*qziX{gQMVhSHh5tvX|3p*Y2!4k z!|62041OEpM9rFBwLXGGXAR;rU_XvHcifsKx!2oU`!RCC5c4AT=gR<<=qhh4%$77R zyZUdElgGg2y{*|;SG!adbGxta8c1R!zHpvYBUKaEcqPoEfgixYq@`Y27gM*{nf1pF z2n(JvaTA}yMSJ80At*;ngW2V!=ge41S@?yFw(I=GrUD@L?4CFc?*?TOzd) zQS{}P^g@V6#kW0)Xs+k%iwRubf5Wyzpf-2rE%uK#&T9IsH!tDyV+Io$O*aOTII9o# zxb3Q_qIrgT<)r%=coUagvFBJwM+weHiJvGCSmH2EIXL#_Gq6;7Z=l#R`Ww8?ZF@1U zoZb7~?n7U%qE3w*_O^7O3LL*oz6c`svQH_^t9>!v?mnZxKR?9nV-*7B+C#SElW(*N zrQ+hIl11tz(;ar<+q7Pak(YfUlP6-AWXgTUdTCwajDdxck`(RCK z>Z8oZycUtsO^;ZS=%am^?2z@;BDl87=6nh=;Sa;qkoaJ}GX6Lb#l4;W!k-lBSijYp zkcrbN!Nc*$u2PPSrk5btMcAGSiwLvhb)T45E+d)(RXFOx!J@+R9uTqTS>HTJ@ z7#n~4HtaXStq79ZS8uytp@swKd$T-(HCkH0-+t&CndJUii%Px>?+S=^7Xb2P$$A;& zR`w<=kNP{)nFb%9D)^Y6z0tNsZ-19fH3>jfcF>zqKSz$k#dsN!$#T|Wk%Bl3Vp&2b zX>{cC$>DZ*PwzR||6gB3`*>1AzH@X5M-`$o(ISlHqhR1U;JY2RoHP|O1RYuVDb)0Mr)1sTM;@N3@QxT zGIZ~AxDeQcWmPRzzn9Lz!*$lN|@SZuLlOpiqBJkr`pW!`ioPOsB zy%L}7@biQAe*p-bz>CABg@@f)7oOMB6mXO=8SvDQ8YhDbHA-?gnh0>lK2AFjwMCVA zqasuV;FDD=)Y6)*$G=bws4{k0eum{s@9Hdten!gs48Dt!T_THx`4lUZ`zi8yE)EGh z_-P37b<57rPjiYZaoZYZ%hS2-Gm~u=tIfl+Waa@NXx1)!8eEA`S_Dpzsz5#v|0x!x zzYnrhHo+0GXusH(H`B@OK6jo!noYXDcDHcuFIi)cAT!={vjm z!0)}w=4hl)kzmXikUm$Rv61P8M;Q58w({B}j+@Sk2mev>h$zKW)%|9M%?i!ZR@fpxhfEr5Fta{(q904n-38??@X-N^#rS8IVa z(*|14ZRf^7CEB**;j#u4PZo|kpmrVir{TbL=?7rDRhPjc{E0ki9P9boB2MdRCf;Y7 ztgr!*00Z*hHmTki!{+5edo3;{tCfV>ZDpPk0Bs75FzZ{J}k=RZgn?n~{m&L-R;P4-M?3DjZ-6 zGE&PyzhU~0oLDgHLPZ>4hmy0)31Elb zl7mCf#asNifnZ_;!^6e{8f8mCKc@Wg>NE{+`|qDZa3^sj|DanOQv_EaPixD++)p|HtIP{FtM zDLac|0S4KbwPjvqM%HJ;8{Nb?XNx`*lWk3UFg8pD` z&L(Oae3&utvYoW_IcmEq1p?2P0O2tA3`4+Z#rLi(l1%u~@Bn(fh5;KafKnX>U7mRK zaM--Zat9cO2RB<;ibx9pz_~zR=xq$hbOUXEt?+!klZxp)V=QgwMR?e#j zS!z&OTOj~Q7}N885(4_9It)An*YJ-_33zdGLpWpj+fiYt&5F-e2ACE@haFH~-D12b6wcxrI$vuxDe2?ko-wyK4B|3G*tZzO}7UtL|q&nG+ zcRDpEKp<<-Ht}B%4E~OE&^)>GgMqEu#SGYlleYT?sQ`MNV%dH-8_60~ zCK7jG$sFXxE1cR0%y8K+FD?k)Zhwa@*N$G!8NIaw;AI+tD>l~I%#FvkflIpH9W*Qe zIORcU%}8}Yrsu%p?H;T3#E-}_t*T#quKQLMZI_@n%NqE&Uo+YaF=%qkA5LNG>SY7e zHBeUk#>#$szO%L#INF6Fyg%x^nQD1!*-PfvdZOW9FN*8jVWro9-WA&J@%@PtK7re! zLx>Ob;mWn^1@~-*+wzV+q#k(I?MfdP>dR`3sybJ94qJ}u0jPy{D}>auZ@$6aK3ecH z1@yFiJs@R*(qa_wy?A;mPRierSQtGsa)g}_@&z5Vgl+6&y86En*g z`!;y8YEXHd1Zh{dYcwgsr*wicWI=2C!Jzb~NHRo&_ zmMnmheHGx|Het5i?w05`f;f40p&bkum(A=OT@WJK&Q*6u30`Uu)Xd5swp|$zB-Dz2 z0VZ~`*LO9H?mgZ%6YfjI)i&UR+MWEh1dL(V-J*iqfE@4}8jt&-2ANM|BX}%w93OAC zT4sbot**dm#^C+F03&nR4R4ZyFa#1m-uLjMy>Rd>p#KW>0eRPTOoE`v3RIq3hBE~h z2soJVQ<4EuaN+aQXErnC`o0IDSXADB-$CEJKdw0r<;t{gzv=PlIxhSkby|OH6ft&d znO_tWyyuvg>RLItiZfEf^H;H_FeYaBJ{go_CAv*n7SSizFCid2fTQz`88EK z1>OzZmOlZgX#O(f_*lP?4162u{NG3}#|D%x)u61ovYhXpa4;KQGkckKK~Ta;9^c<7^wt_4(z-tHZa zQGqqWb1VF0tE(;)HB8Ol_LYS@z=yQ#Gwvcr<{CeJ9}vM!cmkI5^&%vg8^s1<=D-d% zj;-F@4Pa+3g8GNKSN2DrNCvQVMp;I2k>WGzk-ZVUo}hEWEc2TccsQTn0;~of8wq-- ziS-jOowQ+g)Ve?egBFe^xf5(HhZtmP+F`icp-SRJ+Ka&F^z7j;6_L!N!0u;9H1P!7 zjys#_b*C>(?^5LW{9T@-4l`D?uELS{@tY8{Nz*2m+F=%CxPG&Y!P*43>8vdE=6z(y}R1;Wqbt}O)Q^ZP?d4PG=&K58+8X6FwL74T?P zx2-2V#F@yW#7t^^=V$m&jfc}u-UrADXi%h(mm-S`;8WU&RSw%{0$XASEM@~;mbYV8 z`Af~($7ue;#>MVe?+pHJWnr{n->~}C+yU(MI5K%XX00e*q8K{wZ{}A8S-#Cv6^1Jk zT<@Ics?85O&>10FUWdF5V(^5{haoP{?wlJ}u0Z1t@_FfKl$WVnzKRTZRtYAQ@a> zrm;DfJhiZ16T)Mzh0<4dBfVi_Hp{zOuou2Epkl|$LGqY4?hu(o0C_17$cL`yBc0`< z`0jwe%H?rt291zoG>IGih2;F3W=-Ue$wLaDi{8}&Y@pQ`<<)><2H#X)agBC@96zz- zipLho-ds(=mFTbw`wH0L__oPMm(M~-s>tNCYam8vbPtrSJIO(C0V$<|SAU^Z@#61m z?~MP_hg!b`kDey+%#}c#*K{osVYH7J0iTX3Y8DrT1D%S< z(z1*B(Fb38E8Qu|52H{Er_7K2@qAd9fc*^R-{$ldJheE^^aJRDIE5$5A+IIZu_sX_ z`zUG)AN@ON`rdVbiD!U^fTgWPa^7hXKrS9ZHG3RGh2Jk8FWt+Uq1gvAZZF!czEJx* za`o;|^v?5geK2vg7f3@1hC>`btg3jL(=&zU}itUsOLJ zIaF_G>A6pI=7x4Aqf?>dedQWen=od>osG5%xd>%o8N1zDK9{Z|``!8u<^W;KgM~Zx z#l;VJED2cTB@va~tJs9I%rKA89J4CoIB@m_PJ%dfU^pfQTsV9Hd-bYxiWh=y;m9w+ zzQ;|$3|zLTPZgRKnXGRD$k&Dfo{oK`OS2KsK9TY_n0?uIThq_p_f4|C*ER4sn&xX4 z=e8wqQtc>yef?sE4tK zk_t&AUUHPr(v$)I^YOx!D%NLr8Ma@hfm8c4{RG><@=wBi9R?>T`$Hl6pJf31sed`^ zzblwM1=EHpMO-idcD;Myr^wDec%Ylb$x(q1YWb6T7wg}r3Xz#j0w216?`x2c-2`T- zzs)J;j={V^aNdkC?xH$ugZW>tJOt2mZOZc(|Hch==D*h^a7JgiMgRNLfBrWf3T~E2 zJ~)^X=J5W{3;3Am|GULd5h5($$$syLDE&`!|K7mquK+*yf4!urfP?zsEg{j@|8q~`izW@nE4fOy3 literal 0 HcmV?d00001 diff --git a/docs/images/uffd_flow4.png b/docs/images/uffd_flow4.png new file mode 100644 index 0000000000000000000000000000000000000000..3400dfe8f7eabebb7d17ce5a055644590e459730 GIT binary patch literal 32662 zcmdpecQ~Bi+b5z%LJ%R*Ta<|2TMTCOIz}&1MjveqhG-#(5(FWUAS8OUM6XG-5WV;4 zg6Kpf*faV5e*3=f?zQ{pUb~x%neja5oaa9Gxy$D+lgD~Gs#h=Hx{Qa1cU4_Y#Q+bF zU>y$+pNNzgT&ZxP)(3C+XaiLxywX0#?|67*&OT5RAC$j?vl|kROH}!vPh28GP9A6< zE>RUO5fP{x($2@*!ySAC*HIq!&JNB<`+x3<2#JW?6%xHGB5o)o%_RyE$Gr$i3rGnI zoBcE1#u4fMuZ6?~guntk+BSC1?mix9CoWMX@VC0V57G_1gUjGm7Y<%bz^|~7wXnFg zBro^~@$_^<8Y68roqf1OVB(^Z0-_S&GLNR3p|%#6h%)%>=In|DZ>mT;R}b72dv6;* z0cQ^|Tv$>-L_io^Qg-t8K!HIpX$b)#0Wk>)F#%yoF>vEQ_6HM|5D@-LB;3Zq#@qQn zJq@>YU!*tM8SvpRSN^>&?grAw#_^x48b-=?+Flw$0Z?y8PjR@2GD;Wh5jVyM>xukl zj6KrV*$xmQ!X*j?9JhD2arCx9fg#TSVk!h7>f!9*sVoVxSGLD`I_XHG9R3+DY2@hx zxTK_wg6fG0yQ5qIg$CNXT23}_J*2UyjS0#W3$7`-*@G7el!}Hi)?35R)kMtB(9;_$ zt&WANtNKeyni~3eiHmFN+WTljlmoDmT4*drPfFET9L#cc*Y|h8>Y7RTnW~#9`xy8- zc{wRTT-9M};8`|yZULGBC>19yI2>i?7$78pHqp~H1b4KRwV?*y-rCy2rh2-r-nz=- zuD(uaO@xuZk(M|@N!1Cdq~`~u#Kh6x-NYVZX6L7`rwJFflQ6TB!tFxGSk+6{!%Z4c zfOSx@^)>Ubg-U8UyP6my16)Ntb+y45aaU&t6=7wV6d=Jw(nwPbV{D^lAR(&c=`Sse zwlNa&HPlkllyuRC_~>gRZET@J$N)WWS2UPvZ-zoDYx&!`IOwUVJ8AnFDPinkdR{&V zAu$6jEW+2w6bAP&^R~tMA*9^h;ogo~j(`%3j)bV4lY=T8?x2dsI61+Tq4r855HYN; zhlZn#mcEpbR-9DjTSKYe|_R#h@CF zdLn8nQudyb+A7N0?nbTw%4iciw79#krWVW|=!=rHDn!c!uny)91%KITL8J|khEhH# zI|oTGgagD8WehHxVC@5hal(Uek(BmuFtYPTU{RhJ6QHLsJ-D}s0YtCE48rkeyt(@4}4t)&l40u19Q0e3O=Hb%KR+NujX zh}a6Nm`FRLMPQybUK-A32uJ`z%h=J^7#e_(^pn>1R2M->xTF2Our?a55VW$ArvzHd zz*yhZNep7^gwYgJ7Pb|!v(r&ihU!7>Otl<5ee}?dHuerE+W-@LDQ{6zK)VpyObsd` z?dB?J;G~Oo);2Kkh6t8WF-(CV&gjw%pSM`AU%vh&ia)i>hj%#a)f{V7emix|&96-mZ>*T0XvdlJ4Rr zj*g~oYA*I@Cuc2DGckLtj+lcp6soNuZ741+2}QW+02*B-%n;%#x_-KVOhW`h$pPza zq9LXw?u#`=t9irB+%+YnQ828TzYWU37HemqtBQg9x!4G4hMG?47S|VZz&N9PoDtqgTWzGPFcRV-Waw;U>JHU$(Gf-I8z6KY zy!2I-4K##QO`P1Er6FPvA7zBTvYso8WU_A`F~WG}N)q+K2#sZK#eUVx`5^ zMTN|SO{Bd1bVM;oFNhFa%LrI*n1_cS!a>(XTnM~~0n@MKrmv+XEa7Wr=3t{K?h*h2 zV?2HJT;Wb``Ur@pv2K8bva^Q0wi!eZZOT>xvD$sOS*Z8 z+oKUUJLBw$@%MsChyg9u@Ul@gag_-0_mZ#`F#`8&fx(3Nn+c(fl+8>~!e&Sl4I?KV zQ7pnhRl`Qe4JPViC!*mif&}hR35j$v)kVU+pt?|Jb!lUSE(WN(w5~o>UEdw-O$k^m zdk97qEGQ}BCFLisuH&yHEh3>FVCRZa)AB;1(e8TEKmhEGAwcyd4MZh0+-=4DOeIjd zTDD3!Ne8Z5Nm^P}%2-IkUDU-?PfyJ>KvfEh#X36$NMLkqb)}^IwEg`8z)&4cV<$0x zQC}QPR>bV6v2r zmanKUkSmxHz!)$oW#G`k`@enqUk4BV{uineg+B8OX2io|#Zy;-82Vdny(02ARHch- zxkjYq>BlSZw)S@dyri}P{L}Qzuce9kP%8_KZII<``*QJ;tfB4w7l-6jkIX#IhM!Lw z9esEF`sL9b-TnRjl>D5E1poO525F5UnC01O)k(x7`;VVc_WR;4BRnnoxiBh{zt=;_ zXcBk!k~ma9b$541sD3O>2IE6lsOe>Vcsk-4e!{bRJoelBl>V-MKY$RH!2C|;9o2fA zg_t};WhZ#%Gwu6-PWC+g{PGoRwwzlB!2;vjlNp*`DH&ncM&qdMYJVHNk{nFrl|UkC6&YcmyWY?2@%E;8v9b zgOVlLNd9g$j7%>Z4a03M)ET!b>UYIY90#t&;StGj6X7`Zh!L zb;NyR2>CNZUzc9G%`t#}BVfCi9Eu0V#)Y@OTXRIHUO8$n+2QF$WFN?6?|l9C3-DJT z+Bz_fpG>_j4cC98I@)F{4=yHgmmmFMynNWT6ubMJdXb&khibi%LWQ11mldB{2oGP` znuKTI>U}XIb#Oj&gjdemO9bW7$Jy}gO<7rg1+qj%!)ro^m{0KmdPC=9AWnW+H3IB* zf>bYCwG8Ud6m2!<7Eh?g`hXy^nPMQEx%sL;Ax<<1+3&9lIs|zQhyV9y9A*!P_vzvT znNz3AR!p!$4~8>0HBR0F1fCFH31Z47k$=FjOA2^S4L%aD;SmK~`Q$hZIKW1ot>}1e z9xR6AUff^enFN$A4*x|NtJ^;h@4KmnlT|8C@aR=iU+#&8>+7e~%Q(hzDuyxN##fjU z>rYTyVMh#(W@jX8A*2fv0NkaOcNGbo!$6hQp?D#DUzPvY4_trbD zqx`Y5ew_pIXl{ulVX(vqz1*EajyCFxeD%sQVtV+9(3-`1Sv#3hfZH-r7)@-cyW|Da z0?Z1C|GcSqxc{Vd3~)(c$gT6*KbxGrW@PtjfjUuewc2tz8xl+Wk??enp)-lctBvS% zWN`S~_w6v720aQTp?AxT&h7F4uY2UEhG(kVqU zvriD&{-1T$)=fnpJqLzJ!1_%*qWahTodKUYug%e+9}@0W`1et}nKhBk&CM0)mk|e0 zY?G0Y1kN;jF~m9we%w>;OcM3biF+wS{nKNQxRm+jd5{Oa9c zk1*S}u{NK!_R1@&^gEjZ4=8A&v_I9s*AnH8B@S7LCtAx4?)?4>N0`FLlJlAD|R!}ipzQ5Rz(qWWJ!^nayS z`11c|N3`pvmB=vv3h|ZE*m0AZ2TX0@!A?66t()SsO;N|61f_#NhW1_3&(}x!uuL#% z7fC`t+7jtkm=TqjHWIs+y3@-CiQT@MbwyB6@Q`Ja^1DbL`RxIQYPab4Vu??@%{0C& z(~nHXDn^8ZgM*z6tAt-Jb}DC-qC8hmO!;3mTz7QO>ZqsP6m}e9OyMzBdbIpf7Q;-Q zC#YC+ZkZ%)?hwZ$Mxkvc30rG^C2TjN#ntzlV&~FN59WEmuQM1x6l8`iK1l0sEK1O;d1O`M+dRCm|jYQ4K{WT*gU3ACm9ul^-8~kS1nj#M)l(#1QeJ0O{+Rzp|vA)D8)xuIZoM-f~$tP2UUf5ZRc_uKrN^^X1eUu}b z_Rft}?Npc^l_GiBQu$X_z5#m zvh6sbcbEPi;?@7%Ap)_??d5|O@eGZtRweS4`Pk=2G^=6BtsSI9D7i=rPp7|fR)m1Lc zh5R%m?f%$A;&rmm_VuNZWo)W4_i`Z#;uy=W^!j`v8j;J0R#QK+ucmK`B zYnh5`7*?5*Rta`ZoRa|lz$|j@JCwCia8-W#xD{Cpgz!b=a^cfEJJ@y?+Q9;Za>`dz z4Ki1%iTs3DuzrcAlv>}+X1x9^xz5A(;fF_eonAoW#~+LhJ?AlL%r1IAA06y8*37HJ zBp)x(TYrCTkV&fcXQB>4(DD8xpM(N(BZXcZ!#UR&9DaONHXEI#Q!rgCKvZw_zWqc; z;*jorGe_RPWf{095@fXv56&*@Wixbpmq-pV`+Nx(c&^)49JIE|gh*(6B51N1r=m`6 zbF?U)E@xYLIy?NCCMfDB^z*Z;o`+d>8YYs?!-UG*CyqXYDcJ;;Ry&h<+&V0>;b&{d zg^ymJ$s7+e3-0~+5%|G)(w2Fqsm!+Lty;9E9_4A(opClL>(i-1XUE3iUuH~qVomY| zUr^k+b7vJI`My6K=lt5Cf1Mu-6X7RnoHvaknut{RYoHa!J7wg?vkC2oUpwk%ejMw& z!E5kVPstdr5ONwfE@;Jp9{XBeT`|_YeL70(d+eJu<==Id(cmJ8?Ox^^*0hhAF_FU_ z5_&Cm=pU7*bL0P&wD`Z0Hg5tX?LPmr`}ZDF|Ko4bPyUzw_8+eXa2hedc!`RHWb~)8 z(hp}}D~{9A_jBV@vxbrWvxxjd;)MmAkNfL!$^MJSjZ*)%&P2!9@SZ{1fDY1U2$ix1 zK2(jh0{}qPGP3`Z{`-o|y<31v80;UfmBuFUZ{K`P0*7JNA!}DS|Gh0bG+_hHmgisL zfAAL(C{iZlv7q)jaH!Y~fY97=c5rNh9@Q`aTNQ8%Xmy7(D+n+{a2Ozx+jH}m>1fyP zt=KE`CY)wMj+6nMp1ap^OGSI*#4VM)MV*W0FKj0vNrsMYmmRt`|nQw3-neL!XfqRe}!QRFiCql zcuO8P9f%PHcuNd@4xpE$Xt=i&Zj2(O%)eIuFJS-h|L(!{D__GZ1eoqRmNW8_(MKy< z1aco1ij68(sg{}ntLU|4`gi{y!2aXM#PsKcsn(A!A4Ts6n6i?#yQGmTqc2LEzw5P(lfvtbo%ClESD-a8*~Jig)J zNs)#-YugwAQ?-&3&V6|YaFg|LKMb-}@1fxyd%u6~?Ec;+ab<9YvuLCoW)aA12vt(!B#4Tc@&Z3-58)!Se(uW6VZ@`{DbI*UJ;Ct)Gtq+~eqJcKFcb*+{%F3E^Y9?vbXb>>I zrCsOt<SJ8L37&J^TjELObtIToAFM{#Z=sTDjm|~=tme^s6B=-l zC4X^$u%x{_0hORJ%D1@j)-}D^|3!dDX9tr>m6LdLU669`srX=>U;h2q%(udVf3zdo zF0&4ZGf98gxn?9_@hIvibB@a7;=23?%BT5SkwRQ-3kwjM^M8yC=VXkFb737;`8lUix%J z{A!PhC%`{0_4bSdZ}_pGkR#WVxbp$x`ns^krDI&YNXY*Xh$J-K^y&~eqrIW_fjn1+ zlhf11)hhG#sm43RaT^Jzy&Jg!k8kl9HeNe-GGbwOOJI?E@=De3+i<5Y&-3@!PoJ#E zMm(mz-%ibgElU#UmAH7JhkjU9nGUY%D_?qALwDds4An751q#vhF^GTZg zCl>+gqFXsIR!nPtmGrZwFYls|e0k-lsXu z7hG}>%!(ID)%ZL%CXuMPGhcY`^vXdC)nE#0vnJQ4eTc7(>w}l3$rJyxJ4VH!uy?m? z&cz%t=!$?&zIIi)gV|HRM>RDy8)=OBSG}Yw7B;Ec6hlp4zbG~(>sh-SRa~4Q zW9{y{s`VnAdPO7KvZk8HBB$Ims1+ntx?eJ0!6_%bAh7gHRoaF>zXGho9v#8#RJTP;76tMCz)ibkMk?gSaZatgV zQx!A(yPy0f8<7*}Aug4$Og1%(U|aGcwO5uf3oLA*$D(0kZauZ*N1c4}g?_mZgIzks2}hRHfxC-fRn+1^$vdV}&G^ zV3xnP8-l|Qp?f$(u8=`rq`JyK5Z;mPO0N)fcpVXA)E*&Cqqp+2+yB9Z@J32mG2v-S z5`1^>jQgvJh$T~S>k3}v(XCPy|KtsA{c;lsZ6-T(v3Z~XB&Dq<1b#LX9J#yW5Vu@zUKAy%Jd5T(GETu zjfW~^Lcv{xZl(=HbSze__-QS|?z>X_RxN7SvG&08lLb4jfi<1X*r0}EyM-+B6iKUy zqVRC}Jb{-*?-i}9JX0_Knh17cz=7^3@v=A=zIptIY|mXYJ(~ag&;Bpb^MVmYd3;j( z@Jvo;>g&BmTJ1fWTLG=qbL{%+z4TMHf>|w$MT*uL`?nG7i%!yn+q(AY2XLsWHNPu)20FnS2x~tW9AZoZS z`=~eYki!zrmstAP3h6DYe?TgR+J(IOK$fq`uZToqg=wccpG5=qn-#||iWFYd37GKgI?5h*lZ6!baI)C)iBvn&$HNa44p)^-*9 zyqQ*pSdX1yweLOe(G3e8B6{j!k-Q}DRMft^pRIWX>$LQYaql&DV7@uHc)wmVJ?1)A z#AfY&!gum~qV8}7*Zs27T{F zsd;wMb+3!XNpgQImP0x3a3ekXMxpX2H-mibcLrj?XNyIR>x=PeDUlx5Ey|VG)jyZ~ z?pYIbsc^N|P+NLJch4#vp)PRDosh+Dy8kB3_7^xma_jk)peUc4!yo776hvl4>(BtY z_StU<;pj}i4d@*HU~V6P`a&SjY4xbj}8kYELF1UqU*fiNVT`?+lusmPKqan z3SR>{NPI^{mxX5_9NiqeQr{iBy}ccl!xgKzpY}dc(Fs5zX-OPj4<*cb1Yq2U`w}rQ z02)7C*@_^;rNwQQDmMQ@F%2+O`-|D>Q7VDuy%&ouXM78joc9+9 zOYA1Ib>9}kpf>*$D>`evmJC};VDYzBxLe{FrH2Y+y8Cco8INoo7D`|#sk7=iA}MH$ zrjML_vT(50PcIoDm)DnWTWu$n7S>*Y7AtxU%uA~D#7!KS8^{8t|NM&SB{CJFu&_%c zwqqrx=?XzI^_a~Yw1`Rya$sQG*&w@VFF3@l2o~japYh*pY-~&vb2ezi z9(4H2_GI2<0^*V@N+zpiO>_DF8a!N}aNnLH2}E_3xG?ZpleF0q z`|i=Eadgd0M+Xe^_4DUT6}Ig-kIu-%w72k?WNAYv&@KhYSNstF)^!|5RWgBSE>G@h zQ4gdk_Gij3Y;DC+alo?CYkg{23i6q9*oTj4xcSQJA1Fj!Wytdom7D(x;=PQzFljl! z-5wm-Sc$dh_}JFY1ePQr8>Om2t;M#-dq1WnM zAeR~IE0;`|^HfnUj5!taUf|$1SZeNgWgT#UKcdyIWoLKyoY>q|Rx4cy5+Bf&$QI2Y zDKIlLL!iHVd}Pr1$aq<55~lDBQL;<1@*6D526(*oA(|hTyn4EX#i>>{H$IEC$Abf3 z#HXo?PNqzS?Onr*fYzRI8Y>0nYp=IfmVPaH==%Q#mpV1ie%!sMV6}U6@Te*N(u3x} z&*S69M_;F=Zp~TEl~=t#+9_ngyTj_-IO6~N2wU4!^B7rtYJ);vHD{2WFq{6|ciPz^ zK~erbspZFL3;W5BWpUTQQOdySiSnAqS4qr&J7 zjF95uG`VRIcnO&k%r{!>wJ&ir2RwL~u|{{aW1P_Md6beRc(VG7);liRHIS-IqT;Om zU3@t8M|NLS+h-J#i0aE&gip>Z)I4frbH5qGe-TF&$P+yWc~iAkLtO46=51VA*7fxcN1Eek zAo*%!a={@ay7%Tf-&0cV?jLZd&G`M1Qoq*EM)lH`8`d~MM%B;udY}6Jx~gL1j`a#} z#BbFHULgr(%q+XVre$dVzDm5h z3qGZTy<^0T@iH)kN>j87w0Uwo@%+iA$h`G_lkOs)27;5eQC&m*F_;2>fT z7D{{VOv*`vKQa7Yyj49MCvK5ND2Ecff^${^1?13tvW8=vco?O7nb-2MyWB5NI@6~Y zF^B5jx5WCUOKfg=dQ!7ooXl#^1O^d(r#^dFIqAZ#KeJ5imV`(S-6oWimrr7a5TsJR zdPK;s1WH2eAN=`&scFA9;Tf#J=*E;Jj)cVCd|ya;*w;h8yXye(hQq z^S$5)(cJ&jRQk;!DhxR$Ds`F5UYLv^c=vPl0b01RgFSg&P}cbz5Z*iA5qXOZ)PK4v zaY?7-+pGkZBBl1arh1QR%jqTY$-I8;tPrK%j3k(?MIqIjausVhT~&XWfCR=fZdZ_w z5~2ach*)IdlX2_JZ@p>xd?fzto!9xArEO;Hqgl8Mr=p_(GrM3qSuIWNsx^{{7y=dx z>q4K@FE5AK>)b)@|iSqNk zK>nkei_NDr7QT~*A-%9sy5V-vXwV$2a8ep^M3?&8@`u#MTCvr^e9(w76GuzP#dfT5 z77MTb-I+F;?TfN<@2C-O*_@85gnX$lFepXk$7IVH7D>6ik*kTGHjpYd4Y`^OdZ_7m=S}!bus?p^`U?Jn-E`mOYFUDAiU|wmY?Bw(>SYa@jAYOr{@= zzq-E@>uCAO!YVi*)RASI4f!z6|9p=bh=KY(p)xK=x^8+{l!(%jI%jM=h-=~VQQ8H89iWX5P<&TjJkDsvz$B;UPlpX3W@P`Ec_Wv{` z^G6|i%}Q=^fi`PruKay9%YBq_-}eqDER9PHoTNx4ehrd||G z-LCz5j8v!%N>?OzyB;S3bj}B^QGgV;ZYrSnD)?aoL;M;KK14%-f#(x2YpmtdF?&>B zTQq^DePP3(+MX@juD;LF2wl2+esk6Kam9CUjy=&v=qHOm$`;FOJvDxO#Fk1CoQ7(j z;Y#Ks2~Ei{-~D`|DeHKt$MDIA`C%X19BK76TBoNOyp}p1kz#I8N0S22=;_4Nv_wsn z3B$TQFAtjS(qm^kJIQ^kDHd$c){k$IVJ1-U4vyE{_nMB* z*(A>^*6sygZ0E{NKYcnhpL=mY|LZJ2ZJtDj;<2HcM(e;0S=!Y_mTmS}4=ydJ75Zea z{+0I0QWCysuWhZv8on^-MUf}Y&QMRjzAi(0Xy@mf@wiU<3bl|9K}ULxjLZ;!ePCUo z9#yZBMz3vmR>*I13#zK8?^I~Ww^?G(;jFH0l@Z}>m6a6ZUEVJR7Nn>(>~foUPwGMu zw*6I-G!<^&R`=A&f-hqK_+cMo zo~aoB&Ul&i5?D>jZ<|rTsxi9QxNH&RQoCMCiw@=MCKwhPusILCQA^>W0{Cv>+c!K{ zSJzZa)lZ_$LBAvWv+ga;Hu(7;ZO~>a1jR&AvJ_>_)S)>-&j0Wj7F>&=y^|O0xCek( z-}7sxz?D;7ThHV4@o1JJ2ly$N-`~oIT8K7`nVJSrQYsc}%VGCQcktKqp%0kVuQ#_Q z-;?}4{sGV2f6Aqqqn5^GMLNWqI!h} zT!zI)SF2n`4Ia)L#3m$A+IBv97a>DVrV;`Af`|fCR>>2%!g#pvmptSvAhFB}6Yb3j z(da7Bc~AdTI}W4pT{MKY_-vokXzy$@%^8bdCa zU&;Exz&3Lmne5&r63m+1wfl3HqnNKKd2ZUx{6ZYxj@Gb)f1o!&X>cwF(Y~Ima<>=x$G00 zamZNiMI~eefrhzT9H@-X(QiH&s=BCWUi<$2@yBoyyWu=-&=gjnPkxYX_Y_)0b6czR zy<`7K3oW1F0A}L0-%I;X6*V|Jq?pjD&4i*V75 z*!l|>{rz{E1wFZ{3FY1^8gDey1d8--LglZo!)%C?w+M;q!-(n=YA9pYcwU=1Pvqc( zBr$_pHXZ1GxGN)OT;~AF@X^nz%x%8L2^WmN5r7@ue7)bV^FosMQ*tbU{HHE|r7O6z zLKYF6b&O^>S=NZpSj-9qA4`q6o5&75oBa?#1EU0arSQNU<^JPZ^Z`YwRdZxx%L;Ui z@uOq`SsCh|mLJ`twT|M=q-|pp?pvD=Vq!kVjQ=_QG41m4t=o+^np9!IE8VoIF2eVz z!hp8>j+9!OlO5(jV~Nchc~MS(5b0j$ zwM6W_`n6D#Zd6#*;cF|f3>Cz%Y7C>9-OhisQAz|+K!-D?|cwPA}Y-st-LHA`m zhcU(Hsg98Rdwbbwg(kN^b$t9d$GBl`nBdGRVtcBg}z{5#?9^U|W!pn!Fu|~OL zEaHRJ=H~Wao^cHs&DOfmbX;K!XATd@wkxA3vMLN&k6?2e%2O{7`0ScNAU~9+*!hHM z@B1*f+f1#nQtCS205E%{wo~EHKr7_p;rC&UHzaqbt5I+QOGYjHSU01TJH`TBV;lzO ze-3mHw`a{~wdj?JUqm0uf3QP}@|c^OSKYbukC6dKdm1WmDM<`#@L^|=g3CLlZU1hM z-kUcUeVG80rU^t!SWXvE?)19W3{+u_uh5UO9A%8=D<*VltybLDY@H!G1r10QL7yo= z+MO`f)u^MBl4bC{w%{AJR05EvEuLkRES-x`4y0UcVW`os^|u-EFrsYNpo2@u2QKp8 z_-YdU+i#mADDTi|s9v~ebg!R#Hpf?!Q#a{K3+=C~o5pU6&oz+cc1siw{D0#Ay1Wd} zG5fUP@FeRIxuZZUXw+%B@rPD>N&-^aC{1NGPjLD0B^G^uX!i?f$e|Lk@5^qkSys== z zJS&j5ntdod9&*{|(&fXltd<<7YTK!a108J{4s6jfy?pOQtbo&%`bJ!hVFl$Ev!fEm zjLssjW{fBNYMCvV-i&vU*Vma{5kAluB-)5QB3=o;Tc)y|nJggX{f)G~dOeZ=O3rHD z=Z5RVqUKVMICK2D@I5!b!2ES)WuzRc!E-g|vgPw?r&~qmRz!6M%M|v7+m{+_hBtcs z&-SZinvwa>G$MB979ak3bifh(@T5q-@Z#jijt+_Y?O>x!u zwZDQWX^37Cw(DSZ9V?moxeU`)GEt&gf2yJ1mukljzYVfBcAZa%Vlxu7EQNz8KuOoU z)n^)bifqL+@}+XC&q=p~ZjE-o@9e)eE-diG@*Xkhp5q$ediLI{7`72q=hXfC#V2~7 z;j{Y{YxOkcK4A(r>neZhwsb*{7{O7-X{;C7%`R)6d(Xa_77;c?6p1+v;&D@E8M1M+ zSC4O-<Jjw)aW7T>7pnac_x@^mlPWtd`}m$y+ReS*9}IkmvWRI(cV!xw-`*e; zB(3GXW<*6t))Jx`f<7Nhf$ALblbZ!&Q*Ku8Z|0u<7LW;zdb8!-=AsO?xQ)O zW*-y%8+b&J`k|Q`=32>$u#8SKTkK!q=2NyfJ^X&@`|pj>Vvb=X=86BoTkD|T=N2`z z-@9#hRX|QW!edbzp3R__-LtfI<@r!iHm9rZ}^WL@NwSgN2KB7Y#9qVv@LoOJPW4CZRLu~d9q{Jn5f)i*G@R@b7)h0pat?$aYg>m|DTENM=+6f|pu<)CfN@tTw0O%ifH}3t zlMg&Lv_c0NT&u6G&35R0v3q=M#HM3l59cs#<&Bk1=F+_JmSA>vmg%O5eQP3{vf%}X zxxH{n+v|JSeL~>npSXz&2WOJ{EC^gw{u2lSM{+N4pD~r(xZ_xPI#GPBUp0Kz{75e| z_NHcDMuZ|I?-kX=yB0NgptJ1r0e8Q!P2WlMSLtx3J6_fEK|z<0-GliR-O!?J@A!+? z{hk-9hX%{++^%FY$J@~-d#?=nzLtW%HqDkS*+s0se~w}#!G?2JU7#c4N~u8S_hHaw zG!AT-_Zn@wf`d#w<$9z}kj%o+q^8q(AUP_!C-uOX^);(Osb$?0(5}cQEbl&MVj&fz z-=5NgrQW$~`g*P?{1#TLIuz0JjzOJfaSJrN%)CP3 zYxDOz0~er*@~92hf`+^>HaZh1Q-4h&zoq*FVCW=aJ6Kl>8O`muLsbhxcOW9dwr!+W z=!NiD?ggtvQ7{=wqywy59B_1$t@)tw`rZpoEA;pGZZOX0R)@Uu0f&k?Dvt@;TQ=d= zS}*VDelB_Uj(5P0p!so)dKmK){De8LkDz@zi33BmthLfo3s0+bn-g&}Z?SAoC+2UE z*y#YZMX%%Sn?5d6(k3_3LOWv$4i0yt2gPF92apw}OJnhu;yKiZ+WdbW%QrSPCW5wD z%S8G=={J`girTBsRF7kyR&y%&9ZcPo^8e0)VqPEa)XB9BKc4V&5bR^Tm&-h^Nt)b7 z?%F-rl8~c$P-;e%gDqj1^U@!3rYUKgA*y#!=(?K0zn$`n6-pDX0E$_(9qd+UstRvq zZW}f@omN=Lb>|8|2?IX! z#e1H0V@hV4sUOAA#yy>MEji#_|7M@e{3Vpy?70N$7RWS5JtC*u=tTmU>oit!vpM*z zXal&3T3+aND1oC-~P{S1nB`@t82RHL^)aM4toq^^+S+UYNmAjsn3ysuh z<5TNIZiXbGkF@|V{-^54Ai-gEcqGZY%0HSqy(7Ea>Fe1CAOedA$E zV2}IJ^1?u~vPqpME%RJpG&M3d_ve6uTBgYJ%d}=vu2d2wGohg3U*yZRVy+k_@aqLs zE;gFSs=Ah%tN6w{ql#Mb&2g2rxmPLc6jL5wp1$U#+*VNRHK;20nkTN}(dnLm*m-SE zg_zW=>2iG-<2Nm?bR~RXAo5{NXml+^@VrN*=gsa*m3ecW&x#b3yb!VCT)BqL4_hK9YqGHeJG0GEV@Fdwlr5{b*}gRnlKN;* zGjB8V^=_Td{vNeiDNZrYa^>Bh9PppGtDUXsOEWpZ?J?I#FX?U(%|MZQ$5;oOTWnZJ z&BdZk6|*Ruz#!?K-JY#)P=|l#OPXMo#yC#~`*YjZijvUco2ps71H1#DIavwhP514u z>1Xpkl`s%EeENLxSF)o9f7dDpd$+lt`=h)cFCaIfq}vnn4Juwq2j9?K6KH=voxiuS za%V)qoJ6^U(uj9SrOj_+maK3nPbGHVoNsWbUeoo@T=osl;401eK8+9kFN8E#(9hZ( z*l*>gRM$N8PoB^7#gVkT$!;(h3qv%!+) zeKq2(8iyoHMOJoJm2W?pr!;5julHCn{;T^lWp@s=HQS$ZaSS2VqWXelKMxPny!B2s zI(x=sFHT^YwiLH?hwU`bW}CoL+0b3>ty*MWVvPSY=*ex(q$`h;iJ#PAKramFPAN=z zM@9{kb5oqUQNfRiGW@h@-ilsE@2>RFwLM`n|6y39neu4G!} zi(z`SVgl_QqjHPAyrGa|(N&OAf@eqF8E9v?`$-WF>acNC$*P1j>n|+Hk&+*YFI+<} zt^MS{OpR>B7XGXIy7VIOQd+-t#|BfeA%SZjyRkguQuIRU2b|+saMHVLRb*4)G-gsNwEh_lXBIZrG=1b0` zsn>{@2xfKCq0(I0R7FbH8t7hPkCF@-y3YSNXX-m*Xjr`~jMPWndAv|pb)_lI^LE`$ zdP#pdm^M?Pl8qRS<+j_dtPbW@6@Kg+$ep~a97Aq*SPrefBb|0`E@IX=IP|1-$y+tf z59T+l8djP{6O6@n3TxioUkORS*n8cSFRuo7-Sr}pGlr*C)8TKN{jws(5N`D+NQ4~U zQ)4-kWh$8u+n|)lOPq?Euz1>^H&dWlt7AwDr&^~Iuu%Ly^p-7_4`^tA5=RvEMrA1c zlMZL_%uo*qHue_V$-7$e%$Ct~KBC-kCi34*K_Dag*{1he+P8*RE{Q}(*{&kuT}xXE zKf?1nsPzSf%h=tK+@C_BZ60fRmLoYDyjE6LCSvHM%WO%F{a@g>Rf|3prYH)&d`sHV z$CxV-L}sGG8N2(8y`r&FNL%BGTAX0T+K5iMvsUVCo|kJt{EWhx&#BVP?L08Ko9pc4 zoI|P%V-)2=STENnm|l3zmvQomc6q-9UAHb)2HQ-XuK=kN=7NW{rhLOgAi-2n4WmXiLf!pR_gxQAL9 zk<+GQ#=U9EL$iT^w;qt+uY(}d2Y6?sg>!hAKlI8Is148eRJ$5ai47_=+?K~GW2przg~Y}?zm#!M(+ zql-6HXCHLHPc?2YPU<|rO)F74lkKaODuv;#RNMkz-%v}Jg0KP)H5pL-{joNQ1e# zxSIU3VRMN6uuN`v-Tma@{>kpbC5;rG*RurRYc*EVw`F{{qQUq6a#7ROwIILc-_?5_ zG@2ko;Wcx-!sx-oqt@gByMVP zJK<%H2&`*UbCv&Nv*quK-F~8tNJ!;U7fe5SReMN1yN3Y&*gTD$Ik?xs0H3ISwJ^;8 zl#3Bo?MS%+p$lUk^P+lR{)-XR;1sIGB9lPY>vA_t;^6tckmmivSfMnNO_2P;Wm->; ze`II~t}iUuTv4<8Kv}70G^Ex2q=29EzB*vQ(Y1BZeX^e*KUb|f%1+L>DN&m1dlvhh z+LGX`eVC43)-JAPK>qu~xaXn;uXkqaD9>chwfrVr)pS9Uz-r|o(_CPL#ArrDj+)L1 zgRy(ThvXO6=u8c7*X7&;9X1=uVhNQ4hPDmJWNxahr*PA@O8=pDSApu?-{+PqkTj?V zExDn=^V?=0Jf38t0(l4dp(2%E%_QJ=gRf5QByQct(DoQH0@y*CvTrc7pnePXWq%R$ z{zHR#W58Fq3L3vZeORHonK>o5Ue8E3jJ};WB?&TtlmS}3q-Rs&AZh1m_*6j%WOX-w zmxE-M#)B9kN)vmVU83r~+j zuCBGe0GSFmD9_7-76V($mfE8BXy4;w5P?T`^He&WDW&imi^%+-FL_r$y?%N{DH*HO z*bNaiesFq1HdaxBD5@QZk-PQabiL5b$pvK>UuJEQN@v+PZ@{_vsO^1EhwcJx;U@TM z&+h)dQA0!I^(=ZTrU&yvgmDNx{cMJgxff>8m-ThQ;qmd}px2iP8GjQTv`nj6mCMJ; z$(ta-P3E<=XhSD>T5fZ$yJK6~bu|Jck2xh9N}(!M{GqA&gh_WR3k~*{zaY+i$?$F2 z@HIjAa%q%e8q=>AAKXiNZ?hxbS=T|@wJyxojZ-<~-dKDCjZ~LY4rGx<*9*)VUhEHO zB<)`PLM+mb6)ClB&Ko*Xcf)+~<}!Q%QGZ_@^x3z|{jCQ`(pAOB@AH%Jcd!oJ{T|*{ z0m}@nxXcH#<EzZ-k^tG0o+z}fMk2)7Af}N#B*z|iQ%q!wc%N!b}T3OKUbg|XFumd5DY;2 zo&S;kMM#=Z)CQ(FS7nl#M{ilOs`$y$yGI1gSuJh2C4!6nXNTBA~G`Ur#s%Pgz3#gh`;LR89{4cII$3i-`};$uSGt z(=}z(y_OBW&}aOnhVEXb{=Ad64*i+$eDBgi=37ESN~~L$7n|JY8l{=#{6nF!H+c8v znu7wXY!VqclpgHNHMt~n)sZD$Px4k~>+I4kvuYY?zP5--J7$NAIDC!1${>-gMm6iV z_e4upE{ZTN33T@`6IeDssD3vc+4A#5@N$Ylqn2=0SRbg$D)ka&@;K)xO1G4~YOhLOVTC9kzgA{}`tuf-%u{n;wpX%9=HR<0cTTTt zGHs~DI?dbfEe=U*?)|jBD0sAsC0~Y)&KDP4&#xr52WiA^>D}*Vs*Lv@5PTPT11)1~ zO1xuT>Ny{>ze^T*#QbEduJXjf7B)!v;#;&F@X##l?oXKa@oSGx6y_;0?0%Ra~%$2KoF6t;TNjV3yh^fY>tGyXi zbLDE@1^K#J%%JTET5R-=)zpr2d3oV;l#4Rwq#K`=+{HEJy`orBx&#bJND6^O!O`ib zN8|{dCFzup~74 z-qlAEPSJKs*u9{7x~VpQEOrJ|l4sqFlxnvB)k8VSj+IHNIYh9@q4)B%`m=c(w5J#t z3e?`4!US^bx%3qytqcFn1wej%7qbtqgoak8S5K07VtOBs2r`ofl_aul2V!@qjPJ?b zza_^(a*?~EOFQ(4k;v96Qicjcy|EX|qF1BY;28&8%FF!|wu>zw7kEy*?P?YkJ~9KA zKXZCh(dmVf9y>~s%{`i@>Xyb)ABBZCNCR?mNOU&efA71d^o9w~hN$M7VI+B}?_y#w zAs10Mv@?C0`~tKHt9^NevR?-7oaP%v*3>-~8g)!n3KO1C#HA(L8Yo7&X2xyAvoLo1 zmhD5%Os6&%bYB7nt2)8g{9bLdoZ54}Zl;;fB=f%Ln|Hs-jKk%wGzG@pi@v0LIxz&W zm;(`Ck$N5`?x1#&ooK$_4EMpvgVe6?-_?Vvpl*|n{+M2^L%elqLt#%t^M*lUKDU&^ zFqog8pkR(K5A@2kt0;c2#sBpcr;{r-Y;DF|X6=zMUTSm{)624rq~iKU@katZYD&;J z0UZxxG1C2Ssx`bieKpK3$RTRQ7ns6iRf~pwp_EDM1V6yoYG}* z`(T#V-}NkcdfzQC`nxCa`P(sNwFg2kUZ=&x3jO@Z z<_HE=O1y5DtNHT|TnaQ|t*R)AdXQ0qtSU(vO~7&b1UU$Zh)Ld=+O zH^s00&Yx%UyLKEvt4w!(m;>?KZ81lUBVn2_B*Zy!mRV{f`ub>}^1jLYJ7!lkU-^2K zQKMbJY`m z#opzq!fc2Z{F*0{1veNcWL-A8j%R(y@8On1Jioza3Kb+rhj$2t zf7gj`8#Xyf2w-3aFgxq5{=uSEj4oCyEcqMXsrTkRBPKNE%3GVM@-6^IuWW9H0tk>1 zt;u5oMDA2Af55V=fjP>FfT-1es##N`|1xc`EEi0f_z(fRd>DRsX& zR4J3PIS9J|gOfxifVuF&`^4zgHmkCMK1bSAFDitxF!qz}6p=3Tv799fT>s7Yoj_Ze zEM5caXTttmDM-wVavmg5;5}I(7nbxaKYIoQwOqk$Y@l*uhc;@T_RQ9H+gXD>ThZY{ zm6B%_^`(xe;(F)<2wEzgE46ge+22#GL>ObF8R)sFWqQtV%XNM$`*`!F^{G6lCGO}q zkZmOiPkBspOeP(X%mj&amigfEfKBuY=li5^1eYDX8r{E1Q(gdwEYfMm<>lIo9@t{0 z1mMgOwc~4P26ciceiR_=uZ@%fyOP;E&$BH=m2CUz781|XEg-Zme3KfR!G%E%{ zD^^%x=YshyKl|CQPhqH-k6HSZBduSXLV~{?GxGiJ#z>yjuGOdD-0f7aDnRX(m6uER z2!uscPmu?y28~^@zCnsyT~++Z7m$#m({Yj2F^TyB?^PMRil*?7h+SX!w^ykl({32M z)#&Q{hpEGFb%Yrf-A(k(j}2@EA^|#UJmk-%%O&|0fJ|D&fEM zeLQcPMPwdh`5O3U`V}KyV>y--u28H09V&1t7~{cXf@>~!@zj^cddm^OOwL^WnD5>q zDC^_4JtGn!b5bX?9rnIuoabT z8~#5gLOfUN-?3Igoy5hn<(70O?F#sBI%rX}qRa~8zLtNBdJCK$M!Hkviel#Td$C^9 zW8Tt|d#B=G#4kd)AHf75M|AOIB|{ePdPjb8s8ek@BKQj(KwpEfAde^|pux0MOUC;k zD2yn8k$!cwq(x%pOov%d5hY2@)yT9v4t{$SY}}p?u(ZR99s@j?-}_R&3cxao*;nhX z7#FJ_kSe6Zqie~zl67hWf*40N62JNG_%s z)PQ$tO>oIb2W5(}oj9gLa%Cf`ADP4w%ofH1i|mNusQbg#ks4Qm-C7oCQ6Ze@AF}t;FlwIh7J%+~0o5Twl@z!AS z-out09E_R7=64ibfR)CQ)ha&w$_mC)iUV37w%RN%!Q;Y=-rvih!&`{LH{P_1?`UJ^ zyXuTAL;nocmhsMf0sh;Pb}uLLjdncC5jsA%z z50XJ~1yc#1!}?4AB&*dZ)#qJ4J~`+S5x~8L7U10^m?H~NM#L`_emXx8h+S~~l`-|S z3R%BNV0Ltyo_mQ37fxRzi+VEnx0%kq$|J37k4ZxOaePEg>_8KMLJ8D*>zpx&x>mwa z%q08aZKNu0i6$zUtkT*EW+fx}I{FRnD69GB9Sc*C1P)KZFOLN;0U?lRWLM$RqbYnq zsZcRPUB0o?pU^qM0axZKg+}3Z-Hghwz(;}@ZNM^y2AuFh;VsKRSd%YcbLoQeMe4|Y z?XuKAN%`vGFv?8PDq;E^bz-Mn$h6xdVXL@1Mu1@cmU5l2`lUf{3eRn8A!aVC_b5@_ z2Ln;tz~tk_I6Ahbo9QY?+QCfTAiSY0a-CW6UM+Gq)=l}mV9$E&n|Si79hR3dba*_5 zZYUWXHp%_=hJ}j6b$Nwm4)2mFbUtW)*Db)!>o3o(zE&6QfBur9fHbBUFgOH{*a>C= z-Zs<<|I<18kf5Nl5R0Gypsj0BMWzwOq9UTRxKVUEOBeZ>%<**7sX2d`Ul_nK5)iz7 zYV`k9+C%H(;2qO=c@obc@-km0Mdl%2F@A(7l#tpqK7&sJHVQvkqSbY8QX`F{D;T1S zTMJ;=qS{z^G!pT%&A$^dFl&GEI>*a8q5tK=^?tH%p~rmvM2~o$O(fLf|C4VD;&F6~ zbhNOzoUzB~b9;$;>P5sS)mi7PYO?T)vm7Zdfi#R%=#sPQy(J-`W+kh|nYh)`{W3g@ zy}QS4V@Jnb+U?2x$=b@nONeES~KKb0K=upMoHaTxOoUY3d@i>WZ7(o`~IEu+X z+S=w;^Gr%WTPOipi&TJ}DY0KOy=mGT!8Su_5+UM5z5=fL@+u*!Y-)##9miJ{Cu~>k zJlQ9w7oA$}Cy#!CrCWH7UVhzxLAq;nqsM=AXlS%@co3?s$>~rTgTjZg&v^;xs$t*j z1lIX_Xp-L;o5SsC*H>26tN7nN*%7WDf%HutQC9H9wck}}-2Jr|{L_Ng;+aZC$Iwxa zqal!41Unp`CXiyRA6_4B1x?pGYJ70`k-f6zkPU7FIZYi2aulz5iN^>l^Wx3fdpOtzU*Uayv7wx2UU3@I<@Ry@<;o-sC!CD|urdHAHclOO8D&8Xea;NT_0s=;YWyT^&>Mhqk-oa!MbEG!IjJ8?_@ z$5JAr0VkhqA{C-)zH~sX)fk0ocQnJ5=U_U|MEkQboQf1+w-O~g1+ZUG;K}0x?)n++ z@xl;ZRax0Puk#Na+B1s=l+_e$UJAYmJdmvvK`5Cy`ZFoN=V_P=0ehhHQwy7CrHiox zv(aQ(8dP(}>{dWlsqS@hETc#Oe2&d*ZwL)^`4 zP}rXgkQ-^zOIdq1jNk?b;95GNJ{v~-0SBag75V2@2D}Qki2kY6$DYMHvzMkv&n^^X z_|NA;`gg96;LvX$g)TFnb#MQzWBdOU0-xdhPZlSi2F__U7uWjiHv;ALU!u4F`{DmX zS|c~>(@+CmWBWU`bFM$QV${YTA?P}(ZR|3l?`VdX3|_~$LnOaav8(O6T)O#5PAvde zjhJ$!+!qB~7;o_q;it{RfX1(KtB1u0y@})>sk~SHXUFg=63UHT8_P07c5&pa_nKV} zgs_X|UlqDn2#-K}o?BBc_r+zkb~X)g9ZUUx{&aJK|)=*?+vl_lil0y(@GN)hR6R0cIRut(bBFY50s2w5#VFB#%uI z%*SqXJfw5!SxTmW!)a7m|qKeV*K-RQwPGg0Ex4{{6KvSlN^`TZaiMwHC=i3X?H3=XCa$~L$$K?wEx2DwLq;ws&_v%Zi z4u*9)h@+PZdqu~eXTRT>bo@pv(kZS*a%^&TB1Fw-{PbvQ`M&iGHk{6G(HE8^eI~?p z?&|%tnWZ|jo_xfjcU&d|hM=PZ*Lq(9oI(q#SZOH7J2zp=A9u%yGR|6^MmAg7);Ft- zZhIuBnjeB~!Z<11FaOZpch=+^iYITgZRCjf{ZKz|e(&sgLM@#N#05h|Q4yIt2AI^u z$p6o=IxLa>RMSU*Ba7Re`o=#;A`au)X)ERQ+m!VTe8}0BXjZI39kfBPTTEoZmqLR` zgk{j03~{yL^AN4eV)Ufn6S{O&%z!%J(JrNA6kUk-r0O4aji=NQehljO{_N2I_~_Tg zt^me1jvSr}7s~1b?$PZp1!UzmGjdQjRK6?Hu9Y`Y&=@HLJ@8np6wjQuF+Ch0xjRob9jDeKJ7--1jq`YkXki&S;J zv@8=ToHi58!OtVmO3$Mr@BcL_Nb1}$ENy;YD<+hG0Uy~0&X}oPSrmy<3P#a{Qk=z; zuU&|O^>x^C{2{oy0nEre5Cu{U+mVgynE}VML700U|8(9ddK;^k4Njqj4ZxBGr;v>+^Q0D6O&}DMNAFH3yZye-gCnuIz5!| zyqGjdUutZx^#@YJq2TY-CR3Nx7eIpZ9?37B84!ZiS-;5@h&y{gw|>m3 z>W`YQF)*E&u5U*rH`v9V_FtDL3otDAlNzrugfh_MQtpYU#2vb7b9J6rvRj&8Nh!eI zjNXeQ>G)RCm7^MmeUcr8Zl_t1s+ry1bjg)SqljN3-2S-a%Mp4-Dpcy0KbR)PRjN$* zmty2DE|XmDWVU}`Fo zL#x4FI=zOUnfMxu#jsT(P0;lCe$)_b0!;?8j25Fh5!dqWhKsF#mCQY0ar>MGUN$Nj zwUyxC6nA*%O0`H{-)-xRe;7fYLbF$0VcVn_{;!PX`-M!Qgo>X-EoXo{VLUMVX4=)3 z7GyRfA2Z;|>>v5@1wFZ&*^eUmxQ>?RTWbQ|&OaP6TSLAF5muC!PEA#4$6YXTEcB-&scEz03sTJvv!oyM%leNOa2id(FzjtE z$K02-gM+)c-Bjk-MiaPGlkNN)>ufg3i;-{chE0xK7R-k5Q}#<=*pzDlz+5*d!yrx@ zPU+CoM%nMkRiwvQ=QlKU#TPkYTY&jNUau0j?osQtk^Vw0wBC=2M1-{?l)><$^&*a+ z?OaE6?|)nk%hV0pt*n#Fs@3i9WF6Rj7^=7S&7xY;ZgFMt@$nhg{)T!KVgMiel8?Fe z#On2zZCIH{@{hf`KPA*cOX>o{I9WQ^Z%S*u1_$N{DZepwT0s2AU-L&4eK@~+f>r=F zywZNL=Q!IN+GfO?q{qc4!c@Wa<=RVvfSbwS8m z9EA~&5^B`ek84YH6=77v)N4_lWl^+#53vscu&wEIq}`|fE~8#Q2*=CvSLe(zWf2sh zX{GMneHg9l_hST}(v?5<{S9QY`>tK`_FL?BvB2SF#K-Bzy%(=tU*{f-1^+V8;QSgN zS?o0|hO-@|X*!Ulmn3Y&?Tk^L-VVK0cjgFud}_-Uknl(if`fv5$*`f|ZVPH4!}nLX z+HK|pNVspp`5F$557(PKo(^F+59iQ}zurp~yxg8`NA7>DC{x^errHBc@h3>+#{+uA zpEO=l%h;@6=^Hf@H>5&7;zw@2@oe^}@<3!W&Koo-7Eq4m7sV3}yjNCU=v5JJTm7SdsD@yNQ3kz5{gl@l9i^US&`tje7yhq1XB#F*# z-#~PkUw~Hu-bcx*egQu@B=s9saTXo&3EFq~x=A)&iDVWgQCr*&-wefg zW)-fMEf8FFNMnn7gGf}zFwZiWWJITDWf07W1zb^h19Cc13moenH893?qSEtug=|nbI2MJ%`+Vq8D_FH=(|1Nz}9x>*R#qaS7~)YjQlLmT>6`p{E;P}5S#yXle z_KuH27`3ZUwm!c;IUOPrLbg203_v(}_KjU$43f^G7qI3ki@8Gu3Hx*$uNa7)>ViNaiZBX`DgEh1Umk3M)(*OIzLp9$ zRU55>1RR{zC||gC{Nm$Y!;R9SLa&0Ion7yrb@xe=2#2jdJWGA%zLrCR-m&yi-EVx% zji-gp(X~ur1$f#|=$E+Q1U9BVQA+#5u41{U8?dc&xqFiC+*mLoA|s*aU3X-t8Og~# z8DHSRo($tM&5<1`#9_m3>C>xL`K^fe>E`X{Em078_O)T>sLY* zeT+{`jFv3=iIY&|t9nNo8Ch#v%F7)y_eA!QKP_5F#LlBh4=vZT2?Jw^&RyV2J}~oU zvS#PRBlsYu5$353FWjTjK(bqSi1Z9Cv_y;=y#nNkIAr|pF1U7tYO{P=OdBMBZ!u?4 zHaTr!9?1%hjw~t3LRcu*!2YUKtGpyTi`P%~^}~P`5)OSnv|h0;b+K}>s0#`2sAYF( z+~>!fhkWf~6U=!^aYOql{oCO4gzQgHPM7C8h>usoGxe@5wz};yoCo*n@vDIdx2-K# z`Sw$OtoB{{#hNv#v1m>t>`cmbi;dJ`iv$!2Hn{x@eC`E$Eib-S8%K_xmebeJ4kduq zgSIC5&v@3aFkQl{r;ngh($ zTXO=Kg3R)V3+*gdfT^^KdEyr7=vACDxUrCY2pbCUpLm`_H1;f4k zfE1PILD?AYLaaW|1|2K1z$ZwvozhTqLoGR+p!11>xQb9UEP3d)n26G@RzP5;dLW{a zy_Sen>qz5()yF|xLv2zp4xq<=cOJK>DFTe?8xT^vdjjWxX;yDcU_#2F=!aq_ciF9t+Klo);mS2UDli+vivDCu&2(eiVaae+ zq4AOKNRc!TUTValqWt@V`GfzI41hB8&bo>F#DXzaB#{X)C^DcI;|K6q?9y zV^e0=)9>RDZwH_Dzg$`H_jC1xLQR-Qhq|PM=a`b+nBqSozaJuWbuSp%RE5@N;f8_KPebVTxOK!@#VAUYU<)jCec&BDYZYsgXe`)yMalM6)6M+{=Z5b{;Jaw%g z>P<81u&h(Bj*#Q#56Lo>vemA@8?)JMV*z;W7V}S;1s#Ob35<3=sGb{xn_C`B!K(Vo zoP^bJNZvI)s3WI%-?z!qo*GwuDIL8f+ zu%R9jQ1CceXkoa~Y=?W0&ihM$lTe%h#Lz`L^46Ed*WKYp<9+SNK+4n`<#ysS1k zLn6QvRD8{T5|KP>o-jbYdq($_-#$^DSye*QPKUf;AZZfern=XBb!=M7>-ypg&G$6v zZ-s0!+R7>72N8)(I&5+Uzmv?nS9eATqmj2*WSeWPsBP-Y;t;*|iCj!=dm}20`t)`= z%4mH>y#QhMD@S5nQx}eid)X=&FP(T@p`-=ba}Fvz)MHrE(b;jtlUhqXCi#u?yiID9 zqrGBMW`rp8)TNuT8W^s1(r}ASqr(+(vRto`49G6vHjpqxq*Nj?B@DwYA|hGFaGBx~ zaMuuC!;4{%_vB2bHQ4n{=lIsmvq$O*|2Z*)&FEq7BnJj1)Ci^)Y`ko+U--texMmSE){#62`>;jokGpQFWW;{(bk-YW zzUA*;oK1;h?y>9;BdqnPW&Q@9!++N2dw$?)w6t%)uu_VEq0`pD_mQ*Pm|Yw7A6kWr zi2_obh>z~!whQMW^1kk=wll3dvmCY+R*c-)kxQm!Sewxh)Hc7=)WG!P0SL2PLp{_e zn7V*rQUex}kXh{Q=E(|ZQV+!Q!ZMk2#4 zU@ejDU4jX4IP17RNxdAwyNzzOUb@ugtQCppxA>g`myR{ys&w6xS;~dEnXM$;A+Kb9 zwRq6z(fJ~-{ruQ+$0Vd~J#R$$1*NGZOJ53k5ZSEX<0=ll`rC?F0&40Sp)`##MIxcC zh#P{d9;_EX)Lw=es}R$=nZhpoJxbJpTTrvFbi#^~VyUUlUTXG~gKmMdp1{t#xjLDG zN&kR;j=aU(8y-Pf%Yc--uHs#9?ZWq#OuF87J~vCOS$5n~g<(5fS}ItwceM8@NnKq0 zhr0vIcT>NOQyc-02p3Ftb(^m@D|e|*f$9WLoD1u>OH&*ZX2-oEs|(Y5&aE+fj?!4U z(c|g+Jk&aNqs%yjb5~!pa;a843jrtyXUJqiOXmE_j=$J38Kzr|`L!YdVVT z4#*`LCDvMvTINY3+%haBy8xa_-!gl<*D^*YPGbzV6W3);(x5)X6RXpB% zR{Fj<;}j*G*)Ej1XhpI?OL^4aNPF781|9AyC2|N!i8O3BJC0A%7*mT#se6y6Q>SLC zR_V*w1i7<%AvLEBSWAjrtrM2bcdm{Mfa^O6-0#6$MCIXwhYn$phO1jAw@oT=C>O2B zP;6x2A&n9nQZwz6@dgcUoFO=pPEpTQQlw$RNsyBOskwCuQo{zw*v(CoA%TaRUMVWW z?~;Q1?;3CatLjM2QjW7Fpos6up9AfVHrDr7yoCy{@2Oz12Z`n zgqT=O?$FMDJ=xdgU{Ip9 zP~Qy?l9$tHo!b(&i%o@*Xr3hwBpBi3Nap7mEZH{N;li;`&;C0ZSXMLo)cifQT@xfP z*#v*+tAcDHs9xMwIJkve~CC2f3epx;KA!yT653PJMuR5Z0(T>@cCnl*p1>syW}t_0mK!ota&Ij==* z7d;jhS}AC%cqD}Ul}fj9!t(TMEx8*-uk&MiQ~U*ng)d0#7!z;bQ}z zak6=8VG(x-Q3b%>fTI>>c6OS}dv2y@Jk0^{92X>%@9_Z#g9nk{=UN%qH&A(=t~>Jn z1P+>!5D0{aHvUy!bqxp-9K{Zo+F1a(R`I3N{y(qyKO$gg#JVoiNs~>WUth1q0iP(P z$J@OMkFERN(u&CT_I8*3X_bxJ;T$nlpr`FA*ZJX51sLUe-xShtE&b7(3$y%mV@ZXW zS=z2msvQhl+`fNdF*1zAre8gI1l(I2oKYa_Zkcb$fO|Mz=@@Ntwgd#gw0xzkMRS^a zvgbz7jsR7VE$ymG{DguaJeCovoF?o527;^ybC<}lPREgINy|);CD}XGp?r82RQHKM?cPD%k&x)xln>$q9lUj zt&BWyoCBcYYTIuw_IjFR8|{HbO@a5-A*NhC|xtNeEC#%na9UFdtd8M ze!q7AaxIOwqoJTA#mC2Qx2^4=1;!$r0cq~~&j6TF;P3UJywPaocSzD4kP*1P0vwY> z&$G=P3rpZ%b4&H&j8xF4u}r^7HPiP}hk)B5yVhn(@hFb=CBWp90I}9EmCK%atWas{ zrq+5wR=4&8Ih*C+_>cndU3|^ur`|s{!)y9^qQ+9`X8p5LzSBlOuM{b1md8f;giQ1;d2qaa_@ncXKp_R92(W5tlwE}w`4PPN0^EV8dbv2E&^v&DOjcPr z-me^t7>4a_NG zfL=(l=N-*?Kvgn>hU}u(-kT)<``xLEta)&?e><5g@pRR{dwq-i>J@hWBa=?GINWO@ zqNc;?DzT7fjX{4dFvAFx7T|o%(tcl!#K}n(e|EL#t>nP+2W^cB@K^1W-};pic&r=& z#uz0WGOl{x%e=p7TzmZfcL!VwN>25IK(aNVfNACjjC!hlZk%|W*2C?jSDXD0X6y2X zh>Y(&yncZjO$bA~QGzSRT%?mAm9Z0aY+&p-wz#s9=LSy9rsy!n4JtCSv&=Bi>*lJy ziU^$-aUvi3%4)8<6Ld|9i6XL8Z5}1Nn&tmkjPZWn`(k&3p(~udP$5lonAD6|H1=gg z$ofjV6whxcdm%{t*9JwB($DjN`jtUIqr)-SJ{dFrxs0zIvNGFepeP*HNez zRHq1^a%om4QqJdr0OMmciqcE6zp?tipp>xVA#opga(4hlC#mlpAD4sB z*i++xrwHvv=!vl!^v%mF;NY@L0~1ZY-QsqT2!u)2NLZZM9wjew;%qW7X5h7g5HoX) zJAw;ohlHMd(yw~FatdTVOC~cLye6LDx``7A18b`=bOeR4h!QrlAD|_WRl$;3OQQwM ziI zs!UJ5mQj*he$$8QGK&2DDL6=Lv6Jvin% zA}!AAVs|H{@~fyivDF)VQZt{e!Hj0jYXOSKYnDv?<;>|%a+BwmSPLhO&#zXu3oAtpAJubCwER@d=)6V~H9uiLljeSnFvepIq9T7u# z7P_R8i~}m{75Sg*8YQ6z^|LOOTl5`Fq5IfzvV0|Q=Yby zNA^a9-jiQI|K2LeR{F{+LD@U~c6pdKcZ>Tx{2e)j@PX1R`uq#nn(uvkff-{21qJ2x z;=Ht?Z8Y2gc$=AKN0iRzk#B<%M~gU8gtbTTC`P4MgqgYX@e*u$aE<(@`IjTQxR!kK zl->*%QceZK5}TlvOSAp@j2OL1Sx{Mo1tXlzDs^Zz`E_>_?>cUI`9#A zaqVuxV7S;KRrbAQSRuoV3k_;nj{b|w`GWf=BJ(r?*$|OwNH!&AEO-g7msZNp*Jm%f zqZLS7c@=FVb|6S{(^DA?gO!(kF|z-eR>_*AaZOJ^ zB?lHmH=NxJxjF^V%OL$*2k=~%jGx}yi92PH*K!nzQ=6nX$>`Zl-6iD zQsWu_={jl9bv+sz>C$947@uB=k)0T^v4`{I_DYs}$wt#t8>*Wj@U9bC!X%cyuL<<= z;?EI=&HA553U?A&k@cP6DwUeZBZ^3g6W%;>Uq=<9^-UbZ!tgwL>|hZ-hgs7}Aj~Rf z9PkA0lKHyO^yJ&cxSX_#&+BHN@Tb7cSIRTg|6)=q5R=M|ZngenZtZ{;ma*(5`Pba) z;K?^#RMdYrm%&sZQxxf8%$|9E>l195+>58!yjjEnbX|889w%_)mT`p__QoWDWh*Lu zhJMlbugCs<^gjp4KZl-?=F4RNdHP?!mocD_c>O`j{khEldi)dz=cUnog&OkTul#$9 zZu_3#AJ^kcCqIkKeap3<0rRGSA literal 0 HcmV?d00001 diff --git a/docs/snapshotting/handling-page-faults-on-snapshot-resume.md b/docs/snapshotting/handling-page-faults-on-snapshot-resume.md new file mode 100644 index 00000000000..0372a3d02c7 --- /dev/null +++ b/docs/snapshotting/handling-page-faults-on-snapshot-resume.md @@ -0,0 +1,133 @@ +# Handling snapshot memory loading + +Firecracker allows for a better management of the microVM's memory loading +by letting users choose between relying on host OS to handle the page faults +when resuming from a snapshot, or having a dedicated userspace process for +dealing with page faults, with the help of +[Userfaultfd](https://www.kernel.org/doc/html/v4.18/admin-guide/mm/userfaultfd.html). + +## Kernel + +When resuming a microVM from a snapshot, loading the snapshotted guest's memory +(which is file-backed) into RAM is usually kernel's responsibility and is handled +on a per-page-fault basis. Each time the guest touches a page that is not already +in Firecracker's process memory, a page fault occurs, which triggers a context +switch and IO operation in order to bring that page into RAM. Depending on the +use case, doing this for every page can be time-consuming. + +## Userfaultfd + +Userfaultfd is a mechanism that passes that responsibility of handling page +fault events from kernel space to user space. In order to be able to interact +with this mechanism, userspace needs to firstly obtain an userfault object +(i.e file descriptor) by calling into [`userfaultfd` +syscall](https://man7.org/linux/man-pages/man2/userfaultfd.2.html). +Next, the memory address range must be registered with the userfault file +descriptor so that the userfault object can monitor page faults occurring for +those addresses. After this, the user space process can start reading and serving +events via the userfault file descriptor. These events will contain the address +that triggered the fault. The fault-handling thread can choose to handle these +events using these [operations](https://www.kernel.org/doc/html/latest/admin-guide/mm/userfaultfd.html#resolving-userfaults). + +In the flow described above, there are two userspace processes that interact +with each other in order to handle page faults: Firecracker process and the +page fault handler. Please note that users are responsible for writing the page +fault handler process to monitor userfaultfd events and handle those events. + +Below is the interaction flow between Firecracker and the page fault handler +(designed by the users): + +- Page fault handler binds and listens on a unix domain socket in order + to be able to communicate with the Firecracker process. + +![](../images/uffd_flow1.png) + +Please note that when using the Jailer, the page fault handler process, UDS and +memory file must reside inside the jail. The UDS must only be accessible to +Firecracker and the page fault handler. + +- PUT snapshot/load API call is issued towards Firecracker's API thread. + The request encapsulates in its body the path to the unix domain socket that + page fault handler listens to in order to communicate with Firecracker. +- Firecracker process creates the userfault object and obtains the userfault + file descriptor. +- The page fault handler privately mmaps the contents of the guest memory file. + +![](../images/uffd_flow2.png) + +- Firecracker anonymously mmaps memory based on the memory description found + in the microVM state file and registers the memory regions with the userfault + object in order for the userfaultfd to be aware of page fault events on these + addresses. Firecracker then connects to the socket previously opened by the page + fault process. + +![](../images/uffd_flow3.png) + +- Firecracker passes the userfault file descriptor and the guest memory layout + to the page fault handler process through the socket. + +![](../images/uffd_flow4.png) + +- After sending the necessary information to the page fault handler, Firecracker + continues with the normal cycle to restore from snapshot. It reads from the microVM + state file the relevant serialized components and loads them into memory. + +- Page faults that occur while Firecracker is touching guest memory are handled + by the page fault handler process, which listens for events on the userfault file + descriptor that Firecracker previously sent. When a page fault event happens, + the page fault handler issues `UFFDIO_COPY` to load the previously mmaped file + contents into the correspondent memory region. + +After Firecracker sends the payload (i.e mem mappings and file descriptor), no +other communication happens on the UDS socket (or otherwise) between Firecracker +and the page fault handler process. + +### Userfaultfd interaction with balloon + +The balloon device allows the host to reclaim memory from a microVM. For more +details on balloon, please refer to [this doc](../ballooning.md). + +When the balloon device asks for removal of a memory range, Firecracker calls +`madvise` with the `MADV_DONTNEED` flag in order to let the kernel know that it +can free up memory found in that specific area. On such a system call, the +userfaultfd interface sends `UFFD_EVENT_REMOVE`. + +When implementing the logic for the page fault handler, users must identify events +of type `UFFD_EVENT_REMOVE` and handle them by zeroing out those pages. This is +because the memory is removed, but the area still remains monitored by userfaultfd. +After a cycle of inflation and deflation, page faults might happen again for memory +ranges that have been removed by balloon (and subsequently zeroed out by the page +fault handler). In such a case, the page fault handler process must zero out the +faulted page (instead of bringing it from file), as recommended by [the userfaultfd +documentation](https://www.kernel.org/doc/html/latest/admin-guide/mm/userfaultfd.html#non-cooperative-userfaultfd). + +In case of a compromised balloon driver, the page fault handler can get flooded with +`UFFD_EVENT_REMOVE`. We recommend using the jailer's built-in cgroup functionality +as defense in depth, in order to limit resource usage of the Firecracker process. + +### Caveats + +If the handler process crashes while Firecracker is resuming the snapshot, Firecracker +will hang when a page fault occurs. This is because Firecracker is designed to +wait for the requested page to be made available. If the page fault handler process +is no longer around when this happens, Firecracker will wait forever. Users are +expected to monitor the page fault handler's status or gather metrics of hanged +Firecracker process and implement a recycle mechanism if necessary. + +It is the page fault handler process's responsibility to handle any errors that +might occur and also send signals to Firecracker process to inform it of any +crashes/exits. The page fault handler can fetch Firecracker's PID through `getsockopt` +call with `SO_PEERCRED` option, which fetches credentials of the peer process that +is connected to the socket. The returned credentials contain: PID, GID and UID of +the peer process (Firecracker in the page fault handler's case). + +We recommend that the page fault handler includes timeouts for waiting on Firecracker +to connect to the UDS or send information over the UDS, in order to account for +unexpected cases when Firecracker crashes before being able to connect/send data. + +### Example + +An example of a handler process can be found [here](../../tests/host_tools/uffd/src/bin/valid_handler.rs). +The process is designed to tackle faults on a certain address by loading into +memory the entire region that the address belongs to, but users can choose any +other behavior that suits their use case best. \ No newline at end of file diff --git a/docs/snapshotting/snapshot-support.md b/docs/snapshotting/snapshot-support.md index 95ae3d668ff..9cf769a2e85 100644 --- a/docs/snapshotting/snapshot-support.md +++ b/docs/snapshotting/snapshot-support.md @@ -244,7 +244,7 @@ curl --unix-socket /tmp/firecracker.socket -i \ "snapshot_type": "Full", "snapshot_path": "./snapshot_file", "mem_file_path": "./mem_file", - "version": "0.23.0" + "version": "1.0.0" }' ``` @@ -299,7 +299,7 @@ curl --unix-socket /tmp/firecracker.socket -i \ "snapshot_type": "Diff", "snapshot_path": "./snapshot_file", "mem_file_path": "./mem_file", - "version": "0.23.0" + "version": "1.0.0" }' ``` @@ -376,6 +376,44 @@ If you want to load a snapshot, you can do that only **before** the microVM is c (the only resources that can be configured prior are the Logger and the Metrics systems) by sending the following API command: +```bash +curl --unix-socket /tmp/firecracker.socket -i \ + -X PUT 'http://localhost/snapshot/load' \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "snapshot_path": "./snapshot_file", + "mem_backend": { + "backend_path": "./mem_file", + "backend_type": "File", + }, + "enable_diff_snapshots": true, + "resume_vm": false + }' +``` + +The `backend_type` field represents the memory backend type used for loading the +snapshot. Accepted values are: + +- `File` - rely on the kernel to handle page faults when loading the contents of + the guest memory file into memory. +- `Uffd` - use a dedicated user space process to handle page faults that occur + for the guest memory range. Please refer to [this](handling-page-faults-on-snapshot-resume.md) + for more details on handling page faults in the user space. + +The meaning of `backend_path` depends on the `backend_type` chosen: + +- if using `File`, then `backend_path` should contain the path to the snapshot's + memory file to be loaded. +- when using `Uffd`, `backend_path` refers to the path of the unix domain socket + used for communication between Firecracker and the user space process that handles + page faults. + +When relying on the OS to handle page faults, the command below is also accepted. +Note that `mem_file_path` field is currently under the deprecation policy. +`mem_file_path` and `mem_backend` are mutually exclusive, therefore specifying them +both at the same time will return an error. + ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/snapshot/load' \ @@ -409,10 +447,11 @@ as they were to the original one. diff snapshot point of view). - The loaded microVM is now in the `Paused` state, so it needs to be resumed for it to run. - - The memory file pointed by `mem_file_path` **must** be considered immutable - from Firecracker and host point of view. It backs the guest OS memory for - read access through the page cache. External modification to this file - corrupts the guest memory and leads to undefined behavior. + - The memory file (pointed by `backend_path` when using `File` backend type, + or pointed by `mem_file_path`) **must** be considered immutable from Firecracker + and host point of view. It backs the guest OS memory for read access through + the page cache. External modification to this file corrupts the guest memory + and leads to undefined behavior. - The file indicated by `snapshot_path`, that is used to load from, is released and no longer used by this process. - If `enable_diff_snapshots` is set, then diff snapshots can be taken @@ -463,7 +502,7 @@ function abnormally. ## Ensure continued network connectivity for clones -For recomandations related to continued network connectivity for multiple +For recommendations related to continued network connectivity for multiple clones created from a single Firecracker microVM snapshot please see [this doc](network-for-clones.md). ## Snapshot security and uniqueness From f4f2414e5e168248428d3146cef29f61423e92f1 Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Wed, 27 Apr 2022 16:37:09 +0300 Subject: [PATCH 20/22] CI: update coverage Signed-off-by: Luminita Voicu --- tests/integration_tests/build/test_coverage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/build/test_coverage.py b/tests/integration_tests/build/test_coverage.py index 54693cabbf5..ef466670a25 100644 --- a/tests/integration_tests/build/test_coverage.py +++ b/tests/integration_tests/build/test_coverage.py @@ -29,9 +29,9 @@ # Checkout the cpuid crate. In the future other # differences may appear. if utils.is_io_uring_supported(): - COVERAGE_DICT = {"Intel": 85.12, "AMD": 84.60, "ARM": 84.17} + COVERAGE_DICT = {"Intel": 84.89, "AMD": 84.38, "ARM": 83.96} else: - COVERAGE_DICT = {"Intel": 82.14, "AMD": 81.62, "ARM": 81.17} + COVERAGE_DICT = {"Intel": 81.94, "AMD": 81.43, "ARM": 80.96} PROC_MODEL = proc.proc_type() From e69921226ba6f149ebc6d42c901898c1af0168c6 Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Thu, 28 Apr 2022 09:24:11 +0300 Subject: [PATCH 21/22] uffd: fix container ID Signed-off-by: Luminita Voicu --- tools/devtool | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/devtool b/tools/devtool index c6963536a20..6e5269d022a 100755 --- a/tools/devtool +++ b/tools/devtool @@ -72,7 +72,7 @@ DEVCTR_IMAGE_NO_TAG="public.ecr.aws/firecracker/fcuvm" # Development container tag -DEVCTR_IMAGE_TAG="v34_uffd" +DEVCTR_IMAGE_TAG="v35" # Development container image (name:tag) # This should be updated whenever we upgrade the development container. From bb4dd75b5e520a0a3809b479c5c5b9332186061c Mon Sep 17 00:00:00 2001 From: Luminita Voicu Date: Thu, 28 Apr 2022 15:18:58 +0300 Subject: [PATCH 22/22] CI: change dev S3 bucket for fetching artifacts Signed-off-by: Luminita Voicu --- tests/framework/artifacts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/framework/artifacts.py b/tests/framework/artifacts.py index 501c2285ead..b2933b7711a 100644 --- a/tests/framework/artifacts.py +++ b/tests/framework/artifacts.py @@ -304,7 +304,7 @@ class ArtifactCollection: PLATFORM = platform.machine() # S3 bucket structure. - ARTIFACTS_ROOT = 'ci-artifacts-uffd' + ARTIFACTS_ROOT = 'ci-artifacts' ARTIFACTS_DISKS = '/disks/' + PLATFORM + "/" ARTIFACTS_KERNELS = '/kernels/' + PLATFORM + "/" ARTIFACTS_MICROVMS = '/microvms/'