Skip to content

sled-agent: move instance configuration generation to Nexus #8002

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 72 additions & 132 deletions Cargo.lock

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -387,10 +387,10 @@ crossterm = { version = "0.28.1", features = ["event-stream"] }
# NOTE: if you change the pinned revision of the `crucible` dependencies, you
# must also update the references in package-manifest.toml to match the new
# revision.
crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da3cf198a0e000bb89efc3a1c77d7ba09340a600" }
crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da3cf198a0e000bb89efc3a1c77d7ba09340a600" }
crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "da3cf198a0e000bb89efc3a1c77d7ba09340a600" }
crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "da3cf198a0e000bb89efc3a1c77d7ba09340a600" }
crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "45801597f410685015ac2704d044919a41e3ff75" }
crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "45801597f410685015ac2704d044919a41e3ff75" }
crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "45801597f410685015ac2704d044919a41e3ff75" }
crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "45801597f410685015ac2704d044919a41e3ff75" }
# NOTE: See above!
csv = "1.3.1"
curve25519-dalek = "4"
Expand Down Expand Up @@ -604,10 +604,10 @@ progenitor-client = "0.9.1"
# NOTE: if you change the pinned revision of the `bhyve_api` and propolis
# dependencies, you must also update the references in package-manifest.toml to
# match the new revision.
bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "e5c85d84b0a51803caffb335a1063612edb02f6d" }
propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "e5c85d84b0a51803caffb335a1063612edb02f6d" }
propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "e5c85d84b0a51803caffb335a1063612edb02f6d" }
propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "e5c85d84b0a51803caffb335a1063612edb02f6d" }
bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "060a204d91e401a368c700a09d24510b7cd2b0e4" }
propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "060a204d91e401a368c700a09d24510b7cd2b0e4" }
propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "060a204d91e401a368c700a09d24510b7cd2b0e4" }
propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "060a204d91e401a368c700a09d24510b7cd2b0e4" }
# NOTE: see above!
proptest = "1.6.0"
qorb = "0.3.1"
Expand Down
1 change: 1 addition & 0 deletions clients/sled-agent-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] }
schemars.workspace = true
serde.workspace = true
serde_json.workspace = true
sled-agent-types.workspace = true
slog.workspace = true
uuid.workspace = true
17 changes: 1 addition & 16 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ progenitor::generate_api!(
DiskVariant = omicron_common::disk::DiskVariant,
ExternalIpGatewayMap = omicron_common::api::internal::shared::ExternalIpGatewayMap,
Generation = omicron_common::api::external::Generation,
Hostname = omicron_common::api::external::Hostname,
ImportExportPolicy = omicron_common::api::external::ImportExportPolicy,
Inventory = nexus_sled_agent_shared::inventory::Inventory,
InventoryDisk = nexus_sled_agent_shared::inventory::InventoryDisk,
Expand Down Expand Up @@ -117,14 +118,6 @@ impl From<omicron_common::api::internal::nexus::VmmState> for types::VmmState {
}
}

impl From<omicron_common::api::external::InstanceCpuCount>
for types::InstanceCpuCount
{
fn from(s: omicron_common::api::external::InstanceCpuCount) -> Self {
Self(s.0)
}
}

impl From<types::VmmState> for omicron_common::api::internal::nexus::VmmState {
fn from(s: types::VmmState) -> Self {
use omicron_common::api::internal::nexus::VmmState as Output;
Expand Down Expand Up @@ -188,14 +181,6 @@ impl From<types::MigrationState>
}
}

impl From<types::InstanceCpuCount>
for omicron_common::api::external::InstanceCpuCount
{
fn from(s: types::InstanceCpuCount) -> Self {
Self(s.0)
}
}

impl From<omicron_common::api::internal::nexus::DiskRuntimeState>
for types::DiskRuntimeState
{
Expand Down
2 changes: 1 addition & 1 deletion nexus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ serde_urlencoded.workspace = true
serde_with.workspace = true
sha2.workspace = true
sled-agent-client.workspace = true
sled-agent-types.workspace = true
slog.workspace = true
slog-async.workspace = true
slog-dtrace.workspace = true
Expand Down Expand Up @@ -162,7 +163,6 @@ pretty_assertions.workspace = true
rcgen.workspace = true
regex.workspace = true
similar-asserts.workspace = true
sled-agent-types.workspace = true
sp-sim.workspace = true
rustls.workspace = true
subprocess.workspace = true
Expand Down
6 changes: 0 additions & 6 deletions nexus/db-model/src/instance_cpu_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,3 @@ where
.map_err(|e| e.into())
}
}

impl From<InstanceCpuCount> for sled_agent_client::types::InstanceCpuCount {
fn from(i: InstanceCpuCount) -> Self {
Self(i.0.0)
}
}
93 changes: 16 additions & 77 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use super::MAX_VCPU_PER_INSTANCE;
use super::MIN_MEMORY_BYTES_PER_INSTANCE;
use crate::app::sagas;
use crate::app::sagas::NexusSaga;
use crate::cidata::InstanceCiData;
use crate::external_api::params;
use cancel_safe_futures::prelude::*;
use futures::future::Fuse;
Expand All @@ -41,6 +40,7 @@ use omicron_common::api::external::CreateResult;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::DeleteResult;
use omicron_common::api::external::Error;
use omicron_common::api::external::Hostname;
use omicron_common::api::external::InstanceCpuCount;
use omicron_common::api::external::InstanceState;
use omicron_common::api::external::InternalContext;
Expand All @@ -64,9 +64,7 @@ use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode;
use sagas::instance_common::ExternalIpAttach;
use sagas::instance_start;
use sagas::instance_update;
use sled_agent_client::types::InstanceBootSettings;
use sled_agent_client::types::InstanceMigrationTargetParams;
use sled_agent_client::types::InstanceProperties;
use sled_agent_client::types::VmmPutStateBody;
use std::matches;
use std::net::SocketAddr;
Expand Down Expand Up @@ -1080,7 +1078,7 @@ impl super::Nexus {
// TODO-cleanup: This can be removed when we are confident that no
// instances exist prior to the addition of strict hostname validation
// in the API.
let Ok(hostname) = db_instance.hostname.parse() else {
let Ok(hostname) = db_instance.hostname.parse::<Hostname>() else {
let msg = format!(
"The instance hostname '{}' is no longer valid. \
To access the data on its disks, this instance \
Expand All @@ -1107,57 +1105,6 @@ impl super::Nexus {
)
.await?;

let mut disk_reqs = vec![];
for disk in &disks {
// Disks that are attached to an instance should always have a slot
// assignment, but if for some reason this one doesn't, return an
// error instead of taking down the whole process.
let slot = match disk.slot {
Some(s) => s,
None => {
error!(self.log, "attached disk has no PCI slot assignment";
"disk_id" => %disk.id(),
"disk_name" => disk.name().to_string(),
"instance_id" => ?disk.runtime_state.attach_instance_id);

return Err(Error::internal_error(&format!(
"disk {} is attached but has no PCI slot assignment",
disk.id()
))
.into());
}
};

let volume = self
.db_datastore
.volume_checkout(
disk.volume_id(),
match operation {
InstanceRegisterReason::Start { vmm_id } =>
db::datastore::VolumeCheckoutReason::InstanceStart { vmm_id },
InstanceRegisterReason::Migrate { vmm_id, target_vmm_id } =>
db::datastore::VolumeCheckoutReason::InstanceMigrate { vmm_id, target_vmm_id },
}
)
.await?;

disk_reqs.push(sled_agent_client::types::InstanceDisk {
disk_id: disk.id(),
name: disk.name().to_string(),
slot: slot.0,
read_only: false,
vcr_json: volume.data().to_owned(),
});
}

// The routines that maintain an instance's boot options are supposed to
// guarantee that the boot disk ID, if present, is the ID of an attached
// disk. If this invariant isn't upheld, Propolis will catch the failure
// when it processes its received VM configuration.
let boot_settings = db_instance
.boot_disk_id
.map(|id| InstanceBootSettings { order: vec![id] });

let nics = self
.db_datastore
.derive_guest_network_interface_info(&opctx, &authz_instance)
Expand Down Expand Up @@ -1271,28 +1218,25 @@ impl super::Nexus {
.unwrap(),
}),
)
.await?
.into_iter();
.await?;

let ssh_keys: Vec<String> =
ssh_keys.map(|ssh_key| ssh_key.public_key).collect();
let vmm_spec = self
.generate_vmm_spec(
&operation,
db_instance,
&disks,
&nics,
&ssh_keys,
)
.await?;

let metadata = sled_agent_client::types::InstanceMetadata {
silo_id: authz_silo.id(),
project_id: authz_project.id(),
};

// Ask the sled agent to begin the state change. Then update the
// database to reflect the new intermediate state. If this update is
// not the newest one, that's fine. That might just mean the sled agent
// beat us to it.

let instance_hardware = sled_agent_client::types::InstanceHardware {
properties: InstanceProperties {
ncpus: db_instance.ncpus.into(),
memory: db_instance.memory.into(),
hostname,
},
let local_config = sled_agent_client::types::InstanceSledLocalConfig {
hostname,
nics,
source_nat,
ephemeral_ip,
Expand All @@ -1304,12 +1248,6 @@ impl super::Nexus {
host_domain: None,
search_domains: Vec::new(),
},
disks: disk_reqs,
boot_settings,
cloud_init_bytes: Some(base64::Engine::encode(
&base64::engine::general_purpose::STANDARD,
db_instance.generate_cidata(&ssh_keys)?,
)),
};

let instance_id = InstanceUuid::from_untyped_uuid(db_instance.id());
Expand Down Expand Up @@ -1338,7 +1276,8 @@ impl super::Nexus {
.vmm_register(
propolis_id,
&sled_agent_client::types::InstanceEnsureBody {
hardware: instance_hardware,
vmm_spec,
local_config,
migration_id: db_instance.runtime().migration_id,
vmm_runtime,
instance_id,
Expand Down
Loading
Loading