Skip to content

Commit 8fc8312

Browse files
authored
[dns-server] convert DNS server API into a trait (#6079)
Straightforward, and resulted in some nice cleanup.
1 parent d993746 commit 8fc8312

File tree

15 files changed

+248
-243
lines changed

15 files changed

+248
-243
lines changed

Cargo.lock

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ members = [
2626
"dev-tools/releng",
2727
"dev-tools/xtask",
2828
"dns-server",
29+
"dns-server-api",
2930
"end-to-end-tests",
3031
"gateway-cli",
3132
"gateway-test-utils",
@@ -119,6 +120,7 @@ default-members = [
119120
# hakari to not work as well and build times to be longer.
120121
# See omicron#4392.
121122
"dns-server",
123+
"dns-server-api",
122124
# Do not include end-to-end-tests in the list of default members, as its
123125
# tests only work on a deployed control plane.
124126
"gateway-cli",
@@ -279,6 +281,7 @@ derive-where = "1.2.7"
279281
diesel = { version = "2.1.6", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] }
280282
diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" }
281283
dns-server = { path = "dns-server" }
284+
dns-server-api = { path = "dns-server-api" }
282285
dns-service-client = { path = "clients/dns-service-client" }
283286
dpd-client = { path = "clients/dpd-client" }
284287
dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] }

dev-tools/openapi-manager/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ anyhow.workspace = true
1212
atomicwrites.workspace = true
1313
camino.workspace = true
1414
clap.workspace = true
15+
dns-server-api.workspace = true
1516
dropshot.workspace = true
1617
fs-err.workspace = true
1718
indent_write.workspace = true

dev-tools/openapi-manager/src/spec.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@ use openapiv3::OpenAPI;
1414
/// All APIs managed by openapi-manager.
1515
pub fn all_apis() -> Vec<ApiSpec> {
1616
vec![
17+
ApiSpec {
18+
title: "Internal DNS".to_string(),
19+
version: "0.0.1".to_string(),
20+
description: "API for the internal DNS server".to_string(),
21+
boundary: ApiBoundary::Internal,
22+
api_description:
23+
dns_server_api::dns_server_api::stub_api_description,
24+
filename: "dns-server.json".to_string(),
25+
extra_validation: None,
26+
},
1727
ApiSpec {
1828
title: "Nexus internal API".to_string(),
1929
version: "0.0.1".to_string(),

dns-server-api/Cargo.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[package]
2+
name = "dns-server-api"
3+
version = "0.1.0"
4+
edition = "2021"
5+
license = "MPL-2.0"
6+
7+
[lints]
8+
workspace = true
9+
10+
[dependencies]
11+
chrono.workspace = true
12+
dropshot.workspace = true
13+
omicron-workspace-hack.workspace = true
14+
schemars.workspace = true
15+
serde.workspace = true

dns-server-api/src/lib.rs

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! Dropshot API for configuring DNS namespace.
6+
//!
7+
//! ## Shape of the API
8+
//!
9+
//! The DNS configuration API has just two endpoints: PUT and GET of the entire
10+
//! DNS configuration. This is pretty anti-REST. But it's important to think
11+
//! about how this server fits into the rest of the system. When changes are
12+
//! made to DNS data, they're grouped together and assigned a monotonically
13+
//! increasing generation number. The DNS data is first stored into CockroachDB
14+
//! and then propagated from a distributed fleet of Nexus instances to a
15+
//! distributed fleet of these DNS servers. If we accepted individual updates to
16+
//! DNS names, then propagating a particular change would be non-atomic, and
17+
//! Nexus would have to do a lot more work to ensure (1) that all changes were
18+
//! propagated (even if it crashes) and (2) that they were propagated in the
19+
//! correct order (even if two Nexus instances concurrently propagate separate
20+
//! changes).
21+
//!
22+
//! This DNS server supports hosting multiple zones. We could imagine supporting
23+
//! separate endpoints to update the DNS data for a particular zone. That feels
24+
//! nicer (although it's not clear what it would buy us). But as with updates to
25+
//! multiple names, Nexus's job is potentially much easier if the entire state
26+
//! for all zones is updated at once. (Otherwise, imagine how Nexus would
27+
//! implement _renaming_ one zone to another without loss of service. With
28+
//! a combined endpoint and generation number for all zones, all that's necessary
29+
//! is to configure a new zone with all the same names, and then remove the old
30+
//! zone later in another update. That can be managed by the same mechanism in
31+
//! Nexus that manages regular name updates. On the other hand, if there were
32+
//! separate endpoints with separate generation numbers, then Nexus has more to
33+
//! keep track of in order to do the rename safely.)
34+
//!
35+
//! See RFD 367 for more on DNS propagation.
36+
//!
37+
//! ## ETags and Conditional Requests
38+
//!
39+
//! It's idiomatic in HTTP use ETags and conditional requests to provide
40+
//! synchronization. We could define an ETag to be just the current generation
41+
//! number of the server and honor standard `if-match` headers to fail requests
42+
//! where the generation number doesn't match what the client expects. This
43+
//! would be fine, but it's rather annoying:
44+
//!
45+
//! 1. When the client wants to propagate generation X, the client would have
46+
//! make an extra request just to fetch the current ETag, just so it can put
47+
//! it into the conditional request.
48+
//!
49+
//! 2. If some other client changes the configuration in the meantime, the
50+
//! conditional request would fail and the client would have to take another
51+
//! lap (fetching the current config and potentially making another
52+
//! conditional PUT).
53+
//!
54+
//! 3. This approach would make synchronization opt-in. If a client (or just
55+
//! one errant code path) neglected to set the if-match header, we could do
56+
//! the wrong thing and cause the system to come to rest with the wrong DNS
57+
//! data.
58+
//!
59+
//! Since the semantics here are so simple (we only ever want to move the
60+
//! generation number forward), we don't bother with ETags or conditional
61+
//! requests. Instead we have the server implement the behavior we want, which
62+
//! is that when a request comes in to update DNS data to generation X, the
63+
//! server replies with one of:
64+
//!
65+
//! (1) the update has been applied and the server is now running generation X
66+
//! (client treats this as success)
67+
//!
68+
//! (2) the update was not applied because the server is already at generation X
69+
//! (client treats this as success)
70+
//!
71+
//! (3) the update was not applied because the server is already at a newer
72+
//! generation
73+
//! (client probably starts the whole propagation process over because its
74+
//! current view of the world is out of date)
75+
//!
76+
//! This way, the DNS data can never move backwards and the client only ever has
77+
//! to make one request.
78+
//!
79+
//! ## Concurrent updates
80+
//!
81+
//! Given that we've got just one API to update the all DNS zones, and given
82+
//! that might therefore take a minute for a large zone, and also that there may
83+
//! be multiple Nexus instances trying to do it at the same time, we need to
84+
//! think a bit about what should happen if two Nexus do try to do it at the same
85+
//! time. Spoiler: we immediately fail any request to update the DNS data if
86+
//! there's already an update in progress.
87+
//!
88+
//! What else could we do? We could queue the incoming request behind the
89+
//! in-progress one. How large do we allow that queue to grow? At some point
90+
//! we'll need to stop queueing them. So why bother at all?
91+
92+
use std::{
93+
collections::HashMap,
94+
net::{Ipv4Addr, Ipv6Addr},
95+
};
96+
97+
use dropshot::{HttpError, HttpResponseOk, RequestContext};
98+
use schemars::JsonSchema;
99+
use serde::{Deserialize, Serialize};
100+
101+
#[dropshot::api_description]
102+
pub trait DnsServerApi {
103+
type Context;
104+
105+
#[endpoint(
106+
method = GET,
107+
path = "/config",
108+
)]
109+
async fn dns_config_get(
110+
rqctx: RequestContext<Self::Context>,
111+
) -> Result<HttpResponseOk<DnsConfig>, HttpError>;
112+
113+
#[endpoint(
114+
method = PUT,
115+
path = "/config",
116+
)]
117+
async fn dns_config_put(
118+
rqctx: RequestContext<Self::Context>,
119+
rq: dropshot::TypedBody<DnsConfigParams>,
120+
) -> Result<dropshot::HttpResponseUpdatedNoContent, dropshot::HttpError>;
121+
}
122+
123+
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
124+
pub struct DnsConfigParams {
125+
pub generation: u64,
126+
pub time_created: chrono::DateTime<chrono::Utc>,
127+
pub zones: Vec<DnsConfigZone>,
128+
}
129+
130+
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
131+
pub struct DnsConfig {
132+
pub generation: u64,
133+
pub time_created: chrono::DateTime<chrono::Utc>,
134+
pub time_applied: chrono::DateTime<chrono::Utc>,
135+
pub zones: Vec<DnsConfigZone>,
136+
}
137+
138+
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
139+
pub struct DnsConfigZone {
140+
pub zone_name: String,
141+
pub records: HashMap<String, Vec<DnsRecord>>,
142+
}
143+
144+
#[allow(clippy::upper_case_acronyms)]
145+
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
146+
#[serde(tag = "type", content = "data")]
147+
pub enum DnsRecord {
148+
A(Ipv4Addr),
149+
AAAA(Ipv6Addr),
150+
SRV(SRV),
151+
}
152+
153+
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
154+
#[serde(rename = "Srv")]
155+
pub struct SRV {
156+
pub prio: u16,
157+
pub weight: u16,
158+
pub port: u16,
159+
pub target: String,
160+
}

dns-server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ anyhow.workspace = true
1212
camino.workspace = true
1313
chrono.workspace = true
1414
clap.workspace = true
15+
dns-server-api.workspace = true
1516
dns-service-client.workspace = true
1617
dropshot.workspace = true
1718
http.workspace = true

dns-server/src/bin/apigen.rs

Lines changed: 0 additions & 29 deletions
This file was deleted.

dns-server/src/dns_server.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
//! The facilities here handle binding a UDP socket, receiving DNS messages on
88
//! that socket, and replying to them.
99
10-
use crate::dns_types::DnsRecord;
1110
use crate::storage;
1211
use crate::storage::QueryError;
1312
use crate::storage::Store;
1413
use anyhow::anyhow;
1514
use anyhow::Context;
15+
use dns_server_api::DnsRecord;
1616
use pretty_hex::*;
1717
use serde::Deserialize;
1818
use slog::{debug, error, info, o, trace, Logger};
@@ -234,12 +234,7 @@ fn dns_record_to_record(
234234
Ok(aaaa)
235235
}
236236

237-
DnsRecord::SRV(crate::dns_types::SRV {
238-
prio,
239-
weight,
240-
port,
241-
target,
242-
}) => {
237+
DnsRecord::SRV(dns_server_api::SRV { prio, weight, port, target }) => {
243238
let tgt = Name::from_str(&target).map_err(|error| {
244239
RequestError::ServFail(anyhow!(
245240
"serialization failed due to bad SRV target {:?}: {:#}",

dns-server/src/dns_types.rs

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)