Skip to content

Commit 2f0f151

Browse files
committed
Introduce cosmo sequencer interrupts
Instead of polling for our state changes, rely on the sequencer to deliver interrupts to us
1 parent d601eaf commit 2f0f151

File tree

6 files changed

+173
-33
lines changed

6 files changed

+173
-33
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

app/cosmo/base.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ port = "F"
7474
pin = 2
7575
owner = {name = "sprot", notification = "rot_irq"}
7676

77+
[tasks.sys.config.gpio-irqs.seq_irq]
78+
# FPGA1_TO_SP_IRQ1_L
79+
port = "F"
80+
pin = 5
81+
owner = {name = "cosmo_seq", notification = "seq_irq"}
82+
7783
[tasks.spi2_driver]
7884
name = "drv-stm32h7-spi-server"
7985
priority = 3
@@ -175,7 +181,7 @@ stacksize = 2600
175181
start = true
176182
task-slots = ["sys", "i2c_driver", {spi_front = "spi3_driver"}, "jefe", "packrat", "auxflash", "spartan7_loader", "hf"]
177183
uses = ["mmio_sequencer", "mmio_info"]
178-
notifications = ["timer", "vcore"]
184+
notifications = ["timer", "vcore", "seq-irq"]
179185

180186
[tasks.ignition_flash]
181187
name = "drv-ignition-flash"

drv/cosmo-seq-server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ zerocopy-derive = { workspace = true }
2828
[build-dependencies]
2929
build-fpga-regmap = { path = "../../build/fpga-regmap" }
3030
build-util = { path = "../../build/util" }
31+
build-stm32xx-sys = { path = "../../build/stm32xx-sys" }
3132
idol = { workspace = true }
3233

3334
[features]

drv/cosmo-seq-server/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::{fs, io::Write};
77
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
88
build_util::expose_target_board();
99
build_util::build_notifications()?;
10+
build_stm32xx_sys::build_gpio_irq_pins()?;
1011

1112
let out_dir = build_util::out_dir();
1213
let out_file = out_dir.join("cosmo_fpga.rs");

drv/cosmo-seq-server/src/main.rs

Lines changed: 157 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ enum Trace {
7070
our_state: PowerState,
7171
seq_state: Result<fmc_periph::A0Sm, u8>,
7272
},
73+
SequencerInterrupt {
74+
our_state: PowerState,
75+
seq_state: Result<fmc_periph::A0Sm, u8>,
76+
ifr: fmc_periph::IfrView,
77+
},
7378
PowerDownError(drv_cpu_seq_api::SeqError),
7479
Coretype {
7580
coretype0: bool,
@@ -85,6 +90,8 @@ enum Trace {
8590
pwrokn: u8,
8691
},
8792
Thermtrip,
93+
A0MapoInterrupt,
94+
SmerrInterrupt,
8895
}
8996
counted_ringbuf!(Trace, 128, Trace::None);
9097

@@ -133,6 +140,8 @@ const SP5R2_PULL: sys_api::Pull = sys_api::Pull::None;
133140
const SP5R3_PULL: sys_api::Pull = sys_api::Pull::None;
134141
const SP5R4_PULL: sys_api::Pull = sys_api::Pull::None;
135142

143+
use gpio_irq_pins::SEQ_IRQ;
144+
136145
////////////////////////////////////////////////////////////////////////////////
137146

138147
/// Helper type which includes both sequencer and NIC state machine states
@@ -223,6 +232,10 @@ fn init() -> Result<ServerImpl, SeqError> {
223232
sys.gpio_configure_input(SP5_TO_SP_SP5R3, SP5R3_PULL);
224233
sys.gpio_configure_input(SP5_TO_SP_SP5R4, SP5R4_PULL);
225234

235+
// Sequencer interrupt
236+
sys.gpio_configure_input(SEQ_IRQ, sys_api::Pull::None);
237+
sys.gpio_irq_configure(notifications::SEQ_IRQ_MASK, sys_api::Edge::Falling);
238+
226239
let spi_front = drv_spi_api::Spi::from(SPI_FRONT.get_task_id());
227240
let aux = drv_auxflash_api::AuxFlash::from(AUXFLASH.get_task_id());
228241

@@ -390,6 +403,7 @@ impl ServerImpl {
390403
fn log_state_registers(&self) -> StateMachineStates {
391404
let seq_api_status = (&self.seq.seq_api_status).into();
392405
let nic_api_status = (&self.seq.nic_api_status).into();
406+
393407
ringbuf_entry!(Trace::RegStateValues {
394408
seq_api_status,
395409
seq_raw_status: (&self.seq.seq_raw_status).into(),
@@ -513,6 +527,7 @@ impl ServerImpl {
513527
}
514528
};
515529

530+
self.enable_sequencer_interrupts();
516531
// Flip the host flash mux so the CPU can read from it
517532
// (this is secretly infallible on Cosmo, so we can unwrap it)
518533
self.hf.set_mux(drv_hf_api::HfMuxState::HostCPU).unwrap();
@@ -521,6 +536,8 @@ impl ServerImpl {
521536
| (PowerState::A0PlusHP, PowerState::A2)
522537
| (PowerState::A0Thermtrip, PowerState::A2)
523538
| (PowerState::A0Reset, PowerState::A2) => {
539+
// Disable our interrupts before we shutdown
540+
self.disable_sequencer_interrupts();
524541
self.seq.power_ctrl.modify(|m| m.set_a0_en(false));
525542
let mut okay = false;
526543
for _ in 0..200 {
@@ -543,7 +560,6 @@ impl ServerImpl {
543560
// We can't do much else here, since we already cleared the
544561
// a0_en flag to disable the sequencer.
545562
}
546-
547563
// Flip the host flash mux so the SP can read from it
548564
// (this is secretly infallible on Cosmo, so we can unwrap it)
549565
self.hf.set_mux(drv_hf_api::HfMuxState::SP).unwrap();
@@ -571,13 +587,14 @@ impl ServerImpl {
571587

572588
/// Returns the current timer interval, in milliseconds
573589
///
574-
/// If we are in `A0`, then we are waiting for the NIC to come up; if we are
575-
/// in `A0PlusHP`, we're polling for a thermtrip or for someone disabling
576-
/// the NIC. In other states, there's no need to poll.
590+
/// If we are in `A0`, then we are waiting for the NIC to come up;
591+
/// Once we are in `A0PlusHP` we rely on sequencer interrupts for
592+
/// all our state transitions. We still want to catch an unexpected
593+
/// case of sequencer failure so poll for that case specifically.
577594
fn poll_interval(&self) -> Option<u32> {
578595
match self.state {
579596
PowerState::A0 => Some(10),
580-
PowerState::A0PlusHP => Some(100),
597+
PowerState::A0PlusHP => Some(1000),
581598
_ => None,
582599
}
583600
}
@@ -588,6 +605,134 @@ impl ServerImpl {
588605
set_timer_relative(interval, notifications::TIMER_MASK);
589606
}
590607
}
608+
609+
/// Powers down to A2, if that fails for some reason just
610+
/// log an error
611+
fn emergency_a2(&mut self, reason: StateChangeReason) {
612+
// Power down to A2, updating our internal state. We can't
613+
// handle errors here, so log them and continue.
614+
if let Err(e) = self.set_state_impl(PowerState::A2, reason) {
615+
ringbuf_entry!(Trace::PowerDownError(e))
616+
}
617+
}
618+
619+
fn enable_sequencer_interrupts(&mut self) {
620+
let _ = self.sys.gpio_irq_control(
621+
notifications::SEQ_IRQ_MASK,
622+
sys_api::IrqControl::Enable,
623+
);
624+
self.seq.ier.modify(|m| {
625+
m.set_fanfault(true);
626+
m.set_thermtrip(true);
627+
m.set_smerr_assert(true);
628+
m.set_a0mapo(true);
629+
m.set_nicmapo(true);
630+
m.set_amd_pwrok_fedge(true);
631+
m.set_amd_rstn_fedge(true);
632+
});
633+
}
634+
635+
fn disable_sequencer_interrupts(&mut self) {
636+
self.seq.ier.modify(|m| {
637+
m.set_fanfault(false);
638+
m.set_thermtrip(false);
639+
m.set_smerr_assert(false);
640+
m.set_a0mapo(false);
641+
m.set_nicmapo(false);
642+
m.set_amd_pwrok_fedge(false);
643+
m.set_amd_rstn_fedge(false);
644+
});
645+
let _ = self.sys.gpio_irq_control(
646+
notifications::SEQ_IRQ_MASK,
647+
sys_api::IrqControl::Disable,
648+
);
649+
}
650+
651+
fn handle_sequencer_interrupt(&mut self) {
652+
let ifr = self.seq.ifr.view();
653+
654+
let state = self.log_state_registers();
655+
ringbuf_entry!(Trace::SequencerInterrupt {
656+
our_state: self.state,
657+
seq_state: state.seq,
658+
ifr,
659+
});
660+
661+
enum InternalAction {
662+
Reset,
663+
ThermTrip,
664+
Smerr,
665+
Mapo,
666+
}
667+
668+
// We check these in lowest to highest priority. We start with
669+
// reset since we expect the CPU to handle that nicely.
670+
// Thermal trip is a terminal state in that we log it but don't
671+
// actually make any changes to the sequencer.
672+
// SMERR is treated as a higher priority than MAPO arbitrarily.
673+
// we probably(?) won't see multiple of these set at a time but
674+
// it's important to account for that case;
675+
676+
let mut action = InternalAction::Reset;
677+
678+
if ifr.amd_pwrok_fedge || ifr.amd_rstn_fedge {
679+
let rstn = self.seq.amd_reset_fedges.counts();
680+
let pwrokn = self.seq.amd_pwrok_fedges.counts();
681+
682+
// counters and ifr are cleared in the A2 -> A0 transition
683+
// host_sp_comms will be notified of this change and will
684+
// call back into this task to reboot the system (going to
685+
// A2 then back into A0)
686+
ringbuf_entry!(Trace::ResetCounts { rstn, pwrokn });
687+
action = InternalAction::Reset;
688+
}
689+
690+
if ifr.thermtrip {
691+
self.seq.ifr.modify(|h| h.set_thermtrip(false));
692+
ringbuf_entry!(Trace::Thermtrip);
693+
action = InternalAction::ThermTrip;
694+
// Great place for an ereport?
695+
}
696+
697+
if ifr.a0mapo {
698+
self.log_pg_registers();
699+
self.seq.ifr.modify(|h| h.set_a0mapo(false));
700+
ringbuf_entry!(Trace::A0MapoInterrupt);
701+
action = InternalAction::Mapo;
702+
// Great place for an ereport?
703+
}
704+
705+
if ifr.smerr_assert {
706+
self.seq.ifr.modify(|h| h.set_smerr_assert(false));
707+
ringbuf_entry!(Trace::SmerrInterrupt);
708+
action = InternalAction::Smerr;
709+
// Great place for an ereport?
710+
}
711+
// Fan Fault is unconnected
712+
// NIC MAPO is unconnected
713+
714+
match action {
715+
InternalAction::Reset => {
716+
// host_sp_comms will be notified of this change and will
717+
// call back into this task to reboot the system (going to
718+
// A2 then back into A0)
719+
self.set_state_internal(PowerState::A0Reset);
720+
}
721+
InternalAction::ThermTrip => {
722+
// This is a terminal state; we set our state to `A0Thermtrip`
723+
// but do not expect any other task to take action right now
724+
self.set_state_internal(PowerState::A0Thermtrip);
725+
}
726+
InternalAction::Mapo => {
727+
// This is a terminal state (for now)
728+
self.emergency_a2(StateChangeReason::A0Mapo);
729+
}
730+
InternalAction::Smerr => {
731+
// This is a terminal state (for now)
732+
self.emergency_a2(StateChangeReason::SmerrAssert);
733+
}
734+
};
735+
}
591736
}
592737

593738
impl idl::InOrderSequencerImpl for ServerImpl {
@@ -641,10 +786,14 @@ impl idl::InOrderSequencerImpl for ServerImpl {
641786

642787
impl NotificationHandler for ServerImpl {
643788
fn current_notification_mask(&self) -> u32 {
644-
notifications::TIMER_MASK
789+
notifications::TIMER_MASK | notifications::SEQ_IRQ_MASK
645790
}
646791

647792
fn handle_notification(&mut self, bits: u32) {
793+
if (bits & notifications::SEQ_IRQ_MASK) != 0 {
794+
self.handle_sequencer_interrupt();
795+
}
796+
648797
if (bits & notifications::TIMER_MASK) == 0 {
649798
return;
650799
}
@@ -672,32 +821,7 @@ impl NotificationHandler for ServerImpl {
672821
});
673822
self.log_pg_registers();
674823

675-
// Power down to A2, updating our internal state. We can't
676-
// handle errors here, so log them and continue.
677-
if let Err(e) = self
678-
.set_state_impl(PowerState::A2, StateChangeReason::Other)
679-
{
680-
ringbuf_entry!(Trace::PowerDownError(e))
681-
}
682-
} else {
683-
// Check that the FPGA has not logged any reset conditions from
684-
// the CPU.
685-
let ifr = self.seq.ifr.view();
686-
if ifr.thermtrip {
687-
self.seq.ifr.modify(|h| h.set_thermtrip(false));
688-
ringbuf_entry!(Trace::Thermtrip);
689-
self.set_state_internal(PowerState::A0Thermtrip)
690-
// this is a terminal state (for now)
691-
} else if ifr.amd_pwrok_fedge || ifr.amd_rstn_fedge {
692-
let rstn = self.seq.amd_reset_fedges.counts();
693-
let pwrokn = self.seq.amd_pwrok_fedges.counts();
694-
ringbuf_entry!(Trace::ResetCounts { rstn, pwrokn });
695-
// counters are cleared in the A2 -> A0 transition
696-
self.set_state_internal(PowerState::A0Reset);
697-
// host_sp_comms will be notified of this change and will
698-
// call back into this task to reboot the system (going to
699-
// A2 then back into A0)
700-
}
824+
self.emergency_a2(StateChangeReason::Unknown);
701825
}
702826
}
703827

@@ -721,3 +845,4 @@ mod fmc_periph {
721845
}
722846

723847
include!(concat!(env!("OUT_DIR"), "/notifications.rs"));
848+
include!(concat!(env!("OUT_DIR"), "/gpio_irq_pins.rs"));

drv/cpu-seq-api/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ pub enum StateChangeReason {
7171
HostReboot,
7272
/// The system powered off because a component has overheated.
7373
Overheat,
74+
/// A0 MAPO fault from the sequencer
75+
A0Mapo,
76+
/// System Management Error
77+
SmerrAssert,
78+
/// The system powered off for reasons we can't explain
79+
Unknown,
7480
}
7581

7682
/// Indicates the result of a power state transition.

0 commit comments

Comments
 (0)