Skip to content

Commit efe6e28

Browse files
committed
handle port change clock class update
Signed-off-by: Aneesh Puttur <[email protected]>
1 parent e8ce938 commit efe6e28

File tree

4 files changed

+102
-31
lines changed

4 files changed

+102
-31
lines changed

pkg/daemon/daemon.go

Lines changed: 75 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"strconv"
1313
"strings"
1414
"sync"
15+
"sync/atomic"
1516
"syscall"
1617
"time"
1718

@@ -57,6 +58,8 @@ var (
5758
clockIDRegEx = regexp.MustCompile(`\/dev\/ptp\d+`)
5859
)
5960

61+
// Per-process guard is sufficient because each process owns a unique config.
62+
6063
// ProcessManager manages a set of ptpProcess
6164
// which could be ptp4l, phc2sys or timemaster.
6265
// Processes in ProcessManager will be started
@@ -150,6 +153,7 @@ type ptpProcess struct {
150153
nodeProfile ptpv1.PtpProfile
151154
parentClockClass float64
152155
pmcCheck bool
156+
clockClassRunning atomic.Bool
153157
clockType event.ClockType
154158
ptpClockThreshold *ptpv1.PtpClockThreshold
155159
haProfile map[string][]string // stores list of interface name for each profile
@@ -170,6 +174,23 @@ func (p *ptpProcess) setStopped(val bool) {
170174
p.execMutex.Unlock()
171175
}
172176

177+
// TriggerPmcCheck sets pmcCheck to true in a thread-safe way
178+
func (p *ptpProcess) TriggerPmcCheck() {
179+
p.execMutex.Lock()
180+
p.pmcCheck = true
181+
p.execMutex.Unlock()
182+
}
183+
184+
// ConsumePmcCheck atomically reads and resets the pmcCheck flag.
185+
// It returns true if a PMC check should be performed.
186+
func (p *ptpProcess) ConsumePmcCheck() bool {
187+
p.execMutex.Lock()
188+
val := p.pmcCheck
189+
p.pmcCheck = false
190+
p.execMutex.Unlock()
191+
return val
192+
}
193+
173194
// Daemon is the main structure for linuxptp instance.
174195
// It contains all the necessary data to run linuxptp instance.
175196
type Daemon struct {
@@ -694,7 +715,7 @@ func (dn *Daemon) GetPhaseOffsetPinFilter(nodeProfile *ptpv1.PtpProfile) map[str
694715
func (dn *Daemon) HandlePmcTicker() {
695716
for _, p := range dn.processManager.process {
696717
if p.name == ptp4lProcessName {
697-
p.pmcCheck = true
718+
p.TriggerPmcCheck()
698719
}
699720
}
700721
}
@@ -737,6 +758,12 @@ func processStatus(c *net.Conn, processName, messageTag string, status int64) {
737758
}
738759

739760
func (p *ptpProcess) updateClockClass(c *net.Conn) {
761+
// Per-process single-flight guard
762+
if !p.clockClassRunning.CompareAndSwap(false, true) {
763+
glog.Infof("clock class update already running for %s, skipping this run", p.configName)
764+
return
765+
}
766+
defer p.clockClassRunning.Store(false)
740767
defer func() {
741768
if r := recover(); r != nil {
742769
glog.Errorf("updateClockClass Recovered in f %#v", r)
@@ -760,7 +787,7 @@ func (p *ptpProcess) updateClockClass(c *net.Conn) {
760787
// change to pint every minute or when the clock class changes
761788
clockClassOut = fmt.Sprintf("%s[%d]:[%s] CLOCK_CLASS_CHANGE %f\n", p.name, time.Now().Unix(), p.configName, p.parentClockClass)
762789
if c == nil {
763-
UpdateClockClassMetrics(clockClass) // no socket then update metrics
790+
UpdateClockClassMetrics(p.configName, clockClass) // no socket then update metrics
764791
} else {
765792
_, err := (*c).Write([]byte(clockClassOut))
766793
if err != nil {
@@ -850,12 +877,26 @@ func (p *ptpProcess) cmdRun(stdoutToSocket bool) {
850877
d.ProcessStatus(p.c, PtpProcessUp)
851878
}
852879
}
880+
// moving outside scanner loop to ensure clock class update routine
881+
// even if process hangs
882+
go func() {
883+
for {
884+
select {
885+
case <-p.exitCh:
886+
glog.Infof("Exiting pmcCheck%s...", p.name)
887+
return
888+
default:
889+
if p.ConsumePmcCheck() {
890+
p.updateClockClass(p.c)
891+
}
892+
//Add a small sleep to avoid tight CPU loop
893+
time.Sleep(100 * time.Millisecond)
894+
}
895+
}
896+
}()
897+
853898
for scanner.Scan() {
854899
output := scanner.Text()
855-
if p.pmcCheck {
856-
p.pmcCheck = false
857-
go p.updateClockClass(p.c)
858-
}
859900

860901
if regexErr != nil || !logFilterRegex.MatchString(output) {
861902
fmt.Printf("%s\n", output)
@@ -929,7 +970,7 @@ func (p *ptpProcess) processPTPMetrics(output string) {
929970
logEntry := synce.ParseLog(output)
930971
p.ProcessSynceEvents(logEntry)
931972
} else {
932-
configName, source, ptpOffset, clockState, iface := extractMetrics(p.messageTag, p.name, p.ifaces, output)
973+
configName, source, ptpOffset, clockState, iface := extractMetrics(p.messageTag, p.name, p.ifaces, output, p.c == nil)
933974
if iface != "" { // for ptp4l/phc2sys this function only update metrics
934975
var values map[event.ValueType]interface{}
935976
ifaceName := masterOffsetIface.getByAlias(configName, iface).name
@@ -950,6 +991,17 @@ func (p *ptpProcess) processPTPMetrics(output string) {
950991
state = event.PTP_HOLDOVER // consider s1 state as holdover,this passed to event to create metrics and events
951992
}
952993
p.ProcessTs2PhcEvents(ptpOffset, source, ifaceName, state, values)
994+
} else if clockState == HOLDOVER || clockState == LOCKED {
995+
// in case of holdover without iface, still need to update clock class for T_G
996+
if p.name != ts2phcProcessName && p.name != syncEProcessName { // TGM announce clock class via events
997+
p.ConsumePmcCheck() // reset pmc check since we are updating clock class here
998+
// on faulty port or recovery of slave port there might be a clock class change
999+
go func() {
1000+
time.Sleep(50 * time.Millisecond)
1001+
p.updateClockClass(p.c)
1002+
glog.Infof("clock class updated %f", p.parentClockClass)
1003+
}()
1004+
}
9531005
}
9541006
}
9551007
}
@@ -961,6 +1013,9 @@ func (p *ptpProcess) cmdStop() {
9611013
return
9621014
}
9631015
p.setStopped(true)
1016+
// reset runtime flags
1017+
p.ConsumePmcCheck()
1018+
p.clockClassRunning.Store(false)
9641019
if p.cmd.Process != nil {
9651020
glog.Infof("Sending TERM to (%s) PID: %d", p.name, p.cmd.Process.Pid)
9661021
err := p.cmd.Process.Signal(syscall.SIGTERM)
@@ -1042,6 +1097,9 @@ func (p *ptpProcess) ProcessTs2PhcEvents(ptpOffset float64, source string, iface
10421097
if iface != "" && iface != clockRealTime {
10431098
iface = utils.GetAlias(iface)
10441099
}
1100+
if p.c != nil {
1101+
return // no metrics when socket is used
1102+
}
10451103
switch ptpState {
10461104
case event.PTP_LOCKED:
10471105
updateClockStateMetrics(p.name, iface, LOCKED)
@@ -1257,10 +1315,11 @@ func (p *ptpProcess) ProcessSynceEvents(logEntry synce.LogEntry) {
12571315
ExtendedSSM: 0,
12581316
})
12591317
state = sDeviceConfig.LastClockState
1260-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", logEntry.QL)
1261-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", synce.QL_DEFAULT_ENHSSM)
1262-
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(logEntry.QL)+int(synce.QL_DEFAULT_ENHSSM))
1263-
1318+
if p.c == nil { // only update metrics if no socket is used
1319+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", logEntry.QL)
1320+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", synce.QL_DEFAULT_ENHSSM)
1321+
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(logEntry.QL)+int(synce.QL_DEFAULT_ENHSSM))
1322+
}
12641323
} else if sDeviceConfig.ExtendedTlv == synce.ExtendedTLV_ENABLED {
12651324
var lastQLState *synce.QualityLevelInfo
12661325
var ok bool
@@ -1284,9 +1343,11 @@ func (p *ptpProcess) ProcessSynceEvents(logEntry synce.LogEntry) {
12841343
ExtendedSSM: lastQLState.ExtendedSSM,
12851344
Priority: 0,
12861345
})
1287-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", lastQLState.SSM)
1288-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", logEntry.ExtQl)
1289-
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(lastQLState.SSM)+int(logEntry.ExtQl))
1346+
if p.c == nil {
1347+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", lastQLState.SSM)
1348+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", logEntry.ExtQl)
1349+
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(lastQLState.SSM)+int(logEntry.ExtQl))
1350+
}
12901351

12911352
state = sDeviceConfig.LastClockState
12921353
} else if logEntry.QL != synce.QL_DEFAULT_SSM { //else we have only QL

pkg/daemon/daemon_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ func (tc *TestCase) cleanupMetrics() {
8383
daemon.FrequencyAdjustment.With(map[string]string{"from": tc.from, "process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
8484
daemon.Delay.With(map[string]string{"from": tc.from, "process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
8585
daemon.ClockState.With(map[string]string{"process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
86-
daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "node": tc.node}).Set(CLEANUP)
86+
daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "config": "ptp4l.0.config", "node": tc.node}).Set(CLEANUP)
8787
daemon.InterfaceRole.With(map[string]string{"process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
8888
}
8989

@@ -267,7 +267,7 @@ func Test_ProcessPTPMetrics(t *testing.T) {
267267
assert.Equal(tc.expectedClockState, testutil.ToFloat64(clockState), "ClockState does not match\n%s", tc.String())
268268
}
269269
if tc.expectedClockClassMetrics != SKIP {
270-
clockClassMetrics := daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "node": tc.node})
270+
clockClassMetrics := daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "config": "ptp4l.0.config", "node": tc.node})
271271
assert.Equal(tc.expectedClockClassMetrics, testutil.ToFloat64(clockClassMetrics), "ClockClassMetrics does not match\n%s", tc.String())
272272
}
273273
if tc.expectedInterfaceRole != SKIP {

pkg/daemon/metrics.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ var (
142142
Subsystem: PTPSubsystem,
143143
Name: "clock_class",
144144
Help: "6 = Locked, 7 = PRC unlocked in-spec, 52/187 = PRC unlocked out-of-spec, 135 = T-BC holdover in-spec, 165 = T-BC holdover out-of-spec, 248 = Default, 255 = Slave Only Clock",
145-
}, []string{"process", "node"})
145+
}, []string{"process", "node", "config"})
146146

147147
// InterfaceRole metrics to show current interface role
148148
InterfaceRole = prometheus.NewGaugeVec(
@@ -256,7 +256,7 @@ func updatePTPMetrics(from, process, iface string, ptpOffset, maxPtpOffset, freq
256256
}
257257

258258
// extractMetrics ...
259-
func extractMetrics(messageTag string, processName string, ifaces config.IFaces, output string) (configName, source string, offset float64, state string, iface string) {
259+
func extractMetrics(messageTag string, processName string, ifaces config.IFaces, output string, updateMetrics bool) (configName, source string, offset float64, state string, iface string) {
260260
configName = strings.Replace(strings.Replace(messageTag, "]", "", 1), "[", "", 1)
261261
if configName != "" {
262262
configName = strings.Split(configName, MessageTagSuffixSeperator)[0] // remove any suffix added to the configName
@@ -287,8 +287,10 @@ func extractMetrics(messageTag string, processName string, ifaces config.IFaces,
287287
if offsetSource == master {
288288
masterOffsetSource.set(configName, processName)
289289
}
290-
updatePTPMetrics(offsetSource, processName, ifaceName, ptpOffset, maxPtpOffset, frequencyAdjustment, delay)
291-
updateClockStateMetrics(processName, ifaceName, clockstate)
290+
if updateMetrics {
291+
updatePTPMetrics(offsetSource, processName, ifaceName, ptpOffset, maxPtpOffset, frequencyAdjustment, delay)
292+
updateClockStateMetrics(processName, ifaceName, clockstate)
293+
}
292294
}
293295
source = processName
294296
offset = ptpOffset
@@ -302,14 +304,18 @@ func extractMetrics(messageTag string, processName string, ifaces config.IFaces,
302304
if role == SLAVE {
303305
masterOffsetIface.set(configName, ifaces[portId-1].Name)
304306
slaveIface.set(configName, ifaces[portId-1].Name)
307+
state = LOCKED // initial state to indicate we are locked when slave is back for clockclass to trigger
305308
} else if role == FAULTY {
306309
if slaveIface.isFaulty(configName, ifaces[portId-1].Name) &&
307310
masterOffsetSource.get(configName) == ptp4lProcessName {
308-
updatePTPMetrics(master, processName, masterOffsetIface.get(configName).alias, faultyOffset, faultyOffset, 0, 0)
309-
updatePTPMetrics(phc, phc2sysProcessName, clockRealTime, faultyOffset, faultyOffset, 0, 0)
310-
updateClockStateMetrics(processName, masterOffsetIface.get(configName).alias, FREERUN)
311+
if updateMetrics {
312+
updatePTPMetrics(master, processName, masterOffsetIface.get(configName).alias, faultyOffset, faultyOffset, 0, 0)
313+
updatePTPMetrics(phc, phc2sysProcessName, clockRealTime, faultyOffset, faultyOffset, 0, 0)
314+
updateClockStateMetrics(processName, masterOffsetIface.get(configName).alias, FREERUN)
315+
}
311316
masterOffsetIface.set(configName, "")
312317
slaveIface.set(configName, "")
318+
state = HOLDOVER
313319
}
314320
}
315321
}
@@ -528,9 +534,9 @@ func UpdateInterfaceRoleMetrics(process string, iface string, role ptpPortRole)
528534
}
529535

530536
// UpdateClockClassMetrics ... update clock class metrics
531-
func UpdateClockClassMetrics(clockClass float64) {
537+
func UpdateClockClassMetrics(cfgName string, clockClass float64) {
532538
ClockClassMetrics.With(prometheus.Labels{
533-
"process": ptp4lProcessName, "node": NodeName}).Set(float64(clockClass))
539+
"process": ptp4lProcessName, "config": cfgName, "node": NodeName}).Set(float64(clockClass))
534540
}
535541

536542
func UpdateProcessStatusMetrics(process, cfgName string, status int64) {

pkg/event/event.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,6 @@ func Init(nodeName string, stdOutToSocket bool, socketName string, processChanne
216216
frequencyTraceable: false,
217217
ReduceLog: true,
218218
}
219-
if clockClassMetric != nil {
220-
clockClassMetric.With(prometheus.Labels{
221-
"process": PTP4lProcessName, "node": nodeName}).Set(248)
222-
}
223219
StateRegisterer = NewStateNotifier()
224220
return ptpEvent
225221

@@ -683,7 +679,9 @@ connect:
683679
if event.WriteToLog && logDataValues != "" {
684680
logOut = append(logOut, logDataValues)
685681
}
686-
e.UpdateClockStateMetrics(event.State, string(event.ProcessName), event.IFace)
682+
if !e.stdoutToSocket {
683+
e.UpdateClockStateMetrics(event.State, string(event.ProcessName), event.IFace)
684+
}
687685
} else {
688686
// Update the in MemData
689687
dataDetails := e.addEvent(event)
@@ -884,6 +882,9 @@ func (e *EventHandler) GetPTPState(source EventSource, cfgName string) PTPState
884882

885883
// UpdateClockStateMetrics ...
886884
func (e *EventHandler) UpdateClockStateMetrics(state PTPState, process, iFace string) {
885+
if e.stdoutToSocket {
886+
return
887+
}
887888
labels := prometheus.Labels{
888889
"process": process, "node": e.nodeName, "iface": iFace}
889890
if state == PTP_LOCKED {
@@ -978,6 +979,9 @@ func registerMetrics(m *prometheus.GaugeVec) {
978979
}
979980

980981
func (e *EventHandler) unregisterMetrics(configName string, processName string) {
982+
if e.stdoutToSocket {
983+
return // no need to unregister metrics if events are going to socket
984+
}
981985
if data, ok := e.data[configName]; ok {
982986
for _, v := range data {
983987
if string(v.ProcessName) == processName || processName == "" {
@@ -1046,7 +1050,7 @@ func (e *EventHandler) UpdateClockClass(c net.Conn, clk ClockClassRequest) {
10461050
}
10471051
} else {
10481052
e.clockClassMetric.With(prometheus.Labels{
1049-
"process": PTP4lProcessName, "node": e.nodeName}).Set(float64(clockClass))
1053+
"process": PTP4lProcessName, "node": e.nodeName, "config": clk.cfgName}).Set(float64(clockClass))
10501054
}
10511055
fmt.Printf("%s", clockClassOut)
10521056
}

0 commit comments

Comments
 (0)