Skip to content

Commit c5380f8

Browse files
committed
handle port change clock class update
Signed-off-by: Aneesh Puttur <[email protected]>
1 parent e8ce938 commit c5380f8

File tree

4 files changed

+102
-29
lines changed

4 files changed

+102
-29
lines changed

pkg/daemon/daemon.go

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"strconv"
1313
"strings"
1414
"sync"
15+
"sync/atomic"
1516
"syscall"
1617
"time"
1718

@@ -150,6 +151,7 @@ type ptpProcess struct {
150151
nodeProfile ptpv1.PtpProfile
151152
parentClockClass float64
152153
pmcCheck bool
154+
clockClassRunning int32 // 0 = not running, 1 = running
153155
clockType event.ClockType
154156
ptpClockThreshold *ptpv1.PtpClockThreshold
155157
haProfile map[string][]string // stores list of interface name for each profile
@@ -170,6 +172,23 @@ func (p *ptpProcess) setStopped(val bool) {
170172
p.execMutex.Unlock()
171173
}
172174

175+
// SetPmcCheck updates the pmcCheck flag in a thread-safe way
176+
func (p *ptpProcess) SetPmcCheck(val bool) {
177+
p.execMutex.Lock()
178+
p.pmcCheck = val
179+
p.execMutex.Unlock()
180+
}
181+
182+
// ConsumePmcCheck atomically reads and resets the pmcCheck flag.
183+
// It returns true if a PMC check should be performed.
184+
func (p *ptpProcess) ConsumePmcCheck() bool {
185+
p.execMutex.Lock()
186+
val := p.pmcCheck
187+
p.pmcCheck = false
188+
p.execMutex.Unlock()
189+
return val
190+
}
191+
173192
// Daemon is the main structure for linuxptp instance.
174193
// It contains all the necessary data to run linuxptp instance.
175194
type Daemon struct {
@@ -694,7 +713,7 @@ func (dn *Daemon) GetPhaseOffsetPinFilter(nodeProfile *ptpv1.PtpProfile) map[str
694713
func (dn *Daemon) HandlePmcTicker() {
695714
for _, p := range dn.processManager.process {
696715
if p.name == ptp4lProcessName {
697-
p.pmcCheck = true
716+
p.SetPmcCheck(true)
698717
}
699718
}
700719
}
@@ -737,6 +756,12 @@ func processStatus(c *net.Conn, processName, messageTag string, status int64) {
737756
}
738757

739758
func (p *ptpProcess) updateClockClass(c *net.Conn) {
759+
// Single-flight guard to prevent concurrent executions per ptpProcess
760+
if !atomic.CompareAndSwapInt32(&p.clockClassRunning, 0, 1) {
761+
glog.Infof("clock class update already running, skipping this run")
762+
return
763+
}
764+
defer atomic.StoreInt32(&p.clockClassRunning, 0)
740765
defer func() {
741766
if r := recover(); r != nil {
742767
glog.Errorf("updateClockClass Recovered in f %#v", r)
@@ -760,7 +785,7 @@ func (p *ptpProcess) updateClockClass(c *net.Conn) {
760785
// change to pint every minute or when the clock class changes
761786
clockClassOut = fmt.Sprintf("%s[%d]:[%s] CLOCK_CLASS_CHANGE %f\n", p.name, time.Now().Unix(), p.configName, p.parentClockClass)
762787
if c == nil {
763-
UpdateClockClassMetrics(clockClass) // no socket then update metrics
788+
UpdateClockClassMetrics(p.configName, clockClass) // no socket then update metrics
764789
} else {
765790
_, err := (*c).Write([]byte(clockClassOut))
766791
if err != nil {
@@ -850,12 +875,26 @@ func (p *ptpProcess) cmdRun(stdoutToSocket bool) {
850875
d.ProcessStatus(p.c, PtpProcessUp)
851876
}
852877
}
878+
// moving outside scanner loop to ensure clock class update routine
879+
// even if process hangs
880+
go func() {
881+
for {
882+
select {
883+
case <-p.exitCh:
884+
glog.Infof("Exiting pmcCheck%s...", p.name)
885+
return
886+
default:
887+
if p.ConsumePmcCheck() {
888+
p.updateClockClass(p.c)
889+
}
890+
//Add a small sleep to avoid tight CPU loop
891+
time.Sleep(100 * time.Millisecond)
892+
}
893+
}
894+
}()
895+
853896
for scanner.Scan() {
854897
output := scanner.Text()
855-
if p.pmcCheck {
856-
p.pmcCheck = false
857-
go p.updateClockClass(p.c)
858-
}
859898

860899
if regexErr != nil || !logFilterRegex.MatchString(output) {
861900
fmt.Printf("%s\n", output)
@@ -929,7 +968,7 @@ func (p *ptpProcess) processPTPMetrics(output string) {
929968
logEntry := synce.ParseLog(output)
930969
p.ProcessSynceEvents(logEntry)
931970
} else {
932-
configName, source, ptpOffset, clockState, iface := extractMetrics(p.messageTag, p.name, p.ifaces, output)
971+
configName, source, ptpOffset, clockState, iface := extractMetrics(p.messageTag, p.name, p.ifaces, output, p.c == nil)
933972
if iface != "" { // for ptp4l/phc2sys this function only update metrics
934973
var values map[event.ValueType]interface{}
935974
ifaceName := masterOffsetIface.getByAlias(configName, iface).name
@@ -950,6 +989,17 @@ func (p *ptpProcess) processPTPMetrics(output string) {
950989
state = event.PTP_HOLDOVER // consider s1 state as holdover,this passed to event to create metrics and events
951990
}
952991
p.ProcessTs2PhcEvents(ptpOffset, source, ifaceName, state, values)
992+
} else if clockState == HOLDOVER || clockState == LOCKED {
993+
// in case of holdover without iface, still need to update clock class for T_G
994+
if p.name != ts2phcProcessName && p.name != syncEProcessName { // TGM announce clock class via events
995+
p.SetPmcCheck(false) // reset pmc check since we are updating clock class here
996+
// on faulty port or recovery of slave port there might be a clock class change
997+
go func() {
998+
time.Sleep(50 * time.Millisecond)
999+
p.updateClockClass(p.c)
1000+
glog.Infof("clock class updated %f", p.parentClockClass)
1001+
}()
1002+
}
9531003
}
9541004
}
9551005
}
@@ -961,6 +1011,9 @@ func (p *ptpProcess) cmdStop() {
9611011
return
9621012
}
9631013
p.setStopped(true)
1014+
// reset runtime flags
1015+
p.SetPmcCheck(false)
1016+
atomic.StoreInt32(&p.clockClassRunning, 0)
9641017
if p.cmd.Process != nil {
9651018
glog.Infof("Sending TERM to (%s) PID: %d", p.name, p.cmd.Process.Pid)
9661019
err := p.cmd.Process.Signal(syscall.SIGTERM)
@@ -1042,6 +1095,9 @@ func (p *ptpProcess) ProcessTs2PhcEvents(ptpOffset float64, source string, iface
10421095
if iface != "" && iface != clockRealTime {
10431096
iface = utils.GetAlias(iface)
10441097
}
1098+
if p.c != nil {
1099+
return // no metrics when socket is used
1100+
}
10451101
switch ptpState {
10461102
case event.PTP_LOCKED:
10471103
updateClockStateMetrics(p.name, iface, LOCKED)
@@ -1257,10 +1313,11 @@ func (p *ptpProcess) ProcessSynceEvents(logEntry synce.LogEntry) {
12571313
ExtendedSSM: 0,
12581314
})
12591315
state = sDeviceConfig.LastClockState
1260-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", logEntry.QL)
1261-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", synce.QL_DEFAULT_ENHSSM)
1262-
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(logEntry.QL)+int(synce.QL_DEFAULT_ENHSSM))
1263-
1316+
if p.c == nil { // only update metrics if no socket is used
1317+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", logEntry.QL)
1318+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", synce.QL_DEFAULT_ENHSSM)
1319+
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(logEntry.QL)+int(synce.QL_DEFAULT_ENHSSM))
1320+
}
12641321
} else if sDeviceConfig.ExtendedTlv == synce.ExtendedTLV_ENABLED {
12651322
var lastQLState *synce.QualityLevelInfo
12661323
var ok bool
@@ -1284,9 +1341,11 @@ func (p *ptpProcess) ProcessSynceEvents(logEntry synce.LogEntry) {
12841341
ExtendedSSM: lastQLState.ExtendedSSM,
12851342
Priority: 0,
12861343
})
1287-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", lastQLState.SSM)
1288-
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", logEntry.ExtQl)
1289-
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(lastQLState.SSM)+int(logEntry.ExtQl))
1344+
if p.c == nil {
1345+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "SSM", lastQLState.SSM)
1346+
UpdateSynceQLMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, "Extended SSM", logEntry.ExtQl)
1347+
UpdateSynceClockQlMetrics(syncEProcessName, p.configName, iface, sDeviceConfig.NetworkOption, sDeviceConfig.Name, int(lastQLState.SSM)+int(logEntry.ExtQl))
1348+
}
12901349

12911350
state = sDeviceConfig.LastClockState
12921351
} else if logEntry.QL != synce.QL_DEFAULT_SSM { //else we have only QL

pkg/daemon/daemon_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ func (tc *TestCase) cleanupMetrics() {
8383
daemon.FrequencyAdjustment.With(map[string]string{"from": tc.from, "process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
8484
daemon.Delay.With(map[string]string{"from": tc.from, "process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
8585
daemon.ClockState.With(map[string]string{"process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
86-
daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "node": tc.node}).Set(CLEANUP)
86+
daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "config": "ptp4l.0.config", "node": tc.node}).Set(CLEANUP)
8787
daemon.InterfaceRole.With(map[string]string{"process": tc.process, "node": tc.node, "iface": tc.iface}).Set(CLEANUP)
8888
}
8989

@@ -267,7 +267,7 @@ func Test_ProcessPTPMetrics(t *testing.T) {
267267
assert.Equal(tc.expectedClockState, testutil.ToFloat64(clockState), "ClockState does not match\n%s", tc.String())
268268
}
269269
if tc.expectedClockClassMetrics != SKIP {
270-
clockClassMetrics := daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "node": tc.node})
270+
clockClassMetrics := daemon.ClockClassMetrics.With(map[string]string{"process": tc.process, "config": "ptp4l.0.config", "node": tc.node})
271271
assert.Equal(tc.expectedClockClassMetrics, testutil.ToFloat64(clockClassMetrics), "ClockClassMetrics does not match\n%s", tc.String())
272272
}
273273
if tc.expectedInterfaceRole != SKIP {

pkg/daemon/metrics.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ var (
142142
Subsystem: PTPSubsystem,
143143
Name: "clock_class",
144144
Help: "6 = Locked, 7 = PRC unlocked in-spec, 52/187 = PRC unlocked out-of-spec, 135 = T-BC holdover in-spec, 165 = T-BC holdover out-of-spec, 248 = Default, 255 = Slave Only Clock",
145-
}, []string{"process", "node"})
145+
}, []string{"process", "node", "config"})
146146

147147
// InterfaceRole metrics to show current interface role
148148
InterfaceRole = prometheus.NewGaugeVec(
@@ -256,7 +256,7 @@ func updatePTPMetrics(from, process, iface string, ptpOffset, maxPtpOffset, freq
256256
}
257257

258258
// extractMetrics ...
259-
func extractMetrics(messageTag string, processName string, ifaces config.IFaces, output string) (configName, source string, offset float64, state string, iface string) {
259+
func extractMetrics(messageTag string, processName string, ifaces config.IFaces, output string, updateMetrics bool) (configName, source string, offset float64, state string, iface string) {
260260
configName = strings.Replace(strings.Replace(messageTag, "]", "", 1), "[", "", 1)
261261
if configName != "" {
262262
configName = strings.Split(configName, MessageTagSuffixSeperator)[0] // remove any suffix added to the configName
@@ -287,8 +287,10 @@ func extractMetrics(messageTag string, processName string, ifaces config.IFaces,
287287
if offsetSource == master {
288288
masterOffsetSource.set(configName, processName)
289289
}
290-
updatePTPMetrics(offsetSource, processName, ifaceName, ptpOffset, maxPtpOffset, frequencyAdjustment, delay)
291-
updateClockStateMetrics(processName, ifaceName, clockstate)
290+
if updateMetrics {
291+
updatePTPMetrics(offsetSource, processName, ifaceName, ptpOffset, maxPtpOffset, frequencyAdjustment, delay)
292+
updateClockStateMetrics(processName, ifaceName, clockstate)
293+
}
292294
}
293295
source = processName
294296
offset = ptpOffset
@@ -302,14 +304,18 @@ func extractMetrics(messageTag string, processName string, ifaces config.IFaces,
302304
if role == SLAVE {
303305
masterOffsetIface.set(configName, ifaces[portId-1].Name)
304306
slaveIface.set(configName, ifaces[portId-1].Name)
307+
state = LOCKED // initial state to indicate we are locked when slave is back for clockclass to trigger
305308
} else if role == FAULTY {
306309
if slaveIface.isFaulty(configName, ifaces[portId-1].Name) &&
307310
masterOffsetSource.get(configName) == ptp4lProcessName {
308-
updatePTPMetrics(master, processName, masterOffsetIface.get(configName).alias, faultyOffset, faultyOffset, 0, 0)
309-
updatePTPMetrics(phc, phc2sysProcessName, clockRealTime, faultyOffset, faultyOffset, 0, 0)
310-
updateClockStateMetrics(processName, masterOffsetIface.get(configName).alias, FREERUN)
311+
if updateMetrics {
312+
updatePTPMetrics(master, processName, masterOffsetIface.get(configName).alias, faultyOffset, faultyOffset, 0, 0)
313+
updatePTPMetrics(phc, phc2sysProcessName, clockRealTime, faultyOffset, faultyOffset, 0, 0)
314+
updateClockStateMetrics(processName, masterOffsetIface.get(configName).alias, FREERUN)
315+
}
311316
masterOffsetIface.set(configName, "")
312317
slaveIface.set(configName, "")
318+
state = HOLDOVER
313319
}
314320
}
315321
}
@@ -528,9 +534,9 @@ func UpdateInterfaceRoleMetrics(process string, iface string, role ptpPortRole)
528534
}
529535

530536
// UpdateClockClassMetrics ... update clock class metrics
531-
func UpdateClockClassMetrics(clockClass float64) {
537+
func UpdateClockClassMetrics(cfgName string, clockClass float64) {
532538
ClockClassMetrics.With(prometheus.Labels{
533-
"process": ptp4lProcessName, "node": NodeName}).Set(float64(clockClass))
539+
"process": ptp4lProcessName, "config": cfgName, "node": NodeName}).Set(float64(clockClass))
534540
}
535541

536542
func UpdateProcessStatusMetrics(process, cfgName string, status int64) {

pkg/event/event.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,9 @@ func Init(nodeName string, stdOutToSocket bool, socketName string, processChanne
216216
frequencyTraceable: false,
217217
ReduceLog: true,
218218
}
219-
if clockClassMetric != nil {
219+
if !stdOutToSocket && clockClassMetric != nil {
220220
clockClassMetric.With(prometheus.Labels{
221-
"process": PTP4lProcessName, "node": nodeName}).Set(248)
221+
"process": PTP4lProcessName, "node": nodeName, "config": "ptp4l.0.config"}).Set(248)
222222
}
223223
StateRegisterer = NewStateNotifier()
224224
return ptpEvent
@@ -683,7 +683,9 @@ connect:
683683
if event.WriteToLog && logDataValues != "" {
684684
logOut = append(logOut, logDataValues)
685685
}
686-
e.UpdateClockStateMetrics(event.State, string(event.ProcessName), event.IFace)
686+
if !e.stdoutToSocket {
687+
e.UpdateClockStateMetrics(event.State, string(event.ProcessName), event.IFace)
688+
}
687689
} else {
688690
// Update the in MemData
689691
dataDetails := e.addEvent(event)
@@ -884,6 +886,9 @@ func (e *EventHandler) GetPTPState(source EventSource, cfgName string) PTPState
884886

885887
// UpdateClockStateMetrics ...
886888
func (e *EventHandler) UpdateClockStateMetrics(state PTPState, process, iFace string) {
889+
if e.stdoutToSocket {
890+
return
891+
}
887892
labels := prometheus.Labels{
888893
"process": process, "node": e.nodeName, "iface": iFace}
889894
if state == PTP_LOCKED {
@@ -978,6 +983,9 @@ func registerMetrics(m *prometheus.GaugeVec) {
978983
}
979984

980985
func (e *EventHandler) unregisterMetrics(configName string, processName string) {
986+
if e.stdoutToSocket {
987+
return // no need to unregister metrics if events are going to socket
988+
}
981989
if data, ok := e.data[configName]; ok {
982990
for _, v := range data {
983991
if string(v.ProcessName) == processName || processName == "" {
@@ -1046,7 +1054,7 @@ func (e *EventHandler) UpdateClockClass(c net.Conn, clk ClockClassRequest) {
10461054
}
10471055
} else {
10481056
e.clockClassMetric.With(prometheus.Labels{
1049-
"process": PTP4lProcessName, "node": e.nodeName}).Set(float64(clockClass))
1057+
"process": PTP4lProcessName, "node": e.nodeName, "config": clk.cfgName}).Set(float64(clockClass))
10501058
}
10511059
fmt.Printf("%s", clockClassOut)
10521060
}

0 commit comments

Comments
 (0)