@@ -29,7 +29,6 @@ import (
29
29
type OsqueryInstance struct {
30
30
* osqueryInstanceFields
31
31
instanceLock sync.Mutex
32
- logger log.Logger
33
32
}
34
33
35
34
// osqueryInstanceFields is a type which is embedded in OsqueryInstance so that
@@ -48,6 +47,7 @@ type osqueryInstanceFields struct {
48
47
stdout io.Writer
49
48
stderr io.Writer
50
49
retries uint
50
+ logger log.Logger
51
51
52
52
// the following are instance artifacts that are created and held as a result
53
53
// of launching an osqueryd process
@@ -129,8 +129,12 @@ func createOsquerydCommand(osquerydBinary string, paths *osqueryFilePaths, confi
129
129
"--force=true" ,
130
130
"--disable_watchdog" ,
131
131
)
132
- cmd .Stdout = stdout
133
- cmd .Stderr = stderr
132
+ if stdout != nil {
133
+ cmd .Stdout = stdout
134
+ }
135
+ if stderr != nil {
136
+ cmd .Stderr = stderr
137
+ }
134
138
135
139
return cmd , nil
136
140
}
@@ -152,6 +156,14 @@ func osqueryTempDir() (string, func(), error) {
152
156
// https://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis
153
157
type OsqueryInstanceOption func (* OsqueryInstance )
154
158
159
+ // WithLogger is a functional option which allows the user to pass a log.Logger
160
+ // to be used for logging osquery instance status.
161
+ func WithLogger (logger log.Logger ) OsqueryInstanceOption {
162
+ return func (i * OsqueryInstance ) {
163
+ i .logger = logger
164
+ }
165
+ }
166
+
155
167
// WithOsqueryExtensionPlugin is a functional option which allows the user to
156
168
// declare a number of osquery plugins (ie: config plugin, logger plugin, tables,
157
169
// etc) which can be loaded when calling LaunchOsqueryInstance. You can load as
@@ -267,11 +279,10 @@ func LaunchOsqueryInstance(opts ...OsqueryInstanceOption) (*OsqueryInstance, err
267
279
// caller.
268
280
o := & OsqueryInstance {
269
281
osqueryInstanceFields : & osqueryInstanceFields {
270
- stdout : ioutil .Discard ,
271
- stderr : ioutil .Discard ,
272
282
rmRootDirectory : func () {},
273
283
errs : make (chan error ),
274
284
clientLock : new (sync.Mutex ),
285
+ logger : log .NewNopLogger (),
275
286
},
276
287
}
277
288
@@ -436,21 +447,47 @@ func launchOsqueryInstance(o *OsqueryInstance) (*OsqueryInstance, error) {
436
447
// Launch a long-running recovery goroutine which can handle various errors
437
448
// that can occur
438
449
go func () {
439
- // Block until an error is generated by the osqueryd process itself or the
440
- // extension manager server. We don't select, because if one element of the
441
- // runtime produces an error, it's likely that all of the other components
442
- // will produce errors as well since everything is so interconnected. For
443
- // this reason, when any error occurs, we attempt a total recovery.
444
- runtimeError , done := <- errChannel
445
- if done {
446
- return
447
- }
448
- if recoveryError := o .Recover (runtimeError ); recoveryError != nil {
449
- // If we were not able to recover the osqueryd process for some reason,
450
- // kill the process and hope that the operating system scheduling
451
- // mechanism (launchd, etc) can relaunch the tool cleanly.
452
- level .Info (o .logger ).Log ("err" , errors .Wrap (recoveryError , "could not recover the osqueryd process" ))
453
- os .Exit (1 )
450
+ ticker := time .NewTicker (60 * time .Second )
451
+ defer ticker .Stop ()
452
+
453
+ for {
454
+ needsRecovery := false
455
+ select {
456
+ case <- ticker .C :
457
+ healthy , err := o .Healthy ()
458
+ if err != nil {
459
+ needsRecovery = true
460
+ level .Error (o .logger ).Log ("err" , errors .Wrap (err , "checking instance health" ))
461
+ }
462
+ if ! healthy {
463
+ needsRecovery = true
464
+ level .Error (o .logger ).Log ("msg" , "instance not healthy" )
465
+ }
466
+
467
+ // Block until an error is generated by the osqueryd process itself or the
468
+ // extension manager server. We don't select, because if one element of the
469
+ // runtime produces an error, it's likely that all of the other components
470
+ // will produce errors as well since everything is so interconnected. For
471
+ // this reason, when any error occurs, we attempt a total recovery.
472
+ case runtimeError , open := <- errChannel :
473
+ if ! open {
474
+ return
475
+ }
476
+ needsRecovery = true
477
+ level .Error (o .logger ).Log ("err" , errors .Wrap (runtimeError , "osquery runtime error" ))
478
+ }
479
+
480
+ if needsRecovery {
481
+ level .Info (o .logger ).Log ("msg" , "recovering osquery instance" )
482
+ if recoveryError := o .Recover (); recoveryError != nil {
483
+ // If we were not able to recover the osqueryd process for some reason,
484
+ // kill the process and hope that the operating system scheduling
485
+ // mechanism (launchd, etc) can relaunch the tool cleanly.
486
+ level .Error (o .logger ).Log ("err" , errors .Wrap (recoveryError , "could not recover the osqueryd process" ))
487
+ os .Exit (1 )
488
+ }
489
+ return
490
+ }
454
491
}
455
492
}()
456
493
@@ -471,7 +508,7 @@ func (o *OsqueryInstance) beginTeardown() bool {
471
508
// release resources because Kill() expects the osquery instance to be healthy,
472
509
// whereas Recover() expects a hostile environment and is slightly more
473
510
// defensive in it's actions.
474
- func (o * OsqueryInstance ) Recover (runtimeError error ) error {
511
+ func (o * OsqueryInstance ) Recover () error {
475
512
// If the user explicitly calls o.Kill(), as the components are shutdown, they
476
513
// may exit with errors. In this case, we shouldn't recover the
477
514
// instance.
@@ -545,11 +582,16 @@ func (o *OsqueryInstance) Restart() error {
545
582
// being managed by the current instantiation of this OsqueryInstance is
546
583
// healthy.
547
584
func (o * OsqueryInstance ) Healthy () (bool , error ) {
548
- status , err := o .extensionManagerServer .Ping ()
585
+ serverStatus , err := o .extensionManagerServer .Ping ()
586
+ if err != nil {
587
+ return false , errors .Wrap (err , "could not ping extension server" )
588
+ }
589
+
590
+ clientStatus , err := o .extensionManagerClient .Ping ()
549
591
if err != nil {
550
- return false , errors .Wrap (err , "could not ping osquery through extension interface " )
592
+ return false , errors .Wrap (err , "could not ping osquery extension client " )
551
593
}
552
- return status .Code == 0 , nil
594
+ return serverStatus . Code == 0 && clientStatus .Code == 0 , nil
553
595
}
554
596
555
597
func (o * OsqueryInstance ) Query (query string ) ([]map [string ]string , error ) {
@@ -579,6 +621,7 @@ func (o *OsqueryInstance) relaunchAndReplace() error {
579
621
WithLoggerPluginFlag (o .loggerPluginFlag ),
580
622
WithDistributedPluginFlag (o .distributedPluginFlag ),
581
623
WithRetries (o .retries ),
624
+ WithLogger (o .logger ),
582
625
}
583
626
if ! o .usingTempDir {
584
627
opts = append (opts , WithRootDirectory (o .rootDirectory ))
0 commit comments