Skip to content

Commit 1787d38

Browse files
iQQBotroboquat
authored andcommitted
[ws-proxy] add some ssh metrics
1 2 [debug] 1
1 parent 95f9c2d commit 1787d38

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

components/ws-proxy/pkg/sshproxy/server.go

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package sshproxy
66

77
import (
88
"context"
9-
"errors"
109
"fmt"
1110
"net"
1211
"strings"
@@ -17,16 +16,52 @@ import (
1716
supervisor "github.com/gitpod-io/gitpod/supervisor/api"
1817
tracker "github.com/gitpod-io/gitpod/ws-proxy/pkg/analytics"
1918
p "github.com/gitpod-io/gitpod/ws-proxy/pkg/proxy"
19+
"github.com/prometheus/client_golang/prometheus"
2020
"golang.org/x/crypto/ssh"
2121
"golang.org/x/xerrors"
2222
"google.golang.org/grpc"
23+
"sigs.k8s.io/controller-runtime/pkg/metrics"
2324
)
2425

2526
const GitpodUsername = "gitpod"
2627

27-
var ErrWorkspaceNotFound = errors.New("not found workspace")
28-
var ErrAuthFailed = errors.New("auth failed")
29-
var ErrUsernameFormat = errors.New("username format is not correct")
28+
var (
29+
SSHConnectionCount = prometheus.NewGauge(prometheus.GaugeOpts{
30+
Name: "gitpod_ws_proxy_ssh_connection_count",
31+
Help: "Current number of SSH connection",
32+
})
33+
34+
SSHAttemptTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
35+
Name: "gitpod_ws_proxy_ssh_attempt_total",
36+
Help: "Total number of SSH attempt",
37+
}, []string{"status", "error_type"})
38+
)
39+
40+
var (
41+
ErrWorkspaceNotFound = NewSSHError("WS_NOTFOUND", "not found workspace")
42+
ErrAuthFailed = NewSSHError("AUTH_FAILED", "auth failed")
43+
ErrUsernameFormat = NewSSHError("USER_FORMAT", "username format is not correct")
44+
ErrMissPrivateKey = NewSSHError("MISS_KEY", "missing privateKey")
45+
ErrConnFailed = NewSSHError("CONN_FAILED", "cannot to connect with workspace")
46+
ErrCreateSSHKey = NewSSHError("CREATE_KEY_FAILED", "cannot create private pair in workspace")
47+
)
48+
49+
type SSHError struct {
50+
shortName string
51+
description string
52+
}
53+
54+
func (e SSHError) Error() string {
55+
return e.description
56+
}
57+
58+
func (e SSHError) ShortName() string {
59+
return e.shortName
60+
}
61+
62+
func NewSSHError(shortName string, description string) SSHError {
63+
return SSHError{shortName: shortName, description: description}
64+
}
3065

3166
type Session struct {
3267
Conn *ssh.ServerConn
@@ -45,6 +80,13 @@ type Server struct {
4580
workspaceInfoProvider p.WorkspaceInfoProvider
4681
}
4782

83+
func init() {
84+
metrics.Registry.MustRegister(
85+
SSHConnectionCount,
86+
SSHAttemptTotal,
87+
)
88+
}
89+
4890
// New creates a new SSH proxy server
4991

5092
func New(signers []ssh.Signer, workspaceInfoProvider p.WorkspaceInfoProvider, heartbeat Heartbeat) *Server {
@@ -75,7 +117,7 @@ func New(signers []ssh.Signer, workspaceInfoProvider p.WorkspaceInfoProvider, he
75117
workspaceId, ownerToken = args[0], args[1]
76118
wsInfo, err = server.Authenticator(workspaceId, ownerToken)
77119
if err == nil {
78-
err = errors.New("miss private key")
120+
err = ErrMissPrivateKey
79121
}
80122
return
81123
}
@@ -112,10 +154,25 @@ func New(signers []ssh.Signer, workspaceInfoProvider p.WorkspaceInfoProvider, he
112154
return server
113155
}
114156

157+
func ReportSSHAttemptMetrics(err error) {
158+
if err == nil {
159+
SSHAttemptTotal.WithLabelValues("success").Inc()
160+
return
161+
}
162+
errorType := "OTHERS"
163+
if serverAuthErr, ok := err.(*ssh.ServerAuthError); ok && len(serverAuthErr.Errors) > 0 {
164+
if authErr, ok := serverAuthErr.Errors[len(serverAuthErr.Errors)-1].(SSHError); ok {
165+
errorType = authErr.ShortName()
166+
}
167+
}
168+
SSHAttemptTotal.WithLabelValues("failed", errorType).Inc()
169+
}
170+
115171
func (s *Server) HandleConn(c net.Conn) {
116172
sshConn, chans, reqs, err := ssh.NewServerConn(c, s.sshConfig)
117173
if err != nil {
118174
c.Close()
175+
ReportSSHAttemptMetrics(err)
119176
return
120177
}
121178
defer sshConn.Close()
@@ -127,13 +184,16 @@ func (s *Server) HandleConn(c net.Conn) {
127184
workspaceId := sshConn.Permissions.Extensions["workspaceId"]
128185
wsInfo := s.workspaceInfoProvider.WorkspaceInfo(workspaceId)
129186
if wsInfo == nil {
187+
ReportSSHAttemptMetrics(ErrWorkspaceNotFound)
130188
return
131189
}
132190
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
133191
key, err := s.GetWorkspaceSSHKey(ctx, wsInfo.IPAddress)
134192
if err != nil {
135193
cancel()
136-
s.TrackSSHConnection(wsInfo, "connect", err)
194+
s.TrackSSHConnection(wsInfo, "connect", ErrCreateSSHKey)
195+
ReportSSHAttemptMetrics(ErrCreateSSHKey)
196+
log.WithField("instanceId", wsInfo.InstanceID).WithError(err).Error("failed to create private pair in workspace")
137197
return
138198
}
139199
cancel()
@@ -147,7 +207,8 @@ func (s *Server) HandleConn(c net.Conn) {
147207
remoteAddr := wsInfo.IPAddress + ":23001"
148208
conn, err := net.Dial("tcp", remoteAddr)
149209
if err != nil {
150-
s.TrackSSHConnection(wsInfo, "connect", err)
210+
s.TrackSSHConnection(wsInfo, "connect", ErrConnFailed)
211+
ReportSSHAttemptMetrics(ErrConnFailed)
151212
log.WithField("instanceId", wsInfo.InstanceID).WithField("workspaceIP", wsInfo.IPAddress).WithError(err).Error("dail failed")
152213
return
153214
}
@@ -164,7 +225,8 @@ func (s *Server) HandleConn(c net.Conn) {
164225
Timeout: 10 * time.Second,
165226
})
166227
if err != nil {
167-
s.TrackSSHConnection(wsInfo, "connect", err)
228+
s.TrackSSHConnection(wsInfo, "connect", ErrConnFailed)
229+
ReportSSHAttemptMetrics(ErrConnFailed)
168230
log.WithField("instanceId", wsInfo.InstanceID).WithField("workspaceIP", wsInfo.IPAddress).WithError(err).Error("connect failed")
169231
return
170232
}
@@ -173,10 +235,13 @@ func (s *Server) HandleConn(c net.Conn) {
173235
ctx, cancel = context.WithCancel(context.Background())
174236

175237
s.TrackSSHConnection(wsInfo, "connect", nil)
238+
SSHConnectionCount.Inc()
239+
ReportSSHAttemptMetrics(nil)
176240

177241
go func() {
178242
client.Wait()
179243
cancel()
244+
defer SSHConnectionCount.Dec()
180245
}()
181246

182247
for newChannel := range chans {

0 commit comments

Comments
 (0)