diff options
| author | Paul Buetow <paul@buetow.org> | 2026-02-15 08:28:43 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-02-15 08:29:45 +0200 |
| commit | bbbb7461d19e611e6fab3f24edd5f8e0d2d45b1e (patch) | |
| tree | bee4b9e07bafb2810f0e2cc2db4fb34e7154b2d4 | |
| parent | d89b9e6760e2aadf9779faa6f23678f67c731e1e (diff) | |
refactor: implement context-aware network dialing
Modernize network dialing to use Go's context-aware patterns for better
cancellation support and connection reliability.
Changes:
- Update Go version from 1.24 to 1.25 in go.mod
- Replace ssh.Dial with net.Dialer.DialContext + ssh.NewClientConn
for SSH client connections in serverconnection.go
- Add TCP KeepAlive (30s) for SSH connection health monitoring
- Implement context-aware dialing for SSH agent connections in ssh.go
- Improve error messages to distinguish dial vs SSH handshake failures
- Update AGENTS.md with integration test requirements
Benefits:
- Context cancellation now properly affects connection establishment
- TCP KeepAlive prevents silent connection failures
- Better integration with Go's cancellation patterns
- Improved reliability for distributed systems
All integration tests pass with race detection enabled.
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
| -rw-r--r-- | AGENTS.md | 17 | ||||
| -rw-r--r-- | go.mod | 8 | ||||
| -rw-r--r-- | go.sum | 14 | ||||
| -rw-r--r-- | internal/clients/connectors/serverconnection.go | 23 | ||||
| -rw-r--r-- | internal/ssh/ssh.go | 19 |
5 files changed, 53 insertions, 28 deletions
@@ -43,15 +43,20 @@ make pgo make test # Run all tests including integration tests +# IMPORTANT: Always rebuild binaries before running integration tests +make clean && make build DTAIL_INTEGRATION_TEST_RUN_MODE=yes make test +# Quick integration test workflow (recommended) +make build && DTAIL_INTEGRATION_TEST_RUN_MODE=yes make test + # Run linting make lint # Run go vet make vet -# Run integration tests individually (requires binaries built) +# Run integration tests individually (requires binaries built first) cd integrationtests && go test ``` @@ -153,7 +158,15 @@ make profile-help ## Test Execution Details -- Integration tests are run by setting DTAIL_INTEGRATION_TEST_RUN_MODE to yes, and by running 'make test'. +- Integration tests require binaries to be built before execution +- **IMPORTANT:** Always recompile binaries after code changes before running integration tests: + ```bash + make clean && make build + DTAIL_INTEGRATION_TEST_RUN_MODE=yes make test + ``` +- Integration tests are run by setting DTAIL_INTEGRATION_TEST_RUN_MODE to yes, and by running 'make test' +- Integration tests verify: DCat, DGrep, DMap (MapReduce), DServer, DTail, DTailHealth functionality +- All tests run with race detection enabled (`--race` flag) ## Known Limitations @@ -1,6 +1,6 @@ module github.com/mimecast/dtail -go 1.24 +go 1.25 require ( github.com/DataDog/zstd v1.5.7 @@ -8,8 +8,4 @@ require ( golang.org/x/term v0.32.0 ) -require ( - golang.org/x/lint v0.0.0-20241112194109-818c5a804067 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7 // indirect -) +require golang.org/x/sys v0.33.0 // indirect @@ -1,22 +1,8 @@ github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= -golang.org/x/lint v0.0.0-20241112194109-818c5a804067 h1:adDmSQyFTCiv19j015EGKJBoaa7ElV0Q1Wovb/4G7NA= -golang.org/x/lint v0.0.0-20241112194109-818c5a804067/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7 h1:EBZoQjiKKPaLbPrbpssUfuHtwM6KV/vb4U85g/cigFY= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/clients/connectors/serverconnection.go b/internal/clients/connectors/serverconnection.go index 34d3997..d114d06 100644 --- a/internal/clients/connectors/serverconnection.go +++ b/internal/clients/connectors/serverconnection.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net" "strconv" "strings" "time" @@ -135,10 +136,28 @@ func (c *ServerConnection) dial(ctx context.Context, cancel context.CancelFunc, address := fmt.Sprintf("%s:%d", c.hostname, c.port) dlog.Client.Debug(c.server, "Dialing into the connection", address) - client, err := ssh.Dial("tcp", address, c.config) + // Use context-aware dialing to enable proper cancellation during connection establishment. + // TCP KeepAlive (30s) prevents silent connection failures on long-lived connections. + dialer := &net.Dialer{ + Timeout: c.config.Timeout, // Use the SSH config timeout (2 seconds) + KeepAlive: 30 * time.Second, // Standard Go default for connection health monitoring + } + + // Establish TCP connection with context support for cancellation + conn, err := dialer.DialContext(ctx, "tcp", address) + if err != nil { + return fmt.Errorf("failed to dial TCP connection to %s: %w", address, err) + } + + // Perform SSH handshake over the established TCP connection + sshConn, chans, reqs, err := ssh.NewClientConn(conn, address, c.config) if err != nil { - return fmt.Errorf("failed to dial SSH connection to %s: %w", address, err) + conn.Close() + return fmt.Errorf("SSH handshake failed for %s: %w", address, err) } + + // Create SSH client from the connection components + client := ssh.NewClient(sshConn, chans, reqs) defer client.Close() return c.session(ctx, cancel, client, throttleCh) diff --git a/internal/ssh/ssh.go b/internal/ssh/ssh.go index 41cce05..7088e89 100644 --- a/internal/ssh/ssh.go +++ b/internal/ssh/ssh.go @@ -1,6 +1,7 @@ package ssh import ( + "context" "crypto/rand" "crypto/rsa" "crypto/x509" @@ -9,6 +10,7 @@ import ( "net" "os" "syscall" + "time" "github.com/mimecast/dtail/internal/io/dlog" @@ -49,7 +51,16 @@ func Agent() (gossh.AuthMethod, error) { // AgentWithKeyIndex used for SSH auth with a specific key index from the agent. // If keyIndex is -1, all keys are used. Otherwise, only the specified key is used. func AgentWithKeyIndex(keyIndex int) (gossh.AuthMethod, error) { - sshAgent, err := net.Dial("unix", os.Getenv("SSH_AUTH_SOCK")) + // Use context-aware dialing for SSH agent connection (local Unix socket). + // 2-second timeout is reasonable for local socket connections. + dialer := &net.Dialer{ + Timeout: 2 * time.Second, + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + sshAgent, err := dialer.DialContext(ctx, "unix", os.Getenv("SSH_AUTH_SOCK")) if err != nil { return nil, fmt.Errorf("failed to connect to SSH agent: %w", err) } @@ -61,17 +72,17 @@ func AgentWithKeyIndex(keyIndex int) (gossh.AuthMethod, error) { for i, key := range keys { dlog.Common.Debug("Public key", i, key) } - + // If no specific key index requested, use all keys (backwards compatible default) if keyIndex < 0 { return gossh.PublicKeysCallback(agentClient.Signers), nil } - + // Use only the specified key index (0-based) if keyIndex >= len(keys) { return nil, fmt.Errorf("key index %d out of range (agent has %d keys)", keyIndex, len(keys)) } - + dlog.Common.Debug("Using SSH agent key at index", keyIndex) return gossh.PublicKeysCallback(func() ([]gossh.Signer, error) { signers, err := agentClient.Signers() |
