From c2522ffb59514443816a96386c16bb7527cbe57c Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 21 Aug 2021 14:54:24 +0300 Subject: read files bytewise for more control of whats happening - change transport protocol for more control over newlines --- internal/io/fs/readfile.go | 71 ++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 43 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index 6757bd6..8a365a1 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -14,6 +14,7 @@ import ( "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/logger" + "github.com/mimecast/dtail/internal/protocol" "github.com/mimecast/dtail/internal/regex" "github.com/DataDog/zstd" @@ -148,80 +149,64 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan []byte, t if err != nil { return err } - rawLine := make([]byte, 0, 512) lineLengthThreshold := 1024 * 1024 // 1mb - longLineWarning := false + warnedAboutLongLine := false + message := make([]byte, 0, 512) for { select { case <-ctx.Done(): return nil - default: - } - - select { case <-truncate: if isTruncated, err := f.truncated(fd); isTruncated { return err } - logger.Info(f.filePath, "Current offset", offset) default: } - // Read some bytes (max 4k at once as of go 1.12). isPrefix will - // be set if line does not fit into 4k buffer. - bytes, isPrefix, err := reader.ReadLine() + b, err := reader.ReadByte() if err != nil { - // If EOF, sleep a couple of ms and return with nil error. - // If other error, return with non-nil error. if err != io.EOF { return err } if !f.seekEOF { - logger.Debug(f.FilePath(), "End of file reached") + logger.Info(f.FilePath(), "End of file reached") return nil } time.Sleep(time.Millisecond * 100) continue } + offset++ - rawLine = append(rawLine, bytes...) - offset += uint64(len(bytes)) - - if !isPrefix { - // last LineRead call returned contend until end of line. - rawLine = append(rawLine, '\n') - select { - case rawLines <- rawLine: - case <-ctx.Done(): - return nil + switch b { + case '\n': + if len(message) == 0 { + time.Sleep(time.Millisecond * 100) + continue } - rawLine = make([]byte, 0, 512) - if longLineWarning { - longLineWarning = false - } - continue - } - - // Last LineRead call could not read content until end of line, buffer - // was too small. Determine whether we exceed the max line length we - // want dtail to send to the client at once. Possibly split up log line - // into multiple log lines. - if len(rawLine) >= lineLengthThreshold { - if !longLineWarning { - f.serverMessages <- logger.Warn(f.filePath, "Long log line, splitting into multiple lines") - // Only print out one warning per long log line. - longLineWarning = true - } - rawLine = append(rawLine, '\n') select { - case rawLines <- rawLine: + case rawLines <- append(message, protocol.MessageDelimiter): + message = make([]byte, 0, 512) + warnedAboutLongLine = false case <-ctx.Done(): return nil } - rawLine = make([]byte, 0, 512) + default: + if len(message) >= lineLengthThreshold { + if !warnedAboutLongLine { + f.serverMessages <- logger.Warn(f.filePath, "Long log line, splitting into multiple lines") + warnedAboutLongLine = true + } + select { + case <-ctx.Done(): + return nil + case rawLines <- append(message, protocol.MessageDelimiter): + message = make([]byte, 0, 512) + } + } + message = append(message, b) } } } -- cgit v1.2.3 From 6d727b9bdbc387c8a5c34406a2c4de9140face38 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 28 Aug 2021 19:36:46 +0100 Subject: use a byte.Buffer in the file reader --- internal/io/fs/readfile.go | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index 8a365a1..e44f30e 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -2,6 +2,7 @@ package fs import ( "bufio" + "bytes" "compress/gzip" "context" "errors" @@ -14,6 +15,7 @@ import ( "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/logger" + "github.com/mimecast/dtail/internal/io/pool" "github.com/mimecast/dtail/internal/protocol" "github.com/mimecast/dtail/internal/regex" @@ -90,7 +92,7 @@ func (f readFile) Start(ctx context.Context, lines chan<- line.Line, re regex.Re fd.Seek(0, io.SeekEnd) } - rawLines := make(chan []byte, 100) + rawLines := make(chan *bytes.Buffer, 100) truncate := make(chan struct{}) var wg sync.WaitGroup @@ -142,7 +144,7 @@ func (f readFile) makeReader(fd *os.File) (reader *bufio.Reader, err error) { return } -func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan []byte, truncate <-chan struct{}) error { +func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Buffer, truncate <-chan struct{}) error { var offset uint64 reader, err := f.makeReader(fd) @@ -152,7 +154,7 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan []byte, t lineLengthThreshold := 1024 * 1024 // 1mb warnedAboutLongLine := false - message := make([]byte, 0, 512) + message := pool.BytesBuffer.Get().(*bytes.Buffer) for { select { @@ -182,37 +184,41 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan []byte, t switch b { case '\n': - if len(message) == 0 { + if message.Len() == 0 { time.Sleep(time.Millisecond * 100) continue } + message.WriteByte(protocol.MessageDelimiter) select { - case rawLines <- append(message, protocol.MessageDelimiter): - message = make([]byte, 0, 512) + case rawLines <- message: + message = pool.BytesBuffer.Get().(*bytes.Buffer) + //fmt.Printf("%d %d %p\n", message.Len(), message.Cap(), message) warnedAboutLongLine = false case <-ctx.Done(): return nil } default: - if len(message) >= lineLengthThreshold { + if message.Len() >= lineLengthThreshold { if !warnedAboutLongLine { f.serverMessages <- logger.Warn(f.filePath, "Long log line, splitting into multiple lines") warnedAboutLongLine = true } + message.WriteByte(protocol.MessageDelimiter) select { + case rawLines <- message: + message = pool.BytesBuffer.Get().(*bytes.Buffer) + //fmt.Printf("%d %d %p\n", message.Len(), message.Cap(), message) case <-ctx.Done(): return nil - case rawLines <- append(message, protocol.MessageDelimiter): - message = make([]byte, 0, 512) } } - message = append(message, b) + message.WriteByte(b) } } } // Filter log lines matching a given regular expression. -func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, rawLines <-chan []byte, lines chan<- line.Line, re regex.Regex) { +func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, rawLines <-chan *bytes.Buffer, lines chan<- line.Line, re regex.Regex) { defer wg.Done() for { @@ -233,10 +239,10 @@ func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, rawLines <-cha } } -func (f readFile) transmittable(lineBytes []byte, length, capacity int, re regex.Regex) (line.Line, bool) { +func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, re regex.Regex) (line.Line, bool) { var read line.Line - if !re.Match(lineBytes) { + if !re.Match(lineBytes.Bytes()) { f.updateLineNotMatched() f.updateLineNotTransmitted() return read, false -- cgit v1.2.3 From 23982f331c2154a66b86d596226c24454fd06be5 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 28 Aug 2021 20:26:32 +0100 Subject: 1. Major performance gain by not checking for file truncation aftter each bytes read. 2. Introduce field separator to the protocol package. --- internal/io/fs/readfile.go | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index e44f30e..f2f672a 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -157,22 +157,21 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu message := pool.BytesBuffer.Get().(*bytes.Buffer) for { - select { - case <-ctx.Done(): - return nil - case <-truncate: - if isTruncated, err := f.truncated(fd); isTruncated { - return err - } - default: - } - b, err := reader.ReadByte() if err != nil { if err != io.EOF { return err } + select { + case <-truncate: + if isTruncated, err := f.truncated(fd); isTruncated { + return err + } + case <-ctx.Done(): + return nil + default: + } if !f.seekEOF { logger.Info(f.FilePath(), "End of file reached") return nil @@ -207,7 +206,6 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu select { case rawLines <- message: message = pool.BytesBuffer.Get().(*bytes.Buffer) - //fmt.Printf("%d %d %p\n", message.Len(), message.Cap(), message) case <-ctx.Done(): return nil } -- cgit v1.2.3 From 16dc57e1e1c28e9d762424e596223a980770e059 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Wed, 8 Sep 2021 19:10:50 +0300 Subject: mapreduce tables are in colors now too --- internal/io/fs/readfile.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index f2f672a..c0d44dd 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -16,7 +16,6 @@ import ( "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/logger" "github.com/mimecast/dtail/internal/io/pool" - "github.com/mimecast/dtail/internal/protocol" "github.com/mimecast/dtail/internal/regex" "github.com/DataDog/zstd" @@ -187,7 +186,7 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu time.Sleep(time.Millisecond * 100) continue } - message.WriteByte(protocol.MessageDelimiter) + message.WriteString("\n") select { case rawLines <- message: message = pool.BytesBuffer.Get().(*bytes.Buffer) @@ -202,7 +201,7 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu f.serverMessages <- logger.Warn(f.filePath, "Long log line, splitting into multiple lines") warnedAboutLongLine = true } - message.WriteByte(protocol.MessageDelimiter) + message.WriteString("\n") select { case rawLines <- message: message = pool.BytesBuffer.Get().(*bytes.Buffer) -- cgit v1.2.3 From 6506e20f6c80f4acb7434eb9dd14f784a67189cd Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 18 Sep 2021 14:41:25 +0300 Subject: add spartan mode --- internal/io/fs/readfile.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index c0d44dd..ec33c60 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -182,11 +182,14 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu switch b { case '\n': - if message.Len() == 0 { - time.Sleep(time.Millisecond * 100) - continue - } - message.WriteString("\n") + /* + // dcat/dgrep should actually transfer empty lines + if message.Len() == 0 { + time.Sleep(time.Millisecond * 100) + continue + } + */ + //message.WriteString("\n") select { case rawLines <- message: message = pool.BytesBuffer.Get().(*bytes.Buffer) -- cgit v1.2.3 From fe3e68afd99d8ea246be52893730f987e138ec24 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sun, 19 Sep 2021 13:22:59 +0300 Subject: move args to config package logger package rewrite as dlog --- internal/io/fs/permissions/permission.go | 4 ++-- internal/io/fs/readfile.go | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/permissions/permission.go b/internal/io/fs/permissions/permission.go index cc5dd9b..bbcb74e 100644 --- a/internal/io/fs/permissions/permission.go +++ b/internal/io/fs/permissions/permission.go @@ -3,12 +3,12 @@ package permissions import ( - "github.com/mimecast/dtail/internal/io/logger" + "github.com/mimecast/dtail/internal/io/dlog" ) // ToRead is to check whether user has read permissions to a given file. func ToRead(user, filePath string) (bool, error) { // Only implemented for Linux, always expect true - logger.Warn(user, filePath, "Not performing ACL check, not supported on this platform") + dlog.Common.Warn(user, filePath, "Not performing ACL check, not supported on this platform") return true, nil } diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index ec33c60..07486a1 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -14,7 +14,7 @@ import ( "time" "github.com/mimecast/dtail/internal/io/line" - "github.com/mimecast/dtail/internal/io/logger" + "github.com/mimecast/dtail/internal/io/dlog" "github.com/mimecast/dtail/internal/io/pool" "github.com/mimecast/dtail/internal/regex" @@ -62,7 +62,7 @@ func (f readFile) Retry() bool { // Start tailing a log file. func (f readFile) Start(ctx context.Context, lines chan<- line.Line, re regex.Regex) error { - logger.Debug("readFile", f) + dlog.Common.Debug("readFile", f) defer func() { select { case <-f.limiter: @@ -74,7 +74,7 @@ func (f readFile) Start(ctx context.Context, lines chan<- line.Line, re regex.Re case f.limiter <- struct{}{}: default: select { - case f.serverMessages <- logger.Warn(f.filePath, f.globID, "Server limit reached. Queuing file..."): + case f.serverMessages <- dlog.Common.Warn(f.filePath, f.globID, "Server limit reached. Queuing file..."): case <-ctx.Done(): return nil } @@ -126,7 +126,7 @@ func (f readFile) makeReader(fd *os.File) (reader *bufio.Reader, err error) { case strings.HasSuffix(f.FilePath(), ".gz"): fallthrough case strings.HasSuffix(f.FilePath(), ".gzip"): - logger.Info(f.FilePath(), "Detected gzip compression format") + dlog.Common.Info(f.FilePath(), "Detected gzip compression format") var gzipReader *gzip.Reader gzipReader, err = gzip.NewReader(fd) if err != nil { @@ -134,7 +134,7 @@ func (f readFile) makeReader(fd *os.File) (reader *bufio.Reader, err error) { } reader = bufio.NewReader(gzipReader) case strings.HasSuffix(f.FilePath(), ".zst"): - logger.Info(f.FilePath(), "Detected zstd compression format") + dlog.Common.Info(f.FilePath(), "Detected zstd compression format") reader = bufio.NewReader(zstd.NewReader(fd)) default: reader = bufio.NewReader(fd) @@ -172,7 +172,7 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu default: } if !f.seekEOF { - logger.Info(f.FilePath(), "End of file reached") + dlog.Common.Info(f.FilePath(), "End of file reached") return nil } time.Sleep(time.Millisecond * 100) @@ -201,7 +201,7 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu default: if message.Len() >= lineLengthThreshold { if !warnedAboutLongLine { - f.serverMessages <- logger.Warn(f.filePath, "Long log line, splitting into multiple lines") + f.serverMessages <- dlog.Common.Warn(f.filePath, "Long log line, splitting into multiple lines") warnedAboutLongLine = true } message.WriteString("\n") @@ -268,7 +268,7 @@ func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, r // Check wether log file is truncated. Returns nil if not. func (f readFile) truncated(fd *os.File) (bool, error) { - logger.Debug(f.filePath, "File truncation check") + dlog.Common.Debug(f.filePath, "File truncation check") // Can not seek currently open FD. curPos, err := fd.Seek(0, os.SEEK_CUR) -- cgit v1.2.3 From 86ec83754e0ee7153ad55091f7b6da448bc529c5 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 2 Oct 2021 13:44:27 +0300 Subject: add dcat test --- internal/io/fs/readfile.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index 07486a1..f128c07 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -13,8 +13,8 @@ import ( "sync" "time" - "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/dlog" + "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/pool" "github.com/mimecast/dtail/internal/regex" @@ -182,14 +182,6 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu switch b { case '\n': - /* - // dcat/dgrep should actually transfer empty lines - if message.Len() == 0 { - time.Sleep(time.Millisecond * 100) - continue - } - */ - //message.WriteString("\n") select { case rawLines <- message: message = pool.BytesBuffer.Get().(*bytes.Buffer) -- cgit v1.2.3 From fab5dc3e70434ea0abc7a0976487a1973b662331 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Wed, 6 Oct 2021 09:50:41 +0300 Subject: enable faster shutdown - useful for dgrep/dmap and dcat commands --- internal/io/fs/permissions/permission.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/permissions/permission.go b/internal/io/fs/permissions/permission.go index bbcb74e..e80dbb2 100644 --- a/internal/io/fs/permissions/permission.go +++ b/internal/io/fs/permissions/permission.go @@ -9,6 +9,6 @@ import ( // ToRead is to check whether user has read permissions to a given file. func ToRead(user, filePath string) (bool, error) { // Only implemented for Linux, always expect true - dlog.Common.Warn(user, filePath, "Not performing ACL check, not supported on this platform") + dlog.Common.Warn(user, filePath, "Not performing ACL check as not compiled in") return true, nil } -- cgit v1.2.3 From 97747ea0f3178f7f5890512d483fdccaa82846b0 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 9 Oct 2021 21:10:29 +0300 Subject: vetting and linting and some code restyling --- internal/io/fs/catfile.go | 4 +++- internal/io/fs/filereader.go | 3 ++- internal/io/fs/permissions/permission_linuxacl.go | 2 +- internal/io/fs/readfile.go | 29 ++++++++++++----------- internal/io/fs/tailfile.go | 4 +++- 5 files changed, 24 insertions(+), 18 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/catfile.go b/internal/io/fs/catfile.go index 7f387bc..01c15ba 100644 --- a/internal/io/fs/catfile.go +++ b/internal/io/fs/catfile.go @@ -6,7 +6,9 @@ type CatFile struct { } // NewCatFile returns a new file catter. -func NewCatFile(filePath string, globID string, serverMessages chan<- string, limiter chan struct{}) CatFile { +func NewCatFile(filePath string, globID string, serverMessages chan<- string, + limiter chan struct{}) CatFile { + return CatFile{ readFile: readFile{ filePath: filePath, diff --git a/internal/io/fs/filereader.go b/internal/io/fs/filereader.go index 0774837..7773142 100644 --- a/internal/io/fs/filereader.go +++ b/internal/io/fs/filereader.go @@ -7,7 +7,8 @@ import ( "github.com/mimecast/dtail/internal/regex" ) -// FileReader is the interface used on the dtail server to read/cat/grep/mapr... a file. +// FileReader is the interface used on the dtail server to read/cat/grep/mapr... +// a file. type FileReader interface { Start(ctx context.Context, lines chan<- line.Line, re regex.Regex) error FilePath() string diff --git a/internal/io/fs/permissions/permission_linuxacl.go b/internal/io/fs/permissions/permission_linuxacl.go index 7d2d7ca..904b90f 100644 --- a/internal/io/fs/permissions/permission_linuxacl.go +++ b/internal/io/fs/permissions/permission_linuxacl.go @@ -13,7 +13,7 @@ import ( "unsafe" ) -// ToRead checks whether user has Linux file system permissions to read a given file. +// ToRead checks whether user has Linux file system permissions to read a file. func ToRead(user, filePath string) (bool, error) { cUser := C.CString(user) cFilePath := C.CString(filePath) diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index f128c07..92f85b6 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -42,7 +42,8 @@ type readFile struct { // String returns the string representation of the readFile func (f readFile) String() string { - return fmt.Sprintf("readFile(filePath:%s,globID:%s,retry:%v,canSkipLines:%v,seekEOF:%v)", + return fmt.Sprintf( + "readFile(filePath:%s,globID:%s,retry:%v,canSkipLines:%v,seekEOF:%v)", f.filePath, f.globID, f.retry, @@ -61,7 +62,9 @@ func (f readFile) Retry() bool { } // Start tailing a log file. -func (f readFile) Start(ctx context.Context, lines chan<- line.Line, re regex.Regex) error { +func (f readFile) Start(ctx context.Context, lines chan<- line.Line, + re regex.Regex) error { + dlog.Common.Debug("readFile", f) defer func() { select { @@ -74,7 +77,8 @@ func (f readFile) Start(ctx context.Context, lines chan<- line.Line, re regex.Re case f.limiter <- struct{}{}: default: select { - case f.serverMessages <- dlog.Common.Warn(f.filePath, f.globID, "Server limit reached. Queuing file..."): + case f.serverMessages <- dlog.Common.Warn(f.filePath, f.globID, + "Server limit reached. Queuing file..."): case <-ctx.Done(): return nil } @@ -139,13 +143,11 @@ func (f readFile) makeReader(fd *os.File) (reader *bufio.Reader, err error) { default: reader = bufio.NewReader(fd) } - return } func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Buffer, truncate <-chan struct{}) error { var offset uint64 - reader, err := f.makeReader(fd) if err != nil { return err @@ -193,7 +195,8 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu default: if message.Len() >= lineLengthThreshold { if !warnedAboutLongLine { - f.serverMessages <- dlog.Common.Warn(f.filePath, "Long log line, splitting into multiple lines") + f.serverMessages <- dlog.Common.Warn(f.filePath, + "Long log line, splitting into multiple lines") warnedAboutLongLine = true } message.WriteString("\n") @@ -210,9 +213,10 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu } // Filter log lines matching a given regular expression. -func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, rawLines <-chan *bytes.Buffer, lines chan<- line.Line, re regex.Regex) { - defer wg.Done() +func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, + rawLines <-chan *bytes.Buffer, lines chan<- line.Line, re regex.Regex) { + defer wg.Done() for { select { case line, ok := <-rawLines: @@ -231,9 +235,10 @@ func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, rawLines <-cha } } -func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, re regex.Regex) (line.Line, bool) { - var read line.Line +func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, + re regex.Regex) (line.Line, bool) { + var read line.Line if !re.Match(lineBytes.Bytes()) { f.updateLineNotMatched() f.updateLineNotTransmitted() @@ -254,7 +259,6 @@ func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, r Count: f.totalLineCount(), TransmittedPerc: f.transmittedPerc(), } - return read, true } @@ -267,7 +271,6 @@ func (f readFile) truncated(fd *os.File) (bool, error) { if err != nil { return true, err } - // Can not open file at original path. pathFd, err := os.Open(f.filePath) if err != nil { @@ -280,10 +283,8 @@ func (f readFile) truncated(fd *os.File) (bool, error) { if err != nil { return true, err } - if curPos > pathPos { return true, errors.New("File got truncated") } - return false, nil } diff --git a/internal/io/fs/tailfile.go b/internal/io/fs/tailfile.go index 14994e5..b03b45d 100644 --- a/internal/io/fs/tailfile.go +++ b/internal/io/fs/tailfile.go @@ -6,7 +6,9 @@ type TailFile struct { } // NewTailFile returns a new file tailer. -func NewTailFile(filePath string, globID string, serverMessages chan<- string, limiter chan struct{}) TailFile { +func NewTailFile(filePath string, globID string, serverMessages chan<- string, + limiter chan struct{}) TailFile { + return TailFile{ readFile: readFile{ filePath: filePath, -- cgit v1.2.3 From f44792c9102488774c9993b080f35c65287a64b1 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sun, 10 Oct 2021 14:02:12 +0300 Subject: add another dmap test - reading 100 source files at once fix a data race when reading multiple files on one server from the same session at once --- internal/io/fs/permissions/permission.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/permissions/permission.go b/internal/io/fs/permissions/permission.go index e80dbb2..d621c09 100644 --- a/internal/io/fs/permissions/permission.go +++ b/internal/io/fs/permissions/permission.go @@ -9,6 +9,6 @@ import ( // ToRead is to check whether user has read permissions to a given file. func ToRead(user, filePath string) (bool, error) { // Only implemented for Linux, always expect true - dlog.Common.Warn(user, filePath, "Not performing ACL check as not compiled in") + dlog.Common.Debug(user, filePath, "Not performing ACL check as not compiled in") return true, nil } -- cgit v1.2.3 From 1dead22129a26e4f532e68c2c63fe4122b519506 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Wed, 13 Oct 2021 21:10:28 +0300 Subject: Merging grep context from master --- internal/io/fs/filereader.go | 4 +- internal/io/fs/readfile.go | 149 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 144 insertions(+), 9 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/filereader.go b/internal/io/fs/filereader.go index 7773142..b05fd39 100644 --- a/internal/io/fs/filereader.go +++ b/internal/io/fs/filereader.go @@ -4,13 +4,15 @@ import ( "context" "github.com/mimecast/dtail/internal/io/line" + "github.com/mimecast/dtail/internal/lcontext" "github.com/mimecast/dtail/internal/regex" ) // FileReader is the interface used on the dtail server to read/cat/grep/mapr... // a file. type FileReader interface { - Start(ctx context.Context, lines chan<- line.Line, re regex.Regex) error + Start(ctx context.Context, ltx lcontext.LContext, lines chan<- line.Line, + re regex.Regex) error FilePath() string Retry() bool } diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index 92f85b6..88d467e 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -16,6 +16,7 @@ import ( "github.com/mimecast/dtail/internal/io/dlog" "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/pool" + "github.com/mimecast/dtail/internal/lcontext" "github.com/mimecast/dtail/internal/regex" "github.com/DataDog/zstd" @@ -62,8 +63,8 @@ func (f readFile) Retry() bool { } // Start tailing a log file. -func (f readFile) Start(ctx context.Context, lines chan<- line.Line, - re regex.Regex) error { +func (f readFile) Start(ctx context.Context, ltx lcontext.LContext, + lines chan<- line.Line, re regex.Regex) error { dlog.Common.Debug("readFile", f) defer func() { @@ -102,7 +103,7 @@ func (f readFile) Start(ctx context.Context, lines chan<- line.Line, wg.Add(1) go f.periodicTruncateCheck(ctx, truncate) - go f.filter(ctx, &wg, rawLines, lines, re) + go f.filter(ctx, ltx, &wg, rawLines, lines, re) err = f.read(ctx, fd, rawLines, truncate) close(rawLines) @@ -213,10 +214,27 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu } // Filter log lines matching a given regular expression. -func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, - rawLines <-chan *bytes.Buffer, lines chan<- line.Line, re regex.Regex) { +func (f readFile) filter(ctx context.Context, ltx lcontext.LContext, + wg *sync.WaitGroup, rawLines <-chan *bytes.Buffer, lines chan<- line.Line, + re regex.Regex) { defer wg.Done() + // Do we have any kind of local context settings? If so then run the more complex + // filterWithLContext method. + if ltx.Has() { + // We can not skip transmitting any lines to the client with a local + // grep context specified. + f.canSkipLines = false + f.filterWithLContext(ctx, ltx, rawLines, lines, re) + return + } + + f.filterWithoutLContext(ctx, rawLines, lines, re) +} + +func (f readFile) filterWithoutLContext(ctx context.Context, rawLines <-chan *bytes.Buffer, + lines chan<- line.Line, re regex.Regex) { + for { select { case line, ok := <-rawLines: @@ -235,11 +253,126 @@ func (f readFile) filter(ctx context.Context, wg *sync.WaitGroup, } } -func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, +// Filter log lines matching a given regular expression, however with local grep context. +func (f readFile) filterWithLContext(ctx context.Context, ltx lcontext.LContext, + rawLines <-chan *bytes.Buffer, lines chan<- line.Line, re regex.Regex) { + + // Scenario 1: Finish once maxCount hits found + maxCount := ltx.MaxCount + processMaxCount := maxCount > 0 + maxReached := false + + // Scenario 2: Print prev. N lines when current line matches. + before := ltx.BeforeContext + processBefore := before > 0 + var beforeBuf chan *bytes.Buffer + if processBefore { + beforeBuf = make(chan *bytes.Buffer, before) + } + + // Screnario 3: Print next N lines when current line matches. + after := 0 + processAfter := ltx.AfterContext > 0 + + for lineBytesBuffer := range rawLines { + f.updatePosition() + + if !re.Match(lineBytesBuffer.Bytes()) { + f.updateLineNotMatched() + + if processAfter && after > 0 { + after-- + myLine := line.Line{ + Content: lineBytesBuffer, + SourceID: f.globID, + Count: f.totalLineCount(), + TransmittedPerc: 100, + } + + select { + case lines <- myLine: + case <-ctx.Done(): + return + } + + } else if processBefore { + // Keep last num BeforeContext raw messages. + select { + case beforeBuf <- lineBytesBuffer: + default: + pool.RecycleBytesBuffer(<-beforeBuf) + beforeBuf <- lineBytesBuffer + } + } + continue + } + + f.updateLineMatched() + + if processAfter { + if maxReached { + return + } + after = ltx.AfterContext + } + + if processBefore { + i := uint64(len(beforeBuf)) + for { + select { + case lineBytesBuffer := <-beforeBuf: + myLine := line.Line{ + Content: lineBytesBuffer, + SourceID: f.globID, + Count: f.totalLineCount() - i, + TransmittedPerc: 100, + } + i-- + + select { + case lines <- myLine: + case <-ctx.Done(): + return + } + default: + // beforeBuf is now empty. + } + if len(beforeBuf) == 0 { + break + } + } + } + + line := line.Line{ + Content: lineBytesBuffer, + SourceID: f.globID, + Count: f.totalLineCount(), + TransmittedPerc: 100, + } + + select { + case lines <- line: + if processMaxCount { + maxCount-- + if maxCount == 0 { + if !processAfter || after == 0 { + return + } + // Unfortunatley we have to continue filter, as there might be more lines to print + maxReached = true + } + } + case <-ctx.Done(): + return + } + } +} + +func (f readFile) transmittable(lineBytesBuffer *bytes.Buffer, length, capacity int, re regex.Regex) (line.Line, bool) { var read line.Line - if !re.Match(lineBytes.Bytes()) { + if !re.Match(lineBytesBuffer.Bytes()) { f.updateLineNotMatched() f.updateLineNotTransmitted() return read, false @@ -254,7 +387,7 @@ func (f readFile) transmittable(lineBytes *bytes.Buffer, length, capacity int, f.updateLineTransmitted() read = line.Line{ - Content: lineBytes, + Content: lineBytesBuffer, SourceID: f.globID, Count: f.totalLineCount(), TransmittedPerc: f.transmittedPerc(), -- cgit v1.2.3 From 06ece112c0dd20c0c211c538216fe64ebe4045c9 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Thu, 14 Oct 2021 20:10:55 +0300 Subject: add dgrep context integration tests --- internal/io/fs/readfile.go | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'internal/io/fs') diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index 88d467e..28cbe58 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -99,15 +99,24 @@ func (f readFile) Start(ctx context.Context, ltx lcontext.LContext, rawLines := make(chan *bytes.Buffer, 100) truncate := make(chan struct{}) - var wg sync.WaitGroup - wg.Add(1) + readCtx, readCancel := context.WithCancel(ctx) + var filterWg sync.WaitGroup + filterWg.Add(1) go f.periodicTruncateCheck(ctx, truncate) - go f.filter(ctx, ltx, &wg, rawLines, lines, re) + go func() { + f.filter(ctx, ltx, rawLines, lines, re) + filterWg.Done() + // If the filter stopped, make the reader stop too, no need to read + // more data if there is nothing more the filter wants to filter for! + // E.g. it could be that we only want to filter N matches but not more. + readCancel() + }() - err = f.read(ctx, fd, rawLines, truncate) + err = f.read(readCtx, fd, rawLines, truncate) close(rawLines) - wg.Wait() + // Filter may sends some data still. So wait until it is done here. + filterWg.Wait() return err } @@ -215,10 +224,8 @@ func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Bu // Filter log lines matching a given regular expression. func (f readFile) filter(ctx context.Context, ltx lcontext.LContext, - wg *sync.WaitGroup, rawLines <-chan *bytes.Buffer, lines chan<- line.Line, - re regex.Regex) { + rawLines <-chan *bytes.Buffer, lines chan<- line.Line, re regex.Regex) { - defer wg.Done() // Do we have any kind of local context settings? If so then run the more complex // filterWithLContext method. if ltx.Has() { -- cgit v1.2.3