diff options
| author | Paul Buetow <paul@buetow.org> | 2021-10-24 12:59:08 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2021-10-24 12:59:08 +0300 |
| commit | 995c850d1f07f6221558d1c01924f2da6294f4ec (patch) | |
| tree | 80e5c86086ce7157e43a6fba08fb8fe9edae9707 /internal | |
| parent | 1ff2e424cc99a979ceac461c28b03605c46a669f (diff) | |
Fix deadlock around aggregating data + server max concurrent file read limiter
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/clients/connectors/serverless.go | 1 | ||||
| -rw-r--r-- | internal/io/fs/catfile.go | 5 | ||||
| -rw-r--r-- | internal/io/fs/readfile.go | 26 | ||||
| -rw-r--r-- | internal/io/fs/tailfile.go | 5 | ||||
| -rw-r--r-- | internal/mapr/server/aggregate.go | 93 | ||||
| -rw-r--r-- | internal/server/handlers/basehandler.go | 1 | ||||
| -rw-r--r-- | internal/server/handlers/readcommand.go | 39 |
7 files changed, 103 insertions, 67 deletions
diff --git a/internal/clients/connectors/serverless.go b/internal/clients/connectors/serverless.go index 2ff490a..431247a 100644 --- a/internal/clients/connectors/serverless.go +++ b/internal/clients/connectors/serverless.go @@ -47,7 +47,6 @@ func (s *Serverless) Start(ctx context.Context, cancel context.CancelFunc, dlog.Client.Debug("Starting serverless connector") go func() { defer cancel() - if err := s.handle(ctx, cancel); err != nil { dlog.Client.Warn(err) } diff --git a/internal/io/fs/catfile.go b/internal/io/fs/catfile.go index 01c15ba..e4676f3 100644 --- a/internal/io/fs/catfile.go +++ b/internal/io/fs/catfile.go @@ -6,9 +6,7 @@ type CatFile struct { } // NewCatFile returns a new file catter. -func NewCatFile(filePath string, globID string, serverMessages chan<- string, - limiter chan struct{}) CatFile { - +func NewCatFile(filePath string, globID string, serverMessages chan<- string) CatFile { return CatFile{ readFile: readFile{ filePath: filePath, @@ -17,7 +15,6 @@ func NewCatFile(filePath string, globID string, serverMessages chan<- string, retry: false, canSkipLines: false, seekEOF: false, - limiter: limiter, }, } } diff --git a/internal/io/fs/readfile.go b/internal/io/fs/readfile.go index 28cbe58..5815aa3 100644 --- a/internal/io/fs/readfile.go +++ b/internal/io/fs/readfile.go @@ -38,7 +38,6 @@ type readFile struct { canSkipLines bool // Seek to the EOF before processing file? seekEOF bool - limiter chan struct{} } // String returns the string representation of the readFile @@ -66,25 +65,7 @@ func (f readFile) Retry() bool { func (f readFile) Start(ctx context.Context, ltx lcontext.LContext, lines chan<- line.Line, re regex.Regex) error { - dlog.Common.Debug("readFile", f) - defer func() { - select { - case <-f.limiter: - default: - } - }() - - select { - case f.limiter <- struct{}{}: - default: - select { - case f.serverMessages <- dlog.Common.Warn(f.filePath, f.globID, - "Server limit reached. Queuing file..."): - case <-ctx.Done(): - return nil - } - f.limiter <- struct{}{} - } + dlog.Common.Trace("readFile", f) fd, err := os.Open(f.filePath) if err != nil { @@ -156,7 +137,9 @@ func (f readFile) makeReader(fd *os.File) (reader *bufio.Reader, err error) { return } -func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Buffer, truncate <-chan struct{}) error { +func (f readFile) read(ctx context.Context, fd *os.File, rawLines chan *bytes.Buffer, + truncate <-chan struct{}) error { + var offset uint64 reader, err := f.makeReader(fd) if err != nil { @@ -250,6 +233,7 @@ func (f readFile) filterWithoutLContext(ctx context.Context, rawLines <-chan *by return } if filteredLine, ok := f.transmittable(line, len(lines), cap(lines), re); ok { + //dlog.Common.Trace("TODO", "lines", lines, len(lines), cap(lines)) select { case lines <- filteredLine: case <-ctx.Done(): diff --git a/internal/io/fs/tailfile.go b/internal/io/fs/tailfile.go index b03b45d..7a40ac4 100644 --- a/internal/io/fs/tailfile.go +++ b/internal/io/fs/tailfile.go @@ -6,9 +6,7 @@ type TailFile struct { } // NewTailFile returns a new file tailer. -func NewTailFile(filePath string, globID string, serverMessages chan<- string, - limiter chan struct{}) TailFile { - +func NewTailFile(filePath string, globID string, serverMessages chan<- string) TailFile { return TailFile{ readFile: readFile{ filePath: filePath, @@ -17,7 +15,6 @@ func NewTailFile(filePath string, globID string, serverMessages chan<- string, retry: true, canSkipLines: true, seekEOF: true, - limiter: limiter, }, } } diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index 97fee11..11c9ee5 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -20,6 +20,7 @@ type Aggregate struct { done *internal.Done // NextLinesCh can be used to use a new line ch. NextLinesCh chan chan line.Line + linesCh chan line.Line // Hostname of the current server (used to populate $hostname field). hostname string // Signals to serialize data. @@ -113,58 +114,84 @@ func (a *Aggregate) aggregateTimer(ctx context.Context) { } } +func (a *Aggregate) nextLine() (line line.Line, ok bool, noMoreChannels bool) { + + dlog.Common.Trace("nextLine", "entry", line, ok, noMoreChannels) + select { + case line, ok = <-a.linesCh: + if !ok { + // Channel is closed, go to next channel. + select { + case a.linesCh = <-a.NextLinesCh: + default: + noMoreChannels = true + } + } + default: + // No new line from current lines channel. Try next one. + select { + case newLinesCh := <-a.NextLinesCh: + oldLinesCh := a.linesCh + go func() { a.NextLinesCh <- oldLinesCh }() + a.linesCh = newLinesCh + default: + // No new lines channel found. + } + } + dlog.Common.Trace("nextLine", "exit", line, ok, noMoreChannels) + + return +} + func (a *Aggregate) fieldsFromLines(ctx context.Context) <-chan map[string]string { fieldsCh := make(chan map[string]string) go func() { defer close(fieldsCh) - var lines chan line.Line // Gather first lines channel (first input file) select { - case lines = <-a.NextLinesCh: + case a.linesCh = <-a.NextLinesCh: case <-ctx.Done(): return } for { select { - case line, ok := <-lines: - if !ok { - select { - case lines = <-a.NextLinesCh: - // Have a new lines channel (e.g. new input file) - case <-ctx.Done(): - default: - // No new lines channel found. - return - } - } + case <-ctx.Done(): + return + default: + } - maprLine := strings.TrimSpace(line.Content.String()) - fields, err := a.parser.MakeFields(maprLine) - // Can't recycle it here yet, as field slices are still - // TODO: Add unit test reading from multiple mapreduce files lines. - // TODO: Add capability to recycle this bytes buffer. - //pool.RecycleBytesBuffer(line.Content) - - if err != nil { - // Should fields be ignored anyway? - if err != logformat.ErrIgnoreFields { - dlog.Common.Error(fields, err) - } - continue - } - if !a.query.WhereClause(fields) { - continue + // Gather first lines channel (first input file) + line, ok, noMoreChannels := a.nextLine() + if !ok { + if noMoreChannels { + break } + time.Sleep(time.Millisecond * 100) + } + + maprLine := strings.TrimSpace(line.Content.String()) + fields, err := a.parser.MakeFields(maprLine) + // Can't recycle it here yet, as field slices are still + // MAYBETODO: Add capability to recycle this bytes buffer. + //pool.RecycleBytesBuffer(line.Content) - select { - case fieldsCh <- fields: - case <-ctx.Done(): + if err != nil { + // Should fields be ignored anyway? + if err != logformat.ErrIgnoreFields { + dlog.Common.Error(fields, err) } + continue + } + if !a.query.WhereClause(fields) { + continue + } + + select { + case fieldsCh <- fields: case <-ctx.Done(): - return } } }() diff --git a/internal/server/handlers/basehandler.go b/internal/server/handlers/basehandler.go index 6d10d17..53bf375 100644 --- a/internal/server/handlers/basehandler.go +++ b/internal/server/handlers/basehandler.go @@ -114,7 +114,6 @@ func (h *baseHandler) Read(p []byte) (n int, err error) { pool.RecycleBytesBuffer(line.Content) case <-time.After(time.Second): - // Once in a while check whether we are done. select { case <-h.done.Done(): err = io.EOF diff --git a/internal/server/handlers/readcommand.go b/internal/server/handlers/readcommand.go index 4728a55..51077fc 100644 --- a/internal/server/handlers/readcommand.go +++ b/internal/server/handlers/readcommand.go @@ -109,18 +109,51 @@ func (r *readCommand) readFileIfPermissions(ctx context.Context, ltx lcontext.LC r.readFile(ctx, ltx, path, globID, re) } +func (*readCommand) limit(ctx context.Context, limiter chan struct{}, message string) { + select { + case <-ctx.Done(): + return + } +} + func (r *readCommand) readFile(ctx context.Context, ltx lcontext.LContext, path, globID string, re regex.Regex) { dlog.Server.Info(r.server.user, "Start reading file", path, globID) var reader fs.FileReader + var limiter chan struct{} + switch r.mode { case omode.TailClient: - reader = fs.NewTailFile(path, globID, r.server.serverMessages, r.server.tailLimiter) + reader = fs.NewTailFile(path, globID, r.server.serverMessages) + limiter = r.server.tailLimiter case omode.GrepClient, omode.CatClient: - reader = fs.NewCatFile(path, globID, r.server.serverMessages, r.server.catLimiter) + reader = fs.NewCatFile(path, globID, r.server.serverMessages) + limiter = r.server.catLimiter default: - reader = fs.NewTailFile(path, globID, r.server.serverMessages, r.server.tailLimiter) + reader = fs.NewTailFile(path, globID, r.server.serverMessages) + limiter = r.server.tailLimiter + } + + defer func() { + select { + case <-limiter: + default: + } + }() + + select { + case limiter <- struct{}{}: + case <-ctx.Done(): + return + default: + dlog.Server.Info("Server limit hit, queueing file", len(limiter), path) + select { + case limiter <- struct{}{}: + dlog.Server.Info("Server limit OK now, processing file", len(limiter), path) + case <-ctx.Done(): + return + } } lines := r.server.lines |
