From 6d727b9bdbc387c8a5c34406a2c4de9140face38 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 28 Aug 2021 19:36:46 +0100 Subject: use a byte.Buffer in the file reader --- internal/mapr/server/aggregate.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'internal/mapr/server') diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index 28bb074..9106f52 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -10,6 +10,7 @@ import ( "github.com/mimecast/dtail/internal/config" "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/logger" + "github.com/mimecast/dtail/internal/io/pool" "github.com/mimecast/dtail/internal/mapr" "github.com/mimecast/dtail/internal/mapr/logformat" ) @@ -136,7 +137,8 @@ func (a *Aggregate) makeFields(ctx context.Context) <-chan map[string]string { return } - maprLine := strings.TrimSpace(string(line.Content)) + maprLine := strings.TrimSpace(line.Content.String()) + pool.RecycleBytesBuffer(line.Content) fields, err := a.parser.MakeFields(maprLine) logger.Debug(fields, err) -- cgit v1.2.3 From 16dc57e1e1c28e9d762424e596223a980770e059 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Wed, 8 Sep 2021 19:10:50 +0300 Subject: mapreduce tables are in colors now too --- internal/mapr/server/aggregate.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'internal/mapr/server') diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index 9106f52..a6d6bb1 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -13,6 +13,7 @@ import ( "github.com/mimecast/dtail/internal/io/pool" "github.com/mimecast/dtail/internal/mapr" "github.com/mimecast/dtail/internal/mapr/logformat" + "github.com/mimecast/dtail/internal/protocol" ) // Aggregate is for aggregating mapreduce data on the DTail server side. @@ -89,7 +90,6 @@ func (a *Aggregate) Shutdown() { // Start an aggregation. func (a *Aggregate) Start(ctx context.Context, maprLines chan<- string) { - myCtx, cancel := context.WithCancel(ctx) defer cancel() @@ -109,6 +109,7 @@ func (a *Aggregate) Start(ctx context.Context, maprLines chan<- string) { fieldsCh = a.addFields(myCtx, fieldsCh) } + // Periodically pre-aggregate data every a.query.Interval seconds. go a.aggregateTimer(myCtx) a.makeMaprLines(myCtx, fieldsCh, maprLines) } @@ -139,13 +140,16 @@ func (a *Aggregate) makeFields(ctx context.Context) <-chan map[string]string { maprLine := strings.TrimSpace(line.Content.String()) pool.RecycleBytesBuffer(line.Content) - fields, err := a.parser.MakeFields(maprLine) - logger.Debug(fields, err) + fields, err := a.parser.MakeFields(maprLine) if err != nil { - logger.Error(err) + // Should fields be ignored anyway? + if err != logformat.IgnoreFieldsErr { + logger.Error(fields, err) + } continue } + if !a.query.WhereClause(fields) { continue } @@ -170,7 +174,7 @@ func (a *Aggregate) addFields(ctx context.Context, fieldsCh <-chan map[string]st defer close(ch) for { - // fieldsCh will be closed via 'makeFields' if ctx is done + // fieldsCh will be closed via 'makeFields' when ctx is done fields, ok := <-fieldsCh if !ok { return @@ -219,12 +223,11 @@ func (a *Aggregate) makeMaprLines(ctx context.Context, fieldsCh <-chan map[strin } func (a *Aggregate) aggregate(group *mapr.GroupSet, fields map[string]string) { - //logger.Trace("Aggregating", group, fields) var sb strings.Builder for i, field := range a.query.GroupBy { if i > 0 { - sb.WriteString(" ") + sb.WriteString(protocol.AggregateGroupKeyCombinator) } if val, ok := fields[field]; ok { sb.WriteString(val) -- cgit v1.2.3 From 2ebe7e9d63ba62c6f19749c39fe0a577d86ca775 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sun, 12 Sep 2021 19:04:42 +0300 Subject: bugfix: dmap skipped the last couple of mapreduce lines --- internal/mapr/server/aggregate.go | 102 ++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 60 deletions(-) (limited to 'internal/mapr/server') diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index a6d6bb1..d11ed7d 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -19,16 +19,12 @@ import ( // Aggregate is for aggregating mapreduce data on the DTail server side. type Aggregate struct { done *internal.Done - // Log lines to process (parsing MAPREDUCE lines). - Lines chan line.Line + // NextLinesCh can be used to use a new line ch. + NextLinesCh chan chan line.Line // Hostname of the current server (used to populate $hostname field). hostname string // Signals to serialize data. serialize chan struct{} - // Signals to flush data. - flush chan struct{} - // Signals that data has been flushed - flushed chan struct{} // The mapr query query *mapr.Query // The mapr log format parser @@ -69,14 +65,12 @@ func NewAggregate(queryStr string) (*Aggregate, error) { } a := Aggregate{ - done: internal.NewDone(), - Lines: make(chan line.Line, 100), - serialize: make(chan struct{}), - flush: make(chan struct{}), - flushed: make(chan struct{}), - hostname: s[0], - query: query, - parser: logParser, + done: internal.NewDone(), + NextLinesCh: make(chan chan line.Line, 10), + serialize: make(chan struct{}), + hostname: s[0], + query: query, + parser: logParser, } return &a, nil @@ -84,12 +78,11 @@ func NewAggregate(queryStr string) (*Aggregate, error) { // Shutdown the aggregation engine. func (a *Aggregate) Shutdown() { - a.Flush() a.done.Shutdown() } // Start an aggregation. -func (a *Aggregate) Start(ctx context.Context, maprLines chan<- string) { +func (a *Aggregate) Start(ctx context.Context, maprMessages chan<- string) { myCtx, cancel := context.WithCancel(ctx) defer cancel() @@ -102,16 +95,16 @@ func (a *Aggregate) Start(ctx context.Context, maprLines chan<- string) { } }() - fieldsCh := a.makeFields(myCtx) + fieldsCh := a.fieldsFromLines(myCtx) // Add fields (e.g. via 'set' clause) if len(a.query.Set) > 0 { - fieldsCh = a.addFields(myCtx, fieldsCh) + fieldsCh = a.setAdditionalFields(myCtx, fieldsCh) } // Periodically pre-aggregate data every a.query.Interval seconds. go a.aggregateTimer(myCtx) - a.makeMaprLines(myCtx, fieldsCh, maprLines) + a.aggregateAndSerialize(myCtx, fieldsCh, maprMessages) } func (a *Aggregate) aggregateTimer(ctx context.Context) { @@ -125,23 +118,38 @@ func (a *Aggregate) aggregateTimer(ctx context.Context) { } } -func (a *Aggregate) makeFields(ctx context.Context) <-chan map[string]string { - ch := make(chan map[string]string) +func (a *Aggregate) fieldsFromLines(ctx context.Context) <-chan map[string]string { + fieldsCh := make(chan map[string]string) go func() { - defer close(ch) + defer close(fieldsCh) + var lines chan line.Line + + // Gather first lines channel (first input file) + select { + case lines = <-a.NextLinesCh: + case <-ctx.Done(): + return + } for { select { - case line, ok := <-a.Lines: + case line, ok := <-lines: if !ok { - return + select { + case lines = <-a.NextLinesCh: + // Have a new lines channel (e.g. new input file) + case <-ctx.Done(): + default: + // No new lines channel found. + return + } } maprLine := strings.TrimSpace(line.Content.String()) + fields, err := a.parser.MakeFields(maprLine) pool.RecycleBytesBuffer(line.Content) - fields, err := a.parser.MakeFields(maprLine) if err != nil { // Should fields be ignored anyway? if err != logformat.IgnoreFieldsErr { @@ -155,7 +163,7 @@ func (a *Aggregate) makeFields(ctx context.Context) <-chan map[string]string { } select { - case ch <- fields: + case fieldsCh <- fields: case <-ctx.Done(): } case <-ctx.Done(): @@ -164,17 +172,16 @@ func (a *Aggregate) makeFields(ctx context.Context) <-chan map[string]string { } }() - return ch + return fieldsCh } -func (a *Aggregate) addFields(ctx context.Context, fieldsCh <-chan map[string]string) <-chan map[string]string { - ch := make(chan map[string]string) +func (a *Aggregate) setAdditionalFields(ctx context.Context, fieldsCh <-chan map[string]string) <-chan map[string]string { + newFieldsCh := make(chan map[string]string) go func() { - defer close(ch) + defer close(newFieldsCh) for { - // fieldsCh will be closed via 'makeFields' when ctx is done fields, ok := <-fieldsCh if !ok { return @@ -184,23 +191,22 @@ func (a *Aggregate) addFields(ctx context.Context, fieldsCh <-chan map[string]st } select { - case ch <- fields: + case newFieldsCh <- fields: case <-ctx.Done(): } } }() - return ch + return newFieldsCh } -func (a *Aggregate) makeMaprLines(ctx context.Context, fieldsCh <-chan map[string]string, maprLines chan<- string) { +func (a *Aggregate) aggregateAndSerialize(ctx context.Context, fieldsCh <-chan map[string]string, maprMessages chan<- string) { group := mapr.NewGroupSet() serialize := func() { logger.Info("Serializing mapreduce result") - group.Serialize(ctx, maprLines) + group.Serialize(ctx, maprMessages) group = mapr.NewGroupSet() - logger.Info("Done serializing mapreduce result") } for { @@ -213,9 +219,6 @@ func (a *Aggregate) makeMaprLines(ctx context.Context, fieldsCh <-chan map[strin a.aggregate(group, fields) case <-a.serialize: serialize() - case <-a.flush: - serialize() - a.flushed <- struct{}{} case <-ctx.Done(): return } @@ -264,24 +267,3 @@ func (a *Aggregate) Serialize(ctx context.Context) { case <-ctx.Done(): } } - -// Flush all data. -func (a *Aggregate) Flush() { - select { - case a.flush <- struct{}{}: - logger.Info("Flushing mapreduce data") - case <-time.After(time.Minute): - logger.Warn("Starting to flush mapreduce data takes over a minute") - return - case <-a.done.Done(): - return - } - - select { - case <-a.flushed: - logger.Info("Done flushing") - case <-time.After(time.Minute): - logger.Warn("Waiting for data to be flushed takes over a minute") - case <-a.done.Done(): - } -} -- cgit v1.2.3 From fe3e68afd99d8ea246be52893730f987e138ec24 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sun, 19 Sep 2021 13:22:59 +0300 Subject: move args to config package logger package rewrite as dlog --- internal/mapr/server/aggregate.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'internal/mapr/server') diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index d11ed7d..767aada 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -9,7 +9,7 @@ import ( "github.com/mimecast/dtail/internal" "github.com/mimecast/dtail/internal/config" "github.com/mimecast/dtail/internal/io/line" - "github.com/mimecast/dtail/internal/io/logger" + "github.com/mimecast/dtail/internal/io/dlog" "github.com/mimecast/dtail/internal/io/pool" "github.com/mimecast/dtail/internal/mapr" "github.com/mimecast/dtail/internal/mapr/logformat" @@ -40,7 +40,7 @@ func NewAggregate(queryStr string) (*Aggregate, error) { fqdn, err := os.Hostname() if err != nil { - logger.Error(err) + dlog.Common.Error(err) } s := strings.Split(fqdn, ".") @@ -55,12 +55,12 @@ func NewAggregate(queryStr string) (*Aggregate, error) { parserName = query.LogFormat } - logger.Info("Creating log format parser", parserName) + dlog.Common.Info("Creating log format parser", parserName) logParser, err := logformat.NewParser(parserName, query) if err != nil { - logger.Error("Could not create log format parser. Falling back to 'generic'", err) + dlog.Common.Error("Could not create log format parser. Falling back to 'generic'", err) if logParser, err = logformat.NewParser("generic", query); err != nil { - logger.FatalExit("Could not create log format parser", err) + dlog.Common.FatalPanic("Could not create log format parser", err) } } @@ -153,7 +153,7 @@ func (a *Aggregate) fieldsFromLines(ctx context.Context) <-chan map[string]strin if err != nil { // Should fields be ignored anyway? if err != logformat.IgnoreFieldsErr { - logger.Error(fields, err) + dlog.Common.Error(fields, err) } continue } @@ -187,7 +187,7 @@ func (a *Aggregate) setAdditionalFields(ctx context.Context, fieldsCh <-chan map return } if err := a.query.SetClause(fields); err != nil { - logger.Error(err) + dlog.Common.Error(err) } select { @@ -204,7 +204,7 @@ func (a *Aggregate) aggregateAndSerialize(ctx context.Context, fieldsCh <-chan m group := mapr.NewGroupSet() serialize := func() { - logger.Info("Serializing mapreduce result") + dlog.Common.Info("Serializing mapreduce result") group.Serialize(ctx, maprMessages) group = mapr.NewGroupSet() } @@ -243,7 +243,7 @@ func (a *Aggregate) aggregate(group *mapr.GroupSet, fields map[string]string) { for _, sc := range a.query.Select { if val, ok := fields[sc.Field]; ok { if err := set.Aggregate(sc.FieldStorage, sc.Operation, val, false); err != nil { - logger.Error(err) + dlog.Common.Error(err) continue } addedSample = true @@ -255,7 +255,7 @@ func (a *Aggregate) aggregate(group *mapr.GroupSet, fields map[string]string) { return } - logger.Trace("Aggregated data locally without adding new samples") + dlog.Common.Trace("Aggregated data locally without adding new samples") } // Serialize all the aggregated data. @@ -263,7 +263,7 @@ func (a *Aggregate) Serialize(ctx context.Context) { select { case a.serialize <- struct{}{}: case <-time.After(time.Minute): - logger.Warn("Starting to serialize mapredice data takes over a minute") + dlog.Common.Warn("Starting to serialize mapredice data takes over a minute") case <-ctx.Done(): } } -- cgit v1.2.3 From 7a7169791a64190e1002e38bc9c04ad0d5c1ce1f Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 9 Oct 2021 16:44:28 +0300 Subject: add dtail health check unit test. --- internal/mapr/server/aggregate.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'internal/mapr/server') diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index 767aada..1f5d1c3 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -8,9 +8,8 @@ import ( "github.com/mimecast/dtail/internal" "github.com/mimecast/dtail/internal/config" - "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/io/dlog" - "github.com/mimecast/dtail/internal/io/pool" + "github.com/mimecast/dtail/internal/io/line" "github.com/mimecast/dtail/internal/mapr" "github.com/mimecast/dtail/internal/mapr/logformat" "github.com/mimecast/dtail/internal/protocol" @@ -148,7 +147,8 @@ func (a *Aggregate) fieldsFromLines(ctx context.Context) <-chan map[string]strin maprLine := strings.TrimSpace(line.Content.String()) fields, err := a.parser.MakeFields(maprLine) - pool.RecycleBytesBuffer(line.Content) + // Can not recycle here for some rason. + //pool.RecycleBytesBuffer(line.Content) if err != nil { // Should fields be ignored anyway? -- cgit v1.2.3 From 97747ea0f3178f7f5890512d483fdccaa82846b0 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Sat, 9 Oct 2021 21:10:29 +0300 Subject: vetting and linting and some code restyling --- internal/mapr/server/aggregate.go | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'internal/mapr/server') diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index 1f5d1c3..97fee11 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -63,16 +63,14 @@ func NewAggregate(queryStr string) (*Aggregate, error) { } } - a := Aggregate{ + return &Aggregate{ done: internal.NewDone(), NextLinesCh: make(chan chan line.Line, 10), serialize: make(chan struct{}), hostname: s[0], query: query, parser: logParser, - } - - return &a, nil + }, nil } // Shutdown the aggregation engine. @@ -95,12 +93,10 @@ func (a *Aggregate) Start(ctx context.Context, maprMessages chan<- string) { }() fieldsCh := a.fieldsFromLines(myCtx) - // Add fields (e.g. via 'set' clause) if len(a.query.Set) > 0 { fieldsCh = a.setAdditionalFields(myCtx, fieldsCh) } - // Periodically pre-aggregate data every a.query.Interval seconds. go a.aggregateTimer(myCtx) a.aggregateAndSerialize(myCtx, fieldsCh, maprMessages) @@ -147,17 +143,18 @@ func (a *Aggregate) fieldsFromLines(ctx context.Context) <-chan map[string]strin maprLine := strings.TrimSpace(line.Content.String()) fields, err := a.parser.MakeFields(maprLine) - // Can not recycle here for some rason. + // Can't recycle it here yet, as field slices are still + // TODO: Add unit test reading from multiple mapreduce files lines. + // TODO: Add capability to recycle this bytes buffer. //pool.RecycleBytesBuffer(line.Content) if err != nil { // Should fields be ignored anyway? - if err != logformat.IgnoreFieldsErr { + if err != logformat.ErrIgnoreFields { dlog.Common.Error(fields, err) } continue } - if !a.query.WhereClause(fields) { continue } @@ -175,12 +172,12 @@ func (a *Aggregate) fieldsFromLines(ctx context.Context) <-chan map[string]strin return fieldsCh } -func (a *Aggregate) setAdditionalFields(ctx context.Context, fieldsCh <-chan map[string]string) <-chan map[string]string { - newFieldsCh := make(chan map[string]string) +func (a *Aggregate) setAdditionalFields(ctx context.Context, + fieldsCh <-chan map[string]string) <-chan map[string]string { + newFieldsCh := make(chan map[string]string) go func() { defer close(newFieldsCh) - for { fields, ok := <-fieldsCh if !ok { @@ -196,19 +193,18 @@ func (a *Aggregate) setAdditionalFields(ctx context.Context, fieldsCh <-chan map } } }() - return newFieldsCh } -func (a *Aggregate) aggregateAndSerialize(ctx context.Context, fieldsCh <-chan map[string]string, maprMessages chan<- string) { - group := mapr.NewGroupSet() +func (a *Aggregate) aggregateAndSerialize(ctx context.Context, + fieldsCh <-chan map[string]string, maprMessages chan<- string) { + group := mapr.NewGroupSet() serialize := func() { dlog.Common.Info("Serializing mapreduce result") group.Serialize(ctx, maprMessages) group = mapr.NewGroupSet() } - for { select { case fields, ok := <-fieldsCh: @@ -227,7 +223,6 @@ func (a *Aggregate) aggregateAndSerialize(ctx context.Context, fieldsCh <-chan m func (a *Aggregate) aggregate(group *mapr.GroupSet, fields map[string]string) { var sb strings.Builder - for i, field := range a.query.GroupBy { if i > 0 { sb.WriteString(protocol.AggregateGroupKeyCombinator) @@ -254,7 +249,6 @@ func (a *Aggregate) aggregate(group *mapr.GroupSet, fields map[string]string) { set.Samples++ return } - dlog.Common.Trace("Aggregated data locally without adding new samples") } -- cgit v1.2.3