diff options
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/mapr/logformat/csv.go | 53 | ||||
| -rw-r--r-- | internal/mapr/logformat/csv_test.go | 54 | ||||
| -rw-r--r-- | internal/mapr/logformat/custom1.go | 16 | ||||
| -rw-r--r-- | internal/mapr/logformat/custom2.go | 16 | ||||
| -rw-r--r-- | internal/mapr/logformat/default.go | 17 | ||||
| -rw-r--r-- | internal/mapr/logformat/default_test.go | 4 | ||||
| -rw-r--r-- | internal/mapr/logformat/generic.go | 15 | ||||
| -rw-r--r-- | internal/mapr/logformat/generickv.go | 15 | ||||
| -rw-r--r-- | internal/mapr/logformat/mimecast.go | 23 | ||||
| -rw-r--r-- | internal/mapr/logformat/parser.go | 82 | ||||
| -rw-r--r-- | internal/mapr/query.go | 10 | ||||
| -rw-r--r-- | internal/mapr/query_test.go | 40 | ||||
| -rw-r--r-- | internal/mapr/selectcondition.go | 19 | ||||
| -rw-r--r-- | internal/mapr/server/aggregate.go | 2 | ||||
| -rw-r--r-- | internal/mapr/setcondition.go | 7 | ||||
| -rw-r--r-- | internal/mapr/token.go | 10 | ||||
| -rw-r--r-- | internal/version/version.go | 2 |
17 files changed, 311 insertions, 74 deletions
diff --git a/internal/mapr/logformat/csv.go b/internal/mapr/logformat/csv.go new file mode 100644 index 0000000..ea85ca9 --- /dev/null +++ b/internal/mapr/logformat/csv.go @@ -0,0 +1,53 @@ +package logformat + +import ( + "fmt" + "strings" + + "github.com/mimecast/dtail/internal/protocol" +) + +type csvParser struct { + defaultParser + header []string + hasHeader bool +} + +func newCSVParser(hostname, timeZoneName string, timeZoneOffset int) (*csvParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &csvParser{}, err + } + return &csvParser{defaultParser: *defaultParser}, nil +} + +func (p *csvParser) MakeFields(maprLine string) (map[string]string, error) { + if !p.hasHeader { + p.parseHeader(maprLine) + return nil, ErrIgnoreFields + } + + fields := make(map[string]string, 7+len(p.header)) + fields["*"] = "*" + fields["$hostname"] = p.hostname + fields["$server"] = p.hostname + fields["$line"] = maprLine + fields["$empty"] = "" + fields["$timezone"] = p.timeZoneName + fields["$timeoffset"] = p.timeZoneOffset + + splitted := strings.Split(maprLine, protocol.CSVDelimiter) + for i, value := range splitted { + if i >= len(p.header) { + return fields, fmt.Errorf("CSV file seems corrupted, more fields than header values?") + } + fields[p.header[i]] = value + } + + return fields, nil +} + +func (p *csvParser) parseHeader(maprLine string) { + p.header = strings.Split(maprLine, protocol.CSVDelimiter) + p.hasHeader = true +} diff --git a/internal/mapr/logformat/csv_test.go b/internal/mapr/logformat/csv_test.go new file mode 100644 index 0000000..1baf032 --- /dev/null +++ b/internal/mapr/logformat/csv_test.go @@ -0,0 +1,54 @@ +package logformat + +import ( + "strings" + "testing" + + "github.com/mimecast/dtail/internal/protocol" +) + +func TestCSVLogFormat(t *testing.T) { + parser, err := NewParser("csv", nil) + if err != nil { + t.Errorf("Unable to create parser: %s", err.Error()) + } + + headers := []string{"name", "last_name", "color", "profession", "employee_number"} + dataLine1 := []string{"Paul", "Buetow", "Orange", "Site Reliability Engineer", "4242"} + dataLine2 := []string{"Peter", "Bauer", "Black", "CEO", "1"} + + inputs := []string{ + strings.Join(headers, protocol.CSVDelimiter), + strings.Join(dataLine1, protocol.CSVDelimiter), + strings.Join(dataLine2, protocol.CSVDelimiter), + } + + // First line is the header! + if _, err := parser.MakeFields(inputs[0]); err != ErrIgnoreFields { + t.Errorf("Unable to parse the CSV header") + } + + // First data line + fields, err := parser.MakeFields(inputs[1]) + if err != nil { + t.Errorf("Unable to parse first CSV data line: %s", err.Error()) + } + if val := fields["name"]; val != "Paul" { + t.Errorf("Expected 'name' to be 'Paul' but got '%s'", val) + } + if val := fields["employee_number"]; val != "4242" { + t.Errorf("Expected 'employee_number' to be '4242' but got '%s'", val) + } + + // Second data line + fields, err = parser.MakeFields(inputs[2]) + if err != nil { + t.Errorf("Unable to parse first CSV data line: %s", err.Error()) + } + if val := fields["last_name"]; val != "Bauer" { + t.Errorf("Expected 'last_name' to be 'Bauer' but got '%s'", val) + } + if val := fields["color"]; val != "Black" { + t.Errorf("Expected 'color' to be 'Black' but got '%s'", val) + } +} diff --git a/internal/mapr/logformat/custom1.go b/internal/mapr/logformat/custom1.go new file mode 100644 index 0000000..7229f3e --- /dev/null +++ b/internal/mapr/logformat/custom1.go @@ -0,0 +1,16 @@ +package logformat + +import "errors" + +var ErrCustom1NotImplemented error = errors.New("custom1 log format is not implemented") + +// Template for creating a custom log format. +type custom1Parser struct{} + +func newCustom1Parser(hostname, timeZoneName string, timeZoneOffset int) (*custom1Parser, error) { + return &custom1Parser{}, ErrCustom1NotImplemented +} + +func (p *custom1Parser) MakeFields(maprLine string) (map[string]string, error) { + return nil, ErrCustom1NotImplemented +} diff --git a/internal/mapr/logformat/custom2.go b/internal/mapr/logformat/custom2.go new file mode 100644 index 0000000..262c721 --- /dev/null +++ b/internal/mapr/logformat/custom2.go @@ -0,0 +1,16 @@ +package logformat + +import "errors" + +var ErrCustom2NotImplemented error = errors.New("custom2 log format is not implemented") + +// Template for creating a custom log format. +type custom2Parser struct{} + +func newCustom2Parser(hostname, timeZoneName string, timeZoneOffset int) (*custom2Parser, error) { + return &custom2Parser{}, ErrCustom2NotImplemented +} + +func (p *custom2Parser) MakeFields(maprLine string) (map[string]string, error) { + return nil, ErrCustom2NotImplemented +} diff --git a/internal/mapr/logformat/default.go b/internal/mapr/logformat/default.go index a44b49a..a499bc5 100644 --- a/internal/mapr/logformat/default.go +++ b/internal/mapr/logformat/default.go @@ -7,8 +7,21 @@ import ( "github.com/mimecast/dtail/internal/protocol" ) -// MakeFieldsDEFAULT is the default DTail log file key-value parser. -func (p *Parser) MakeFieldsDEFAULT(maprLine string) (map[string]string, error) { +type defaultParser struct { + hostname string + timeZoneName string + timeZoneOffset string +} + +func newDefaultParser(hostname, timeZoneName string, timeZoneOffset int) (*defaultParser, error) { + return &defaultParser{ + hostname: hostname, + timeZoneName: timeZoneName, + timeZoneOffset: fmt.Sprintf("%d", timeZoneOffset), + }, nil +} + +func (p *defaultParser) MakeFields(maprLine string) (map[string]string, error) { splitted := strings.Split(maprLine, protocol.FieldDelimiter) if len(splitted) < 11 || !strings.HasPrefix(splitted[9], "MAPREDUCE:") || diff --git a/internal/mapr/logformat/default_test.go b/internal/mapr/logformat/default_test.go index 28e1acc..4eae81b 100644 --- a/internal/mapr/logformat/default_test.go +++ b/internal/mapr/logformat/default_test.go @@ -87,6 +87,10 @@ func TestDefaultLogFormat(t *testing.T) { } fields, err := parser.MakeFields("foozoo=bar|bazbay") + if err != nil && err != ErrIgnoreFields { + t.Errorf(err.Error()) + } + if _, ok := fields["foo"]; ok { t.Errorf("Expected fiending field 'foo', but found it\n") } diff --git a/internal/mapr/logformat/generic.go b/internal/mapr/logformat/generic.go index 14ac2a9..32d9b4a 100644 --- a/internal/mapr/logformat/generic.go +++ b/internal/mapr/logformat/generic.go @@ -1,7 +1,18 @@ package logformat -// MakeFieldsGENERIC is the generic log line parser. -func (p *Parser) MakeFieldsGENERIC(maprLine string) (map[string]string, error) { +type genericParser struct { + defaultParser +} + +func newGenericParser(hostname, timeZoneName string, timeZoneOffset int) (*genericParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &genericParser{}, err + } + return &genericParser{defaultParser: *defaultParser}, nil +} + +func (p *genericParser) MakeFields(maprLine string) (map[string]string, error) { fields := make(map[string]string, 3) fields["*"] = "*" diff --git a/internal/mapr/logformat/generickv.go b/internal/mapr/logformat/generickv.go index 3452e97..9c3de92 100644 --- a/internal/mapr/logformat/generickv.go +++ b/internal/mapr/logformat/generickv.go @@ -6,8 +6,19 @@ import ( "github.com/mimecast/dtail/internal/protocol" ) -// MakeFieldsGENERIGKV is the generic key-value logfile parser. -func (p *Parser) MakeFieldsGENERIGKV(maprLine string) (map[string]string, error) { +type genericKVParser struct { + defaultParser +} + +func newGenericKVParser(hostname, timeZoneName string, timeZoneOffset int) (*genericKVParser, error) { + defaultParser, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return &genericKVParser{}, err + } + return &genericKVParser{defaultParser: *defaultParser}, nil +} + +func (p *genericKVParser) MakeFields(maprLine string) (map[string]string, error) { splitted := strings.Split(maprLine, protocol.FieldDelimiter) fields := make(map[string]string, len(splitted)) diff --git a/internal/mapr/logformat/mimecast.go b/internal/mapr/logformat/mimecast.go new file mode 100644 index 0000000..cf6b333 --- /dev/null +++ b/internal/mapr/logformat/mimecast.go @@ -0,0 +1,23 @@ +//go:build !proprietary +// +build !proprietary + +package logformat + +import "errors" + +// ErrMimecastNotAvailable is thrown in the open source version of DTail +var ErrMimecastNotAvailable error = errors.New("The mimecast logformat is not available in this build of DTail") + +type mimecastParser struct{} + +func newMimecastParser(hostname, timeZoneName string, timeZoneOffset int) (*mimecastParser, error) { + return &mimecastParser{}, ErrMimecastNotAvailable +} + +func newMimecastGenericParser(hostname, timeZoneName string, timeZoneOffset int) (*mimecastParser, error) { + return &mimecastParser{}, ErrMimecastNotAvailable +} + +func (p *mimecastParser) MakeFields(maprLine string) (map[string]string, error) { + return nil, ErrMimecastNotAvailable +} diff --git a/internal/mapr/logformat/parser.go b/internal/mapr/logformat/parser.go index d6aac8c..37d7a63 100644 --- a/internal/mapr/logformat/parser.go +++ b/internal/mapr/logformat/parser.go @@ -3,8 +3,6 @@ package logformat import ( "errors" "fmt" - "reflect" - "strings" "time" "github.com/mimecast/dtail/internal/config" @@ -15,62 +13,44 @@ import ( var ErrIgnoreFields error = errors.New("Ignore this field set") // Parser is used to parse the mapreduce information from the server log files. -type Parser struct { - hostname string - logFormatName string - makeFieldsFunc reflect.Value - makeFieldsReceiver reflect.Value - timeZoneName string - timeZoneOffset string +type Parser interface { + // MakeFields creates a field map from an input log line. + MakeFields(string) (map[string]string, error) } // NewParser returns a new log parser. -func NewParser(logFormatName string, query *mapr.Query) (*Parser, error) { +func NewParser(logFormatName string, query *mapr.Query) (Parser, error) { hostname, err := config.Hostname() if err != nil { return nil, err } now := time.Now() - zone, offset := now.Zone() - - p := Parser{ - hostname: hostname, - timeZoneName: zone, - timeZoneOffset: fmt.Sprintf("%d", offset), - } - - err = p.reflectLogFormat(logFormatName) - if err != nil { - return nil, err - } - return &p, nil -} - -// The aim of this is that everyone can plug in their own mapr log format -// parsing method to DTail. Just add a method MakeFieldsMODULENAME to type -// Parser. Whereas MODULENAME must be a upeprcase string. -func (p *Parser) reflectLogFormat(logFormatName string) error { - methodName := fmt.Sprintf("MakeFields%s", strings.ToUpper(logFormatName)) - rt := reflect.TypeOf(p) - method, ok := rt.MethodByName(methodName) - if !ok { - return errors.New("No such mapr log format module: " + methodName) - } - - p.makeFieldsFunc = method.Func - p.makeFieldsReceiver = reflect.ValueOf(p) - return nil -} - -// MakeFields is for returning the fields from a given log line. -func (p *Parser) MakeFields(maprLine string) (fields map[string]string, err error) { - inputValues := []reflect.Value{p.makeFieldsReceiver, reflect.ValueOf(maprLine)} - returnValues := p.makeFieldsFunc.Call(inputValues) - errInterface := returnValues[1].Interface() - if errInterface == nil { - fields, err = returnValues[0].Interface().(map[string]string), nil - return + timeZoneName, timeZoneOffset := now.Zone() + + // Extend this for adding more log formats! + switch logFormatName { + case "generic": + return newGenericParser(hostname, timeZoneName, timeZoneOffset) + case "generickv": + return newGenericKVParser(hostname, timeZoneName, timeZoneOffset) + case "csv": + return newCSVParser(hostname, timeZoneName, timeZoneOffset) + case "mimecast": + return newMimecastParser(hostname, timeZoneName, timeZoneOffset) + case "mimecastgeneric": + return newMimecastGenericParser(hostname, timeZoneName, timeZoneOffset) + case "default": + return newDefaultParser(hostname, timeZoneName, timeZoneOffset) + case "custom1": + return newCustom1Parser(hostname, timeZoneName, timeZoneOffset) + case "custom2": + return newCustom2Parser(hostname, timeZoneName, timeZoneOffset) + default: + p, err := newDefaultParser(hostname, timeZoneName, timeZoneOffset) + if err != nil { + return p, fmt.Errorf("No '%s' mapr log format and problem creating default one: %v", + logFormatName, err) + } + return p, fmt.Errorf("No '%s' mapr log format", logFormatName) } - fields, err = returnValues[0].Interface().(map[string]string), errInterface.(error) - return } diff --git a/internal/mapr/query.go b/internal/mapr/query.go index 247cdaf..ddcbc90 100644 --- a/internal/mapr/query.go +++ b/internal/mapr/query.go @@ -73,6 +73,13 @@ func NewQuery(queryStr string) (*Query, error) { Interval: time.Second * 5, Limit: -1, } + + // If log format is CSV, then use "." as the table. It means, that + // we don't do any file filtering, we process all lines of the CSV. + if q.LogFormat == "csv" { + q.Table = "." + } + return &q, q.parse(tokens) } @@ -87,8 +94,7 @@ func (q *Query) Has(what string) bool { } func (q *Query) parse(tokens []token) error { - tokens, err := q.parseTokens(tokens) - if err != nil { + if _, err := q.parseTokens(tokens); err != nil { return err } diff --git a/internal/mapr/query_test.go b/internal/mapr/query_test.go index f03ccba..f37b8d4 100644 --- a/internal/mapr/query_test.go +++ b/internal/mapr/query_test.go @@ -252,3 +252,43 @@ func TestParseQueryDeep(t *testing.T) { } } } + +func TestQuotedSelectCondition(t *testing.T) { + queryStr := "select `count($foo)`, foo, $foo, count($foo) logformat csv" + + q, err := NewQuery(queryStr) + if err != nil { + t.Errorf("Query parse error: %s\n%v: %v", queryStr, q, err) + } + if len(q.Select) != 4 { + t.Errorf("Expected three elements in 'select' clause but got '%v': %s\n%v", + q.Select, queryStr, q) + } + + if q.Select[0].Field != "count($foo)" { + t.Errorf("Expected 'num($foo)' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[0].Field, queryStr, q) + } + if q.Select[0].Operation != Last { + t.Errorf("Expected 'Last' as aggregation function of first element in "+ + "'select' clause but got '%v': %s\n%v", q.Select[0].Operation, queryStr, q) + } + + if q.Select[1].Field != "foo" { + t.Errorf("Expected 'foo' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[1].Field, queryStr, q) + } + if q.Select[2].Field != "$foo" { + t.Errorf("Expected '$foo' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[2].Field, queryStr, q) + } + + if q.Select[3].Field != "$foo" { + t.Errorf("Expected '$foo' as first element in 'select' clause but got '%v': %s\n%v", + q.Select[3].Field, queryStr, q) + } + if q.Select[3].Operation != Count { + t.Errorf("Expected 'count' as aggregation function of thourth element in "+ + "'select' clause but got '%v': %s\n%v", q.Select[3].Operation, queryStr, q) + } +} diff --git a/internal/mapr/selectcondition.go b/internal/mapr/selectcondition.go index 45fc16b..78359c7 100644 --- a/internal/mapr/selectcondition.go +++ b/internal/mapr/selectcondition.go @@ -40,16 +40,18 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) { // Parse select aggregation, e.g. sum(foo) parse := func(token token) (selectCondition, error) { var sc selectCondition - tokenStr := token.str - if !strings.Contains(tokenStr, "(") && !strings.Contains(tokenStr, ")") { - sc.Field = tokenStr - sc.FieldStorage = tokenStr + // With quotes stripped: We got a quoted select expression, e.g. + // "select `count($foo)` ...", which will literaly look for field + // "count($foo)" without performing the count aggregation. + if token.quotesStripped || (!strings.Contains(token.str, "(") && !strings.Contains(token.str, ")")) { + sc.Field = token.str + sc.FieldStorage = token.str sc.Operation = Last return sc, nil } - a := strings.Split(tokenStr, "(") + a := strings.Split(token.str, "(") if len(a) != 2 { return sc, errors.New(invalidQuery + "Can't parse 'select' aggregation: " + token.str) @@ -61,8 +63,8 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) { return sc, errors.New(invalidQuery + "Can't parse 'select' field name " + "from aggregation: " + token.str) } - sc.Field = b[0] // Field name, e.g. 'foo' - sc.FieldStorage = tokenStr // e.g. 'sum(foo)' + sc.Field = b[0] // Field name, e.g. 'foo' + sc.FieldStorage = token.str // e.g. 'sum(foo)' switch agg { case "count": @@ -80,8 +82,7 @@ func makeSelectConditions(tokens []token) ([]selectCondition, error) { case "len": sc.Operation = Len default: - return sc, errors.New(invalidQuery + - "Unknown aggregation in 'select' clause: " + agg) + return sc, errors.New(invalidQuery + "Unknown aggregation in 'select' clause: " + agg) } return sc, nil } diff --git a/internal/mapr/server/aggregate.go b/internal/mapr/server/aggregate.go index ed32f8f..4f14751 100644 --- a/internal/mapr/server/aggregate.go +++ b/internal/mapr/server/aggregate.go @@ -27,7 +27,7 @@ type Aggregate struct { // The mapr query query *mapr.Query // The mapr log format parser - parser *logformat.Parser + parser logformat.Parser } // NewAggregate return a new server side aggregator. diff --git a/internal/mapr/setcondition.go b/internal/mapr/setcondition.go index 9dcd690..308a0f4 100644 --- a/internal/mapr/setcondition.go +++ b/internal/mapr/setcondition.go @@ -37,6 +37,13 @@ func makeSetConditions(tokens []token) (set []setCondition, err error) { return sc, nil, err } + // Seems like a quoted string? E.g.: "set $foo = `count(bar)`" + // So don't interpret `count` as a function! + if tokens[2].quotesStripped { + sc.rType = Field + return sc, tokens[3:], nil + } + // Seems like a function call? if strings.HasSuffix(sc.rString, ")") { functionStack, functionArg, err := funcs.NewFunctionStack(tokens[2].str) diff --git a/internal/mapr/token.go b/internal/mapr/token.go index 6ac7631..48d1192 100644 --- a/internal/mapr/token.go +++ b/internal/mapr/token.go @@ -9,8 +9,9 @@ var keywords = [...]string{"select", "from", "where", "set", "group", "rorder", // Represents a parsed token, used to parse the mapr query. type token struct { - str string - isBareword bool + str string + isBareword bool + quotesStripped bool } func (t token) isKeyword() bool { @@ -71,8 +72,9 @@ func tokensConsume(tokens []token) ([]token, []token) { stripped := t.str[1 : length-1] //dlog.Common.Trace("stripped", stripped) t := token{ - str: stripped, - isBareword: t.isBareword, + str: stripped, + isBareword: t.isBareword, + quotesStripped: true, } consumed = append(consumed, t) continue diff --git a/internal/version/version.go b/internal/version/version.go index 90073b5..15ea50f 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -13,7 +13,7 @@ const ( // Name of DTail. Name string = "DTail" // Version of DTail. - Version string = "4.2.0" + Version string = "4.3.0" // Additional information for DTail Additional string = "Have a lot of fun!" ) |
