From 1f10cafab36d6db860c2a684e0f6e27dce35034a Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Mon, 14 Mar 2022 13:09:31 +0000 Subject: Add "append" modifier for "outfile" keyword to the mapreduce language --- internal/mapr/groupsetresult.go | 8 +++---- internal/mapr/query.go | 27 +++++++++++++++++++----- internal/mapr/query_test.go | 46 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 10 deletions(-) (limited to 'internal') diff --git a/internal/mapr/groupsetresult.go b/internal/mapr/groupsetresult.go index 6d0ac1f..915b342 100644 --- a/internal/mapr/groupsetresult.go +++ b/internal/mapr/groupsetresult.go @@ -159,7 +159,7 @@ func (g *GroupSet) resultWriteFormattedDataEntry(query *Query, sb *strings.Build } func (*GroupSet) writeQueryFile(query *Query) error { - queryFile := fmt.Sprintf("%s.query", query.Outfile) + queryFile := fmt.Sprintf("%s.query", query.Outfile.FilePath) tmpQueryFile := fmt.Sprintf("%s.tmp", queryFile) dlog.Common.Debug("Writing query file", queryFile) @@ -187,8 +187,8 @@ func (g *GroupSet) WriteResult(query *Query) error { return err } - dlog.Common.Info("Writing outfile", query.Outfile) - tmpOutfile := fmt.Sprintf("%s.tmp", query.Outfile) + dlog.Common.Info("Writing outfile", query.Outfile.FilePath) + tmpOutfile := fmt.Sprintf("%s.tmp", query.Outfile.FilePath) fd, err := os.Create(tmpOutfile) if err != nil { @@ -228,7 +228,7 @@ func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, tmpOutfil fd.WriteString("\n") } - if err := os.Rename(tmpOutfile, query.Outfile); err != nil { + if err := os.Rename(tmpOutfile, query.Outfile.FilePath); err != nil { os.Remove(tmpOutfile) return err } diff --git a/internal/mapr/query.go b/internal/mapr/query.go index d70675f..4eeb7b6 100644 --- a/internal/mapr/query.go +++ b/internal/mapr/query.go @@ -13,6 +13,15 @@ const ( unexpectedEnd string = "Unexpected end of query" ) +type Outfile struct { + FilePath string + AppendMode bool +} + +func (o Outfile) String() string { + return fmt.Sprintf("Outfile(FilePath:%v,AppendMode:%v)", o.FilePath, o.AppendMode) +} + // Query represents a parsed mapr query. type Query struct { Select []selectCondition @@ -25,7 +34,7 @@ type Query struct { GroupKey string Interval time.Duration Limit int - Outfile string + Outfile *Outfile RawQuery string tokens []token LogFormat string @@ -68,7 +77,7 @@ func NewQuery(queryStr string) (*Query, error) { // HasOutfile returns true if query result will be written to a CVS output file. func (q *Query) HasOutfile() bool { - return q.Outfile != "" + return q.Outfile != nil } // Has is a helper to determine whether a query contains a substring @@ -193,10 +202,18 @@ func (q *Query) parseTokens(tokens []token) ([]token, error) { q.Limit = i case "outfile": tokens, found = tokensConsume(tokens[1:]) - if len(found) == 0 { - return tokens, errors.New(invalidQuery + unexpectedEnd) + switch len(found) { + case 1: + q.Outfile = &Outfile{FilePath: found[0].str, AppendMode: false} + case 2: + if found[0].str == "append" { + q.Outfile = &Outfile{FilePath: found[1].str, AppendMode: true} + } else { + return tokens, errors.New(invalidQuery + invalidQuery) + } + default: + return tokens, errors.New(invalidQuery + invalidQuery) } - q.Outfile = found[0].str case "logformat": tokens, found = tokensConsume(tokens[1:]) if len(found) == 0 { diff --git a/internal/mapr/query_test.go b/internal/mapr/query_test.go index a0913fd..f03ccba 100644 --- a/internal/mapr/query_test.go +++ b/internal/mapr/query_test.go @@ -5,6 +5,48 @@ import ( "time" ) +func TestParseQueryOutfile(t *testing.T) { + queryStr := "select foo from bar outfile \"baz.csv\"" + + q, err := NewQuery(queryStr) + if err != nil { + t.Errorf("Query parse error: %s\n%v: %v", queryStr, q, err) + } + + if q.Outfile == nil { + t.Errorf("Expected non-nil outfile: %s\n%v", queryStr, q) + } + + if q.Outfile.FilePath != "baz.csv" { + t.Errorf("Expected \"baz.csv\" as outfile file path: %s\n%v", queryStr, q) + } + + if q.Outfile.AppendMode { + t.Errorf("Expected append mode of outfile to be false: %s\n%v", queryStr, q) + } +} + +func TestParseQueryOutfileAppend(t *testing.T) { + queryStr := "select foo from bar outfile append \"baz.csv\"" + + q, err := NewQuery(queryStr) + if err != nil { + t.Errorf("Query parse error: %s\n%v: %v", queryStr, q, err) + } + + if q.Outfile == nil { + t.Errorf("Expected non-nil outfile: %s\n%v", queryStr, q) + } + + if q.Outfile.FilePath != "baz.csv" { + t.Errorf("Expected \"baz.csv\" as outfile file path: %s\n%v", queryStr, q) + } + + if !q.Outfile.AppendMode { + t.Errorf("Expected append mode of outfile to be true: %s\n%v", queryStr, q) + } +} + func TestParseQuerySimple(t *testing.T) { errorQueries := []string{ "select", @@ -30,7 +72,9 @@ func TestParseQuerySimple(t *testing.T) { "select foo from bar where baz < 100 bay eq 12 group by foo, bar, baz " + "order by foo limit 23 outfile \"result.csv\"", "select foo from bar where baz < 100 bay eq 12 group by foo, bar, baz " + - "order by foo limit 23 outfile \"result.csv\" " + + "order by foo limit 23 outfile append \"result.csv\"", + "select foo from bar where baz < 100 bay eq 12 group by foo, bar, baz " + + "order by foo limit 23 outfile append \"result.csv\" " + "set $foo = maskdigits(bar), $baz = 12, $bay = $foo;", } -- cgit v1.2.3 From bee83cd299b3259790d62b9f22347498f70206b7 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Mon, 14 Mar 2022 13:59:51 +0000 Subject: "append" now actually will append to an outfile now. previously we only added the syntax to the mapr query --- internal/mapr/groupsetresult.go | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'internal') diff --git a/internal/mapr/groupsetresult.go b/internal/mapr/groupsetresult.go index 915b342..40c10fe 100644 --- a/internal/mapr/groupsetresult.go +++ b/internal/mapr/groupsetresult.go @@ -163,7 +163,7 @@ func (*GroupSet) writeQueryFile(query *Query) error { tmpQueryFile := fmt.Sprintf("%s.tmp", queryFile) dlog.Common.Debug("Writing query file", queryFile) - fd, err := os.Create(tmpQueryFile) + fd, err := os.OpenFile(tmpQueryFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) if err != nil { return err } @@ -187,21 +187,28 @@ func (g *GroupSet) WriteResult(query *Query) error { return err } - dlog.Common.Info("Writing outfile", query.Outfile.FilePath) - tmpOutfile := fmt.Sprintf("%s.tmp", query.Outfile.FilePath) - - fd, err := os.Create(tmpOutfile) + fd, err := g.getOutfileFD(query) if err != nil { return err } defer fd.Close() - return g.resultWriteUnformatted(query, rows, tmpOutfile, fd) + return g.resultWriteUnformatted(query, rows, fd) } -func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, tmpOutfile string, - fd *os.File) error { +func (g *GroupSet) getOutfileFD(query *Query) (*os.File, error) { + if !query.Outfile.AppendMode { + dlog.Common.Info("Writing to outfile", query.Outfile.FilePath) + tmpOutfile := fmt.Sprintf("%s.tmp", query.Outfile.FilePath) + return os.OpenFile(tmpOutfile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + } + + dlog.Common.Info("Appending to outfile", query.Outfile.FilePath) + // TODO: Make umask configurable. + return os.OpenFile(query.Outfile.FilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) +} +func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, fd *os.File) error { // Generate header now lastColumn := len(query.Select) - 1 for i, sc := range query.Select { @@ -228,9 +235,12 @@ func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, tmpOutfil fd.WriteString("\n") } - if err := os.Rename(tmpOutfile, query.Outfile.FilePath); err != nil { - os.Remove(tmpOutfile) - return err + if !query.Outfile.AppendMode { + tmpOutfile := fmt.Sprintf("%s.tmp", query.Outfile.FilePath) + if err := os.Rename(tmpOutfile, query.Outfile.FilePath); err != nil { + os.Remove(tmpOutfile) + return err + } } return nil -- cgit v1.2.3 From e085a61ca70932a3670381e2d6b5919e9108441d Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Mon, 14 Mar 2022 16:31:19 +0000 Subject: add integration test for "outfile append.." --- internal/mapr/groupsetresult.go | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'internal') diff --git a/internal/mapr/groupsetresult.go b/internal/mapr/groupsetresult.go index 40c10fe..4b86025 100644 --- a/internal/mapr/groupsetresult.go +++ b/internal/mapr/groupsetresult.go @@ -187,13 +187,23 @@ func (g *GroupSet) WriteResult(query *Query) error { return err } + // By default, also write the CSV header. + writeHeader := true + + // In append mode, only write CSV header when file doesn't exist yet or is empty. + if query.Outfile.AppendMode { + if info, err := os.Stat(query.Outfile.FilePath); err == nil && info.Size() > 0 { + writeHeader = false + } + } + fd, err := g.getOutfileFD(query) if err != nil { return err } defer fd.Close() - return g.resultWriteUnformatted(query, rows, fd) + return g.resultWriteUnformatted(query, rows, fd, writeHeader) } func (g *GroupSet) getOutfileFD(query *Query) (*os.File, error) { @@ -208,17 +218,19 @@ func (g *GroupSet) getOutfileFD(query *Query) (*os.File, error) { return os.OpenFile(query.Outfile.FilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) } -func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, fd *os.File) error { - // Generate header now +func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, fd *os.File, writeHeader bool) error { lastColumn := len(query.Select) - 1 - for i, sc := range query.Select { - fd.WriteString(sc.FieldStorage) - if i == lastColumn { - continue + + if writeHeader { + for i, sc := range query.Select { + fd.WriteString(sc.FieldStorage) + if i == lastColumn { + continue + } + fd.WriteString(protocol.CSVDelimiter) } - fd.WriteString(protocol.CSVDelimiter) + fd.WriteString("\n") } - fd.WriteString("\n") // And now write the data for i, r := range rows { -- cgit v1.2.3 From 789c88458b7b2e7827b6a2e0a7a753d7252acdf7 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Mon, 14 Mar 2022 17:09:06 +0000 Subject: a 0666 to OpenFile will respect the user's default umask --- internal/io/dlog/loggers/file.go | 2 +- internal/mapr/groupsetresult.go | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'internal') diff --git a/internal/io/dlog/loggers/file.go b/internal/io/dlog/loggers/file.go index 9dce251..6a09353 100644 --- a/internal/io/dlog/loggers/file.go +++ b/internal/io/dlog/loggers/file.go @@ -141,7 +141,7 @@ func (f *file) getWriter(name string) *bufio.Writer { } logFile := fmt.Sprintf("%s/%s.log", config.Common.LogDir, name) - newFd, err := os.OpenFile(logFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0644) + newFd, err := os.OpenFile(logFile, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0666) if err != nil { panic(err) } diff --git a/internal/mapr/groupsetresult.go b/internal/mapr/groupsetresult.go index 4b86025..9c3c134 100644 --- a/internal/mapr/groupsetresult.go +++ b/internal/mapr/groupsetresult.go @@ -163,7 +163,7 @@ func (*GroupSet) writeQueryFile(query *Query) error { tmpQueryFile := fmt.Sprintf("%s.tmp", queryFile) dlog.Common.Debug("Writing query file", queryFile) - fd, err := os.OpenFile(tmpQueryFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + fd, err := os.OpenFile(tmpQueryFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) if err != nil { return err } @@ -210,12 +210,11 @@ func (g *GroupSet) getOutfileFD(query *Query) (*os.File, error) { if !query.Outfile.AppendMode { dlog.Common.Info("Writing to outfile", query.Outfile.FilePath) tmpOutfile := fmt.Sprintf("%s.tmp", query.Outfile.FilePath) - return os.OpenFile(tmpOutfile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) + return os.OpenFile(tmpOutfile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) } dlog.Common.Info("Appending to outfile", query.Outfile.FilePath) - // TODO: Make umask configurable. - return os.OpenFile(query.Outfile.FilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + return os.OpenFile(query.Outfile.FilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) } func (g *GroupSet) resultWriteUnformatted(query *Query, rows []result, fd *os.File, writeHeader bool) error { -- cgit v1.2.3