1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
package clients
import (
"context"
"errors"
"fmt"
"runtime"
"strings"
"time"
"github.com/mimecast/dtail/internal/clients/handlers"
"github.com/mimecast/dtail/internal/io/logger"
"github.com/mimecast/dtail/internal/mapr"
"github.com/mimecast/dtail/internal/omode"
)
// MaprClient is used for running mapreduce aggregations on remote files.
type MaprClient struct {
baseClient
// Query string for mapr aggregations
queryStr string
// Global group set for merged mapr aggregation results
globalGroup *mapr.GlobalGroupSet
// The query object (constructed from queryStr)
query *mapr.Query
// Additative result or new result every run?
additative bool
}
// NewMaprClient returns a new mapreduce client.
func NewMaprClient(args Args, queryStr string) (*MaprClient, error) {
if queryStr == "" {
return nil, errors.New("No mapreduce query specified, use '-query' flag")
}
c := MaprClient{
baseClient: baseClient{
Args: args,
throttleCh: make(chan struct{}, args.ConnectionsPerCPU*runtime.NumCPU()),
retry: args.Mode == omode.TailClient,
},
queryStr: queryStr,
additative: args.Mode == omode.MapClient,
}
query, err := mapr.NewQuery(c.queryStr)
if err != nil {
logger.FatalExit(c.queryStr, "Can't parse mapr query", err)
}
c.query = query
switch c.query.Table {
case "*":
c.Regex = fmt.Sprintf("\\|MAPREDUCE:\\|")
case ".":
c.Regex = "."
default:
c.Regex = fmt.Sprintf("\\|MAPREDUCE:%s\\|", c.query.Table)
}
c.globalGroup = mapr.NewGlobalGroupSet()
c.baseClient.init(c)
return &c, nil
}
// Start starts the mapreduce client.
func (c *MaprClient) Start(ctx context.Context) (status int) {
if c.query.Outfile == "" {
// Only print out periodic results if we don't write an outfile
go c.periodicPrintResults(ctx)
}
status = c.baseClient.Start(ctx)
if c.additative {
c.recievedFinalResult()
}
return
}
func (c MaprClient) makeHandler(server string) handlers.Handler {
return handlers.NewMaprHandler(server, c.query, c.globalGroup)
}
func (c MaprClient) makeCommands() (commands []string) {
commands = append(commands, fmt.Sprintf("map %s", c.query.RawQuery))
modeStr := "tail"
if c.additative {
modeStr = "cat"
}
for _, file := range strings.Split(c.What, ",") {
commands = append(commands, fmt.Sprintf("%s %s regex %s", modeStr, file, c.Regex))
}
return
}
func (c *MaprClient) recievedFinalResult() {
logger.Info("Received final mapreduce result")
if c.query.Outfile == "" {
c.printResults()
return
}
logger.Info(fmt.Sprintf("Writing final mapreduce result to '%s'", c.query.Outfile))
err := c.globalGroup.WriteResult(c.query)
if err != nil {
logger.FatalExit(err)
return
}
logger.Info(fmt.Sprintf("Wrote final mapreduce result to '%s'", c.query.Outfile))
}
func (c *MaprClient) periodicPrintResults(ctx context.Context) {
for {
select {
case <-time.After(c.query.Interval):
logger.Info("Gathering interim mapreduce result")
c.printResults()
case <-ctx.Done():
return
}
}
}
func (c *MaprClient) printResults() {
var result string
var err error
var numLines int
if c.additative {
result, numLines, err = c.globalGroup.Result(c.query)
} else {
result, numLines, err = c.globalGroup.SwapOut().Result(c.query)
}
if err != nil {
logger.FatalExit(err)
}
if numLines > 0 {
logger.Raw(fmt.Sprintf("%s\n", c.query.RawQuery))
logger.Raw(result)
}
}
|