summaryrefslogtreecommitdiff
path: root/ioreplay/src/generate/generate.c
diff options
context:
space:
mode:
Diffstat (limited to 'ioreplay/src/generate/generate.c')
-rw-r--r--ioreplay/src/generate/generate.c235
1 files changed, 235 insertions, 0 deletions
diff --git a/ioreplay/src/generate/generate.c b/ioreplay/src/generate/generate.c
new file mode 100644
index 0000000..ff1be94
--- /dev/null
+++ b/ioreplay/src/generate/generate.c
@@ -0,0 +1,235 @@
+// Copyright 2018 Mimecast Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "generate.h"
+
+#include "../meta/meta.h"
+#include "gtask.h"
+#include "gwriter.h"
+#include "gparser.h"
+
+#include <fcntl.h>
+
+#define _MAX_PROCESSES 1024*1024*10
+
+#define _Perc_filtered (g->num_lines_filtered / (g->lineno/100.0))
+
+generate_s* generate_new(options_s *opts)
+{
+ generate_s *g = Malloc(generate_s);
+
+ g->writer = NULL;
+ g->lineno = 0;
+ g->name = opts->name;
+ g->replay_fd = NULL;
+ g->mps = mounts_new(opts);
+ g->num_lines_filtered = 0;
+ g->num_vsizes = 0;
+ g->start_time = -1;
+ g->pid_map = amap_new(_MAX_PROCESSES);
+ g->vsize_map = hmap_new(_MAX_PROCESSES);
+ g->mmap_map = hmap_new(1024*1024);
+ g->vfd_buffer = rbuffer_new(1024);
+ g->num_mapped_pids = 0;
+ g->num_mapped_fds = 10;
+ g->opts = opts;
+ g->reuse_queue = rbuffer_new(1024);
+ g->replay_fd = Fopen(opts->replay_file, "w");
+
+ return g;
+}
+
+void generate_destroy(generate_s *g)
+{
+ // TODO: Also clean the contets of these maps
+ amap_destroy(g->pid_map);
+ hmap_destroy(g->vsize_map);
+ hmap_destroy(g->mmap_map);
+ rbuffer_destroy(g->vfd_buffer);
+ mounts_destroy(g->mps);
+
+ gtask_s *task = NULL;
+ while (NULL != (task = rbuffer_get_next(g->reuse_queue)))
+ gtask_destroy(task);
+ rbuffer_destroy(g->reuse_queue);
+
+ fclose(g->replay_fd);
+ free(g);
+}
+
+
+status_e generate_run(options_s *opts)
+{
+ generate_s *g = generate_new(opts);
+ Put("Parsing file %s, writing output to %s", opts->capture_file,
+ opts->replay_file);
+ FILE *capture_fd = Fopen(opts->capture_file, "r");
+
+ size_t len = 0;
+ ssize_t read;
+ char *line = NULL;
+
+ drop_root(opts->user);
+
+ // Reserve first few bytes for meta information
+ meta_s *meta = meta_new(g->replay_fd);
+ meta_reserve(meta);
+
+ // The writer will write the .replay file
+ gwriter_s *writer = gwriter_new(g);
+
+ // The parser will parse every line of the .capture file
+ gparser_s *parser = gparser_new(g);
+
+ g->writer = writer;
+
+ // Start one writer and one parser thread!
+ gparser_start(parser);
+ gwriter_start(writer);
+
+ Out("Processing, it may take a while: ");
+
+ // Process each line of the .capture file. Determine line by line whether
+ // the I/O operation makes sense or not. It might be that SystemTap skipped
+ // some I/O ops due to system overload or other issues. The result is that
+ // some lines may be corrupt or contain I/O operations on unknown file
+ // handles. It could also be that there are operations on unknown
+ // file handles such as sockets etc. These will be all filtered out by
+ // either the parser or the writer thread!
+
+ while ((read = getline(&line, &len, capture_fd)) != -1) {
+ if (0 > ++g->lineno) {
+ Error("lineno:%lu Line number overflow", g->lineno);
+ }
+ if (strlen(line) >= MAX_LINE_LEN) {
+ Error("lineno:%lu Exceeded max line length", g->lineno);
+ }
+
+ // Create a new generate task (try to reuse a task object)...
+ gtask_s *t = rbuffer_get_next(g->reuse_queue);
+ if (!t) {
+ t = gtask_new(g);
+ } else if (t->ret != 0) {
+ g->num_lines_filtered++;
+ }
+ gtask_init(t, line, g->lineno);
+
+ // ...pass it to the parser queue
+ while (!rbuffer_insert(parser->queue, t))
+ usleep(100);
+
+ if (g->lineno % 1000000 == 0) {
+ Out(" %lu (filtered:%.2lf%%)", g->lineno, _Perc_filtered);
+ }
+ }
+
+ Put("\nDone reading input file!");
+
+ Put("Waiting for parser thread...");
+ gparser_terminate(parser);
+ gparser_destroy(parser);
+
+ Put("Waiting for writer thread...");
+ gwriter_terminate(writer);
+ gwriter_destroy(writer);
+
+ // Retrieve all left over processed tasks to collect the
+ // statistics!
+ gtask_s *t;
+ while (NULL != (t = rbuffer_get_next(g->reuse_queue))) {
+ if (t->ret != 0)
+ g->num_lines_filtered++;
+ gtask_destroy(t);
+ }
+
+ Put("Processed %lu lines in total, had to filter out %.2lf%%",
+ g->lineno, _Perc_filtered);
+
+ Put("Writing init section to '%s'...", opts->replay_file);
+ fprintf(g->replay_fd, "#INIT\n");
+ off_t init_offset = ftello(g->replay_fd);
+ hmap_run_cb(g->vsize_map, generate_write_init_cb);
+
+ Put("Writing meta header to '%s'...", opts->replay_file);
+ meta_write_start(meta);
+
+ // The meta header is being written to the first line of the .replay
+ // file and used by ioreplay to do various things (e.g. initializing
+ // the test correctly, creating the internal data structures with the
+ // correct sizes etc.
+
+ meta_write_l(meta, "replay_version", REPLAY_VERSION);
+ meta_write_l(meta, "init_offset", init_offset);
+
+ meta_write_s(meta, "user", opts->user);
+ meta_write_s(meta, "name", opts->name);
+
+ meta_write_l(meta, "num_vsizes", g->num_vsizes);
+ meta_write_l(meta, "num_mapped_pids", g->num_mapped_pids);
+ meta_write_l(meta, "num_mapped_fds", g->num_mapped_fds);
+ meta_write_l(meta, "num_lines", g->lineno - g->num_lines_filtered);
+
+ meta_destroy(meta);
+ fclose(capture_fd);
+
+ Put("Generating '%s' done", opts->replay_file);
+ generate_destroy(g);
+
+ return SUCCESS;
+}
+
+void generate_write_init_cb(void *data)
+{
+ vsize_s *l = data;
+ generate_s *g = l->generate;
+
+ if (l->required && strlen(l->path) > 0) {
+ fprintf(g->replay_fd, "%d|%d|%ld|%s|\n",
+ l->is_dir, l->is_file, -l->vsize_deficit, l->path);
+ }
+}
+
+vsize_s* generate_vsize_by_path(generate_s *g, gtask_s *t,
+ char *path)
+{
+ vsize_s *v = NULL;
+
+ if (!path && t)
+ path = t->path;
+
+ Error_if(!path, "No path specified");
+ v = hmap_get(g->vsize_map, path);
+
+ if (!v) {
+ v = vsize_new(path, ++g->num_vsizes, g);
+ hmap_insert(g->vsize_map, path, v);
+ }
+
+ if (t)
+ t->vsize = v;
+
+ return v;
+}
+
+void generate_gprocess_by_realpid(generate_s *g, gtask_s *t)
+{
+ // Get the virtual process data object from the virtual PID space.
+ t->gprocess = amap_get(g->pid_map, t->pid);
+ if (t->gprocess == NULL) {
+ t->gprocess = gprocess_new(t->pid, ++g->num_mapped_pids);
+ if (amap_set(g->pid_map, t->pid, t->gprocess)) {
+ Error("lineno:%lu Can not insert PID %ld", t->lineno, t->pid);
+ }
+ }
+}