feat: Add 30-day summary report and fix syntax errors

author: Paul Buetow <paul@buetow.org> 2025-06-30 00:31:31 +0300
committer: Paul Buetow <paul@buetow.org> 2025-06-30 00:31:31 +0300
commit: aa80c07f9a9a208fdb74a5ed907d663d05f1c5e1 (patch)
tree: 81c41f871889b203c825d7e4246a0d88342074c5
parent: 70e7c7397657385b274f2d2421e9310ada89bd5b (diff)
3 files changed, 202 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1269488
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+data
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..537caf1
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,69 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+This is a Perl-based log analysis tool for OpenBSD that processes web (httpd) and Gemini server logs. The script anonymizes IP addresses, filters suspicious requests, and generates aggregated statistics.
+
+## Key Commands
+
+### Code Formatting
+```bash
+# Format code with perltidy (use default settings)
+perltidy foostats.pl
+```
+
+### Running the Script
+```bash
+# Parse logs and generate statistics
+doas perl foostats.pl --parse-logs
+
+# Replicate data from partner node
+doas perl foostats.pl --replicate
+
+# Pretty print statistics
+doas perl foostats.pl --pretty-print
+
+# Full options with custom paths
+doas perl foostats.pl --parse-logs --stats-dir=/custom/path --odds-file=fooodds.txt --filter-log=/var/log/foostats-filter.log
+```
+
+## Architecture
+
+The codebase follows an object-oriented design with the following key components:
+
+1. **Main Script** (`foostats.pl`): Entry point that orchestrates the workflow
+2. **Core Packages**:
+   - `FileHelper`: Handles JSON+gzip file I/O operations
+   - `DateHelper`: Date parsing and manipulation utilities
+   - `Foostats::Logreader`: Parses httpd and Gemini logs from `/var/log/` and `/var/www/logs/`
+   - `Foostats::Filter`: Filters requests based on patterns in `fooodds.txt`
+   - `Foostats::Aggregator`: Aggregates filtered data into statistics
+   - `Foostats::FileOutputter`: Writes JSON output to stats directory
+   - `Foostats::Replicator`: Handles HTTPS replication with partner nodes
+   - `Foostats::Merger`: Merges local and replicated data
+   - `Foostats::Reporter`: Generates human-readable reports
+
+## Development Notes
+
+- The script uses modern Perl 5.38 features with experimental builtin functions
+- IP addresses are anonymized using SHA3-512 hashing
+- Data is stored in JSON format with gzip compression
+- The blocklist file (`fooodds.txt`) contains patterns for filtering suspicious requests
+- All file operations use the `FileHelper` package for consistency
+- Date operations should use the `DateHelper` package
+
+## Dependencies
+
+Install required Perl modules via OpenBSD's package manager:
+```bash
+doas pkg_add p5-Digest-SHA3 p5-PerlIO-gzip p5-JSON p5-String-Util p5-LWP-Protocol-https
+```
+
+## Important Considerations
+
+- This tool is OpenBSD-specific and reads from system log locations
+- Always test with `--filter-log` option to debug filtering behavior
+- The script requires elevated privileges (`doas`) to read system logs
+- Partner replication uses HTTPS with mutual authentication
+\ No newline at end of file
diff --git a/summary_report.pl b/summary_report.pl
new file mode 100644
index 0000000..0485d20
--- /dev/null
+++ b/summary_report.pl
@@ -0,0 +1,132 @@
+#!/usr/bin/perl
+
+use v5.38;
+use strict;
+use warnings;
+use JSON;
+use File::Slurp qw(read_file);
+
+my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats';
+
+my @report_files = glob "$stats_dir/*.gmi";
+my %summary_stats;
+my %feed_stats;
+my %host_stats;
+my %url_stats;
+
+for my $file (sort @report_files) {
+    my ($date) = $file =~ /(\d{8})\.gmi/;
+    next unless $date;
+
+    my $content = read_file($file);
+
+    # Extract Summary
+    if ($content =~ /### Summary\n\n(.*?)\n\n###/s) {
+        my $summary_text = $1;
+        my @lines = split /\n/, $summary_text;
+        for my $line (@lines) {
+            if ($line =~ /\* (.*?): (\d+)/) {
+                $summary_stats{$date}{$1} = $2;
+            }
+        }
+    }
+
+    # Extract Feed Statistics
+    if ($content =~ /### Feed Statistics\n\n```\n(.*?)\n```/s) {
+        my $feed_text = $1;
+        my @lines = split /\n/, $feed_text;
+        for my $line (@lines) {
+            if ($line =~ /\| (.*?) \| (.*?) \|/) {
+                my ($key, $val) = (trim($1), trim($2));
+                next if $key eq 'Feed Type';
+                $feed_stats{$date}{$key} = $val;
+            }
+        }
+    }
+
+    # Extract Host Statistics
+    if ($content =~ /### Page Statistics \(by Host\)\n\n```\n(.*?)\n```/s) {
+        my $host_text = $1;
+        my @lines = split /\n/, $host_text;
+        for my $line (@lines) {
+            if ($line =~ /\| (.*?) \| (.*?) \|/) {
+                my ($key, $val) = (trim($1), trim($2));
+                next if $key eq 'Host';
+                $host_stats{$key} += $val;
+            }
+        }
+    }
+
+    # Extract URL Statistics
+    if ($content =~ /### Page Statistics \(by URL\)\n\n```\n(.*?)\n```/s) {
+        my $url_text = $1;
+        my @lines = split /\n/, $url_text;
+        for my $line (@lines) {
+            if ($line =~ /\| (.*?) \| (.*?) \|/) {
+                my ($key, $val) = (trim($1), trim($2));
+                next if $key eq 'URL';
+                $url_stats{$key} += $val;
+            }
+        }
+    }
+}
+
+# Generate Summary Report
+
+print "# 30-Day Summary Report\n\n";
+
+print "## Daily Summary Evolution\n\n";
+my @dates = sort keys %summary_stats;
+my @summary_headers = sort keys %{ $summary_stats{ $dates[0] } };
+print "| Date       | " . join(" | ", @summary_headers) . "|\n";
+print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @summary_headers) . "\n";
+for my $date (@dates) {
+    print "| $date | ";
+    for my $header (@summary_headers) {
+        print "$summary_stats{$date}{$header} | ";
+    }
+    print "\n";
+}
+
+print "\n## Daily Feed Statistics Evolution\n\n";
+my @feed_headers = sort keys %{ $feed_stats{ $dates[0] } };
+print "| Date       | " . join(" | ", @feed_headers) . "|\n";
+print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @feed_headers) . "\n";
+for my $date (@dates) {
+    print "| $date | ";
+    for my $header (@feed_headers) {
+        print "$feed_stats{$date}{$header} | ";
+    }
+    print "\n";
+}
+
+print "\n## Top 50 Hosts\n\n";
+my @sorted_hosts = sort { $host_stats{$b} <=> $host_stats{$a} } keys %host_stats;
+@sorted_hosts = @sorted_hosts[0..49] if @sorted_hosts > 50;
+print "| Host | Total Visitors |\n";
+print "|------|----------------|\n";
+for my $host (@sorted_hosts) {
+    print "| $host | $host_stats{$host} |\n";
+}
+
+print "\n## Top 50 URLs\n\n";
+my @sorted_urls = sort { $url_stats{$b} <=> $url_stats{$a} } keys %url_stats;
+@sorted_urls = @sorted_urls[0..49] if @sorted_urls > 50;
+print "| URL | Total Visitors |\n";
+print "|-----|----------------|\n";
+for my $url (@sorted_urls) {
+    print "| $url | $url_stats{$url} |\n";
+}
+
+print "\n## Daily Reports\n\n";
+for my $file (sort @report_files) {
+    my ($date) = $file =~ /(\d{8})\.gmi/;
+    next unless $date;
+    print "=> ./$date.gmi $date Report\n";
+}
+
+sub trim {
+    my $s = shift;
+    $s =~ s/^\s+|\s+$//g;
+    return $s;
+}
author	Paul Buetow <paul@buetow.org>	2025-06-30 00:31:31 +0300
committer	Paul Buetow <paul@buetow.org>	2025-06-30 00:31:31 +0300
commit	aa80c07f9a9a208fdb74a5ed907d663d05f1c5e1 (patch)
tree	81c41f871889b203c825d7e4246a0d88342074c5
parent	70e7c7397657385b274f2d2421e9310ada89bd5b (diff)