diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-30 00:31:31 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-30 00:31:31 +0300 |
| commit | aa80c07f9a9a208fdb74a5ed907d663d05f1c5e1 (patch) | |
| tree | 81c41f871889b203c825d7e4246a0d88342074c5 | |
| parent | 70e7c7397657385b274f2d2421e9310ada89bd5b (diff) | |
feat: Add 30-day summary report and fix syntax errors
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | CLAUDE.md | 69 | ||||
| -rw-r--r-- | summary_report.pl | 132 |
3 files changed, 202 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1269488 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..537caf1 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,69 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is a Perl-based log analysis tool for OpenBSD that processes web (httpd) and Gemini server logs. The script anonymizes IP addresses, filters suspicious requests, and generates aggregated statistics. + +## Key Commands + +### Code Formatting +```bash +# Format code with perltidy (use default settings) +perltidy foostats.pl +``` + +### Running the Script +```bash +# Parse logs and generate statistics +doas perl foostats.pl --parse-logs + +# Replicate data from partner node +doas perl foostats.pl --replicate + +# Pretty print statistics +doas perl foostats.pl --pretty-print + +# Full options with custom paths +doas perl foostats.pl --parse-logs --stats-dir=/custom/path --odds-file=fooodds.txt --filter-log=/var/log/foostats-filter.log +``` + +## Architecture + +The codebase follows an object-oriented design with the following key components: + +1. **Main Script** (`foostats.pl`): Entry point that orchestrates the workflow +2. **Core Packages**: + - `FileHelper`: Handles JSON+gzip file I/O operations + - `DateHelper`: Date parsing and manipulation utilities + - `Foostats::Logreader`: Parses httpd and Gemini logs from `/var/log/` and `/var/www/logs/` + - `Foostats::Filter`: Filters requests based on patterns in `fooodds.txt` + - `Foostats::Aggregator`: Aggregates filtered data into statistics + - `Foostats::FileOutputter`: Writes JSON output to stats directory + - `Foostats::Replicator`: Handles HTTPS replication with partner nodes + - `Foostats::Merger`: Merges local and replicated data + - `Foostats::Reporter`: Generates human-readable reports + +## Development Notes + +- The script uses modern Perl 5.38 features with experimental builtin functions +- IP addresses are anonymized using SHA3-512 hashing +- Data is stored in JSON format with gzip compression +- The blocklist file (`fooodds.txt`) contains patterns for filtering suspicious requests +- All file operations use the `FileHelper` package for consistency +- Date operations should use the `DateHelper` package + +## Dependencies + +Install required Perl modules via OpenBSD's package manager: +```bash +doas pkg_add p5-Digest-SHA3 p5-PerlIO-gzip p5-JSON p5-String-Util p5-LWP-Protocol-https +``` + +## Important Considerations + +- This tool is OpenBSD-specific and reads from system log locations +- Always test with `--filter-log` option to debug filtering behavior +- The script requires elevated privileges (`doas`) to read system logs +- Partner replication uses HTTPS with mutual authentication
\ No newline at end of file diff --git a/summary_report.pl b/summary_report.pl new file mode 100644 index 0000000..0485d20 --- /dev/null +++ b/summary_report.pl @@ -0,0 +1,132 @@ +#!/usr/bin/perl + +use v5.38; +use strict; +use warnings; +use JSON; +use File::Slurp qw(read_file); + +my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats'; + +my @report_files = glob "$stats_dir/*.gmi"; +my %summary_stats; +my %feed_stats; +my %host_stats; +my %url_stats; + +for my $file (sort @report_files) { + my ($date) = $file =~ /(\d{8})\.gmi/; + next unless $date; + + my $content = read_file($file); + + # Extract Summary + if ($content =~ /### Summary\n\n(.*?)\n\n###/s) { + my $summary_text = $1; + my @lines = split /\n/, $summary_text; + for my $line (@lines) { + if ($line =~ /\* (.*?): (\d+)/) { + $summary_stats{$date}{$1} = $2; + } + } + } + + # Extract Feed Statistics + if ($content =~ /### Feed Statistics\n\n```\n(.*?)\n```/s) { + my $feed_text = $1; + my @lines = split /\n/, $feed_text; + for my $line (@lines) { + if ($line =~ /\| (.*?) \| (.*?) \|/) { + my ($key, $val) = (trim($1), trim($2)); + next if $key eq 'Feed Type'; + $feed_stats{$date}{$key} = $val; + } + } + } + + # Extract Host Statistics + if ($content =~ /### Page Statistics \(by Host\)\n\n```\n(.*?)\n```/s) { + my $host_text = $1; + my @lines = split /\n/, $host_text; + for my $line (@lines) { + if ($line =~ /\| (.*?) \| (.*?) \|/) { + my ($key, $val) = (trim($1), trim($2)); + next if $key eq 'Host'; + $host_stats{$key} += $val; + } + } + } + + # Extract URL Statistics + if ($content =~ /### Page Statistics \(by URL\)\n\n```\n(.*?)\n```/s) { + my $url_text = $1; + my @lines = split /\n/, $url_text; + for my $line (@lines) { + if ($line =~ /\| (.*?) \| (.*?) \|/) { + my ($key, $val) = (trim($1), trim($2)); + next if $key eq 'URL'; + $url_stats{$key} += $val; + } + } + } +} + +# Generate Summary Report + +print "# 30-Day Summary Report\n\n"; + +print "## Daily Summary Evolution\n\n"; +my @dates = sort keys %summary_stats; +my @summary_headers = sort keys %{ $summary_stats{ $dates[0] } }; +print "| Date | " . join(" | ", @summary_headers) . "|\n"; +print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @summary_headers) . "\n"; +for my $date (@dates) { + print "| $date | "; + for my $header (@summary_headers) { + print "$summary_stats{$date}{$header} | "; + } + print "\n"; +} + +print "\n## Daily Feed Statistics Evolution\n\n"; +my @feed_headers = sort keys %{ $feed_stats{ $dates[0] } }; +print "| Date | " . join(" | ", @feed_headers) . "|\n"; +print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @feed_headers) . "\n"; +for my $date (@dates) { + print "| $date | "; + for my $header (@feed_headers) { + print "$feed_stats{$date}{$header} | "; + } + print "\n"; +} + +print "\n## Top 50 Hosts\n\n"; +my @sorted_hosts = sort { $host_stats{$b} <=> $host_stats{$a} } keys %host_stats; +@sorted_hosts = @sorted_hosts[0..49] if @sorted_hosts > 50; +print "| Host | Total Visitors |\n"; +print "|------|----------------|\n"; +for my $host (@sorted_hosts) { + print "| $host | $host_stats{$host} |\n"; +} + +print "\n## Top 50 URLs\n\n"; +my @sorted_urls = sort { $url_stats{$b} <=> $url_stats{$a} } keys %url_stats; +@sorted_urls = @sorted_urls[0..49] if @sorted_urls > 50; +print "| URL | Total Visitors |\n"; +print "|-----|----------------|\n"; +for my $url (@sorted_urls) { + print "| $url | $url_stats{$url} |\n"; +} + +print "\n## Daily Reports\n\n"; +for my $file (sort @report_files) { + my ($date) = $file =~ /(\d{8})\.gmi/; + next unless $date; + print "=> ./$date.gmi $date Report\n"; +} + +sub trim { + my $s = shift; + $s =~ s/^\s+|\s+$//g; + return $s; +} |
