diff options
| author | Paul Buetow <paul@buetow.org> | 2025-06-30 00:31:45 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2025-06-30 00:31:45 +0300 |
| commit | 84909c70a1a3eaf477c278e49b18a0acba3ccc3e (patch) | |
| tree | 9e0ed041a40c1c3673181bacb8d4e394481940e9 | |
| parent | aa80c07f9a9a208fdb74a5ed907d663d05f1c5e1 (diff) | |
Revert "feat: Add 30-day summary report and fix syntax errors"
This reverts commit aa80c07f9a9a208fdb74a5ed907d663d05f1c5e1.
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | CLAUDE.md | 69 | ||||
| -rw-r--r-- | summary_report.pl | 132 |
3 files changed, 0 insertions, 202 deletions
diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 1269488..0000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -data diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 537caf1..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,69 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -This is a Perl-based log analysis tool for OpenBSD that processes web (httpd) and Gemini server logs. The script anonymizes IP addresses, filters suspicious requests, and generates aggregated statistics. - -## Key Commands - -### Code Formatting -```bash -# Format code with perltidy (use default settings) -perltidy foostats.pl -``` - -### Running the Script -```bash -# Parse logs and generate statistics -doas perl foostats.pl --parse-logs - -# Replicate data from partner node -doas perl foostats.pl --replicate - -# Pretty print statistics -doas perl foostats.pl --pretty-print - -# Full options with custom paths -doas perl foostats.pl --parse-logs --stats-dir=/custom/path --odds-file=fooodds.txt --filter-log=/var/log/foostats-filter.log -``` - -## Architecture - -The codebase follows an object-oriented design with the following key components: - -1. **Main Script** (`foostats.pl`): Entry point that orchestrates the workflow -2. **Core Packages**: - - `FileHelper`: Handles JSON+gzip file I/O operations - - `DateHelper`: Date parsing and manipulation utilities - - `Foostats::Logreader`: Parses httpd and Gemini logs from `/var/log/` and `/var/www/logs/` - - `Foostats::Filter`: Filters requests based on patterns in `fooodds.txt` - - `Foostats::Aggregator`: Aggregates filtered data into statistics - - `Foostats::FileOutputter`: Writes JSON output to stats directory - - `Foostats::Replicator`: Handles HTTPS replication with partner nodes - - `Foostats::Merger`: Merges local and replicated data - - `Foostats::Reporter`: Generates human-readable reports - -## Development Notes - -- The script uses modern Perl 5.38 features with experimental builtin functions -- IP addresses are anonymized using SHA3-512 hashing -- Data is stored in JSON format with gzip compression -- The blocklist file (`fooodds.txt`) contains patterns for filtering suspicious requests -- All file operations use the `FileHelper` package for consistency -- Date operations should use the `DateHelper` package - -## Dependencies - -Install required Perl modules via OpenBSD's package manager: -```bash -doas pkg_add p5-Digest-SHA3 p5-PerlIO-gzip p5-JSON p5-String-Util p5-LWP-Protocol-https -``` - -## Important Considerations - -- This tool is OpenBSD-specific and reads from system log locations -- Always test with `--filter-log` option to debug filtering behavior -- The script requires elevated privileges (`doas`) to read system logs -- Partner replication uses HTTPS with mutual authentication
\ No newline at end of file diff --git a/summary_report.pl b/summary_report.pl deleted file mode 100644 index 0485d20..0000000 --- a/summary_report.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/perl - -use v5.38; -use strict; -use warnings; -use JSON; -use File::Slurp qw(read_file); - -my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats'; - -my @report_files = glob "$stats_dir/*.gmi"; -my %summary_stats; -my %feed_stats; -my %host_stats; -my %url_stats; - -for my $file (sort @report_files) { - my ($date) = $file =~ /(\d{8})\.gmi/; - next unless $date; - - my $content = read_file($file); - - # Extract Summary - if ($content =~ /### Summary\n\n(.*?)\n\n###/s) { - my $summary_text = $1; - my @lines = split /\n/, $summary_text; - for my $line (@lines) { - if ($line =~ /\* (.*?): (\d+)/) { - $summary_stats{$date}{$1} = $2; - } - } - } - - # Extract Feed Statistics - if ($content =~ /### Feed Statistics\n\n```\n(.*?)\n```/s) { - my $feed_text = $1; - my @lines = split /\n/, $feed_text; - for my $line (@lines) { - if ($line =~ /\| (.*?) \| (.*?) \|/) { - my ($key, $val) = (trim($1), trim($2)); - next if $key eq 'Feed Type'; - $feed_stats{$date}{$key} = $val; - } - } - } - - # Extract Host Statistics - if ($content =~ /### Page Statistics \(by Host\)\n\n```\n(.*?)\n```/s) { - my $host_text = $1; - my @lines = split /\n/, $host_text; - for my $line (@lines) { - if ($line =~ /\| (.*?) \| (.*?) \|/) { - my ($key, $val) = (trim($1), trim($2)); - next if $key eq 'Host'; - $host_stats{$key} += $val; - } - } - } - - # Extract URL Statistics - if ($content =~ /### Page Statistics \(by URL\)\n\n```\n(.*?)\n```/s) { - my $url_text = $1; - my @lines = split /\n/, $url_text; - for my $line (@lines) { - if ($line =~ /\| (.*?) \| (.*?) \|/) { - my ($key, $val) = (trim($1), trim($2)); - next if $key eq 'URL'; - $url_stats{$key} += $val; - } - } - } -} - -# Generate Summary Report - -print "# 30-Day Summary Report\n\n"; - -print "## Daily Summary Evolution\n\n"; -my @dates = sort keys %summary_stats; -my @summary_headers = sort keys %{ $summary_stats{ $dates[0] } }; -print "| Date | " . join(" | ", @summary_headers) . "|\n"; -print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @summary_headers) . "\n"; -for my $date (@dates) { - print "| $date | "; - for my $header (@summary_headers) { - print "$summary_stats{$date}{$header} | "; - } - print "\n"; -} - -print "\n## Daily Feed Statistics Evolution\n\n"; -my @feed_headers = sort keys %{ $feed_stats{ $dates[0] } }; -print "| Date | " . join(" | ", @feed_headers) . "|\n"; -print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @feed_headers) . "\n"; -for my $date (@dates) { - print "| $date | "; - for my $header (@feed_headers) { - print "$feed_stats{$date}{$header} | "; - } - print "\n"; -} - -print "\n## Top 50 Hosts\n\n"; -my @sorted_hosts = sort { $host_stats{$b} <=> $host_stats{$a} } keys %host_stats; -@sorted_hosts = @sorted_hosts[0..49] if @sorted_hosts > 50; -print "| Host | Total Visitors |\n"; -print "|------|----------------|\n"; -for my $host (@sorted_hosts) { - print "| $host | $host_stats{$host} |\n"; -} - -print "\n## Top 50 URLs\n\n"; -my @sorted_urls = sort { $url_stats{$b} <=> $url_stats{$a} } keys %url_stats; -@sorted_urls = @sorted_urls[0..49] if @sorted_urls > 50; -print "| URL | Total Visitors |\n"; -print "|-----|----------------|\n"; -for my $url (@sorted_urls) { - print "| $url | $url_stats{$url} |\n"; -} - -print "\n## Daily Reports\n\n"; -for my $file (sort @report_files) { - my ($date) = $file =~ /(\d{8})\.gmi/; - next unless $date; - print "=> ./$date.gmi $date Report\n"; -} - -sub trim { - my $s = shift; - $s =~ s/^\s+|\s+$//g; - return $s; -} |
