summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-06-30 00:31:45 +0300
committerPaul Buetow <paul@buetow.org>2025-06-30 00:31:45 +0300
commit84909c70a1a3eaf477c278e49b18a0acba3ccc3e (patch)
tree9e0ed041a40c1c3673181bacb8d4e394481940e9
parentaa80c07f9a9a208fdb74a5ed907d663d05f1c5e1 (diff)
Revert "feat: Add 30-day summary report and fix syntax errors"
This reverts commit aa80c07f9a9a208fdb74a5ed907d663d05f1c5e1.
-rw-r--r--.gitignore1
-rw-r--r--CLAUDE.md69
-rw-r--r--summary_report.pl132
3 files changed, 0 insertions, 202 deletions
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 1269488..0000000
--- a/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-data
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index 537caf1..0000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# CLAUDE.md
-
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-
-## Project Overview
-
-This is a Perl-based log analysis tool for OpenBSD that processes web (httpd) and Gemini server logs. The script anonymizes IP addresses, filters suspicious requests, and generates aggregated statistics.
-
-## Key Commands
-
-### Code Formatting
-```bash
-# Format code with perltidy (use default settings)
-perltidy foostats.pl
-```
-
-### Running the Script
-```bash
-# Parse logs and generate statistics
-doas perl foostats.pl --parse-logs
-
-# Replicate data from partner node
-doas perl foostats.pl --replicate
-
-# Pretty print statistics
-doas perl foostats.pl --pretty-print
-
-# Full options with custom paths
-doas perl foostats.pl --parse-logs --stats-dir=/custom/path --odds-file=fooodds.txt --filter-log=/var/log/foostats-filter.log
-```
-
-## Architecture
-
-The codebase follows an object-oriented design with the following key components:
-
-1. **Main Script** (`foostats.pl`): Entry point that orchestrates the workflow
-2. **Core Packages**:
- - `FileHelper`: Handles JSON+gzip file I/O operations
- - `DateHelper`: Date parsing and manipulation utilities
- - `Foostats::Logreader`: Parses httpd and Gemini logs from `/var/log/` and `/var/www/logs/`
- - `Foostats::Filter`: Filters requests based on patterns in `fooodds.txt`
- - `Foostats::Aggregator`: Aggregates filtered data into statistics
- - `Foostats::FileOutputter`: Writes JSON output to stats directory
- - `Foostats::Replicator`: Handles HTTPS replication with partner nodes
- - `Foostats::Merger`: Merges local and replicated data
- - `Foostats::Reporter`: Generates human-readable reports
-
-## Development Notes
-
-- The script uses modern Perl 5.38 features with experimental builtin functions
-- IP addresses are anonymized using SHA3-512 hashing
-- Data is stored in JSON format with gzip compression
-- The blocklist file (`fooodds.txt`) contains patterns for filtering suspicious requests
-- All file operations use the `FileHelper` package for consistency
-- Date operations should use the `DateHelper` package
-
-## Dependencies
-
-Install required Perl modules via OpenBSD's package manager:
-```bash
-doas pkg_add p5-Digest-SHA3 p5-PerlIO-gzip p5-JSON p5-String-Util p5-LWP-Protocol-https
-```
-
-## Important Considerations
-
-- This tool is OpenBSD-specific and reads from system log locations
-- Always test with `--filter-log` option to debug filtering behavior
-- The script requires elevated privileges (`doas`) to read system logs
-- Partner replication uses HTTPS with mutual authentication \ No newline at end of file
diff --git a/summary_report.pl b/summary_report.pl
deleted file mode 100644
index 0485d20..0000000
--- a/summary_report.pl
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/bin/perl
-
-use v5.38;
-use strict;
-use warnings;
-use JSON;
-use File::Slurp qw(read_file);
-
-my $stats_dir = '/var/www/htdocs/buetow.org/self/foostats';
-
-my @report_files = glob "$stats_dir/*.gmi";
-my %summary_stats;
-my %feed_stats;
-my %host_stats;
-my %url_stats;
-
-for my $file (sort @report_files) {
- my ($date) = $file =~ /(\d{8})\.gmi/;
- next unless $date;
-
- my $content = read_file($file);
-
- # Extract Summary
- if ($content =~ /### Summary\n\n(.*?)\n\n###/s) {
- my $summary_text = $1;
- my @lines = split /\n/, $summary_text;
- for my $line (@lines) {
- if ($line =~ /\* (.*?): (\d+)/) {
- $summary_stats{$date}{$1} = $2;
- }
- }
- }
-
- # Extract Feed Statistics
- if ($content =~ /### Feed Statistics\n\n```\n(.*?)\n```/s) {
- my $feed_text = $1;
- my @lines = split /\n/, $feed_text;
- for my $line (@lines) {
- if ($line =~ /\| (.*?) \| (.*?) \|/) {
- my ($key, $val) = (trim($1), trim($2));
- next if $key eq 'Feed Type';
- $feed_stats{$date}{$key} = $val;
- }
- }
- }
-
- # Extract Host Statistics
- if ($content =~ /### Page Statistics \(by Host\)\n\n```\n(.*?)\n```/s) {
- my $host_text = $1;
- my @lines = split /\n/, $host_text;
- for my $line (@lines) {
- if ($line =~ /\| (.*?) \| (.*?) \|/) {
- my ($key, $val) = (trim($1), trim($2));
- next if $key eq 'Host';
- $host_stats{$key} += $val;
- }
- }
- }
-
- # Extract URL Statistics
- if ($content =~ /### Page Statistics \(by URL\)\n\n```\n(.*?)\n```/s) {
- my $url_text = $1;
- my @lines = split /\n/, $url_text;
- for my $line (@lines) {
- if ($line =~ /\| (.*?) \| (.*?) \|/) {
- my ($key, $val) = (trim($1), trim($2));
- next if $key eq 'URL';
- $url_stats{$key} += $val;
- }
- }
- }
-}
-
-# Generate Summary Report
-
-print "# 30-Day Summary Report\n\n";
-
-print "## Daily Summary Evolution\n\n";
-my @dates = sort keys %summary_stats;
-my @summary_headers = sort keys %{ $summary_stats{ $dates[0] } };
-print "| Date | " . join(" | ", @summary_headers) . "|\n";
-print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @summary_headers) . "\n";
-for my $date (@dates) {
- print "| $date | ";
- for my $header (@summary_headers) {
- print "$summary_stats{$date}{$header} | ";
- }
- print "\n";
-}
-
-print "\n## Daily Feed Statistics Evolution\n\n";
-my @feed_headers = sort keys %{ $feed_stats{ $dates[0] } };
-print "| Date | " . join(" | ", @feed_headers) . "|\n";
-print "|------------|" . join("", map { '-' x (length($_) + 2) . '|' } @feed_headers) . "\n";
-for my $date (@dates) {
- print "| $date | ";
- for my $header (@feed_headers) {
- print "$feed_stats{$date}{$header} | ";
- }
- print "\n";
-}
-
-print "\n## Top 50 Hosts\n\n";
-my @sorted_hosts = sort { $host_stats{$b} <=> $host_stats{$a} } keys %host_stats;
-@sorted_hosts = @sorted_hosts[0..49] if @sorted_hosts > 50;
-print "| Host | Total Visitors |\n";
-print "|------|----------------|\n";
-for my $host (@sorted_hosts) {
- print "| $host | $host_stats{$host} |\n";
-}
-
-print "\n## Top 50 URLs\n\n";
-my @sorted_urls = sort { $url_stats{$b} <=> $url_stats{$a} } keys %url_stats;
-@sorted_urls = @sorted_urls[0..49] if @sorted_urls > 50;
-print "| URL | Total Visitors |\n";
-print "|-----|----------------|\n";
-for my $url (@sorted_urls) {
- print "| $url | $url_stats{$url} |\n";
-}
-
-print "\n## Daily Reports\n\n";
-for my $file (sort @report_files) {
- my ($date) = $file =~ /(\d{8})\.gmi/;
- next unless $date;
- print "=> ./$date.gmi $date Report\n";
-}
-
-sub trim {
- my $s = shift;
- $s =~ s/^\s+|\s+$//g;
- return $s;
-}