summaryrefslogtreecommitdiff
path: root/frontends/scripts
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-07-12 08:29:49 +0300
committerPaul Buetow <paul@buetow.org>2025-07-12 08:29:49 +0300
commit6901512992a6ecbbf651d254e588dc13beae154f (patch)
treeff78f9df92aeffdff7750f1966879b9bbabf20d0 /frontends/scripts
parented5d18bb0633b2b31684fbdf7a3f359bd907d6dd (diff)
update foostats
Diffstat (limited to 'frontends/scripts')
-rw-r--r--frontends/scripts/foostats.pl364
-rwxr-xr-xfrontends/scripts/sitestats.sh111
2 files changed, 321 insertions, 154 deletions
diff --git a/frontends/scripts/foostats.pl b/frontends/scripts/foostats.pl
index 1af3e4b..2783b2d 100644
--- a/frontends/scripts/foostats.pl
+++ b/frontends/scripts/foostats.pl
@@ -771,8 +771,219 @@ package Foostats::Reporter {
", @table_lines );
}
+ # Convert gemtext to HTML
+ sub gemtext_to_html {
+ my ($content) = @_;
+ my $html = "";
+ my $in_code_block = 0;
+ my $in_list = 0;
+ my @lines = split /\n/, $content;
+ my @code_block_lines = ();
+
+ for my $line (@lines) {
+ if ($line =~ /^```/) {
+ if ($in_code_block) {
+ # End code block - check if it's a table
+ if (is_ascii_table(\@code_block_lines)) {
+ $html .= convert_ascii_table_to_html(\@code_block_lines);
+ } else {
+ $html .= "<pre>\n";
+ for my $code_line (@code_block_lines) {
+ $html .= encode_entities($code_line) . "\n";
+ }
+ $html .= "</pre>\n";
+ }
+ @code_block_lines = ();
+ $in_code_block = 0;
+ } else {
+ $in_code_block = 1;
+ }
+ next;
+ }
+
+ if ($in_code_block) {
+ push @code_block_lines, $line;
+ next;
+ }
+
+ # Check if we need to close a list
+ if ($in_list && $line !~ /^\* /) {
+ $html .= "</ul>\n";
+ $in_list = 0;
+ }
+
+ # Headers
+ if ($line =~ /^### (.*)/) {
+ $html .= "<h3>" . encode_entities($1) . "</h3>\n";
+ } elsif ($line =~ /^## (.*)/) {
+ $html .= "<h2>" . encode_entities($1) . "</h2>\n";
+ } elsif ($line =~ /^# (.*)/) {
+ $html .= "<h1>" . encode_entities($1) . "</h1>\n";
+ }
+ # Links
+ elsif ($line =~ /^=> (\S+)\s+(.*)/) {
+ my ($url, $text) = ($1, $2);
+ # Convert .gmi links to .html
+ $url =~ s/\.gmi$/\.html/;
+ $html .= "<p><a href=\"" . encode_entities($url) . "\">" . encode_entities($text) . "</a></p>\n";
+ }
+ # Bullet points
+ elsif ($line =~ /^\* (.*)/) {
+ if (!$in_list) {
+ $html .= "<ul>\n";
+ $in_list = 1;
+ }
+ $html .= "<li>" . encode_entities($1) . "</li>\n";
+ }
+ # Empty line - skip to avoid excessive spacing
+ elsif ($line =~ /^\s*$/) {
+ # Skip empty lines for more compact output
+ }
+ # Regular text
+ else {
+ $html .= "<p>" . encode_entities($line) . "</p>\n";
+ }
+ }
+
+ # Close list if still open
+ if ($in_list) {
+ $html .= "</ul>\n";
+ }
+
+ return $html;
+ }
+
+ # Check if the lines form an ASCII table
+ sub is_ascii_table {
+ my ($lines) = @_;
+ return 0 if @$lines < 3; # Need at least header, separator, and one data row
+
+ # Check for separator lines with dashes and pipes
+ for my $line (@$lines) {
+ return 1 if $line =~ /^\|?[\s\-]+\|/;
+ }
+ return 0;
+ }
+
+ # Convert ASCII table to HTML table
+ sub convert_ascii_table_to_html {
+ my ($lines) = @_;
+ my $html = "<table>\n";
+ my $row_count = 0;
+
+ for my $line (@$lines) {
+ # Skip separator lines
+ next if $line =~ /^\|?[\s\-]+\|/ && $line =~ /\-/;
+
+ # Parse table row
+ my @cells = split /\s*\|\s*/, $line;
+ @cells = grep { length($_) > 0 } @cells; # Remove empty cells
+
+ if (@cells) {
+ $html .= "<tr>\n";
+ # First row is header
+ my $tag = ($row_count == 0) ? "th" : "td";
+ for my $cell (@cells) {
+ $html .= " <$tag>" . encode_entities(trim($cell)) . "</$tag>\n";
+ }
+ $html .= "</tr>\n";
+ $row_count++;
+ }
+ }
+
+ $html .= "</table>\n";
+ return $html;
+ }
+
+ # Trim whitespace from string
+ sub trim {
+ my ($str) = @_;
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//;
+ return $str;
+ }
+
+ # Encode HTML entities to prevent XSS
+ sub encode_entities {
+ my ($text) = @_;
+ $text =~ s/&/&amp;/g;
+ $text =~ s/</&lt;/g;
+ $text =~ s/>/&gt;/g;
+ $text =~ s/"/&quot;/g;
+ $text =~ s/'/&#39;/g;
+ return $text;
+ }
+
+ # Generate HTML wrapper
+ sub generate_html_page {
+ my ($title, $content) = @_;
+ return qq{<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <title>$title</title>
+ <style>
+ body {
+ font-family: monospace;
+ line-height: 1.6;
+ max-width: 80ch;
+ margin: 0 auto;
+ padding: 1em;
+ background: white;
+ color: black;
+ }
+ h1, h2, h3 {
+ font-weight: bold;
+ margin-top: 1em;
+ margin-bottom: 0.5em;
+ }
+ h1 { font-size: 1.2em; }
+ h2 { font-size: 1.1em; }
+ h3 { font-size: 1em; }
+ pre {
+ overflow-x: auto;
+ white-space: pre;
+ font-family: monospace;
+ }
+ table {
+ border-collapse: collapse;
+ margin: 1em 0;
+ }
+ th, td {
+ padding: 0.25em 0.5em;
+ text-align: left;
+ }
+ a {
+ color: blue;
+ text-decoration: underline;
+ }
+ a:visited {
+ color: purple;
+ }
+ hr {
+ border: none;
+ border-top: 1px solid #ccc;
+ margin: 1em 0;
+ }
+ ul {
+ margin: 0.5em 0;
+ padding-left: 2em;
+ }
+ li {
+ margin: 0.25em 0;
+ }
+ </style>
+</head>
+<body>
+$content
+</body>
+</html>
+};
+ }
+
sub report {
- my ( $stats_dir, $output_dir, %merged ) = @_;
+ my ( $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
for my $date ( sort { $b cmp $a } keys %merged ) {
my $stats = $merged{$date};
next unless $stats->{count};
@@ -781,15 +992,16 @@ package Foostats::Reporter {
# Check if .gmi file exists and its age based on date in filename
my $report_path = "$output_dir/$date.gmi";
+ my $html_report_path = "$output_dir/$date.html";
# Calculate age of the data based on date in filename
my $today = Time::Piece->new();
my $file_date = Time::Piece->strptime( $date, '%Y%m%d' );
my $age_days = ( $today - $file_date ) / ( 24 * 60 * 60 );
- if ( -e $report_path ) {
+ if ( -e $report_path && -e $html_report_path ) {
- # File exists
+ # Files exist
if ( $age_days <= 3 ) {
# Data is recent (within 3 days), regenerate it
@@ -799,9 +1011,9 @@ package Foostats::Reporter {
. " days)";
}
else {
- # Data is old (older than 3 days), skip if file exists
+ # Data is old (older than 3 days), skip if files exist
say
-"Skipping daily report for $year-$month-$day (file exists, data age: "
+"Skipping daily report for $year-$month-$day (files exist, data age: "
. sprintf( "%.1f", $age_days )
. " days)";
next;
@@ -933,12 +1145,14 @@ package Foostats::Reporter {
$report_content .= "
";
- # Add link to monthly report
+ # Add links to summary reports
$report_content .= "## Related Reports\n\n";
my $now = localtime;
- my $current_month = $now->strftime('%Y%m%d');
- $report_content .=
- "=> ./30day_summary_$current_month.gmi 30-Day Summary Report\n\n";
+ my $current_date = $now->strftime('%Y%m%d');
+ $report_content .= "=> ./7day_summary_$current_date.gmi 7-Day Summary Report\n";
+ $report_content .= "=> ./30day_summary_$current_date.gmi 30-Day Summary Report\n";
+ $report_content .= "=> ./365day_summary_$current_date.gmi 365-Day Summary Report\n";
+ $report_content .= "=> ./index.gmi Back to Index\n\n";
# Ensure output directory exists
mkdir $output_dir unless -d $output_dir;
@@ -946,27 +1160,38 @@ package Foostats::Reporter {
# $report_path already defined above
say "Writing report to $report_path";
FileHelper::write( $report_path, $report_content );
+
+ # Also write HTML version
+ mkdir $html_output_dir unless -d $html_output_dir;
+ my $html_path = "$html_output_dir/$date.html";
+ my $html_content = gemtext_to_html($report_content);
+ my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
+ say "Writing HTML report to $html_path";
+ FileHelper::write( $html_path, $html_page );
}
- # Generate 30-day summary report
- generate_30day_report( $stats_dir, $output_dir, %merged );
+ # Generate summary reports
+ generate_summary_report( 7, $stats_dir, $output_dir, $html_output_dir, %merged );
+ generate_summary_report( 30, $stats_dir, $output_dir, $html_output_dir, %merged );
+ generate_summary_report( 365, $stats_dir, $output_dir, $html_output_dir, %merged );
- # Generate index.gmi
- generate_index( $output_dir );
+ # Generate index.gmi and index.html
+ generate_index( $output_dir, $html_output_dir );
}
- sub generate_30day_report {
- my ( $stats_dir, $output_dir, %merged ) = @_;
+ sub generate_summary_report {
+ my ( $days, $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
- # Get the last 30 days of dates
+ # Get the last N days of dates
my @dates = sort { $b cmp $a } keys %merged;
- @dates = @dates[ 0 .. 29 ] if @dates > 30;
+ my $max_index = $days - 1;
+ @dates = @dates[ 0 .. $max_index ] if @dates > $days;
my $today = localtime;
my $report_date = $today->strftime('%Y%m%d');
# Build report content
- my $report_content = build_report_header($today);
+ my $report_content = build_report_header($today, $days);
$report_content .= build_daily_summary_section( \@dates, \%merged );
$report_content .= build_feed_statistics_section( \@dates, \%merged );
@@ -976,21 +1201,30 @@ package Foostats::Reporter {
$report_content .= build_top_hosts_section($all_hosts);
$report_content .= build_top_urls_section($all_urls);
- # Add daily report links
- $report_content .= build_daily_reports_links( \@dates, \%merged );
+ # Add links to other summary reports
+ $report_content .= build_summary_links($days, $report_date);
- # Ensure output directory exists and write the 30-day report
+ # Ensure output directory exists and write the summary report
mkdir $output_dir unless -d $output_dir;
- my $report_path = "$output_dir/30day_summary_$report_date.gmi";
- say "Writing 30-day summary report to $report_path";
+ my $report_path = "$output_dir/${days}day_summary_$report_date.gmi";
+ say "Writing $days-day summary report to $report_path";
FileHelper::write( $report_path, $report_content );
+
+ # Also write HTML version
+ mkdir $html_output_dir unless -d $html_output_dir;
+ my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
+ my $html_content = gemtext_to_html($report_content);
+ my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
+ say "Writing HTML $days-day summary report to $html_path";
+ FileHelper::write( $html_path, $html_page );
}
sub build_report_header {
- my ($today) = @_;
+ my ($today, $days) = @_;
+ $days //= 30; # Default to 30 days for backward compatibility
- my $content = "# 30-Day Summary Report\n\n";
+ my $content = "# $days-Day Summary Report\n\n";
$content .= "Generated on " . $today->strftime('%Y-%m-%d') . "\n\n";
return $content;
}
@@ -1146,42 +1380,60 @@ package Foostats::Reporter {
return $content;
}
- sub build_daily_reports_links {
- my ( $dates, $merged ) = @_;
+ sub build_summary_links {
+ my ( $current_days, $report_date ) = @_;
- my $content = "## Daily Reports\n\n";
-
- for my $date (@$dates) {
- next unless exists $merged->{$date} && $merged->{$date}->{count};
-
- my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
- my $formatted_date = "$year-$month-$day";
-
- $content .= "=> ./$date.gmi $formatted_date Daily Report\n";
+ my $content = "## Other Summary Reports\n\n";
+
+ # Add links to other summary periods
+ my @periods = (7, 30, 365);
+
+ for my $days (@periods) {
+ next if $days == $current_days; # Skip current report type
+ $content .= "=> ./${days}day_summary_$report_date.gmi ${days}-Day Summary Report\n";
}
+
+ # Add link to index
+ $content .= "\n=> ./index.gmi Back to Index\n";
return $content;
}
sub generate_index {
- my ($output_dir) = @_;
+ my ($output_dir, $html_output_dir) = @_;
# Get all .gmi files in the output directory
opendir(my $dh, $output_dir) or die "Cannot open directory $output_dir: $!";
my @gmi_files = grep { /\.gmi$/ && $_ ne 'index.gmi' } readdir($dh);
closedir($dh);
- # Sort files: 30-day summaries first, then daily reports by date (newest first)
- my @summaries = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
+ # Sort files by type and date (newest first)
+ my @summaries_7day = sort { $b cmp $a } grep { /^7day_summary_/ } @gmi_files;
+ my @summaries_30day = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
+ my @summaries_365day = sort { $b cmp $a } grep { /^365day_summary_/ } @gmi_files;
my @daily = sort { $b cmp $a } grep { /^\d{8}\.gmi$/ } @gmi_files;
# Build index content
my $content = "# Foostats Reports Index\n\n";
$content .= "Generated on " . localtime->strftime('%Y-%m-%d %H:%M:%S') . "\n\n";
- if (@summaries) {
+ # Add 7-day summaries
+ if (@summaries_7day) {
+ $content .= "## 7-Day Summary Reports\n\n";
+ for my $summary (@summaries_7day) {
+ my ($date) = $summary =~ /7day_summary_(\d{8})\.gmi/;
+ if ($date) {
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ $content .= "=> ./$summary 7-Day Summary ($year-$month-$day)\n";
+ }
+ }
+ $content .= "\n";
+ }
+
+ # Add 30-day summaries
+ if (@summaries_30day) {
$content .= "## 30-Day Summary Reports\n\n";
- for my $summary (@summaries) {
+ for my $summary (@summaries_30day) {
my ($date) = $summary =~ /30day_summary_(\d{8})\.gmi/;
if ($date) {
my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
@@ -1191,6 +1443,19 @@ package Foostats::Reporter {
$content .= "\n";
}
+ # Add 365-day summaries
+ if (@summaries_365day) {
+ $content .= "## 365-Day Summary Reports\n\n";
+ for my $summary (@summaries_365day) {
+ my ($date) = $summary =~ /365day_summary_(\d{8})\.gmi/;
+ if ($date) {
+ my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+ $content .= "=> ./$summary 365-Day Summary ($year-$month-$day)\n";
+ }
+ }
+ $content .= "\n";
+ }
+
if (@daily) {
$content .= "## Daily Reports\n\n";
my $count = 0;
@@ -1212,6 +1477,14 @@ package Foostats::Reporter {
my $index_path = "$output_dir/index.gmi";
say "Writing index to $index_path";
FileHelper::write($index_path, $content);
+
+ # Also write HTML version
+ mkdir $html_output_dir unless -d $html_output_dir;
+ my $html_path = "$html_output_dir/index.html";
+ my $html_content = gemtext_to_html($content);
+ my $html_page = generate_html_page("Foostats Reports Index", $html_content);
+ say "Writing HTML index to $html_path";
+ FileHelper::write($html_path, $html_page);
}
}
@@ -1232,6 +1505,8 @@ package main {
Default: /var/www/htdocs/buetow.org/self/foostats
--output-dir <path> Directory to write .gmi report files.
Default: /var/gemini/stats.foo.zone
+ --html-output-dir <path> Directory to write .html report files.
+ Default: /var/www/htdocs/gemtexter/stats.foo.zone
--odds-file <path> File with odd URI patterns to filter.
Default: <stats-dir>/fooodds.txt
--filter-log <path> Log file for filtered requests.
@@ -1262,6 +1537,7 @@ package main {
my $odds_file = $stats_dir . '/fooodds.txt';
my $odds_log = '/var/log/fooodds';
my $output_dir; # Will default to $stats_dir/gemtext if not specified
+ my $html_output_dir; # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
my $partner_node =
hostname eq 'fishfinger.buetow.org'
? 'blowfish.buetow.org'
@@ -1276,6 +1552,7 @@ package main {
'all!' => \$all,
'stats-dir=s' => \$stats_dir,
'output-dir=s' => \$output_dir,
+ 'html-output-dir=s' => \$html_output_dir,
'partner-node=s' => \$partner_node,
'help|?' => \$help;
@@ -1289,10 +1566,11 @@ package main {
if $replicate
or $all;
- # Set default output directory if not specified
+ # Set default output directories if not specified
$output_dir //= '/var/gemini/stats.foo.zone';
+ $html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone';
- Foostats::Reporter::report( $stats_dir, $output_dir,
+ Foostats::Reporter::report( $stats_dir, $output_dir, $html_output_dir,
Foostats::Merger::merge($stats_dir) )
if $report
or $all;
diff --git a/frontends/scripts/sitestats.sh b/frontends/scripts/sitestats.sh
deleted file mode 100755
index 62702c7..0000000
--- a/frontends/scripts/sitestats.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/sh
-
-# This is a quick and dirty script to get some stats for my site.
-# Yes, this could be programmed cleaner, but I wanted to do something quick
-# and dirty and this also with only tools available on the OpenBSD base install.
-
-STATSFILE=/tmp/sitestats.csv
-BOTSFILE=/tmp/sitebots.txt
-TOP=20
-
-header () {
- echo "proto,host,ip,day,month,time,path"
-}
-
-http_stats () {
- zgrep -h . /var/www/logs/access.log* |
- perl -l -n -e 's/\.html/.suffix/; @s=split / +/; next if @s!=11;
- $s[4]=~s|\[(\d\d)/(...)/\d{4}:(.*)|$1,$2,$3|; print "http,".join ",",@s[0,1,4,7];'
-}
-
-gemini_stats () {
- zgrep -h . /var/log/daemon* |
- perl -l -n -e '@s=split / +/; @v=@s and next if $s[4] eq "vger:";
- next if !/relayd.*gemini/; ($path) = $v[-1] =~ m|gemini://.*?(/.*)|;
- next if $path eq ""; $path =~ s/\.gmi/.suffix/;
- print "gemini,".(split("/", $v[6]))[2].",$s[12],$s[1],$s[0],$s[2],$path"'
-}
-
-parse_logs () {
- header > $STATSFILE.tmp
- http_stats >> $STATSFILE.tmp
- gemini_stats >> $STATSFILE.tmp
- mv $STATSFILE.tmp $STATSFILE
-}
-
-filter () {
- # Collect some 'you are a bot' scores.
- # 1. You visit 2 sites within one single second
- # 2. You try to call an odd file or path
- cut -d, -f2,3,6,7 $STATSFILE |
- perl -l -n -e '($k)=m/(.*?,.*?,.*?),/; $s{$k}++ if /\.suffix/;
- $s{$k}+=1000 if /(?:target\.suffix|\.php|wordpress|\/wp|\.asp|\.\.|robots\.txt|\.env|\?|\+|%|\*|HNAP1|\/admin\/|\.git\/|microsoft\.exchange|\.lua|\/owa\/)/;
- END { while (($k,$v) = each %s) { print $k =~ /.*?,(.*?),/ if $v > 1 } }' |
- sort -u > $BOTSFILE
-
- # Filte out all bot IPs, also only filter out all known file "types".
- grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean1
- grep -v -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg|,,)' $STATSFILE.clean1 > $STATSFILE.dirt
- #grep -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg)' $STATSFILE.clean1 > $STATSFILE.clean2
- mv $STATSFILE.clean1 $STATSFILE
-}
-
-stats () {
- sed 1d $STATSFILE
-}
-
-top_n () {
- fields="$1"
- descr="$2"
-
- echo "Top $TOP `head -n 1 $STATSFILE | cut -d, -f"$fields"`$descr:"
- cut -d, -f"$fields" | sort | uniq -c | sort -nr | head -n $TOP | sed 's/^/ /'
- echo
-}
-
-ip_stats () {
- for proto in http gemini; do
- echo -n "Unique $proto IPv4 IPs:\t"
- stats | grep "^$proto," | cut -d, -f3 | grep -F -v : | sort -u | wc -l
- echo -n "Unique $proto IPv6 IPs:\t"
- stats | grep "^$proto," | cut -d, -f3 | grep -F : | sort -u | wc -l
- done
-}
-
-ip_daily_stats () {
- echo "Unique IPs by day"
- for back in $(jot 14); do
- now=$(date +%s)
- date=$(date -r $(echo "$now - 86400 * $back" | bc) +%d,%b)
- echo -n "\t $date:"
- stats | grep $date | cut -d, -f3 | sort -u | wc -l
- done
-}
-
-ip_daily_subscribers () {
- echo "Unique atom.xml subscribers by day"
- for back in $(jot 14); do
- now=$(date +%s)
- date=$(date -r $(echo "$now - 86400 * $back" | bc) +%d,%b)
- echo -n "\t $date:"
- stats | grep $date | grep atom.xml | cut -d, -f3 | sort -u | wc -l
- done
-}
-
-main () {
- date
- echo
- parse_logs
- filter
- stats | grep -F .suffix | top_n '1,2,4,5,7' ' (Only content)'
- stats | top_n 2
- stats | top_n '4,5'
- stats | top_n 7
- stats | grep -F .suffix | top_n 7 ' (Only content)'
- stats | top_n '1,2,7'
- ip_stats
- ip_daily_stats
- ip_daily_subscribers
-}
-
-main | sed 's/\.suffix//'