2 files changed, 321 insertions, 154 deletions
diff --git a/frontends/scripts/foostats.pl b/frontends/scripts/foostats.pl
index 1af3e4b..2783b2d 100644
--- a/frontends/scripts/foostats.pl
+++ b/frontends/scripts/foostats.pl
@@ -771,8 +771,219 @@ package Foostats::Reporter {
 ", @table_lines );
     }
 
+    # Convert gemtext to HTML
+    sub gemtext_to_html {
+        my ($content) = @_;
+        my $html = "";
+        my $in_code_block = 0;
+        my $in_list = 0;
+        my @lines = split /\n/, $content;
+        my @code_block_lines = ();
+        
+        for my $line (@lines) {
+            if ($line =~ /^```/) {
+                if ($in_code_block) {
+                    # End code block - check if it's a table
+                    if (is_ascii_table(\@code_block_lines)) {
+                        $html .= convert_ascii_table_to_html(\@code_block_lines);
+                    } else {
+                        $html .= "<pre>\n";
+                        for my $code_line (@code_block_lines) {
+                            $html .= encode_entities($code_line) . "\n";
+                        }
+                        $html .= "</pre>\n";
+                    }
+                    @code_block_lines = ();
+                    $in_code_block = 0;
+                } else {
+                    $in_code_block = 1;
+                }
+                next;
+            }
+            
+            if ($in_code_block) {
+                push @code_block_lines, $line;
+                next;
+            }
+            
+            # Check if we need to close a list
+            if ($in_list && $line !~ /^\* /) {
+                $html .= "</ul>\n";
+                $in_list = 0;
+            }
+            
+            # Headers
+            if ($line =~ /^### (.*)/) {
+                $html .= "<h3>" . encode_entities($1) . "</h3>\n";
+            } elsif ($line =~ /^## (.*)/) {
+                $html .= "<h2>" . encode_entities($1) . "</h2>\n";
+            } elsif ($line =~ /^# (.*)/) {
+                $html .= "<h1>" . encode_entities($1) . "</h1>\n";
+            }
+            # Links
+            elsif ($line =~ /^=> (\S+)\s+(.*)/) {
+                my ($url, $text) = ($1, $2);
+                # Convert .gmi links to .html
+                $url =~ s/\.gmi$/\.html/;
+                $html .= "<p><a href=\"" . encode_entities($url) . "\">" . encode_entities($text) . "</a></p>\n";
+            }
+            # Bullet points
+            elsif ($line =~ /^\* (.*)/) {
+                if (!$in_list) {
+                    $html .= "<ul>\n";
+                    $in_list = 1;
+                }
+                $html .= "<li>" . encode_entities($1) . "</li>\n";
+            }
+            # Empty line - skip to avoid excessive spacing
+            elsif ($line =~ /^\s*$/) {
+                # Skip empty lines for more compact output
+            }
+            # Regular text
+            else {
+                $html .= "<p>" . encode_entities($line) . "</p>\n";
+            }
+        }
+        
+        # Close list if still open
+        if ($in_list) {
+            $html .= "</ul>\n";
+        }
+        
+        return $html;
+    }
+    
+    # Check if the lines form an ASCII table
+    sub is_ascii_table {
+        my ($lines) = @_;
+        return 0 if @$lines < 3;  # Need at least header, separator, and one data row
+        
+        # Check for separator lines with dashes and pipes
+        for my $line (@$lines) {
+            return 1 if $line =~ /^\|?[\s\-]+\|/;
+        }
+        return 0;
+    }
+    
+    # Convert ASCII table to HTML table
+    sub convert_ascii_table_to_html {
+        my ($lines) = @_;
+        my $html = "<table>\n";
+        my $row_count = 0;
+        
+        for my $line (@$lines) {
+            # Skip separator lines
+            next if $line =~ /^\|?[\s\-]+\|/ && $line =~ /\-/;
+            
+            # Parse table row
+            my @cells = split /\s*\|\s*/, $line;
+            @cells = grep { length($_) > 0 } @cells;  # Remove empty cells
+            
+            if (@cells) {
+                $html .= "<tr>\n";
+                # First row is header
+                my $tag = ($row_count == 0) ? "th" : "td";
+                for my $cell (@cells) {
+                    $html .= "  <$tag>" . encode_entities(trim($cell)) . "</$tag>\n";
+                }
+                $html .= "</tr>\n";
+                $row_count++;
+            }
+        }
+        
+        $html .= "</table>\n";
+        return $html;
+    }
+    
+    # Trim whitespace from string
+    sub trim {
+        my ($str) = @_;
+        $str =~ s/^\s+//;
+        $str =~ s/\s+$//;
+        return $str;
+    }
+    
+    # Encode HTML entities to prevent XSS
+    sub encode_entities {
+        my ($text) = @_;
+        $text =~ s/&/&amp;/g;
+        $text =~ s/</&lt;/g;
+        $text =~ s/>/&gt;/g;
+        $text =~ s/"/&quot;/g;
+        $text =~ s/'/&#39;/g;
+        return $text;
+    }
+    
+    # Generate HTML wrapper
+    sub generate_html_page {
+        my ($title, $content) = @_;
+        return qq{<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>$title</title>
+    <style>
+        body {
+            font-family: monospace;
+            line-height: 1.6;
+            max-width: 80ch;
+            margin: 0 auto;
+            padding: 1em;
+            background: white;
+            color: black;
+        }
+        h1, h2, h3 {
+            font-weight: bold;
+            margin-top: 1em;
+            margin-bottom: 0.5em;
+        }
+        h1 { font-size: 1.2em; }
+        h2 { font-size: 1.1em; }
+        h3 { font-size: 1em; }
+        pre {
+            overflow-x: auto;
+            white-space: pre;
+            font-family: monospace;
+        }
+        table {
+            border-collapse: collapse;
+            margin: 1em 0;
+        }
+        th, td {
+            padding: 0.25em 0.5em;
+            text-align: left;
+        }
+        a {
+            color: blue;
+            text-decoration: underline;
+        }
+        a:visited {
+            color: purple;
+        }
+        hr {
+            border: none;
+            border-top: 1px solid #ccc;
+            margin: 1em 0;
+        }
+        ul {
+            margin: 0.5em 0;
+            padding-left: 2em;
+        }
+        li {
+            margin: 0.25em 0;
+        }
+    </style>
+</head>
+<body>
+$content
+</body>
+</html>
+};
+    }
+
     sub report {
-        my ( $stats_dir, $output_dir, %merged ) = @_;
+        my ( $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
         for my $date ( sort { $b cmp $a } keys %merged ) {
             my $stats = $merged{$date};
             next unless $stats->{count};
@@ -781,15 +992,16 @@ package Foostats::Reporter {
 
             # Check if .gmi file exists and its age based on date in filename
             my $report_path = "$output_dir/$date.gmi";
+            my $html_report_path = "$output_dir/$date.html";
 
             # Calculate age of the data based on date in filename
             my $today     = Time::Piece->new();
             my $file_date = Time::Piece->strptime( $date, '%Y%m%d' );
             my $age_days  = ( $today - $file_date ) / ( 24 * 60 * 60 );
 
-            if ( -e $report_path ) {
+            if ( -e $report_path && -e $html_report_path ) {
 
-                # File exists
+                # Files exist
                 if ( $age_days <= 3 ) {
 
                     # Data is recent (within 3 days), regenerate it
@@ -799,9 +1011,9 @@ package Foostats::Reporter {
                       . " days)";
                 }
                 else {
-                    # Data is old (older than 3 days), skip if file exists
+                    # Data is old (older than 3 days), skip if files exist
                     say
-"Skipping daily report for $year-$month-$day (file exists, data age: "
+"Skipping daily report for $year-$month-$day (files exist, data age: "
                       . sprintf( "%.1f", $age_days )
                       . " days)";
                     next;
@@ -933,12 +1145,14 @@ package Foostats::Reporter {
             $report_content .= "
 ";
 
-            # Add link to monthly report
+            # Add links to summary reports
             $report_content .= "## Related Reports\n\n";
             my $now           = localtime;
-            my $current_month = $now->strftime('%Y%m%d');
-            $report_content .=
-              "=> ./30day_summary_$current_month.gmi 30-Day Summary Report\n\n";
+            my $current_date = $now->strftime('%Y%m%d');
+            $report_content .= "=> ./7day_summary_$current_date.gmi 7-Day Summary Report\n";
+            $report_content .= "=> ./30day_summary_$current_date.gmi 30-Day Summary Report\n";
+            $report_content .= "=> ./365day_summary_$current_date.gmi 365-Day Summary Report\n";
+            $report_content .= "=> ./index.gmi Back to Index\n\n";
 
             # Ensure output directory exists
             mkdir $output_dir unless -d $output_dir;
@@ -946,27 +1160,38 @@ package Foostats::Reporter {
             # $report_path already defined above
             say "Writing report to $report_path";
             FileHelper::write( $report_path, $report_content );
+            
+            # Also write HTML version
+            mkdir $html_output_dir unless -d $html_output_dir;
+            my $html_path = "$html_output_dir/$date.html";
+            my $html_content = gemtext_to_html($report_content);
+            my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content);
+            say "Writing HTML report to $html_path";
+            FileHelper::write( $html_path, $html_page );
         }
 
-        # Generate 30-day summary report
-        generate_30day_report( $stats_dir, $output_dir, %merged );
+        # Generate summary reports
+        generate_summary_report( 7, $stats_dir, $output_dir, $html_output_dir, %merged );
+        generate_summary_report( 30, $stats_dir, $output_dir, $html_output_dir, %merged );
+        generate_summary_report( 365, $stats_dir, $output_dir, $html_output_dir, %merged );
         
-        # Generate index.gmi
-        generate_index( $output_dir );
+        # Generate index.gmi and index.html
+        generate_index( $output_dir, $html_output_dir );
     }
 
-    sub generate_30day_report {
-        my ( $stats_dir, $output_dir, %merged ) = @_;
+    sub generate_summary_report {
+        my ( $days, $stats_dir, $output_dir, $html_output_dir, %merged ) = @_;
 
-        # Get the last 30 days of dates
+        # Get the last N days of dates
         my @dates = sort { $b cmp $a } keys %merged;
-        @dates = @dates[ 0 .. 29 ] if @dates > 30;
+        my $max_index = $days - 1;
+        @dates = @dates[ 0 .. $max_index ] if @dates > $days;
 
         my $today       = localtime;
         my $report_date = $today->strftime('%Y%m%d');
 
         # Build report content
-        my $report_content = build_report_header($today);
+        my $report_content = build_report_header($today, $days);
         $report_content .= build_daily_summary_section( \@dates, \%merged );
         $report_content .= build_feed_statistics_section( \@dates, \%merged );
 
@@ -976,21 +1201,30 @@ package Foostats::Reporter {
         $report_content .= build_top_hosts_section($all_hosts);
         $report_content .= build_top_urls_section($all_urls);
 
-        # Add daily report links
-        $report_content .= build_daily_reports_links( \@dates, \%merged );
+        # Add links to other summary reports
+        $report_content .= build_summary_links($days, $report_date);
 
-        # Ensure output directory exists and write the 30-day report
+        # Ensure output directory exists and write the summary report
         mkdir $output_dir unless -d $output_dir;
 
-        my $report_path = "$output_dir/30day_summary_$report_date.gmi";
-        say "Writing 30-day summary report to $report_path";
+        my $report_path = "$output_dir/${days}day_summary_$report_date.gmi";
+        say "Writing $days-day summary report to $report_path";
         FileHelper::write( $report_path, $report_content );
+        
+        # Also write HTML version
+        mkdir $html_output_dir unless -d $html_output_dir;
+        my $html_path = "$html_output_dir/${days}day_summary_$report_date.html";
+        my $html_content = gemtext_to_html($report_content);
+        my $html_page = generate_html_page("$days-Day Summary Report", $html_content);
+        say "Writing HTML $days-day summary report to $html_path";
+        FileHelper::write( $html_path, $html_page );
     }
 
     sub build_report_header {
-        my ($today) = @_;
+        my ($today, $days) = @_;
+        $days //= 30;  # Default to 30 days for backward compatibility
 
-        my $content = "# 30-Day Summary Report\n\n";
+        my $content = "# $days-Day Summary Report\n\n";
         $content .= "Generated on " . $today->strftime('%Y-%m-%d') . "\n\n";
         return $content;
     }
@@ -1146,42 +1380,60 @@ package Foostats::Reporter {
         return $content;
     }
 
-    sub build_daily_reports_links {
-        my ( $dates, $merged ) = @_;
+    sub build_summary_links {
+        my ( $current_days, $report_date ) = @_;
 
-        my $content = "## Daily Reports\n\n";
-
-        for my $date (@$dates) {
-            next unless exists $merged->{$date} && $merged->{$date}->{count};
-
-            my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/;
-            my $formatted_date = "$year-$month-$day";
-
-            $content .= "=> ./$date.gmi $formatted_date Daily Report\n";
+        my $content = "## Other Summary Reports\n\n";
+        
+        # Add links to other summary periods
+        my @periods = (7, 30, 365);
+        
+        for my $days (@periods) {
+            next if $days == $current_days;  # Skip current report type
+            $content .= "=> ./${days}day_summary_$report_date.gmi ${days}-Day Summary Report\n";
         }
+        
+        # Add link to index
+        $content .= "\n=> ./index.gmi Back to Index\n";
 
         return $content;
     }
     
     sub generate_index {
-        my ($output_dir) = @_;
+        my ($output_dir, $html_output_dir) = @_;
         
         # Get all .gmi files in the output directory
         opendir(my $dh, $output_dir) or die "Cannot open directory $output_dir: $!";
         my @gmi_files = grep { /\.gmi$/ && $_ ne 'index.gmi' } readdir($dh);
         closedir($dh);
         
-        # Sort files: 30-day summaries first, then daily reports by date (newest first)
-        my @summaries = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
+        # Sort files by type and date (newest first)
+        my @summaries_7day = sort { $b cmp $a } grep { /^7day_summary_/ } @gmi_files;
+        my @summaries_30day = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files;
+        my @summaries_365day = sort { $b cmp $a } grep { /^365day_summary_/ } @gmi_files;
         my @daily = sort { $b cmp $a } grep { /^\d{8}\.gmi$/ } @gmi_files;
         
         # Build index content
         my $content = "# Foostats Reports Index\n\n";
         $content .= "Generated on " . localtime->strftime('%Y-%m-%d %H:%M:%S') . "\n\n";
         
-        if (@summaries) {
+        # Add 7-day summaries
+        if (@summaries_7day) {
+            $content .= "## 7-Day Summary Reports\n\n";
+            for my $summary (@summaries_7day) {
+                my ($date) = $summary =~ /7day_summary_(\d{8})\.gmi/;
+                if ($date) {
+                    my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+                    $content .= "=> ./$summary 7-Day Summary ($year-$month-$day)\n";
+                }
+            }
+            $content .= "\n";
+        }
+        
+        # Add 30-day summaries
+        if (@summaries_30day) {
             $content .= "## 30-Day Summary Reports\n\n";
-            for my $summary (@summaries) {
+            for my $summary (@summaries_30day) {
                 my ($date) = $summary =~ /30day_summary_(\d{8})\.gmi/;
                 if ($date) {
                     my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
@@ -1191,6 +1443,19 @@ package Foostats::Reporter {
             $content .= "\n";
         }
         
+        # Add 365-day summaries
+        if (@summaries_365day) {
+            $content .= "## 365-Day Summary Reports\n\n";
+            for my $summary (@summaries_365day) {
+                my ($date) = $summary =~ /365day_summary_(\d{8})\.gmi/;
+                if ($date) {
+                    my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/;
+                    $content .= "=> ./$summary 365-Day Summary ($year-$month-$day)\n";
+                }
+            }
+            $content .= "\n";
+        }
+        
         if (@daily) {
             $content .= "## Daily Reports\n\n";
             my $count = 0;
@@ -1212,6 +1477,14 @@ package Foostats::Reporter {
         my $index_path = "$output_dir/index.gmi";
         say "Writing index to $index_path";
         FileHelper::write($index_path, $content);
+        
+        # Also write HTML version
+        mkdir $html_output_dir unless -d $html_output_dir;
+        my $html_path = "$html_output_dir/index.html";
+        my $html_content = gemtext_to_html($content);
+        my $html_page = generate_html_page("Foostats Reports Index", $html_content);
+        say "Writing HTML index to $html_path";
+        FileHelper::write($html_path, $html_page);
     }
 }
 
@@ -1232,6 +1505,8 @@ package main {
                                     Default: /var/www/htdocs/buetow.org/self/foostats
           --output-dir <path>       Directory to write .gmi report files.
                                     Default: /var/gemini/stats.foo.zone
+          --html-output-dir <path>  Directory to write .html report files.
+                                    Default: /var/www/htdocs/gemtexter/stats.foo.zone
           --odds-file <path>        File with odd URI patterns to filter.
                                     Default: <stats-dir>/fooodds.txt
           --filter-log <path>       Log file for filtered requests.
@@ -1262,6 +1537,7 @@ package main {
     my $odds_file = $stats_dir . '/fooodds.txt';
     my $odds_log  = '/var/log/fooodds';
     my $output_dir;  # Will default to $stats_dir/gemtext if not specified
+    my $html_output_dir;  # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified
     my $partner_node =
       hostname eq 'fishfinger.buetow.org'
       ? 'blowfish.buetow.org'
@@ -1276,6 +1552,7 @@ package main {
       'all!'           => \$all,
       'stats-dir=s'    => \$stats_dir,
       'output-dir=s'   => \$output_dir,
+      'html-output-dir=s' => \$html_output_dir,
       'partner-node=s' => \$partner_node,
       'help|?'         => \$help;
 
@@ -1289,10 +1566,11 @@ package main {
       if $replicate
       or $all;
 
-    # Set default output directory if not specified
+    # Set default output directories if not specified
     $output_dir //= '/var/gemini/stats.foo.zone';
+    $html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone';
     
-    Foostats::Reporter::report( $stats_dir, $output_dir,
+    Foostats::Reporter::report( $stats_dir, $output_dir, $html_output_dir,
         Foostats::Merger::merge($stats_dir) )
       if $report
       or $all;
diff --git a/frontends/scripts/sitestats.sh b/frontends/scripts/sitestats.sh
deleted file mode 100755
index 62702c7..0000000
--- a/frontends/scripts/sitestats.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/sh
-
-# This is a quick and dirty script to get some stats for my site.
-# Yes, this could be programmed cleaner, but I wanted to do something quick
-# and dirty and this also with only tools available on the OpenBSD base install.
-
-STATSFILE=/tmp/sitestats.csv
-BOTSFILE=/tmp/sitebots.txt
-TOP=20
-
-header () {
-	echo "proto,host,ip,day,month,time,path"
-}
-
-http_stats () {
-	zgrep -h . /var/www/logs/access.log* |
-	perl -l -n -e 's/\.html/.suffix/; @s=split / +/; next if @s!=11;
-	$s[4]=~s|\[(\d\d)/(...)/\d{4}:(.*)|$1,$2,$3|; print "http,".join ",",@s[0,1,4,7];' 
-}
-
-gemini_stats () {
-	zgrep -h . /var/log/daemon* |
-	perl -l -n -e '@s=split / +/; @v=@s and next if $s[4] eq "vger:";
-	next if !/relayd.*gemini/; ($path) = $v[-1] =~ m|gemini://.*?(/.*)|;
-	next if $path eq ""; $path =~ s/\.gmi/.suffix/;
-	print "gemini,".(split("/", $v[6]))[2].",$s[12],$s[1],$s[0],$s[2],$path"'
-}
-
-parse_logs () {
-	header > $STATSFILE.tmp
-	http_stats >> $STATSFILE.tmp
-	gemini_stats >> $STATSFILE.tmp
-	mv $STATSFILE.tmp $STATSFILE
-}
-
-filter () {
-	# Collect some 'you are a bot' scores.
-	# 1. You visit 2 sites within one single second
-	# 2. You try to call an odd file or path
-	cut -d, -f2,3,6,7 $STATSFILE |
-	perl -l -n -e '($k)=m/(.*?,.*?,.*?),/; $s{$k}++ if /\.suffix/;
-	$s{$k}+=1000 if /(?:target\.suffix|\.php|wordpress|\/wp|\.asp|\.\.|robots\.txt|\.env|\?|\+|%|\*|HNAP1|\/admin\/|\.git\/|microsoft\.exchange|\.lua|\/owa\/)/;
-	END { while (($k,$v) = each %s) { print $k =~ /.*?,(.*?),/ if $v > 1 } }' |
-	sort -u > $BOTSFILE
-
-	# Filte out all bot IPs, also only filter out all known file "types".
-	grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean1
-	grep -v -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg|,,)' $STATSFILE.clean1 > $STATSFILE.dirt
-	#grep -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg)' $STATSFILE.clean1 > $STATSFILE.clean2
-	mv $STATSFILE.clean1 $STATSFILE
-}
-
-stats () {
-	sed 1d $STATSFILE
-}
-
-top_n () {
-	fields="$1"
-	descr="$2"
-
-	echo "Top $TOP `head -n 1 $STATSFILE | cut -d, -f"$fields"`$descr:"
- 	cut -d, -f"$fields" | sort | uniq -c | sort -nr | head -n $TOP | sed 's/^/    /'
-	echo
-}
-
-ip_stats () {
-	for proto in http gemini; do 
-		echo -n "Unique $proto IPv4 IPs:\t"
- 		stats | grep "^$proto," | cut -d, -f3 | grep -F -v : | sort -u | wc -l
-		echo -n "Unique $proto IPv6 IPs:\t" 
-		stats | grep "^$proto," | cut -d, -f3 | grep -F : | sort -u | wc -l
-	done
-}
-
-ip_daily_stats () {
-	echo "Unique IPs by day"
-	for back in $(jot 14); do
-		now=$(date +%s)
-		date=$(date -r $(echo "$now - 86400 * $back" | bc) +%d,%b)
-		echo -n "\t $date:"
-		stats | grep $date | cut -d, -f3 | sort -u | wc -l		
-	done
-}
-
-ip_daily_subscribers () {
-	echo "Unique atom.xml subscribers by day"
-	for back in $(jot 14); do
-		now=$(date +%s)
-		date=$(date -r $(echo "$now - 86400 * $back" | bc) +%d,%b)
-		echo -n "\t $date:"
-		stats | grep $date | grep atom.xml | cut -d, -f3 | sort -u | wc -l		
-	done
-}
-
-main () {
-	date
-	echo
-	parse_logs
-	filter 
-	stats | grep -F .suffix | top_n '1,2,4,5,7' ' (Only content)'
-	stats | top_n 2
-	stats | top_n '4,5'
-	stats | top_n 7
-	stats | grep -F .suffix | top_n 7 ' (Only content)'
-	stats | top_n '1,2,7'
-	ip_stats
-	ip_daily_stats
-	ip_daily_subscribers
-}
-
-main | sed 's/\.suffix//'