diff options
Diffstat (limited to 'frontends/scripts')
| -rw-r--r-- | frontends/scripts/foostats.pl | 364 | ||||
| -rwxr-xr-x | frontends/scripts/sitestats.sh | 111 |
2 files changed, 321 insertions, 154 deletions
diff --git a/frontends/scripts/foostats.pl b/frontends/scripts/foostats.pl index 1af3e4b..2783b2d 100644 --- a/frontends/scripts/foostats.pl +++ b/frontends/scripts/foostats.pl @@ -771,8 +771,219 @@ package Foostats::Reporter { ", @table_lines ); } + # Convert gemtext to HTML + sub gemtext_to_html { + my ($content) = @_; + my $html = ""; + my $in_code_block = 0; + my $in_list = 0; + my @lines = split /\n/, $content; + my @code_block_lines = (); + + for my $line (@lines) { + if ($line =~ /^```/) { + if ($in_code_block) { + # End code block - check if it's a table + if (is_ascii_table(\@code_block_lines)) { + $html .= convert_ascii_table_to_html(\@code_block_lines); + } else { + $html .= "<pre>\n"; + for my $code_line (@code_block_lines) { + $html .= encode_entities($code_line) . "\n"; + } + $html .= "</pre>\n"; + } + @code_block_lines = (); + $in_code_block = 0; + } else { + $in_code_block = 1; + } + next; + } + + if ($in_code_block) { + push @code_block_lines, $line; + next; + } + + # Check if we need to close a list + if ($in_list && $line !~ /^\* /) { + $html .= "</ul>\n"; + $in_list = 0; + } + + # Headers + if ($line =~ /^### (.*)/) { + $html .= "<h3>" . encode_entities($1) . "</h3>\n"; + } elsif ($line =~ /^## (.*)/) { + $html .= "<h2>" . encode_entities($1) . "</h2>\n"; + } elsif ($line =~ /^# (.*)/) { + $html .= "<h1>" . encode_entities($1) . "</h1>\n"; + } + # Links + elsif ($line =~ /^=> (\S+)\s+(.*)/) { + my ($url, $text) = ($1, $2); + # Convert .gmi links to .html + $url =~ s/\.gmi$/\.html/; + $html .= "<p><a href=\"" . encode_entities($url) . "\">" . encode_entities($text) . "</a></p>\n"; + } + # Bullet points + elsif ($line =~ /^\* (.*)/) { + if (!$in_list) { + $html .= "<ul>\n"; + $in_list = 1; + } + $html .= "<li>" . encode_entities($1) . "</li>\n"; + } + # Empty line - skip to avoid excessive spacing + elsif ($line =~ /^\s*$/) { + # Skip empty lines for more compact output + } + # Regular text + else { + $html .= "<p>" . encode_entities($line) . "</p>\n"; + } + } + + # Close list if still open + if ($in_list) { + $html .= "</ul>\n"; + } + + return $html; + } + + # Check if the lines form an ASCII table + sub is_ascii_table { + my ($lines) = @_; + return 0 if @$lines < 3; # Need at least header, separator, and one data row + + # Check for separator lines with dashes and pipes + for my $line (@$lines) { + return 1 if $line =~ /^\|?[\s\-]+\|/; + } + return 0; + } + + # Convert ASCII table to HTML table + sub convert_ascii_table_to_html { + my ($lines) = @_; + my $html = "<table>\n"; + my $row_count = 0; + + for my $line (@$lines) { + # Skip separator lines + next if $line =~ /^\|?[\s\-]+\|/ && $line =~ /\-/; + + # Parse table row + my @cells = split /\s*\|\s*/, $line; + @cells = grep { length($_) > 0 } @cells; # Remove empty cells + + if (@cells) { + $html .= "<tr>\n"; + # First row is header + my $tag = ($row_count == 0) ? "th" : "td"; + for my $cell (@cells) { + $html .= " <$tag>" . encode_entities(trim($cell)) . "</$tag>\n"; + } + $html .= "</tr>\n"; + $row_count++; + } + } + + $html .= "</table>\n"; + return $html; + } + + # Trim whitespace from string + sub trim { + my ($str) = @_; + $str =~ s/^\s+//; + $str =~ s/\s+$//; + return $str; + } + + # Encode HTML entities to prevent XSS + sub encode_entities { + my ($text) = @_; + $text =~ s/&/&/g; + $text =~ s/</</g; + $text =~ s/>/>/g; + $text =~ s/"/"/g; + $text =~ s/'/'/g; + return $text; + } + + # Generate HTML wrapper + sub generate_html_page { + my ($title, $content) = @_; + return qq{<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>$title</title> + <style> + body { + font-family: monospace; + line-height: 1.6; + max-width: 80ch; + margin: 0 auto; + padding: 1em; + background: white; + color: black; + } + h1, h2, h3 { + font-weight: bold; + margin-top: 1em; + margin-bottom: 0.5em; + } + h1 { font-size: 1.2em; } + h2 { font-size: 1.1em; } + h3 { font-size: 1em; } + pre { + overflow-x: auto; + white-space: pre; + font-family: monospace; + } + table { + border-collapse: collapse; + margin: 1em 0; + } + th, td { + padding: 0.25em 0.5em; + text-align: left; + } + a { + color: blue; + text-decoration: underline; + } + a:visited { + color: purple; + } + hr { + border: none; + border-top: 1px solid #ccc; + margin: 1em 0; + } + ul { + margin: 0.5em 0; + padding-left: 2em; + } + li { + margin: 0.25em 0; + } + </style> +</head> +<body> +$content +</body> +</html> +}; + } + sub report { - my ( $stats_dir, $output_dir, %merged ) = @_; + my ( $stats_dir, $output_dir, $html_output_dir, %merged ) = @_; for my $date ( sort { $b cmp $a } keys %merged ) { my $stats = $merged{$date}; next unless $stats->{count}; @@ -781,15 +992,16 @@ package Foostats::Reporter { # Check if .gmi file exists and its age based on date in filename my $report_path = "$output_dir/$date.gmi"; + my $html_report_path = "$output_dir/$date.html"; # Calculate age of the data based on date in filename my $today = Time::Piece->new(); my $file_date = Time::Piece->strptime( $date, '%Y%m%d' ); my $age_days = ( $today - $file_date ) / ( 24 * 60 * 60 ); - if ( -e $report_path ) { + if ( -e $report_path && -e $html_report_path ) { - # File exists + # Files exist if ( $age_days <= 3 ) { # Data is recent (within 3 days), regenerate it @@ -799,9 +1011,9 @@ package Foostats::Reporter { . " days)"; } else { - # Data is old (older than 3 days), skip if file exists + # Data is old (older than 3 days), skip if files exist say -"Skipping daily report for $year-$month-$day (file exists, data age: " +"Skipping daily report for $year-$month-$day (files exist, data age: " . sprintf( "%.1f", $age_days ) . " days)"; next; @@ -933,12 +1145,14 @@ package Foostats::Reporter { $report_content .= " "; - # Add link to monthly report + # Add links to summary reports $report_content .= "## Related Reports\n\n"; my $now = localtime; - my $current_month = $now->strftime('%Y%m%d'); - $report_content .= - "=> ./30day_summary_$current_month.gmi 30-Day Summary Report\n\n"; + my $current_date = $now->strftime('%Y%m%d'); + $report_content .= "=> ./7day_summary_$current_date.gmi 7-Day Summary Report\n"; + $report_content .= "=> ./30day_summary_$current_date.gmi 30-Day Summary Report\n"; + $report_content .= "=> ./365day_summary_$current_date.gmi 365-Day Summary Report\n"; + $report_content .= "=> ./index.gmi Back to Index\n\n"; # Ensure output directory exists mkdir $output_dir unless -d $output_dir; @@ -946,27 +1160,38 @@ package Foostats::Reporter { # $report_path already defined above say "Writing report to $report_path"; FileHelper::write( $report_path, $report_content ); + + # Also write HTML version + mkdir $html_output_dir unless -d $html_output_dir; + my $html_path = "$html_output_dir/$date.html"; + my $html_content = gemtext_to_html($report_content); + my $html_page = generate_html_page("Stats for $year-$month-$day", $html_content); + say "Writing HTML report to $html_path"; + FileHelper::write( $html_path, $html_page ); } - # Generate 30-day summary report - generate_30day_report( $stats_dir, $output_dir, %merged ); + # Generate summary reports + generate_summary_report( 7, $stats_dir, $output_dir, $html_output_dir, %merged ); + generate_summary_report( 30, $stats_dir, $output_dir, $html_output_dir, %merged ); + generate_summary_report( 365, $stats_dir, $output_dir, $html_output_dir, %merged ); - # Generate index.gmi - generate_index( $output_dir ); + # Generate index.gmi and index.html + generate_index( $output_dir, $html_output_dir ); } - sub generate_30day_report { - my ( $stats_dir, $output_dir, %merged ) = @_; + sub generate_summary_report { + my ( $days, $stats_dir, $output_dir, $html_output_dir, %merged ) = @_; - # Get the last 30 days of dates + # Get the last N days of dates my @dates = sort { $b cmp $a } keys %merged; - @dates = @dates[ 0 .. 29 ] if @dates > 30; + my $max_index = $days - 1; + @dates = @dates[ 0 .. $max_index ] if @dates > $days; my $today = localtime; my $report_date = $today->strftime('%Y%m%d'); # Build report content - my $report_content = build_report_header($today); + my $report_content = build_report_header($today, $days); $report_content .= build_daily_summary_section( \@dates, \%merged ); $report_content .= build_feed_statistics_section( \@dates, \%merged ); @@ -976,21 +1201,30 @@ package Foostats::Reporter { $report_content .= build_top_hosts_section($all_hosts); $report_content .= build_top_urls_section($all_urls); - # Add daily report links - $report_content .= build_daily_reports_links( \@dates, \%merged ); + # Add links to other summary reports + $report_content .= build_summary_links($days, $report_date); - # Ensure output directory exists and write the 30-day report + # Ensure output directory exists and write the summary report mkdir $output_dir unless -d $output_dir; - my $report_path = "$output_dir/30day_summary_$report_date.gmi"; - say "Writing 30-day summary report to $report_path"; + my $report_path = "$output_dir/${days}day_summary_$report_date.gmi"; + say "Writing $days-day summary report to $report_path"; FileHelper::write( $report_path, $report_content ); + + # Also write HTML version + mkdir $html_output_dir unless -d $html_output_dir; + my $html_path = "$html_output_dir/${days}day_summary_$report_date.html"; + my $html_content = gemtext_to_html($report_content); + my $html_page = generate_html_page("$days-Day Summary Report", $html_content); + say "Writing HTML $days-day summary report to $html_path"; + FileHelper::write( $html_path, $html_page ); } sub build_report_header { - my ($today) = @_; + my ($today, $days) = @_; + $days //= 30; # Default to 30 days for backward compatibility - my $content = "# 30-Day Summary Report\n\n"; + my $content = "# $days-Day Summary Report\n\n"; $content .= "Generated on " . $today->strftime('%Y-%m-%d') . "\n\n"; return $content; } @@ -1146,42 +1380,60 @@ package Foostats::Reporter { return $content; } - sub build_daily_reports_links { - my ( $dates, $merged ) = @_; + sub build_summary_links { + my ( $current_days, $report_date ) = @_; - my $content = "## Daily Reports\n\n"; - - for my $date (@$dates) { - next unless exists $merged->{$date} && $merged->{$date}->{count}; - - my ( $year, $month, $day ) = $date =~ /(\d{4})(\d{2})(\d{2})/; - my $formatted_date = "$year-$month-$day"; - - $content .= "=> ./$date.gmi $formatted_date Daily Report\n"; + my $content = "## Other Summary Reports\n\n"; + + # Add links to other summary periods + my @periods = (7, 30, 365); + + for my $days (@periods) { + next if $days == $current_days; # Skip current report type + $content .= "=> ./${days}day_summary_$report_date.gmi ${days}-Day Summary Report\n"; } + + # Add link to index + $content .= "\n=> ./index.gmi Back to Index\n"; return $content; } sub generate_index { - my ($output_dir) = @_; + my ($output_dir, $html_output_dir) = @_; # Get all .gmi files in the output directory opendir(my $dh, $output_dir) or die "Cannot open directory $output_dir: $!"; my @gmi_files = grep { /\.gmi$/ && $_ ne 'index.gmi' } readdir($dh); closedir($dh); - # Sort files: 30-day summaries first, then daily reports by date (newest first) - my @summaries = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files; + # Sort files by type and date (newest first) + my @summaries_7day = sort { $b cmp $a } grep { /^7day_summary_/ } @gmi_files; + my @summaries_30day = sort { $b cmp $a } grep { /^30day_summary_/ } @gmi_files; + my @summaries_365day = sort { $b cmp $a } grep { /^365day_summary_/ } @gmi_files; my @daily = sort { $b cmp $a } grep { /^\d{8}\.gmi$/ } @gmi_files; # Build index content my $content = "# Foostats Reports Index\n\n"; $content .= "Generated on " . localtime->strftime('%Y-%m-%d %H:%M:%S') . "\n\n"; - if (@summaries) { + # Add 7-day summaries + if (@summaries_7day) { + $content .= "## 7-Day Summary Reports\n\n"; + for my $summary (@summaries_7day) { + my ($date) = $summary =~ /7day_summary_(\d{8})\.gmi/; + if ($date) { + my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/; + $content .= "=> ./$summary 7-Day Summary ($year-$month-$day)\n"; + } + } + $content .= "\n"; + } + + # Add 30-day summaries + if (@summaries_30day) { $content .= "## 30-Day Summary Reports\n\n"; - for my $summary (@summaries) { + for my $summary (@summaries_30day) { my ($date) = $summary =~ /30day_summary_(\d{8})\.gmi/; if ($date) { my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/; @@ -1191,6 +1443,19 @@ package Foostats::Reporter { $content .= "\n"; } + # Add 365-day summaries + if (@summaries_365day) { + $content .= "## 365-Day Summary Reports\n\n"; + for my $summary (@summaries_365day) { + my ($date) = $summary =~ /365day_summary_(\d{8})\.gmi/; + if ($date) { + my ($year, $month, $day) = $date =~ /(\d{4})(\d{2})(\d{2})/; + $content .= "=> ./$summary 365-Day Summary ($year-$month-$day)\n"; + } + } + $content .= "\n"; + } + if (@daily) { $content .= "## Daily Reports\n\n"; my $count = 0; @@ -1212,6 +1477,14 @@ package Foostats::Reporter { my $index_path = "$output_dir/index.gmi"; say "Writing index to $index_path"; FileHelper::write($index_path, $content); + + # Also write HTML version + mkdir $html_output_dir unless -d $html_output_dir; + my $html_path = "$html_output_dir/index.html"; + my $html_content = gemtext_to_html($content); + my $html_page = generate_html_page("Foostats Reports Index", $html_content); + say "Writing HTML index to $html_path"; + FileHelper::write($html_path, $html_page); } } @@ -1232,6 +1505,8 @@ package main { Default: /var/www/htdocs/buetow.org/self/foostats --output-dir <path> Directory to write .gmi report files. Default: /var/gemini/stats.foo.zone + --html-output-dir <path> Directory to write .html report files. + Default: /var/www/htdocs/gemtexter/stats.foo.zone --odds-file <path> File with odd URI patterns to filter. Default: <stats-dir>/fooodds.txt --filter-log <path> Log file for filtered requests. @@ -1262,6 +1537,7 @@ package main { my $odds_file = $stats_dir . '/fooodds.txt'; my $odds_log = '/var/log/fooodds'; my $output_dir; # Will default to $stats_dir/gemtext if not specified + my $html_output_dir; # Will default to /var/www/htdocs/gemtexter/stats.foo.zone if not specified my $partner_node = hostname eq 'fishfinger.buetow.org' ? 'blowfish.buetow.org' @@ -1276,6 +1552,7 @@ package main { 'all!' => \$all, 'stats-dir=s' => \$stats_dir, 'output-dir=s' => \$output_dir, + 'html-output-dir=s' => \$html_output_dir, 'partner-node=s' => \$partner_node, 'help|?' => \$help; @@ -1289,10 +1566,11 @@ package main { if $replicate or $all; - # Set default output directory if not specified + # Set default output directories if not specified $output_dir //= '/var/gemini/stats.foo.zone'; + $html_output_dir //= '/var/www/htdocs/gemtexter/stats.foo.zone'; - Foostats::Reporter::report( $stats_dir, $output_dir, + Foostats::Reporter::report( $stats_dir, $output_dir, $html_output_dir, Foostats::Merger::merge($stats_dir) ) if $report or $all; diff --git a/frontends/scripts/sitestats.sh b/frontends/scripts/sitestats.sh deleted file mode 100755 index 62702c7..0000000 --- a/frontends/scripts/sitestats.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/sh - -# This is a quick and dirty script to get some stats for my site. -# Yes, this could be programmed cleaner, but I wanted to do something quick -# and dirty and this also with only tools available on the OpenBSD base install. - -STATSFILE=/tmp/sitestats.csv -BOTSFILE=/tmp/sitebots.txt -TOP=20 - -header () { - echo "proto,host,ip,day,month,time,path" -} - -http_stats () { - zgrep -h . /var/www/logs/access.log* | - perl -l -n -e 's/\.html/.suffix/; @s=split / +/; next if @s!=11; - $s[4]=~s|\[(\d\d)/(...)/\d{4}:(.*)|$1,$2,$3|; print "http,".join ",",@s[0,1,4,7];' -} - -gemini_stats () { - zgrep -h . /var/log/daemon* | - perl -l -n -e '@s=split / +/; @v=@s and next if $s[4] eq "vger:"; - next if !/relayd.*gemini/; ($path) = $v[-1] =~ m|gemini://.*?(/.*)|; - next if $path eq ""; $path =~ s/\.gmi/.suffix/; - print "gemini,".(split("/", $v[6]))[2].",$s[12],$s[1],$s[0],$s[2],$path"' -} - -parse_logs () { - header > $STATSFILE.tmp - http_stats >> $STATSFILE.tmp - gemini_stats >> $STATSFILE.tmp - mv $STATSFILE.tmp $STATSFILE -} - -filter () { - # Collect some 'you are a bot' scores. - # 1. You visit 2 sites within one single second - # 2. You try to call an odd file or path - cut -d, -f2,3,6,7 $STATSFILE | - perl -l -n -e '($k)=m/(.*?,.*?,.*?),/; $s{$k}++ if /\.suffix/; - $s{$k}+=1000 if /(?:target\.suffix|\.php|wordpress|\/wp|\.asp|\.\.|robots\.txt|\.env|\?|\+|%|\*|HNAP1|\/admin\/|\.git\/|microsoft\.exchange|\.lua|\/owa\/)/; - END { while (($k,$v) = each %s) { print $k =~ /.*?,(.*?),/ if $v > 1 } }' | - sort -u > $BOTSFILE - - # Filte out all bot IPs, also only filter out all known file "types". - grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean1 - grep -v -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg|,,)' $STATSFILE.clean1 > $STATSFILE.dirt - #grep -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg)' $STATSFILE.clean1 > $STATSFILE.clean2 - mv $STATSFILE.clean1 $STATSFILE -} - -stats () { - sed 1d $STATSFILE -} - -top_n () { - fields="$1" - descr="$2" - - echo "Top $TOP `head -n 1 $STATSFILE | cut -d, -f"$fields"`$descr:" - cut -d, -f"$fields" | sort | uniq -c | sort -nr | head -n $TOP | sed 's/^/ /' - echo -} - -ip_stats () { - for proto in http gemini; do - echo -n "Unique $proto IPv4 IPs:\t" - stats | grep "^$proto," | cut -d, -f3 | grep -F -v : | sort -u | wc -l - echo -n "Unique $proto IPv6 IPs:\t" - stats | grep "^$proto," | cut -d, -f3 | grep -F : | sort -u | wc -l - done -} - -ip_daily_stats () { - echo "Unique IPs by day" - for back in $(jot 14); do - now=$(date +%s) - date=$(date -r $(echo "$now - 86400 * $back" | bc) +%d,%b) - echo -n "\t $date:" - stats | grep $date | cut -d, -f3 | sort -u | wc -l - done -} - -ip_daily_subscribers () { - echo "Unique atom.xml subscribers by day" - for back in $(jot 14); do - now=$(date +%s) - date=$(date -r $(echo "$now - 86400 * $back" | bc) +%d,%b) - echo -n "\t $date:" - stats | grep $date | grep atom.xml | cut -d, -f3 | sort -u | wc -l - done -} - -main () { - date - echo - parse_logs - filter - stats | grep -F .suffix | top_n '1,2,4,5,7' ' (Only content)' - stats | top_n 2 - stats | top_n '4,5' - stats | top_n 7 - stats | grep -F .suffix | top_n 7 ' (Only content)' - stats | top_n '1,2,7' - ip_stats - ip_daily_stats - ip_daily_subscribers -} - -main | sed 's/\.suffix//' |
