summaryrefslogtreecommitdiff
path: root/openbsd
diff options
context:
space:
mode:
authorPaul Buetow <openbsd@mx.buetow.org>2022-01-29 19:29:24 +0100
committerPaul Buetow <openbsd@mx.buetow.org>2022-01-29 19:29:24 +0100
commit2ecb7f46a8eea6a9a7aca9a59db61628cac69679 (patch)
treec390ac1ec15e315841a854f8fd91f0e5061c4ec5 /openbsd
parentc3aa5b7ae9974d407a538d492989eadcd52b16c2 (diff)
improved stats
Diffstat (limited to 'openbsd')
-rwxr-xr-x[-rw-r--r--]openbsd/frontends/scripts/sitestats.sh84
1 files changed, 83 insertions, 1 deletions
diff --git a/openbsd/frontends/scripts/sitestats.sh b/openbsd/frontends/scripts/sitestats.sh
index 892cf22..6903a07 100644..100755
--- a/openbsd/frontends/scripts/sitestats.sh
+++ b/openbsd/frontends/scripts/sitestats.sh
@@ -1,3 +1,85 @@
#!/bin/sh
-zgrep -h . /var/www/logs/access.log* | perl -l -n -e '@s=split / +/; next if @s!=11; $s[4]=~s|\[(\d\d)/(...)/(\d{4}):(.*)|$1 $2 $3 $4|; print join " ",@s[0,1,4,7];'
+STATSFILE=/tmp/sitestats.csv
+BOTSFILE=/tmp/sitebots.txt
+TOP=20
+
+header () {
+ echo "proto,host,ip,day,month,time,path"
+}
+
+indent () {
+ sed 's/^/ /'
+}
+
+http_stats () {
+ zgrep -h . /var/www/logs/access.log* |
+ perl -l -n -e 's/\.html/.suffix/; @s=split / +/; next if @s!=11;
+ $s[4]=~s|\[(\d\d)/(...)/\d{4}:(.*)|$1,$2,$3|;
+ print "http,".join ",",@s[0,1,4,7];'
+}
+
+gemini_stats () {
+ zgrep -h . /var/log/daemon* |
+ perl -l -n -e '@s=split / +/; @v=@s and next if $s[4] eq "vger:";
+ next if !/relayd.*gemini/;
+ ($path) = $v[-1] =~ m|gemini://.*?(/.*)|;
+ next if $path eq "";
+ $path =~ s/\.gmi/.suffix/;
+ print "gemini,".(split("/", $v[6]))[2].",$s[12],$s[1],$s[0],$s[2],$path"'
+}
+
+parse_logs () {
+ header > $STATSFILE.tmp
+ http_stats >> $STATSFILE.tmp
+ gemini_stats >> $STATSFILE.tmp
+ mv $STATSFILE.tmp $STATSFILE
+}
+
+filter () {
+ # Collect some 'you are a bot' scores.
+ # 1. You visit 2 sites within one single second
+ # 2. You try to call an odd file or path
+ cut -d, -f2,3,6,7 $STATSFILE |
+ perl -l -n -e '$s{$_}++ if /\.suffix/;
+ $s{$_}+=1000 if /(?:\.php|\.env|robots\.txt|\/wp|\/wordpress\/|\/\.git\/|HNAP)/;
+ END { while (($k,$v) = each %s) { print $k =~ /.*?,(.*?),/ if $v > 1 } }' |
+ sort -u > $BOTSFILE
+ grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean
+ mv $STATSFILE.clean $STATSFILE
+}
+
+stats () {
+ sed 1d $STATSFILE
+}
+
+top_n () {
+ fields="$1"
+ echo "Top $TOP `head -n 1 $STATSFILE | cut -d, -f"$fields"`:"
+ cut -d, -f"$fields" | sort | uniq -c | sort -nr | head -n $TOP | indent
+ echo
+}
+
+ip_stats () {
+ for proto in http gemini; do
+ echo -n "Unique $proto IPv4 IPs:\t"
+ stats | grep "^$proto," | cut -d, -f3 | grep -F -v : | sort -u | wc -l
+
+ echo -n "Unique $proto IPv6 IPs:\t"
+ stats | grep "^$proto," | cut -d, -f3 | grep -F : | sort -u | wc -l
+ done
+}
+
+main () {
+ parse_logs
+ filter
+ stats | top_n 1
+ stats | top_n 2
+ stats | top_n '4,5'
+ stats | top_n 7
+ stats | top_n '1,7'
+ stats | top_n '1,2,7'
+ ip_stats
+}
+
+main