diff options
| author | Paul Buetow <openbsd@mx.buetow.org> | 2022-01-29 19:29:24 +0100 |
|---|---|---|
| committer | Paul Buetow <openbsd@mx.buetow.org> | 2022-01-29 19:29:24 +0100 |
| commit | 2ecb7f46a8eea6a9a7aca9a59db61628cac69679 (patch) | |
| tree | c390ac1ec15e315841a854f8fd91f0e5061c4ec5 /openbsd | |
| parent | c3aa5b7ae9974d407a538d492989eadcd52b16c2 (diff) | |
improved stats
Diffstat (limited to 'openbsd')
| -rwxr-xr-x[-rw-r--r--] | openbsd/frontends/scripts/sitestats.sh | 84 |
1 files changed, 83 insertions, 1 deletions
diff --git a/openbsd/frontends/scripts/sitestats.sh b/openbsd/frontends/scripts/sitestats.sh index 892cf22..6903a07 100644..100755 --- a/openbsd/frontends/scripts/sitestats.sh +++ b/openbsd/frontends/scripts/sitestats.sh @@ -1,3 +1,85 @@ #!/bin/sh -zgrep -h . /var/www/logs/access.log* | perl -l -n -e '@s=split / +/; next if @s!=11; $s[4]=~s|\[(\d\d)/(...)/(\d{4}):(.*)|$1 $2 $3 $4|; print join " ",@s[0,1,4,7];' +STATSFILE=/tmp/sitestats.csv +BOTSFILE=/tmp/sitebots.txt +TOP=20 + +header () { + echo "proto,host,ip,day,month,time,path" +} + +indent () { + sed 's/^/ /' +} + +http_stats () { + zgrep -h . /var/www/logs/access.log* | + perl -l -n -e 's/\.html/.suffix/; @s=split / +/; next if @s!=11; + $s[4]=~s|\[(\d\d)/(...)/\d{4}:(.*)|$1,$2,$3|; + print "http,".join ",",@s[0,1,4,7];' +} + +gemini_stats () { + zgrep -h . /var/log/daemon* | + perl -l -n -e '@s=split / +/; @v=@s and next if $s[4] eq "vger:"; + next if !/relayd.*gemini/; + ($path) = $v[-1] =~ m|gemini://.*?(/.*)|; + next if $path eq ""; + $path =~ s/\.gmi/.suffix/; + print "gemini,".(split("/", $v[6]))[2].",$s[12],$s[1],$s[0],$s[2],$path"' +} + +parse_logs () { + header > $STATSFILE.tmp + http_stats >> $STATSFILE.tmp + gemini_stats >> $STATSFILE.tmp + mv $STATSFILE.tmp $STATSFILE +} + +filter () { + # Collect some 'you are a bot' scores. + # 1. You visit 2 sites within one single second + # 2. You try to call an odd file or path + cut -d, -f2,3,6,7 $STATSFILE | + perl -l -n -e '$s{$_}++ if /\.suffix/; + $s{$_}+=1000 if /(?:\.php|\.env|robots\.txt|\/wp|\/wordpress\/|\/\.git\/|HNAP)/; + END { while (($k,$v) = each %s) { print $k =~ /.*?,(.*?),/ if $v > 1 } }' | + sort -u > $BOTSFILE + grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean + mv $STATSFILE.clean $STATSFILE +} + +stats () { + sed 1d $STATSFILE +} + +top_n () { + fields="$1" + echo "Top $TOP `head -n 1 $STATSFILE | cut -d, -f"$fields"`:" + cut -d, -f"$fields" | sort | uniq -c | sort -nr | head -n $TOP | indent + echo +} + +ip_stats () { + for proto in http gemini; do + echo -n "Unique $proto IPv4 IPs:\t" + stats | grep "^$proto," | cut -d, -f3 | grep -F -v : | sort -u | wc -l + + echo -n "Unique $proto IPv6 IPs:\t" + stats | grep "^$proto," | cut -d, -f3 | grep -F : | sort -u | wc -l + done +} + +main () { + parse_logs + filter + stats | top_n 1 + stats | top_n 2 + stats | top_n '4,5' + stats | top_n 7 + stats | top_n '1,7' + stats | top_n '1,2,7' + ip_stats +} + +main |
