diff options
| author | Paul Buetow <openbsd@mx.buetow.org> | 2022-01-29 21:27:44 +0100 |
|---|---|---|
| committer | Paul Buetow <openbsd@mx.buetow.org> | 2022-01-29 21:27:44 +0100 |
| commit | b114ca094378a4e9709a14dc5f24aa596253c34f (patch) | |
| tree | 0eb957e9e10281caf59a26afecc7d367a82b3fc1 | |
| parent | 8f727e6eb47b93b2157ef9b3b0d7e1e8b72555a8 (diff) | |
slightly better
| -rwxr-xr-x | openbsd/frontends/scripts/sitestats.sh | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/openbsd/frontends/scripts/sitestats.sh b/openbsd/frontends/scripts/sitestats.sh index 4859201..da87473 100755 --- a/openbsd/frontends/scripts/sitestats.sh +++ b/openbsd/frontends/scripts/sitestats.sh @@ -39,11 +39,15 @@ filter () { # 2. You try to call an odd file or path cut -d, -f2,3,6,7 $STATSFILE | perl -l -n -e '($k)=m/(.*?,.*?,.*?),/; $s{$k}++ if /\.suffix/; - $s{$k}+=1000 if /(?:\.php|\.env|robots\.txt|\/wp|\/wordpress\/|\/\.git\/|HNAP)/; + $s{$k}+=1000 if /(?:target\.suffix|\.php|wordpress|\/wp|\.asp|\.\.|robots\.txt|\.env|\?|\+|%|\*|HNAP1|\/admin\/|\.git\/|microsoft\.exchange|\.lua|\/owa\/)/; END { while (($k,$v) = each %s) { print $k =~ /.*?,(.*?),/ if $v > 1 } }' | sort -u > $BOTSFILE - grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean - mv $STATSFILE.clean $STATSFILE + + # Filte out all bot IPs, also only filter out all known file "types". + grep -F -v -f $BOTSFILE $STATSFILE > $STATSFILE.clean1 + grep -v -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg)' $STATSFILE.clean1 > $STATSFILE.dirt + #grep -E '(proto,host|\.suffix|atom\.xml|\.gif|\.png|\.jpg)' $STATSFILE.clean1 > $STATSFILE.clean2 + mv $STATSFILE.clean1 $STATSFILE } stats () { @@ -81,7 +85,7 @@ ip_daily_stats () { main () { parse_logs filter - stats | grep -F .suffix | top_n '1,2,4,5,7' ' (Only .suffix)' + stats | grep -F .suffix | top_n '1,2,4,5,7' ' (Only content)' stats | grep -F atom.xml | top_n '1,2,4,5,7' ' (Only atom.xml)' stats | top_n 1 stats | top_n 2 @@ -93,4 +97,4 @@ main () { ip_daily_stats } -main +main | sed 's/\.suffix//' |
