most possible basic apache2 web log file analyze script

# what is the usage of the webserver?
# what is the usage of a certain domain?

vim ./analyze.sh

#!/usr/bin/env bash
shopt -s nullglob

# Output header
printf "%-30s %10s\n" "file" "unique_ips"
printf "%-30s %10s\n" "------------------------------" "----------"

for gz in *$1*.gz; do
  # detect if gzip file (skip if not)
  if ! file "$gz" | grep -qi 'gzip'; then
    printf "%-30s %10s\n" "$gz" "skipped"
    continue
  fi

  # stream-uncompress, extract first field (IP), sort+uniq count
  # use awk to be robust for lines that might start with spaces
  unique_count=$(gzip -dc -- "$gz" 2>/dev/null | awk '{print $1}' | sort -u | wc -l)

  printf "%-30s %10d\n" "$gz" "$unique_count"
done

# usage:

cd /var/log/apache2
./analyze.sh access
file                           unique_ips
------------------------------ ----------
access.log.10.gz                      130
access.log.11.gz                      155
access.log.12.gz                      142
access.log.13.gz                      137
access.log.14.gz                      161
access.log.2.gz                       145
access.log.3.gz                       132
access.log.4.gz                       131
access.log.5.gz                       133
access.log.6.gz                       153
access.log.7.gz                       130
access.log.8.gz                       168
access.log.9.gz                       150
domain1.com-access.log.10.gz        174
domain1.com-access.log.11.gz        195
domain1.com-access.log.12.gz        209
domain1.com-access.log.13.gz        217
domain1.com-access.log.14.gz        285
domain1.com-access.log.2.gz        183
domain1.com-access.log.3.gz        181
domain1.com-access.log.4.gz        209
domain1.com-access.log.5.gz        176
domain1.com-access.log.6.gz        196
domain1.com-access.log.7.gz        156
domain1.com-access.log.8.gz        178
domain1.com-access.log.9.gz        143
another-domain.com-access.log.10.gz       1008
another-domain.com-access.log.11.gz        951
another-domain.com-access.log.12.gz        836
another-domain.com-access.log.13.gz        906
another-domain.com-access.log.14.gz        991
another-domain.com-access.log.2.gz       1183
another-domain.com-access.log.3.gz       1190
another-domain.com-access.log.4.gz       1410
another-domain.com-access.log.5.gz       1157
another-domain.com-access.log.6.gz       1136
another-domain.com-access.log.7.gz        999
another-domain.com-access.log.8.gz       1011
another-domain.com-access.log.9.gz        915
a-different-domain.com-access.log.10.gz        253
a-different-domain.com-access.log.11.gz        205
a-different-domain.com-access.log.12.gz        203
a-different-domain.com-access.log.13.gz        217
a-different-domain.com-access.log.14.gz        209
a-different-domain.com-access.log.2.gz        203
a-different-domain.com-access.log.3.gz        165
a-different-domain.com-access.log.4.gz        174
a-different-domain.com-access.log.5.gz        193
a-different-domain.com-access.log.6.gz        173
a-different-domain.com-access.log.7.gz        200
a-different-domain.com-access.log.8.gz        208
a-different-domain.com-access.log.9.gz        189

liked this article?

  • only together we can create a truly free world
  • plz support dwaves to keep it up & running!
  • (yes the info on the internet is (mostly) free but beer is still not free (still have to work on that))
  • really really hate advertisement
  • contribute: whenever a solution was found, blog about it for others to find!
  • talk about, recommend & link to this blog and articles
  • thanks to all who contribute!
admin