most possible basic apache2 web log file analyze script
# what is the usage of the webserver? # what is the usage of a certain domain? vim ./analyze.sh #!/usr/bin/env bash shopt -s nullglob # Output header printf "%-30s %10s\n" "file" "unique_ips" printf "%-30s %10s\n" "------------------------------" "----------" for gz in *$1*.gz; do # detect if gzip file (skip if not) if ! file "$gz" | grep -qi 'gzip'; then printf "%-30s %10s\n" "$gz" "skipped" continue fi # stream-uncompress, extract first field (IP), sort+uniq count # use awk to be robust for lines that might start with spaces unique_count=$(gzip -dc -- "$gz" 2>/dev/null | awk '{print $1}' | sort -u | wc -l) printf "%-30s %10d\n" "$gz" "$unique_count" done # usage: cd /var/log/apache2 ./analyze.sh access file unique_ips ------------------------------ ---------- access.log.10.gz 130 access.log.11.gz 155 access.log.12.gz 142 access.log.13.gz 137 access.log.14.gz 161 access.log.2.gz 145 access.log.3.gz 132 access.log.4.gz 131 access.log.5.gz 133 access.log.6.gz 153 access.log.7.gz 130 access.log.8.gz 168 access.log.9.gz 150 domain1.com-access.log.10.gz 174 domain1.com-access.log.11.gz 195 domain1.com-access.log.12.gz 209 domain1.com-access.log.13.gz 217 domain1.com-access.log.14.gz 285 domain1.com-access.log.2.gz 183 domain1.com-access.log.3.gz 181 domain1.com-access.log.4.gz 209 domain1.com-access.log.5.gz 176 domain1.com-access.log.6.gz 196 domain1.com-access.log.7.gz 156 domain1.com-access.log.8.gz 178 domain1.com-access.log.9.gz 143 another-domain.com-access.log.10.gz 1008 another-domain.com-access.log.11.gz 951 another-domain.com-access.log.12.gz 836 another-domain.com-access.log.13.gz 906 another-domain.com-access.log.14.gz 991 another-domain.com-access.log.2.gz 1183 another-domain.com-access.log.3.gz 1190 another-domain.com-access.log.4.gz 1410 another-domain.com-access.log.5.gz 1157 another-domain.com-access.log.6.gz 1136 another-domain.com-access.log.7.gz 999 another-domain.com-access.log.8.gz 1011 another-domain.com-access.log.9.gz 915 a-different-domain.com-access.log.10.gz 253 a-different-domain.com-access.log.11.gz 205 a-different-domain.com-access.log.12.gz 203 a-different-domain.com-access.log.13.gz 217 a-different-domain.com-access.log.14.gz 209 a-different-domain.com-access.log.2.gz 203 a-different-domain.com-access.log.3.gz 165 a-different-domain.com-access.log.4.gz 174 a-different-domain.com-access.log.5.gz 193 a-different-domain.com-access.log.6.gz 173 a-different-domain.com-access.log.7.gz 200 a-different-domain.com-access.log.8.gz 208 a-different-domain.com-access.log.9.gz 189
liked this article?
- only together we can create a truly free world
- plz support dwaves to keep it up & running!
- (yes the info on the internet is (mostly) free but beer is still not free (still have to work on that))
- really really hate advertisement
- contribute: whenever a solution was found, blog about it for others to find!
- talk about, recommend & link to this blog and articles
- thanks to all who contribute!