2 Commits

Author SHA1 Message Date
  Pekka Helenius 57b2ec3d98 Minor improvements 4 months ago
  Pekka Helenius d58e65f5e2 Update README (minor) 4 months ago
2 changed files with 12 additions and 14 deletions
Unified View
  1. +3
    -3
      README.md
  2. +9
    -11
      apache-logparser/logparser.py

+ 3
- 3
README.md View File

@ -159,7 +159,7 @@ NOTE: The last numerical part of all ip addresses are anonymized with `XXX` stri
**Q: How many valid requests from Finland and Sweden occured between 15th - 24th April 2022?** **Q: How many valid requests from Finland and Sweden occured between 15th - 24th April 2022?**
``` ```
httpd-logparser --files-regex /var/log/httpd/access_log --included-fields time,http_status,country --sort-by time --status-codes ^20* --day-lower "15-04-2022" --day-upper "24-04-2022" --countries Finland,Sweden --show-stats --show-progress
httpd-logparser --files-regex "/var/log/httpd/access*log" --included-fields time,http_status,country --sort-by time --status-codes ^20* --day-lower "15-04-2022" --day-upper "24-04-2022" --countries Finland,Sweden --show-stats --show-progress
File count: 5 File count: 5
Lines in total: 86876 Lines in total: 86876
@ -266,7 +266,7 @@ You should also check any invalid log lines detected by the tool.
**Q: How many `4XX` codes have connected clients from China and United States produced?** **Q: How many `4XX` codes have connected clients from China and United States produced?**
``` ```
httpd-logparser --files-regex /var/log/httpd/access_log --included-fields time,country,http_status,http_request --countries "United States",China --sort-by time --status-codes ^4 --show-progress --show-stats
httpd-logparser --files-regex "/var/log/httpd/access*log" --included-fields time,country,http_status,http_request --countries "United States",China --sort-by time --status-codes ^4 --show-progress --show-stats
File count: 2 File count: 2
Lines in total: 23614 Lines in total: 23614
@ -365,7 +365,7 @@ WinHTTP/1.1
**Q: Which is time difference between single client requests? Exclude Finland. Include all access_log files.** **Q: Which is time difference between single client requests? Exclude Finland. Include all access_log files.**
``` ```
httpd-logparser --included-fields http_status,time,time_diff,country --countries "\!Finland" --files-regex /var/log/httpd/old/access_log
httpd-logparser --included-fields http_status,time,time_diff,country --countries "\!Finland" --files-regex "/var/log/httpd/old/access*log"
200 Taiwan 2022-06-19 12:21:47 NEW_CONN 200 Taiwan 2022-06-19 12:21:47 NEW_CONN
200 Taiwan 2022-06-19 12:21:48 +1 200 Taiwan 2022-06-19 12:21:48 +1


+ 9
- 11
apache-logparser/logparser.py View File

@ -21,6 +21,11 @@
# TODO: prev_host: instead of comparing to previous entry, check if such IP has been seen in XXX seconds # TODO: prev_host: instead of comparing to previous entry, check if such IP has been seen in XXX seconds
# TODO: store IP values for temporary list for XXX seconds, and check list values # TODO: store IP values for temporary list for XXX seconds, and check list values
# TODO: implement warning check for geoiplookup tool database files, i.e. "warning, some geo database files are very old. Please consider updating geo database information." # TODO: implement warning check for geoiplookup tool database files, i.e. "warning, some geo database files are very old. Please consider updating geo database information."
# TODO: implement support for json output
# TODO: implement following output: most visited URIs (<count> <uri (http_request)>)
# Store each http_request
# If contains, add http_request dict count: (counter + 1), continue
import argparse import argparse
import os import os
@ -831,21 +836,14 @@ class program(object):
incl_fields = self.args.incl_fields.split(',') incl_fields = self.args.incl_fields.split(',')
use_geolocation = self.args.use_geolocation use_geolocation = self.args.use_geolocation
if 'country' in incl_fields or 'city' in incl_fields:
use_geolocation = True
geotool_ok = False geotool_ok = False
if use_geolocation: if use_geolocation:
if self.check_file(geotool_exec, "os.X_OK", "PATH") and self.check_file(geo_database_location, "os.R_OK"): if self.check_file(geotool_exec, "os.X_OK", "PATH") and self.check_file(geo_database_location, "os.R_OK"):
geotool_ok = True geotool_ok = True
if use_geolocation:
if 'country' not in incl_fields:
incl_fields.append('country')
if 'city' not in incl_fields:
incl_fields.append('city')
if 'country' in incl_fields or 'city' in incl_fields:
use_geolocation = True
fields = self.get_included_fields( fields = self.get_included_fields(
self.get_out_fields(), self.get_out_fields(),
incl_fields, incl_fields,
@ -1009,7 +1007,7 @@ class program(object):
log_entries.append(printargs) log_entries.append(printargs)
line_num += 1 line_num += 1
print()
file_num += 1 file_num += 1
return [log_entries, files_process_data['files'], lines_total, stri, field_names, invalid_lines] return [log_entries, files_process_data['files'], lines_total, stri, field_names, invalid_lines]
@ -1085,7 +1083,7 @@ class program(object):
"Processed log entries: {:d}\n" + "Processed log entries: {:d}\n" +
"Matched log entries: {:d}\n" "Matched log entries: {:d}\n"
).format( ).format(
', '.join([i['file'] for i in result_files['files']]),
', '.join([i['file'] for i in result_files]),
result_lines, result_lines,
len(result_entries) len(result_entries)
) )


Loading…
Cancel
Save