3 Commits

2 changed files with 127 additions and 79 deletions
Split View
  1. +22
    -8
      README.md
  2. +105
    -71
      apache-logparser/logparser.py

+ 22
- 8
README.md View File

@ -1,8 +1,6 @@
# Apache log parser
# Apache HTTPD log parser
Simple Apache/HTTPD command-line log parser for short analysis, targeted to web server administration tasks.
Unix-alike systems only.
Apache/HTTPD command-line log parser for Linux web server administrators.
## Motivation
@ -16,7 +14,7 @@ This tool is not for intrusion detection/prevention or does not alert administra
## Requirements
Following Arch Linux packages. If you use another distribution, refer to corresponding packages:
Following Python packages (Arch Linux):
```
python
@ -40,9 +38,25 @@ Arch Linux:
run `updpkgsums && makepkg -Cfi` in [apache-logparser](apache-logparser/) directory. The command installs `httpd-logparser` executable file in `/usr/bin/` folder.
## Supported output formats
- `table` and `csv`
## Features
- Multiple Linux distributions supported
- Supported output formats: `table` and `csv`
- Use output log entry field ordering
- Include and exclude log entry fields
- Date ranges
- Geo IP lookup for log entries
- Get origin countries and cities
- Unknown cities: give coordinates instead
- Check also: [MaxMind DB Apache Module](https://github.com/maxmind/mod_maxminddb)
- Output field filters
- Limit processed log entries with `--head` and `--tail` parameters
- Get only interesting HTTP response codes
- Get only interesting countries of origin
- Process multiple log files at once, either by providing a list of files or matching regex
- Show processing status
- Show processing summary
- List invalid log entries that couldn't be processed
## Examples


+ 105
- 71
apache-logparser/logparser.py View File

@ -78,6 +78,34 @@ class program(object):
}
return out_fields
"""
Get default Apache HTTPD configuration file location
"""
def get_apache_conf_path(self):
path = None
os_data_file = '/etc/os-release'
conf_path = [
{ 'os_check_file': os_data_file, 'os_like': 'Arch Linux', 'path': '/etc/httpd/conf/httpd.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'Debian', 'path': '/etc/apache2/apache2.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'Ubuntu', 'path': '/etc/apache2/apache2.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'Linux Mint', 'path': '/etc/apache2/apache2.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'openSUSE', 'path': '/etc/apache2/httpd.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'Gentoo', 'path': '/etc/apache2/httpd.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'Red Hat', 'path': '/etc/httpd/conf/httpd.conf'},
{ 'os_check_file': os_data_file, 'os_like': 'Fedora', 'path': '/etc/httpd/conf/httpd.conf'}
]
if self.check_file(os_data_file, "os.R_OK"):
with open(os_data_file, 'r') as f:
for line in f:
if re.match('^[ ]?NAME=\"', line):
for a in conf_path:
if re.match('.*' + a['os_like'] + '.*', line):
path = a['path']
return path
return path
"""
Argument parser
"""
@ -111,21 +139,22 @@ class program(object):
'-c', '--status-codes',
help = 'Print only these numerical status codes.\nRegular expressions supported.',
nargs = '+',
dest = 'codes'
dest = 'codes',
required = False
)
argparser.add_argument(
'-cf', '--countries',
help = 'Include only these countries.\nNegative match (exclude): "\!Country"',
nargs = '?',
type = lambda x: [i for i in x.split(',')],
dest = 'countries'
dest = 'countries',
required = False
)
argparser.add_argument(
'-tf', '--time-format',
help = 'Output time format.',
nargs = '?',
dest = 'time_format',
default = out_time_format
)
argparser.add_argument(
'-if', '--included-fields',
@ -154,7 +183,7 @@ class program(object):
help = '"geoiplookup" tool executable found in PATH.',
nargs = '?',
dest = 'geotool_exec',
default = "geoiplookup"
default = 'geoiplookup'
)
argparser.add_argument(
'-gd', '--geo-database-dir',
@ -202,9 +231,10 @@ class program(object):
argparser.add_argument(
'--httpd-conf-file',
help = 'Apache HTTPD configuration file with LogFormat directive.',
action = 'store_true',
dest = 'httpd_conf_file',
default = '/etc/httpd/conf/httpd.conf'
default = self.get_apache_conf_path(),
nargs = '?',
type = str
)
argparser.add_argument(
'--httpd-log-nickname',
@ -370,7 +400,10 @@ class program(object):
break
if os.access(file_path, eval(flag)):
self.txt.print_verbose('File check', file_path, 'flags: ' + flag)
try:
self.txt.print_verbose('File check', file_path, 'flags: ' + flag)
except AttributeError:
pass
return True
return False
@ -379,21 +412,20 @@ class program(object):
"""
def get_httpd_logformat_directive(self, cfile, tag = None):
try:
log_format = None
self.txt.print_verbose('Apache configuration file', cfile)
with open(cfile, 'r') as f:
for line in f:
if re.search('^[ ]+LogFormat ".*' + tag, line):
r = re.search('^[ ]+LogFormat "(.*)(!?("))', line)
log_format = r.groups()[0].replace('\\', '')
break
f.close()
self.txt.print_verbose('Log format', log_format)
return log_format
if not self.check_file(cfile, "os.R_OK"):
raise Exception("Couldn't open Apache HTTPD configuration file '{:s}'.".format(cfile))
except:
raise Exception("Couldn't open Apache HTTPD configuration file.")
log_format = None
self.txt.print_verbose('Apache configuration file', cfile)
with open(cfile, 'r') as f:
for line in f:
if re.search('^[ ]+LogFormat ".*' + tag, line):
r = re.search('^[ ]+LogFormat "(.*)(!?("))', line)
log_format = r.groups()[0].replace('\\', '')
break
f.close()
self.txt.print_verbose('Log format', log_format)
return log_format
"""
Geotool processing
@ -513,20 +545,19 @@ class program(object):
for sfile in sfiles:
try:
with open(sfile, 'r') as f:
line_count = len(list(f))
f.close()
files_tmp.append({
'file': str(sfile),
'modified_date': os.path.getmtime(sfile),
'size': os.path.getsize(sfile),
'line_count': line_count
})
if not self.check_file(sfile, "os.R_OK"):
raise Exception("Couldn't read input file '{}'.".format(sfile))
except:
raise Exception("Couldn't read input file " + sfile)
with open(sfile, 'r') as f:
line_count = len(list(f))
f.close()
files_tmp.append({
'file': str(sfile),
'modified_date': os.path.getmtime(sfile),
'size': os.path.getsize(sfile),
'line_count': line_count
})
if files_order == 'date':
files_tmp.sort(key = lambda d: d['modified_date'])
@ -613,50 +644,50 @@ class program(object):
for sfile in sfiles:
append = False
try:
with open(sfile, 'r') as f:
line_count = len(list(f))
f.close()
line_end = line_start + line_count
if not self.check_file(sfile, "os.R_OK"):
raise Exception("Couldn't read input file '{}'.".format(sfile))
if line_range_min is not None:
if line_range_min >= line_start and line_range_min <= line_end:
append = True
line_start = line_range_min
if line_range_min is None and line_end < line_range_max:
with open(sfile, 'r') as f:
line_count = len(list(f))
f.close()
line_end = line_start + line_count
if line_range_min is not None:
if line_range_min >= line_start and line_range_min <= line_end:
append = True
line_start = line_range_min
if line_range_min is None and line_end < line_range_max:
append = True
if line_range_max is not None:
if line_range_max >= line_start and line_range_max <= line_end:
append = True
line_end = line_range_max
if line_range_min < line_end and line_range_max > line_end:
append = True
if line_range_max is None and line_start > line_range_min:
if line_range_max is not None:
if line_range_max >= line_start and line_range_max <= line_end:
append = True
line_end = line_range_max
if line_range_min < line_end and line_range_max > line_end:
append = True
if line_range_max is None and line_start > line_range_min:
append = True
if append:
files_and_lines['files'].append({
'file': str(sfile),
'line_start_global': line_start,
'line_end_global': line_end,
'modified_date': os.path.getmtime(sfile),
'size': os.path.getsize(sfile)
})
if append:
files_and_lines['files'].append({
'file': str(sfile),
'line_start_global': line_start,
'line_end_global': line_end,
'modified_date': os.path.getmtime(sfile),
'size': os.path.getsize(sfile)
})
# Use only the first matching line_start value
if not range_line_start_found:
range_line_start_found = True
range_line_start = line_start
# Use the last matching line_end value
range_line_end = line_end
lines_count += line_count
line_start = lines_count + 1
# Use only the first matching line_start value
if not range_line_start_found:
range_line_start_found = True
range_line_start = line_start
# Use the last matching line_end value
range_line_end = line_end
except:
raise Exception("Couldn't read input file " + sfile)
lines_count += line_count
line_start = lines_count + 1
files_and_lines['lines_total'] = range_line_end - range_line_start
files_and_lines['range_min'] = range_line_start
@ -863,6 +894,9 @@ class program(object):
lfile['line_start_global'], lfile['line_end_global']
))
if not self.check_file(lfile['file'], "os.R_OK"):
raise Exception("Couldn't read input file '{}'.".format(lfile['file']))
with open(lfile['file'], 'r') as f:
f = list(f)
range_start = files_process_data['files'][file_num]['line_start_local']


Loading…
Cancel
Save