@ -18,8 +18,9 @@
################################################################
################################################################
# TODO prev_host: instead of comparing to previous entry, check if such IP has been seen in XXX seconds
# TODO: prev_host: instead of comparing to previous entry, check if such IP has been seen in XXX seconds
# TODO: store IP values for temporary list for XXX seconds, and check list values
# TODO: store IP values for temporary list for XXX seconds, and check list values
# TODO: implement warning check for geoiplookup tool database files, i.e. "warning, some geo database files are very old. Please consider updating geo database information."
import argparse
import argparse
import os
import os
@ -29,31 +30,51 @@ import subprocess
from datetime import datetime
from datetime import datetime
from apachelogs import LogParser , InvalidEntryError
from apachelogs import LogParser , InvalidEntryError
class text_processing ( object ) :
"""
Init
"""
def __init__ ( self , verbose ) :
self . show_verbose = verbose
"""
Verbose output format ( we do not use logger library )
"""
def print_verbose ( self , prefix = ' output ' , * args ) :
if self . show_verbose :
print ( ' VERBOSE [{:s}]: {:s} ' . format ( prefix , ' , ' . join ( [ str ( i ) for i in args ] ) ) )
class program ( object ) :
class program ( object ) :
"""
"""
Init
Init
"""
"""
def __init__ ( self ) :
def __init__ ( self ) :
self . get_args ( )
self . args = self . get_args ( )
# Exclude private IP address classes from geo lookup process
# Exclude private IP address classes from geo lookup process
# Strip out %I and %O flags from Apache log format
# 127.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
# 127.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
self . private_class_ip_networks = [ ' ^127 \ . ' , ' ^172 \ .[1[6-9]|2[0-9]|3[0-1]] \ . ' , ' ^192 \ .168 \ . ' ]
self . private_class_ip_networks = [ ' ^127 \ . ' , ' ^172 \ .(1[6-9]{1}|2[0-9]{1}|3[0-1]{1}) \ . ' , ' ^192 \ .168 \ . ' ]
self . txt = text_processing ( verbose = self . args . verbose )
"""
"""
Define & get output fields
Define & get output fields
"""
"""
def get_out_fields ( self ) :
def get_out_fields ( self ) :
out_fields = {
out_fields = {
' log_file_name ' : { ' data ' : None , ' format ' : ' {:s} ' , ' included ' : False , ' human_name ' : ' Log file name ' } ,
' http_status ' : { ' data ' : None , ' format ' : ' {:3s} ' , ' included ' : True , ' human_name ' : ' Status ' } ,
' remote_host ' : { ' data ' : None , ' format ' : ' {:15s} ' , ' included ' : True , ' human_name ' : ' Remote IP ' } ,
' country ' : { ' data ' : None , ' format ' : ' {:20s} ' , ' included ' : False , ' human_name ' : ' Country ' } ,
' city ' : { ' data ' : None , ' format ' : ' {:15s} ' , ' included ' : False , ' human_name ' : ' City ' } ,
' time ' : { ' data ' : None , ' format ' : ' {:20s} ' , ' included ' : True , ' human_name ' : ' Date/Time ' } ,
' time_diff ' : { ' data ' : None , ' format ' : ' {:8s} ' , ' included ' : True , ' human_name ' : ' Time diff ' } ,
' user_agent ' : { ' data ' : None , ' format ' : ' {:s} ' , ' included ' : True , ' human_name ' : ' User agent ' } ,
' http_request ' : { ' data ' : None , ' format ' : ' {:s} ' , ' included ' : True , ' human_name ' : ' Request ' }
' log_file_name ' : { ' data ' : None , ' format ' : ' {:s} ' , ' included ' : False , ' human_name ' : ' Log file name ' , ' sort_index ' : 0 } ,
' http_status ' : { ' data ' : None , ' format ' : ' {:3s} ' , ' included ' : True , ' human_name ' : ' Status ' , ' sort_index ' : 1 } ,
' remote_host ' : { ' data ' : None , ' format ' : ' {:15s} ' , ' included ' : True , ' human_name ' : ' Remote IP ' , ' sort_index ' : 2 } ,
' country ' : { ' data ' : None , ' format ' : ' {:20s} ' , ' included ' : False , ' human_name ' : ' Country ' , ' sort_index ' : 3 } ,
' city ' : { ' data ' : None , ' format ' : ' {:15s} ' , ' included ' : False , ' human_name ' : ' City ' , ' sort_index ' : 4 } ,
' time ' : { ' data ' : None , ' format ' : ' {:20s} ' , ' included ' : True , ' human_name ' : ' Date/Time ' , ' sort_index ' : 5 } ,
' time_diff ' : { ' data ' : None , ' format ' : ' {:8s} ' , ' included ' : True , ' human_name ' : ' Time diff ' , ' sort_index ' : 6 } ,
' user_agent ' : { ' data ' : None , ' format ' : ' {:s} ' , ' included ' : True , ' human_name ' : ' User agent ' , ' sort_index ' : 7 } ,
' http_request ' : { ' data ' : None , ' format ' : ' {:s} ' , ' included ' : True , ' human_name ' : ' Request ' , ' sort_index ' : 8 }
}
}
return out_fields
return out_fields
@ -112,7 +133,7 @@ class program(object):
nargs = ' ? ' ,
nargs = ' ? ' ,
dest = ' incl_fields ' ,
dest = ' incl_fields ' ,
type = lambda x : [ i for i in x . split ( ' , ' ) ] ,
type = lambda x : [ i for i in x . split ( ' , ' ) ] ,
default = ' , ' . join ( incl_fields )
default = ' , ' . join ( incl_fields )
)
)
argparser . add_argument (
argparser . add_argument (
' -ef ' , ' --excluded-fields ' ,
' -ef ' , ' --excluded-fields ' ,
@ -161,7 +182,7 @@ class program(object):
dest = ' sortby_field '
dest = ' sortby_field '
)
)
argparser . add_argument (
argparser . add_argument (
' -ro ' , ' --reverse-order ' ,
' -ro ' , ' --reverse ' ,
help = ' Sort in reverse order. ' ,
help = ' Sort in reverse order. ' ,
dest = ' sortby_reverse ' ,
dest = ' sortby_reverse ' ,
action = ' store_true '
action = ' store_true '
@ -199,7 +220,7 @@ class program(object):
required = False
required = False
)
)
argparser . add_argument (
argparser . add_argument (
' -ph ' , ' --print-headers ' ,
' -ph ' , ' --print-header ' ,
help = ' Print column headers. ' ,
help = ' Print column headers. ' ,
dest = ' column_headers ' ,
dest = ' column_headers ' ,
required = False ,
required = False ,
@ -213,6 +234,37 @@ class program(object):
default = ' table ' ,
default = ' table ' ,
choices = [ ' table ' , ' csv ' ]
choices = [ ' table ' , ' csv ' ]
)
)
argparser . add_argument (
' --head ' ,
help = ' Read first N lines from all log entries. ' ,
dest = ' read_first_lines_num ' ,
required = False ,
nargs = ' ? ' ,
type = int
)
argparser . add_argument (
' --tail ' ,
help = ' Read last N lines from all log entries. ' ,
dest = ' read_last_lines_num ' ,
required = False ,
nargs = ' ? ' ,
type = int
)
argparser . add_argument (
' --sort-logs-by ' ,
help = ' Sorting order for input log files. ' ,
dest = ' sort_logs_by_info ' ,
required = False ,
default = ' name ' ,
choices = [ ' date ' , ' size ' , ' name ' ]
)
argparser . add_argument (
' --verbose ' ,
help = ' Verbose output. ' ,
dest = ' verbose ' ,
required = False ,
action = ' store_true '
)
args = argparser . parse_args ( )
args = argparser . parse_args ( )
return args
return args
@ -266,12 +318,14 @@ class program(object):
if not code_appended :
if not code_appended :
codes . append ( ( user_code , validated ) )
codes . append ( ( user_code , validated ) )
self . txt . print_verbose ( ' Available status codes ' , codes )
return codes
return codes
"""
"""
Get log file list
Get log file list
"""
"""
def get_files ( self , files_regex = None , files_list = None ) :
def get_files ( self , files_regex = None , files_list = None ) :
files = [ ]
files = [ ]
@ -298,6 +352,8 @@ class program(object):
raise Exception ( " No matching files found. " )
raise Exception ( " No matching files found. " )
files . sort ( )
files . sort ( )
self . txt . print_verbose ( ' Input files ' , files )
return files
return files
"""
"""
@ -314,16 +370,18 @@ class program(object):
break
break
if os . access ( file_path , eval ( flag ) ) :
if os . access ( file_path , eval ( flag ) ) :
return True
self . txt . print_verbose ( ' File check ' , file_path , ' flags: ' + flag )
return True
return False
return False
"""
"""
Get Apache HTTPD LogFormat directive syntax
Get Apache HTTPD LogFormat directive syntax
"""
"""
def get_httpd_logformat_directive ( self , cfile , tag = None ) :
def get_httpd_logformat_directive ( self , cfile , tag = None ) :
try :
try :
log_format = None
log_format = None
self . txt . print_verbose ( ' Apache configuration file ' , cfile )
with open ( cfile , ' r ' ) as f :
with open ( cfile , ' r ' ) as f :
for line in f :
for line in f :
if re . search ( ' ^[ ]+LogFormat " .* ' + tag , line ) :
if re . search ( ' ^[ ]+LogFormat " .* ' + tag , line ) :
@ -331,6 +389,7 @@ class program(object):
log_format = r . groups ( ) [ 0 ] . replace ( ' \\ ' , ' ' )
log_format = r . groups ( ) [ 0 ] . replace ( ' \\ ' , ' ' )
break
break
f . close ( )
f . close ( )
self . txt . print_verbose ( ' Log format ' , log_format )
return log_format
return log_format
except :
except :
@ -339,7 +398,7 @@ class program(object):
"""
"""
Geotool processing
Geotool processing
"""
"""
def geotool_get_data ( self , geotool_exec , database_file , remote_host ) :
def geotool_get_data ( self , geotool_ok , geotool_ exec , database_file , remote_host ) :
host_country = None
host_country = None
host_city = None
host_city = None
@ -352,7 +411,7 @@ class program(object):
' host_city ' : host_city
' host_city ' : host_city
}
}
if self . check_file ( geotool_exec , " os.X_OK " , " PATH " ) and self . chec k_file ( database_file , " os.R_OK " ) :
if geotool_ok :
host_country_main = subprocess . check_output ( [ geotool_exec , ' -d ' , database_file , remote_host ] ) . rstrip ( ) . decode ( )
host_country_main = subprocess . check_output ( [ geotool_exec , ' -d ' , database_file , remote_host ] ) . rstrip ( ) . decode ( )
host_country_main = host_country_main . split ( ' \n ' )
host_country_main = host_country_main . split ( ' \n ' )
@ -424,25 +483,186 @@ class program(object):
return skip_line
return skip_line
"""
"""
Get total number of lines in files
Get lines to be processed from input files and min / max input
min and max work much like Unix tools ' head ' and ' tail '
Only a single value ( min or max ) is allowed
"""
"""
def get_file_line_count ( self , sfiles ) :
lines_in_files = [ ]
def get_file_lines_head_tail ( self , sfiles , line_range_min = None , line_range_max = None , files_order = None ) :
files_and_lines = { ' files ' : [ ] , ' lines_total ' : 0 , ' range_min ' : 0 , ' range_max ' : 0 }
files_tmp = [ ]
lines_count = 0
line_start = 0
line_end = 0
if line_range_min and line_range_max :
raise Exception ( " Either first or last line limit can be used, not both. " )
if files_order is None :
raise Exception ( " Sorting order for input files missing. " )
if line_range_min is not None :
if line_range_min < 0 :
line_range_min = None
if line_range_max is not None :
if line_range_max < 0 :
line_range_max = None
for sfile in sfiles :
for sfile in sfiles :
try :
try :
with open ( sfile , ' r ' ) as f :
with open ( sfile , ' r ' ) as f :
line_count = len ( list ( f ) )
line_count = len ( list ( f ) )
f . close ( )
f . close ( )
lines_in_files . append ( {
' file ' : str ( sfile ) ,
' lines ' : int ( line_count )
files_tmp . append ( {
' file ' : str ( sfile ) ,
' modified_date ' : os . path . getmtime ( sfile ) ,
' size ' : os . path . getsize ( sfile ) ,
' line_count ' : line_count
} )
} )
except :
raise Exception ( " Couldn ' t read input file " + sfile )
if files_order == ' date ' :
files_tmp . sort ( key = lambda d : d [ ' modified_date ' ] )
elif files_order == ' size ' :
files_tmp . sort ( key = lambda d : d [ ' size ' ] )
elif files_order == ' name ' :
files_tmp . sort ( key = lambda d : d [ ' file ' ] )
i = 0
for sfile in files_tmp :
line_end = ( line_start + sfile [ ' line_count ' ] ) - 1
files_and_lines [ ' files ' ] . append ( {
' file ' : sfile [ ' file ' ] ,
' line_start_global ' : line_start ,
' line_end_global ' : line_end ,
' line_start_local ' : 0 ,
' line_end_local ' : sfile [ ' line_count ' ] - 1 ,
} )
lines_count + = line_count
line_start = files_and_lines [ ' files ' ] [ i ] [ ' line_end_global ' ] + 1
i + = 1
range_line_start = files_and_lines [ ' files ' ] [ 0 ] [ ' line_start_global ' ]
full_range = files_and_lines [ ' files ' ] [ - 1 ] [ ' line_end_global ' ]
files_and_lines [ ' range_min ' ] = range_line_start
files_and_lines [ ' range_max ' ] = full_range
files_and_lines [ ' lines_total ' ] = full_range - range_line_start
i = 0
# Read last N lines
if line_range_max is not None :
range_start = full_range - line_range_max
if range_start < = 0 :
range_start = 0
for l in files_and_lines [ ' files ' ] :
if range_start > = l [ ' line_start_global ' ] and range_start < = l [ ' line_end_global ' ] :
l [ ' line_start_global ' ] = range_start
l [ ' line_start_local ' ] = l [ ' line_end_local ' ] - ( l [ ' line_end_global ' ] - range_start )
del files_and_lines [ ' files ' ] [ : i ]
i + = 1
# Read first N lines
if line_range_min is not None :
range_end = line_range_min
if range_end > = full_range :
range_end = full_range
for l in files_and_lines [ ' files ' ] :
if range_end > = l [ ' line_start_global ' ] and range_end < = l [ ' line_end_global ' ] :
l [ ' line_end_local ' ] = l [ ' line_end_local ' ] - l [ ' line_start_local ' ] - ( l [ ' line_end_global ' ] - range_end )
l [ ' line_end_global ' ] = range_end
del files_and_lines [ ' files ' ] [ i + 1 : ]
i + = 1
return files_and_lines
"""
Get lines to be processed from input files and range input
Range : < min > - < max >
"""
def get_file_lines_range ( self , sfiles , line_range_min = None , line_range_max = None ) :
files_and_lines = { ' files ' : [ ] , ' lines_total ' : 0 , ' range_min ' : 0 , ' range_max ' : 0 }
lines_count = 0
line_start = 0
line_end = 0
range_line_start = 0
range_line_end = 0
range_line_start_found = False
if line_range_min is not None :
if line_range_min < 0 :
line_range_min = None
if line_range_max is not None :
if line_range_max < 0 :
line_range_max = None
for sfile in sfiles :
append = False
try :
with open ( sfile , ' r ' ) as f :
line_count = len ( list ( f ) )
f . close ( )
line_end = line_start + line_count
if line_range_min is not None :
if line_range_min > = line_start and line_range_min < = line_end :
append = True
line_start = line_range_min
if line_range_min is None and line_end < line_range_max :
append = True
if line_range_max is not None :
if line_range_max > = line_start and line_range_max < = line_end :
append = True
line_end = line_range_max
if line_range_min < line_end and line_range_max > line_end :
append = True
if line_range_max is None and line_start > line_range_min :
append = True
if append :
files_and_lines [ ' files ' ] . append ( {
' file ' : str ( sfile ) ,
' line_start_global ' : line_start ,
' line_end_global ' : line_end ,
' modified_date ' : os . path . getmtime ( sfile ) ,
' size ' : os . path . getsize ( sfile )
} )
# Use only the first matching line_start value
if not range_line_start_found :
range_line_start_found = True
range_line_start = line_start
# Use the last matching line_end value
range_line_end = line_end
lines_count + = line_count
line_start = lines_count + 1
except :
except :
raise Exception ( " Couldn ' t read input file " + sfile )
raise Exception ( " Couldn ' t read input file " + sfile )
return lines_in_files
files_and_lines [ ' lines_total ' ] = range_line_end - range_line_start
files_and_lines [ ' range_min ' ] = range_line_start
files_and_lines [ ' range_max ' ] = range_line_end
return files_and_lines
"""
"""
Date checker
Date checker
@ -488,42 +708,57 @@ class program(object):
"""
"""
def get_included_fields ( self , fields , included_fields , excluded_fields = None ) :
def get_included_fields ( self , fields , included_fields , excluded_fields = None ) :
included_values = [ ]
if included_fields :
# TODO: simplify logic
n = 0
included_fields = [ [ i . replace ( ' ' , ' ' ) , 0 ] for i in included_fields ]
for a in included_fields :
a [ 1 ] + = n
n + = 1
if excluded_fields :
excluded_fields = [ i . replace ( ' ' , ' ' ) for i in excluded_fields ]
all_defined_fields = [ ]
all_defined_fields = [ ]
fields_out = { }
if ' all ' in included_fields or included_fields is None :
if ' all ' in included_fields or included_fields is None :
included_fields = [ i for i in fields . keys ( ) ]
included_fields = [ [ i , int ( i [ ' sort_index ' ] ) ] for i in fields . keys ( ) ]
if excluded_fields is not None :
if excluded_fields is not None :
if ' all ' in excluded_fields :
if ' all ' in excluded_fields :
raise Exception ( " No output fields defined. " )
raise Exception ( " No output fields defined. " )
# for i in excluded_fields:
# if i in included_fields:
# raise Exception("Field can't be both included and excluded. Offending field: {}".format(i))
included_fields = [ i for i in included_fields if i not in excluded_fields ]
all_defined_fields = included_fields + excluded_fields
# TODO: simplify logic
n = 0
included_fields = [ [ i , 0 ] for i in included_fields if i not in excluded_fields ]
for a in included_fields :
a [ 1 ] + = n
n + = 1
all_defined_fields = [ i [ 0 ] for i in included_fields ] + excluded_fields
else :
else :
all_defined_fields = included_fields
all_defined_fields = included_fields
for i in all_defined_fields :
for i in all_defined_fields :
if i not in fields . keys ( ) :
raise Exception ( " Unknown field value: {}. Accepted values: {} " . format ( i , ' , ' . join ( fields . keys ( ) ) ) )
if i [ 0 ] not in fields . keys ( ) :
raise Exception ( " Unknown field value: {}. Accepted values: {} " . format ( i , ' , ' . join ( fields . keys ( ) ) ) )
for key , value in fields . items ( ) :
if key in included_fields :
value [ ' included ' ] = True
else :
value [ ' included ' ] = Fals e
included_values . append ( value [ ' included ' ] )
for a in included_fields :
for key , value in fields . items ( ) :
if key == a [ 0 ] :
value [ ' sort_index ' ] = a [ 1 ]
value [ ' included ' ] = Tru e
fields_out [ key ] = value
if True not in included_values :
if len ( fields_out . keys ( ) ) == 0 :
raise Exception ( " No output fields defined. " )
raise Exception ( " No output fields defined. " )
return fields
return fields_out
"""
"""
Process input files
Process input files
"""
"""
def process_files ( self , user_arguments ) :
def process_files ( self ) :
prev_host = " "
prev_host = " "
log_entries = [ ]
log_entries = [ ]
@ -531,21 +766,25 @@ class program(object):
countries = [ ]
countries = [ ]
# Log format as defined in Apache/HTTPD configuration file (LogFormat directive) or manually by user
# Log format as defined in Apache/HTTPD configuration file (LogFormat directive) or manually by user
if user_argument s. log_format :
log_format = user_argument s. log_format
if self . arg s. log_format :
log_format = self . arg s. log_format
else :
else :
log_format = self . get_httpd_logformat_directive ( user_arguments . httpd_conf_file , user_arguments . httpd_log_nickname )
log_format = self . get_httpd_logformat_directive ( self . args . httpd_conf_file , self . args . httpd_log_nickname )
# Remove bytes in & out fields from local traffic pattern
log_format_local = log_format . replace ( ' % I ' , ' ' ) . replace ( ' % O ' , ' ' ) . strip ( )
parser = LogParser ( log_format )
parser = LogParser ( log_format )
parser_local = LogParser ( log_format_local )
if user_argument s. codes :
codes = self . get_input_status_codes ( self . populate_status_codes ( ) , user_argument s. codes )
if self . arg s. codes :
codes = self . get_input_status_codes ( self . populate_status_codes ( ) , self . arg s. codes )
if user_argument s. countries :
countries = user_argument s. countries
if self . arg s. countries :
countries = self . arg s. countries
date_lower = user_argument s. date_lower
date_upper = user_argument s. date_upper
date_lower = self . arg s. date_lower
date_upper = self . arg s. date_upper
day_format = " %d - % m- % Y "
day_format = " %d - % m- % Y "
if date_lower is not None :
if date_lower is not None :
@ -553,74 +792,106 @@ class program(object):
if date_upper is not None :
if date_upper is not None :
date_upper = datetime . strptime ( date_upper , day_format )
date_upper = datetime . strptime ( date_upper , day_format )
files = self . get_files ( user_arguments . files_regex , user_arguments . files_list )
geotool_exec = self . args . geotool_exec
geo_database_location = self . args . geo_database_location
show_progress = user_arguments . show_progress
use_geolocation = user_arguments . use_geolocation
incl_fields = self . args . incl_fields
if isinstance ( self . args . incl_fields , str ) :
incl_fields = self . args . incl_fields . split ( ' , ' )
use_geolocation = self . args . use_geolocation
geotool_ok = False
if use_geolocation :
if self . check_file ( geotool_exec , " os.X_OK " , " PATH " ) and self . check_file ( geo_database_location , " os.R_OK " ) :
geotool_ok = True
geotool_exec = user_arguments . geotool_exec
geo_database_location = user_arguments . geo_database_location
if use_geolocation :
if ' country ' not in incl_fields :
incl_fields . append ( ' country ' )
if ' city ' not in incl_fields :
incl_fields . append ( ' city ' )
incl_fields = user_arguments . incl_fields
if isinstance ( user_arguments . incl_fields , str ) :
incl_fields = user_arguments . incl_fields . replace ( ' ' , ' ' ) . split ( ' , ' )
if ' country ' in incl_fields or ' city ' in incl_fields :
use_geolocation = True
fields = self . get_included_fields (
fields = self . get_included_fields (
self . get_out_fields ( ) ,
self . get_out_fields ( ) ,
incl_fields ,
incl_fields ,
user_argument s. excl_fields
self . arg s. excl_fields
)
)
if use_geolocation :
fields [ ' country ' ] [ ' included ' ] = True
fields [ ' city ' ] [ ' included ' ] = True
if fields [ ' country ' ] [ ' included ' ] or fields [ ' city ' ] [ ' included ' ] :
use_geolocation = True
invalid_lines = [ ]
invalid_lines = [ ]
field_names = [ ]
field_names = [ ]
i = 0
country_seen = False
country_seen = False
geo_data = None
geo_data = None
skip_line_by_status = False
skip_line_by_status = False
skip_line_by_country = False
skip_line_by_country = False
file_num = 0
stri = " "
files_input = self . get_files ( self . args . files_regex , self . args . files_list )
files_process_data = self . get_file_lines_head_tail (
files_input ,
self . args . read_first_lines_num ,
self . args . read_last_lines_num ,
self . args . sort_logs_by_info
)
lines_total = files_process_data [ ' lines_total ' ]
files_total = len ( files_process_data [ ' files ' ] )
lines_total = sum ( [ i [ ' lines ' ] for i in self . get_file_line_count ( files ) ] )
self . txt . print_verbose (
' Log entry range ' ,
str ( files_process_data [ ' files ' ] [ 0 ] [ ' line_start_global ' ] )
+ ' - ' +
str ( files_process_data [ ' files ' ] [ - 1 ] [ ' line_end_global ' ] )
)
if show_progress :
if self . args . show_progress or self . args . verbose :
print (
print (
" File count: {} \n Lines in total: {} " . format (
" File count: {} \n Lines in total: {} " . format (
str ( len ( files ) ) ,
str ( files_total ) ,
str ( lines_total )
str ( lines_total )
) )
) )
for lfile in files :
for lfile in files_process_data [ ' files ' ] :
if show_progress :
print ( " Processing file: {} (lines: {}) " . format (
lfile ,
str ( self . get_file_line_count ( [ lfile ] ) [ 0 ] [ ' lines ' ] )
if self . args . show_progress or self . args . verbose :
print ( " Processing file: {:s } (lines: {:d}-{:d }) " . format (
lfile [ ' file ' ] ,
lfile [ ' line_start_global ' ] , lfile [ ' line_end_global ' ]
) )
) )
with open ( lfile , ' r ' ) as f :
with open ( lfile [ ' file ' ] , ' r ' ) as f :
f = list ( f )
range_start = files_process_data [ ' files ' ] [ file_num ] [ ' line_start_local ' ]
range_end = files_process_data [ ' files ' ] [ file_num ] [ ' line_end_local ' ]
for line in f :
lines = range ( range_start , range_end )
line_num = 1
for line in lines :
if show_progress :
print ( " Processing log entry: {} ({} % ) " . format (
str ( i ) ,
round ( 100 * ( i / lines_total ) , 2 )
if self . args . show_progress or self . args . verbose :
print ( " Processing log entry: {:d}/{:d} ({} % ) " . format (
line_num ,
len ( lines ) ,
round ( 100 * ( line_num / len ( lines ) ) , 2 )
) , end = " \r " )
) , end = " \r " )
if i != 0 and not ( skip_line_by_status or skip_line_by_country ) and entry_data :
if l ine_num != 1 and not ( skip_line_by_status or skip_line_by_country ) and entry_data :
prev_host = entry_data [ ' remote_host ' ]
prev_host = entry_data [ ' remote_host ' ]
prev_host_time = entry_data [ ' time ' ]
prev_host_time = entry_data [ ' time ' ]
try :
try :
entry = parser . parse ( line )
if re . match ( ' | ' . join ( self . private_class_ip_networks ) , f [ line ] ) :
entry = parser_local . parse ( f [ line ] )
else :
entry = parser . parse ( f [ line ] )
except InvalidEntryError :
except InvalidEntryError :
invalid_lines . append ( ( lfile , i + 1 ) )
invalid_lines . append ( ( lfile [ ' file ' ] , line_num ) )
line_num + = 1
continue
continue
entry_data = {
entry_data = {
@ -632,7 +903,7 @@ class program(object):
}
}
if not self . date_checker ( date_lower , date_upper , entry_data [ ' time ' ] ) :
if not self . date_checker ( date_lower , date_upper , entry_data [ ' time ' ] ) :
i + = 1
l ine_num + = 1
continue
continue
if len ( codes ) > 0 :
if len ( codes ) > 0 :
@ -645,7 +916,7 @@ class program(object):
country_seen = False
country_seen = False
if not country_seen :
if not country_seen :
geo_data = self . geotool_get_data ( geotool_exec , geo_database_location , entry_data [ ' remote_host ' ] )
geo_data = self . geotool_get_data ( geotool_ok , geotool_ exec , geo_database_location , entry_data [ ' remote_host ' ] )
if len ( countries ) > 0 and geo_data is not None :
if len ( countries ) > 0 and geo_data is not None :
skip_line_by_country = self . filter_country ( countries , geo_data [ ' host_country ' ] )
skip_line_by_country = self . filter_country ( countries , geo_data [ ' host_country ' ] )
@ -654,7 +925,7 @@ class program(object):
skip_line_by_country = False
skip_line_by_country = False
if skip_line_by_status or skip_line_by_country :
if skip_line_by_status or skip_line_by_country :
i + = 1
l ine_num + = 1
continue
continue
time_diff = str ( ' NEW_CONN ' )
time_diff = str ( ' NEW_CONN ' )
@ -664,29 +935,29 @@ class program(object):
time_diff = int ( time_diff )
time_diff = int ( time_diff )
if time_diff > 0 :
if time_diff > 0 :
time_diff = " + " + str ( time_diff )
time_diff = " + " + str ( time_diff )
if i == 0 :
if l ine_num == 1 and file_num == 0 :
time_diff = int ( 0 )
time_diff = int ( 0 )
if fields [ ' log_file_name ' ] [ ' included ' ] :
if ' log_file_name ' in fields :
fields [ ' log_file_name ' ] [ ' data ' ] = lfile
fields [ ' log_file_name ' ] [ ' data ' ] = lfile
if fields [ ' http_status ' ] [ ' included ' ] :
if ' http_status ' in fields :
fields [ ' http_status ' ] [ ' data ' ] = entry_data [ ' status ' ]
fields [ ' http_status ' ] [ ' data ' ] = entry_data [ ' status ' ]
if fields [ ' remote_host ' ] [ ' included ' ] :
if ' remote_host ' in fields :
fields [ ' remote_host ' ] [ ' data ' ] = entry_data [ ' remote_host ' ]
fields [ ' remote_host ' ] [ ' data ' ] = entry_data [ ' remote_host ' ]
if geo_data is not None :
if geo_data is not None :
if fields [ ' country ' ] [ ' included ' ] :
if ' country ' in fields :
fields [ ' country ' ] [ ' data ' ] = geo_data [ ' host_country ' ]
fields [ ' country ' ] [ ' data ' ] = geo_data [ ' host_country ' ]
if fields [ ' city ' ] [ ' included ' ] :
if ' city ' in fields :
fields [ ' city ' ] [ ' data ' ] = geo_data [ ' host_city ' ]
fields [ ' city ' ] [ ' data ' ] = geo_data [ ' host_city ' ]
if fields [ ' time ' ] [ ' included ' ] :
if ' time ' in fields :
fields [ ' time ' ] [ ' data ' ] = entry_data [ ' time ' ]
fields [ ' time ' ] [ ' data ' ] = entry_data [ ' time ' ]
if fields [ ' time_diff ' ] [ ' included ' ] :
if ' time_diff ' in fields :
fields [ ' time_diff ' ] [ ' data ' ] = time_diff
fields [ ' time_diff ' ] [ ' data ' ] = time_diff
if fields [ ' user_agent ' ] [ ' included ' ] :
if ' user_agent ' in fields :
fields [ ' user_agent ' ] [ ' data ' ] = entry_data [ ' user_agent ' ]
fields [ ' user_agent ' ] [ ' data ' ] = entry_data [ ' user_agent ' ]
if fields [ ' http_request ' ] [ ' included ' ] :
if ' http_request ' in fields :
fields [ ' http_request ' ] [ ' data ' ] = entry_data [ ' http_request ' ]
fields [ ' http_request ' ] [ ' data ' ] = entry_data [ ' http_request ' ]
stri = " "
stri = " "
@ -703,30 +974,31 @@ class program(object):
field_names . append ( ( key , value [ ' human_name ' ] ) )
field_names . append ( ( key , value [ ' human_name ' ] ) )
log_entries . append ( printargs )
log_entries . append ( printargs )
i + = 1
l ine_num + = 1
return [ log_entries , files , i , stri , field_names , invalid_lines ]
file_num + = 1
return [ log_entries , files_process_data [ ' files ' ] , lines_total , stri , field_names , invalid_lines ]
"""
"""
Execute
Execute
"""
"""
def execute ( self ) :
def execute ( self ) :
user_arguments = self . get_args ( )
print_headers = user_arguments . column_headers
show_progress = user_arguments . show_progress
show_stats = user_arguments . show_stats
output_format = user_arguments . output_format
print_headers = self . args . column_headers
show_progress = self . args . show_progress
show_stats = self . args . show_stats
output_format = self . args . output_format
sortby_field = user_argument s. sortby_field
reverse_order = bool ( user_argument s. sortby_reverse )
sortby_field = self . args . sortby_field
reverse_order = self . arg s. sortby_reverse
if ' all ' not in user_arguments . incl_fields :
if sortby_field and sortby_field not in user_arguments . incl_fields :
raise Exception ( " Sort-by field must be included in output fields. " )
if self . args . incl_fields :
if ' all ' not in self . args . incl_fields :
if sortby_field and sortby_field not in self . args . incl_fields :
raise Exception ( " Sort-by field must be included in output fields. " )
results = self . process_files ( user_arguments )
results = self . process_files ( )
result_entries = results [ 0 ]
result_entries = results [ 0 ]
result_files = results [ 1 ]
result_files = results [ 1 ]
result_lines = results [ 2 ]
result_lines = results [ 2 ]
@ -735,17 +1007,17 @@ class program(object):
out_fields_human_names = [ i [ 1 ] for i in results [ 4 ] ]
out_fields_human_names = [ i [ 1 ] for i in results [ 4 ] ]
invalid_lines = results [ 5 ]
invalid_lines = results [ 5 ]
if sortby_field is None and reverse_order :
raise Exception ( " You must define a field for reverse sorting. " )
if sortby_field is not None :
if sortby_field is not None :
out_field_validation = self . get_out_field ( out_fields , sortby_field )
out_field_validation = self . get_out_field ( out_fields , sortby_field )
if out_field_validation [ 0 ] :
if out_field_validation [ 0 ] :
result_entries . sort (
result_entries . sort (
key = lambda result_entries : result_entries [ out_field_validation [ 1 ] ] or ' ' ,
key = lambda r : r [ out_field_validation [ 1 ] ] or ' ' ,
reverse = reverse_order
reverse = reverse_order
)
)
if not show_progress :
print ( " \n " )
if output_format == ' table ' :
if output_format == ' table ' :
if print_headers :
if print_headers :
@ -763,7 +1035,6 @@ class program(object):
if output_format == ' csv ' :
if output_format == ' csv ' :
if print_headers :
if print_headers :
print ( " \n " )
print ( ' , ' . join ( out_fields_human_names ) )
print ( ' , ' . join ( out_fields_human_names ) )
for entry in result_entries :
for entry in result_entries :
@ -780,7 +1051,7 @@ class program(object):
" Processed log entries: {:d} \n " +
" Processed log entries: {:d} \n " +
" Matched log entries: {:d} \n "
" Matched log entries: {:d} \n "
) . format (
) . format (
' , ' . join ( result_files ) ,
' , ' . join ( [ i [ ' file ' ] for i in result_files [ ' files ' ] ] ) ,
result_lines ,
result_lines ,
len ( result_entries )
len ( result_entries )
)
)