Fincer
/
apache-logparser
mirror of https://github.com/Fincer/apache-logparser


#!/bin/env python


#    Simple Apache HTTPD log file parser

#    Copyright (C) 2022  Pekka Helenius <pekka [dot] helenius [at] fjordtek [dot] com>

#

#    This program is free software: you can redistribute it and/or modify

#    it under the terms of the GNU General Public License as published by

#    the Free Software Foundation, either version 3 of the License, or

#    (at your option) any later version.

#

#    This program is distributed in the hope that it will be useful,

#    but WITHOUT ANY WARRANTY; without even the implied warranty of

#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

#    GNU General Public License for more details.

#

#    You should have received a copy of the GNU General Public License

#    along with this program.  If not, see <https://www.gnu.org/licenses/>.


################################################################


# TODO: prev_host: instead of comparing to previous entry, check if such IP has been seen in XXX seconds

# TODO: store IP values for temporary list for XXX seconds, and check list values

# TODO: implement warning check for geoiplookup tool database files, i.e. "warning, some geo database files are very old. Please consider updating geo database information."


import argparse

import os

import re

import subprocess


from datetime import datetime

from apachelogs import LogParser, InvalidEntryError


class text_processing(object):


  """

  Init

  """

  def __init__(self, verbose):

    self.show_verbose = verbose


  """

  Verbose output format (we do not use logger library)

  """


  def print_verbose(self, prefix='output', *args):

    if self.show_verbose:

      print('VERBOSE [{:s}]: {:s}'.format(prefix, ', '.join([str(i) for i in args])))


class program(object):


  """

  Init

  """

  def __init__(self):

    self.args = self.get_args()


    # Exclude private IP address classes from geo lookup process

    # Strip out %I and %O flags from Apache log format

    # 127.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16

    self.private_class_ip_networks = ['^127\.', '^172\.(1[6-9]{1}|2[0-9]{1}|3[0-1]{1})\.', '^192\.168\.']


    self.txt = text_processing(verbose = self.args.verbose)


  """

  Define & get output fields

  """

  def get_out_fields(self):

    out_fields = {

      'log_file_name': {'data': None, 'format': '{:s}',   'included': False, 'human_name': 'Log file name', 'sort_index': 0},

      'http_status':   {'data': None, 'format': '{:3s}',  'included': True,  'human_name': 'Status',        'sort_index': 1},

      'remote_host':   {'data': None, 'format': '{:15s}', 'included': True,  'human_name': 'Remote IP',     'sort_index': 2},

      'country':       {'data': None, 'format': '{:20s}', 'included': False, 'human_name': 'Country',       'sort_index': 3},

      'city':          {'data': None, 'format': '{:15s}', 'included': False, 'human_name': 'City',          'sort_index': 4},

      'time':          {'data': None, 'format': '{:20s}', 'included': True,  'human_name': 'Date/Time',     'sort_index': 5},

      'time_diff':     {'data': None, 'format': '{:8s}',  'included': True,  'human_name': 'Time diff',     'sort_index': 6},

      'user_agent':    {'data': None, 'format': '{:s}',   'included': True,  'human_name': 'User agent',    'sort_index': 7},

      'http_request':  {'data': None, 'format': '{:s}',   'included': True,  'human_name': 'Request',       'sort_index': 8}

    }

    return out_fields


  """

  Get default Apache HTTPD configuration file location

  """

  def get_apache_conf_path(self):


    path = None

    os_data_file = '/etc/os-release'

    conf_path = [

      { 'os_check_file': os_data_file, 'os_like': 'Arch Linux', 'path': '/etc/httpd/conf/httpd.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'Debian',     'path': '/etc/apache2/apache2.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'Ubuntu',     'path': '/etc/apache2/apache2.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'Linux Mint', 'path': '/etc/apache2/apache2.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'openSUSE',   'path': '/etc/apache2/httpd.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'Gentoo',     'path': '/etc/apache2/httpd.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'Red Hat',    'path': '/etc/httpd/conf/httpd.conf'},

      { 'os_check_file': os_data_file, 'os_like': 'Fedora',     'path': '/etc/httpd/conf/httpd.conf'}

    ]


    if self.check_file(os_data_file, "os.R_OK"):

      with open(os_data_file, 'r') as f:

        for line in f:

          if re.match('^[ ]?NAME=\"', line):

            for a in conf_path:

              if re.match('.*' + a['os_like'] + '.*', line):

                path = a['path']

                return path

    return path


  """

  Argument parser

  """

  def get_args(self):


    all_fields      = self.get_out_fields()

    incl_fields     = [i for i in all_fields.keys() if all_fields[i]['included']]

    out_time_format = "%d-%m-%Y %H:%M:%S"


    argparser = argparse.ArgumentParser(

      description     = 'Apache HTTPD server log parser',

      formatter_class = argparse.ArgumentDefaultsHelpFormatter

    )


    argparser.add_argument(

      '-fr', '--files-regex',

      help     = 'Apache log files matching input regular expression.',

      nargs    = '?',

      dest     = 'files_regex',

      required = False

    )

    argparser.add_argument(

      '-f', '--files-list',

      help     = 'Apache log files.\nRegular expressions supported.',

      nargs    = '?',

      type     = lambda x: [i for i in x.split(',')],

      dest     = 'files_list',

      required = False

    )

    argparser.add_argument(

      '-c',  '--status-codes',

      help     = 'Print only these numerical status codes.\nRegular expressions supported.',

      nargs    = '+',

      dest     = 'codes',

      required = False

    )

    argparser.add_argument(

      '-cf', '--countries',

      help     = 'Include only these countries.\nNegative match (exclude): "\!Country"',

      nargs    = '?',

      type     = lambda x: [i for i in x.split(',')],

      dest     = 'countries',

      required = False

    )

    argparser.add_argument(

      '-tf', '--time-format',

      help     = 'Output time format.',

      nargs    = '?',

      dest     = 'time_format',

    )

    argparser.add_argument(

      '-if', '--included-fields',

      help     = 'Included fields.\nAll fields: all, ' + ', '.join(all_fields),

      nargs    = '?',

      dest     = 'incl_fields',

      type     = lambda x: [i for i in x.split(',')],

      default  = ','.join(incl_fields)

    )

    argparser.add_argument(

      '-ef', '--excluded-fields',

      help     = 'Excluded fields.',

      nargs    = '?',

      dest     = 'excl_fields',

      type     = lambda x: [i for i in x.split(',')],

      default  = None

    )

    argparser.add_argument(

      '-gl', '--geo-location',

      help     = 'Check origin countries with external "geoiplookup" tool.\nNOTE: Automatically includes "country" and "city" fields.',

      action   = 'store_true',

      dest     = 'use_geolocation'

    )

    argparser.add_argument(

      '-ge', '--geotool-exec',

      help     = '"geoiplookup" tool executable found in PATH.',

      nargs    = '?',

      dest     = 'geotool_exec',

      default  = 'geoiplookup'

    )

    argparser.add_argument(

      '-gd', '--geo-database-dir',

      help     = 'Database file directory for "geoiplookup" tool.',

      nargs    = '?',

      dest     = 'geo_database_location',

      default  = '/usr/share/GeoIP/'

    )

    argparser.add_argument(

      '-dl', '--day-lower',

      help     = 'Do not check log entries older than this day.\nDay syntax: 31-12-2020',

      nargs    = '?',

      dest     = 'date_lower'

    )

    argparser.add_argument(

      '-du', '--day-upper',

      help     = 'Do not check log entries newer than this day.\nDay syntax: 31-12-2020',

      nargs    = '?',

      dest     = 'date_upper'

    )

    argparser.add_argument(

      '-sb', '--sort-by',

      help     = 'Sort by an output field.',

      nargs    = '?',

      dest     = 'sortby_field'

    )

    argparser.add_argument(

      '-ro', '--reverse',

      help     = 'Sort in reverse order.',

      dest     = 'sortby_reverse',

      action   = 'store_true'

    )

    argparser.add_argument(

      '-st', '--show-stats',

      help     = 'Show short statistics at the end.',

      action   = 'store_true',

      dest     = 'show_stats'

    )

    argparser.add_argument(

      '-p', '--show-progress',

      help     = 'Show progress information.',

      dest     = 'show_progress',

      action   = 'store_true'

    )

    argparser.add_argument(

      '--httpd-conf-file',

      help     = 'Apache HTTPD configuration file with LogFormat directive.',

      dest     = 'httpd_conf_file',

      default  = self.get_apache_conf_path(),

      nargs    = '?',

      type     = str

    )

    argparser.add_argument(

      '--httpd-log-nickname',

      help     = 'LogFormat directive nickname',

      action   = 'store_true',

      dest     = 'httpd_log_nickname',

      default  = 'combinedio'

    )

    argparser.add_argument(

      '-lf', '--log-format',

      help     = 'Log format, manually defined.',

      dest     = 'log_format',

      required = False

    )

    argparser.add_argument(

      '-ph', '--print-header',

      help     = 'Print column headers.',

      dest     = 'column_headers',

      required = False,

      action   = 'store_true'

    )

    argparser.add_argument(

      '--output-format',

      help     = 'Output format for results.',

      dest     = 'output_format',

      required = False,

      default  = 'table',

      choices  = ['table', 'csv']

    )

    argparser.add_argument(

      '--head',

      help     = 'Read first N lines from all log entries.',

      dest     = 'read_first_lines_num',

      required = False,

      nargs    = '?',

      type     = int

    )

    argparser.add_argument(

      '--tail',

      help     = 'Read last N lines from all log entries.',

      dest     = 'read_last_lines_num',

      required = False,

      nargs    = '?',

      type     = int

    )

    argparser.add_argument(

      '--sort-logs-by',

      help     = 'Sorting order for input log files.',

      dest     = 'sort_logs_by_info',

      required = False,

      default  = 'name',

      choices  = ['date', 'size', 'name']

    )

    argparser.add_argument(

      '--verbose',

      help     = 'Verbose output.',

      dest     = 'verbose',

      required = False,

      action   = 'store_true'

    )

    args = argparser.parse_args()

    return args


  """

  Populate recognized HTTP status codes

  """

  def populate_status_codes(self):


    http_valid_codes = [

      '100-103',

      '200-208',

      '218'

      '226',

      '300-308',

      '400-431',

      '451',

      '500-511'

    ]

    codes = []

    for code in http_valid_codes:

      if len(code.split('-')) == 2:

        code_start = int(code.split('-')[0])

        code_end   = int(code.split('-')[1])

        for i in range(code_start,code_end):

          codes.append(str(i))

      else:

        codes.append(code)


    return codes


  """

  Get valid HTTP status codes from user input

  """

  def get_input_status_codes(self, valid_codes, user_codes):


    codes = []


    for user_code in user_codes:

      user_code     = str(user_code)

      validated     = False

      code_appended = False


      for valid_code in valid_codes:


        if re.search(user_code, valid_code):

          validated     = True

          code_appended = True

          codes.append((valid_code, validated))

        else:

          validated = False

      if not code_appended:

        codes.append((user_code, validated))


    self.txt.print_verbose('Available status codes', codes)


    return codes


  """

  Get log file list

  """

  def get_files(self, files_regex = None, files_list = None):


    files = []


    if files_regex is None and files_list is None:

      raise Exception("Either single file or regex file selection method is required.")


    if files_regex and files_list:

      raise Exception("Single file and regex file selection methods are mutually exclusive.")


    if files_regex:

      log_dir = '/'.join(files_regex.split('/')[:-1])

      file_part = files_regex.split('/')[-1]

      for lfile in os.listdir(log_dir):

        if os.path.isfile(log_dir + '/' + lfile):

          if re.match(file_part, lfile):

            files.append(log_dir + '/' + lfile)


    if files_list:

      for lfile in files_list:

        if os.path.isfile(lfile):

          files.append(lfile)


    if len(files) == 0:

      raise Exception("No matching files found.")


    files.sort()


    self.txt.print_verbose('Input files', files)

    return files


  """

  Common file checker

  """

  def check_file(self, sfile, flag, env = None):


    file_path = sfile


    if env is not None:

      for path in os.environ[env].split(os.pathsep):

        file_path = os.path.join(path, sfile)

        if os.path.isfile(file_path):

          break


    if os.access(file_path, eval(flag)):

      try:

        self.txt.print_verbose('File check', file_path, 'flags: ' + flag)

      except AttributeError:

        pass

      return True

    return False


  """

  Get Apache HTTPD LogFormat directive syntax

  """

  def get_httpd_logformat_directive(self, cfile, tag = None):


    if not self.check_file(cfile, "os.R_OK"):

      raise Exception("Couldn't open Apache HTTPD configuration file '{:s}'.".format(cfile))


    log_format = None

    self.txt.print_verbose('Apache configuration file', cfile)

    with open(cfile, 'r') as f:

      for line in f:

        if re.search('^[ ]+LogFormat ".*' + tag, line):

          r = re.search('^[ ]+LogFormat "(.*)(!?("))', line)

          log_format = r.groups()[0].replace('\\', '')

          break

      f.close()

      self.txt.print_verbose('Log format', log_format)

      return log_format


  """

  Geotool processing

  """

  def geotool_get_data(self, geotool_ok, geotool_exec, database_file, remote_host):


    host_country = None

    host_city    = None


    if re.match('|'.join(self.private_class_ip_networks), remote_host):

      host_country = "Local"

      host_city    = "Local"

      return {

        'host_country': host_country,

        'host_city':    host_city

      }


    if geotool_ok:


      host_country_main = subprocess.check_output([geotool_exec,'-d', database_file, remote_host]).rstrip().decode()

      host_country_main = host_country_main.split('\n')


      try:

        host_country = host_country_main[0].split(', ')[1]

      except:

        if re.search("Address not found", host_country_main[0]):

          host_country = "Unknown"


      if len(host_country_main) > 1:

        try:

          host_city = host_country_main[1].split(', ')[4]

          if re.search("N/A", host_city):

            host_city = "Unknown: " + host_country_main[1].split(', ')[6] + ', ' + host_country_main[1].split(', ')[7]

        except:

          pass


      return {

        'host_country': host_country,

        'host_city':    host_city

      }

    return None


  """

  Status code filter

  """

  def filter_status_code(self, status_codes, final_status):


    skip_line = True


    for status in status_codes:


      # Status consists of numerical status value (num) and validity boolean value (num_ok)

      if len(status) != 2:

        continue


      num, num_ok = status


      if num_ok:

        status = int(num)


        if status == final_status:

          skip_line = False

          break


    return skip_line


  """

  Country name filter

  """

  def filter_country(self, countries, host_country):


    skip_line = True


    for country in countries:

      if country[1] == "!":

        country = country[2:]

        if country.lower() == host_country.lower():

          skip_line = True

          break

        else:

          skip_line = False


      elif country.lower() == host_country.lower():

        skip_line = False

        break


    return skip_line


  """

  Get lines to be processed from input files and min/max input

  min and max work much like Unix tools 'head' and 'tail'

  Only a single value (min or max) is allowed

  """


  def get_file_lines_head_tail(self, sfiles, line_range_min = None, line_range_max = None, files_order = None):


    files_and_lines = {'files': [], 'lines_total': 0, 'range_min': 0, 'range_max': 0}

    files_tmp = []


    lines_count = 0

    line_start  = 0

    line_end    = 0


    if line_range_min and line_range_max:

      raise Exception("Either first or last line limit can be used, not both.")


    if files_order is None:

      raise Exception("Sorting order for input files missing.")


    if line_range_min is not None:

      if line_range_min < 0:

        line_range_min = None


    if line_range_max is not None:

      if line_range_max < 0:

        line_range_max = None


    for sfile in sfiles:


      if not self.check_file(sfile, "os.R_OK"):

        raise Exception("Couldn't read input file '{}'.".format(sfile))


      with open(sfile, 'r') as f:

        line_count = len(list(f))

        f.close()


        files_tmp.append({

          'file':          str(sfile),

          'modified_date': os.path.getmtime(sfile),

          'size':          os.path.getsize(sfile),

          'line_count':    line_count

        })


      if files_order == 'date':

        files_tmp.sort(key = lambda d: d['modified_date'])

      elif files_order == 'size':

        files_tmp.sort(key = lambda d: d['size'])

      elif files_order == 'name':

        files_tmp.sort(key = lambda d: d['file'])


    i = 0

    for sfile in files_tmp:


      line_end = (line_start + sfile['line_count']) - 1


      files_and_lines['files'].append({

        'file':              sfile['file'],

        'line_start_global': line_start,

        'line_end_global':   line_end,

        'line_start_local':  0,

        'line_end_local':    sfile['line_count'] - 1,

      })


      lines_count += line_count

      line_start = files_and_lines['files'][i]['line_end_global'] + 1

      i += 1


    range_line_start = files_and_lines['files'][0]['line_start_global']

    full_range                     = files_and_lines['files'][-1]['line_end_global']

    files_and_lines['range_min']   = range_line_start

    files_and_lines['range_max']   = full_range

    files_and_lines['lines_total'] = full_range - range_line_start

    i = 0


    # Read last N lines

    if line_range_max is not None:

      range_start = full_range - line_range_max

      if range_start <= 0:

        range_start = 0


      for l in files_and_lines['files']:

        if range_start >= l['line_start_global'] and range_start <= l['line_end_global']:

          l['line_start_global'] = range_start

          l['line_start_local']  = l['line_end_local'] - (l['line_end_global'] - range_start)

          del files_and_lines['files'][:i]

        i += 1


    # Read first N lines

    if line_range_min is not None:

      range_end = line_range_min

      if range_end >= full_range:

        range_end = full_range


      for l in files_and_lines['files']:

        if range_end >= l['line_start_global'] and range_end <= l['line_end_global']:

          l['line_end_local']  = l['line_end_local'] - l['line_start_local'] - (l['line_end_global'] - range_end)

          l['line_end_global'] = range_end

          del files_and_lines['files'][i + 1:]

        i += 1


    return files_and_lines


  """

  Get lines to be processed from input files and range input

  Range: <min> - <max>

  """


  def get_file_lines_range(self, sfiles, line_range_min=None, line_range_max=None):


    files_and_lines = {'files': [], 'lines_total': 0, 'range_min': 0, 'range_max': 0}


    lines_count            = 0

    line_start             = 0

    line_end               = 0

    range_line_start       = 0

    range_line_end         = 0

    range_line_start_found = False


    if line_range_min is not None:

      if line_range_min < 0:

        line_range_min = None


    if line_range_max is not None:

      if line_range_max < 0:

        line_range_max = None


    for sfile in sfiles:

      append = False


      if not self.check_file(sfile, "os.R_OK"):

        raise Exception("Couldn't read input file '{}'.".format(sfile))


      with open(sfile, 'r') as f:

        line_count = len(list(f))

        f.close()


        line_end = line_start + line_count


        if line_range_min is not None:

          if line_range_min >= line_start and line_range_min <= line_end:

            append = True

            line_start = line_range_min

        if line_range_min is None and line_end < line_range_max:

          append = True


        if line_range_max is not None:

          if line_range_max >= line_start and line_range_max <= line_end:

            append = True

            line_end = line_range_max

          if line_range_min < line_end and line_range_max > line_end:

            append = True

        if line_range_max is None and line_start > line_range_min:

          append = True


        if append:

          files_and_lines['files'].append({

            'file':              str(sfile),

            'line_start_global': line_start,

            'line_end_global':   line_end,

            'modified_date':     os.path.getmtime(sfile),

            'size':              os.path.getsize(sfile)

          })


          # Use only the first matching line_start value

          if not range_line_start_found:

            range_line_start_found = True

            range_line_start = line_start

          # Use the last matching line_end value

          range_line_end = line_end


        lines_count += line_count

        line_start  = lines_count + 1


    files_and_lines['lines_total'] = range_line_end - range_line_start

    files_and_lines['range_min']   = range_line_start

    files_and_lines['range_max']   = range_line_end


    return files_and_lines


  """

  Date checker

  """

  def date_checker(self, date_lower, date_upper, entry_time):


    # TODO Handle situations where date_upper & date_lower are equal


    if date_upper is not None and date_lower is not None:

      if date_lower > date_upper:

        raise Exception("Earlier day can't be later than later day")


    if date_upper is not None:

      if date_upper > datetime.now():

        raise Exception("Day can't be in the future")


    if date_lower is not None:

      if date_lower > datetime.now():

        raise Exception("Day can't be in the future")


    if date_lower is not None:

      if entry_time <= date_lower: return False


    if date_upper is not None:

      if entry_time >= date_upper: return False


    return True


  """

  Get output field definitions (sortby)

  """

  def get_out_field(self, fields, field_input):


    i = 0

    for field in fields:

      if field == field_input:

        return [True, i]

      i += 1

    return [False, i]


  """

  Get included fields

  """

  def get_included_fields(self, fields, included_fields, excluded_fields=None):


    if included_fields:


      # TODO: simplify logic

      n = 0

      included_fields = [[i.replace(' ',''), 0] for i in included_fields]

      for a in included_fields:

        a[1] += n

        n += 1

    if excluded_fields:

      excluded_fields = [i.replace(' ','') for i in excluded_fields]


    all_defined_fields = []

    fields_out         = {}


    if 'all' in included_fields or included_fields is None:

      included_fields = [[i, int(i['sort_index'])] for i in fields.keys()]


    if excluded_fields is not None:

      if 'all' in excluded_fields:

        raise Exception("No output fields defined.")


      # TODO: simplify logic

      n = 0

      included_fields = [[i, 0] for i in included_fields if i not in excluded_fields]

      for a in included_fields:

        a[1] += n

        n += 1

      all_defined_fields = [i[0] for i in included_fields] + excluded_fields

    else:

      all_defined_fields = included_fields


    for i in all_defined_fields:

      if i[0] not in fields.keys():

        raise Exception("Unknown field value: {}. Accepted values: {}".format(i, ','.join(fields.keys())))


    for a in included_fields:

      for key, value in fields.items():

        if key == a[0]:

          value['sort_index'] = a[1]

          value['included']   = True

          fields_out[key]     = value


    if len(fields_out.keys()) == 0:

      raise Exception("No output fields defined.")


    return fields_out


  """

  Process input files

  """

  def process_files(self):


    prev_host    = ""

    log_entries  = []

    codes        = []

    countries    = []


    # Log format as defined in Apache/HTTPD configuration file (LogFormat directive) or manually by user

    if self.args.log_format:

      log_format = self.args.log_format

    else:

      log_format = self.get_httpd_logformat_directive(self.args.httpd_conf_file, self.args.httpd_log_nickname)


    # Remove bytes in & out fields from local traffic pattern

    log_format_local = log_format.replace('%I','').replace('%O','').strip()


    parser = LogParser(log_format)

    parser_local = LogParser(log_format_local)


    if self.args.codes:

      codes = self.get_input_status_codes(self.populate_status_codes(), self.args.codes)


    if self.args.countries:

      countries = self.args.countries


    date_lower = self.args.date_lower

    date_upper = self.args.date_upper

    day_format = "%d-%m-%Y"


    if date_lower is not None:

      date_lower = datetime.strptime(date_lower, day_format)

    if date_upper is not None:

      date_upper = datetime.strptime(date_upper, day_format)


    geotool_exec          = self.args.geotool_exec

    geo_database_location = self.args.geo_database_location


    incl_fields = self.args.incl_fields

    if isinstance(self.args.incl_fields, str):

      incl_fields = self.args.incl_fields.split(',')


    use_geolocation = self.args.use_geolocation

    geotool_ok      = False


    if use_geolocation:

      if self.check_file(geotool_exec, "os.X_OK", "PATH") and self.check_file(geo_database_location, "os.R_OK"):

        geotool_ok = True


    if use_geolocation:

      if 'country' not in incl_fields:

        incl_fields.append('country')

      if 'city' not in incl_fields:

        incl_fields.append('city')


    if 'country' in incl_fields or 'city' in incl_fields:

      use_geolocation = True


    fields = self.get_included_fields(

      self.get_out_fields(),

      incl_fields,

      self.args.excl_fields

    )


    invalid_lines        = []

    field_names          = []

    country_seen         = False

    geo_data             = None

    skip_line_by_status  = False

    skip_line_by_country = False

    file_num             = 0

    stri                 = ""


    files_input        = self.get_files(self.args.files_regex, self.args.files_list)

    files_process_data = self.get_file_lines_head_tail(

      files_input,

      self.args.read_first_lines_num,

      self.args.read_last_lines_num,

      self.args.sort_logs_by_info

    )


    lines_total        = files_process_data['lines_total']

    files_total        = len(files_process_data['files'])


    self.txt.print_verbose(

      'Log entry range',

      str(files_process_data['files'][0]['line_start_global'])

      + ' - ' +

      str(files_process_data['files'][-1]['line_end_global'])

    )


    if self.args.show_progress or self.args.verbose:

      print(

        "File count: {}\nLines in total: {}".format(

          str(files_total),

          str(lines_total)

        ))


    for lfile in files_process_data['files']:


      if self.args.show_progress or self.args.verbose:

        print("Processing file: {:s} (lines: {:d}-{:d})".format(

          lfile['file'],

          lfile['line_start_global'], lfile['line_end_global']

        ))


      if not self.check_file(lfile['file'], "os.R_OK"):

        raise Exception("Couldn't read input file '{}'.".format(lfile['file']))


      with open(lfile['file'], 'r') as f:

        f = list(f)

        range_start = files_process_data['files'][file_num]['line_start_local']

        range_end   = files_process_data['files'][file_num]['line_end_local']


        lines = range(range_start, range_end)

        line_num = 1


        for line in lines:


          if self.args.show_progress or self.args.verbose:

            print("Processing log entry: {:d}/{:d} ({}%)".format(

              line_num,

              len(lines),

              round(100 * (line_num/len(lines)), 2)

            ), end = "\r")


          if line_num != 1 and not (skip_line_by_status or skip_line_by_country) and entry_data:

            prev_host      = entry_data['remote_host']

            prev_host_time = entry_data['time']


          try:

            if re.match('|'.join(self.private_class_ip_networks), f[line]):

              entry = parser_local.parse(f[line])

            else:

              entry = parser.parse(f[line])

          except InvalidEntryError:

            invalid_lines.append((lfile['file'], line_num))

            line_num += 1

            continue


          entry_data = {

            'time':         entry.request_time.replace(tzinfo = None),

            'user_agent':   entry.headers_in["User-Agent"],

            'http_request': str(entry.request_line).encode('unicode_escape').decode(),

            'remote_host':  entry.remote_host,

            'status':       entry.final_status

          }


          if not self.date_checker(date_lower, date_upper, entry_data['time']):

            line_num += 1

            continue


          if len(codes) > 0:

             skip_line_by_status = self.filter_status_code(codes, entry_data['status'])


          if use_geolocation:

            if prev_host == entry_data['remote_host']:

                country_seen = True

            else:

              country_seen = False


            if not country_seen:

              geo_data = self.geotool_get_data(geotool_ok, geotool_exec, geo_database_location, entry_data['remote_host'])


            if len(countries) > 0 and geo_data is not None:

              skip_line_by_country = self.filter_country(countries, geo_data['host_country'])


          else:

            skip_line_by_country = False


          if skip_line_by_status or skip_line_by_country:

            line_num += 1

            continue


          time_diff = str('NEW_CONN')

          if prev_host == entry_data['remote_host']:

            time_diff = (entry_data['time'] - prev_host_time).total_seconds()

            if isinstance(time_diff, float):

              time_diff = int(time_diff)

            if time_diff > 0:

              time_diff = "+" + str(time_diff)

          if line_num == 1 and file_num == 0:

            time_diff = int(0)


          if 'log_file_name' in fields:

            fields['log_file_name']['data'] = lfile

          if 'http_status' in fields:

            fields['http_status']['data'] = entry_data['status']

          if 'remote_host' in fields:

            fields['remote_host']['data'] = entry_data['remote_host']


          if geo_data is not None:

            if 'country' in fields:

              fields['country']['data'] = geo_data['host_country']

            if 'city' in fields:

              fields['city']['data'] = geo_data['host_city']


          if 'time' in fields:

            fields['time']['data'] = entry_data['time']

          if 'time_diff' in fields:

            fields['time_diff']['data'] = time_diff

          if 'user_agent' in fields:

            fields['user_agent']['data'] = entry_data['user_agent']

          if 'http_request' in fields:

            fields['http_request']['data'] = entry_data['http_request']


          stri = ""

          printargs = []


          for key, value in fields.items():

            if not use_geolocation and (key == 'country' or key == 'city'):

              continue

            if value['included']:

              stri += "\t" + value['format']

              printargs.append(value['data'])


              if not any(key in i for i in field_names):

                field_names.append((key, value['human_name']))


          log_entries.append(printargs)

          line_num += 1


      file_num += 1


    return [log_entries, files_process_data['files'], lines_total, stri, field_names, invalid_lines]


  """

  Execute

  """

  def execute(self):


    print_headers  = self.args.column_headers

    show_progress  = self.args.show_progress

    show_stats     = self.args.show_stats

    output_format  = self.args.output_format


    sortby_field   = self.args.sortby_field

    reverse_order  = self.args.sortby_reverse


    if self.args.incl_fields:

      if 'all' not in self.args.incl_fields:

        if sortby_field and sortby_field not in self.args.incl_fields:

          raise Exception("Sort-by field must be included in output fields.")


    results = self.process_files()

    result_entries = results[0]

    result_files   = results[1]

    result_lines   = results[2]

    stri           = results[3]

    out_fields     = [i[0] for i in results[4]]

    out_fields_human_names = [i[1] for i in results[4]]

    invalid_lines  = results[5]


    if sortby_field is None and reverse_order:

      raise Exception("You must define a field for reverse sorting.")


    if sortby_field is not None:

      out_field_validation = self.get_out_field(out_fields, sortby_field)

      if out_field_validation[0]:

        result_entries.sort(

          key = lambda r : r[out_field_validation[1]] or '',

          reverse = reverse_order

        )


    if output_format == 'table':


      if print_headers:

        print("\n")

        print(stri.format(*out_fields_human_names).lstrip())


      for entry in result_entries:

        c = 0

        entry_items = []

        while c < len(entry):

          entry_items.append(str(entry[c]))

          c += 1

        print(stri.format(*entry_items).lstrip())


    if output_format == 'csv':


      if print_headers:

        print(','.join(out_fields_human_names))


      for entry in result_entries:

        c = 0

        entry_items = []

        while c < len(entry):

          entry_items.append(str(entry[c]))

          c += 1

        print(','.join(entry_items))


    if show_stats:

      print(("\n" +

        "Processed files:       {:s}\n" +

        "Processed log entries: {:d}\n" +

        "Matched log entries:   {:d}\n"

             ).format(

          ', '.join([i['file'] for i in result_files['files']]),

          result_lines,

          len(result_entries)

        )

      )

      if len(invalid_lines) > 0:

        print("Invalid lines:")

        for i in invalid_lines:

          print("\tFile: {:s}, line: {:d}".format(i[0], i[1]))

        print("\n")


if __name__ == "__main__":

  app = program()

								  app.execute()