''' Module for viewing and analyzing Mevion logs from FLASH runs.

This Python module is designed to parse, display, and analyze data
  from log files generated by the Mevion S250i proton-therapy system
  when operating in FLASH mode.

For general usage instructions, see the accomanying README,
  which is available in both .html and .txt formats.

For more detailed information, either scroll through
  the full module listing below or call Python's
  built-in `help()` function on the desired object, function, etc.
We recommend starting with `help(LogAnalyzer)`,
  as this class handles all of the module's top-level functionality.

For quick reference, a basic session might look
  something like this:

from flashlogs import *
list_logfiles()
la = LogAnalyzer( get_logfile_name(0) )
la.show_next_record()
la.show_next_record()
la.show_next_record()
# etc...
la.profile()
la.save_profile()
# quit() # if desired

Authorship:
duvall@wustl.edu and haileizhang@wustl.edu,
Siteman Cancer Center,
Washington University School of Medicine
11/2024

'''

# NOTES
# =====
# TODO
#   - TIMESTAMP AT EACH DR
#   - Update docstrings
#   - Highlight low charges
#   - Plot distribution of pct. over threshold
# ---
# IDEAS
#   - "Line -- Leave gaps at NaNs" -- use this behavior of PANDAS' Line plots
#        to achieve color segmentation
#   - Use a list of newline file.tell() offsets to improve scanning efficiency (probably by quite a bit)
#   - Use a database (SQLite?) to create / access a master record
# ---
#FIXME (low-priority): Parse input data that contains numbers in engineering format (e.g., `2.3E4`, `1.2e-1`, etc.)
# ---
# DONE
#   - Add command to save all plots sequentially
#   - Show final value of Accum_Doseplane (convert to nC) instead of sum of Doseplane_pC
#   - Add a file selection dialog
#   - LIVE UPDATES WORKING!
#   - Easy method for saving profile plots
#   - Fix index offset in la.profile() (e.g., [0-500] and [501--1001] instead of [0-499] and [500--1000], etc.)
#   - Add examples to documentation
#   - Extract Delta_T and Q_Total for each delivery record
#   - Extract log timestamp from filename
# ---
# TEMP_IGNORE
#   - Fill in on spreadsheet
#   - Flash Doseplane (nC)


## SETTINGS
GUI = False
# GUI = True

## IMPORTS
# builtins
import re, io, sys, os, os.path, zipfile
import logging as log
from importlib import import_module
from datetime import datetime
from pprint import pp
from glob import glob
from tkinter.filedialog import askopenfilename
# externals
ext_libs = [ ('numpy', 'np'), ('matplotlib', 'mpl'), ('matplotlib.pyplot', 'plt'), ('pandas', 'pd') ]
# ext_libs = [ ('numpy', 'np'), ('matplotlib.pyplot', 'plt'), ('fake_module', 'fm'), ('fake_module2', 'fm2'), ('pandas', 'pd') ] #debug
flashlogs_loc = os.path.dirname(__file__)
install_path = '"' + os.path.join(flashlogs_loc, 'install.py') + '"'
import_success = True
print()
for (lib_name, lib_abbr) in ext_libs:
    try:
        lib = import_module(lib_name)
    except ModuleNotFoundError:
        print(f'Module `{lib_name}` not found; you will need to install it before using FlashLogs.')
        import_success = False
    except:
        print('Import Error:', sys.exc_info())
        import_success = False
    else:
        globals()[lib_abbr] = lib
if not import_success:
    print(f'\nError: One or more required modules either not found or failed to load. Try the following:')
    print(f'  1) Exit Python.')
    print(f'  2) Run this command at your shell prompt: `python {install_path}`.')
    print(f'  3) Re-launch Python and try importing FlashLogs again.\n')
    cancel_err_msg = 'Canceling FlashLogs import; see instructions above.'
    raise RuntimeError(cancel_err_msg)
# external
from tabulate import tabulate as tab
# settings
# log.basicConfig(level = log.WARN)
log.basicConfig(level = log.INFO)
# log.basicConfig(level = log.DEBUG)


## CLASSES


class LogAnalyzer:
    '''
    The primary class for this module; it contains everying needed
        to parse and analyze an ASCII (i.e., ".txt") FLASH-mode log.
        To begin, call this constructor and supply the path
        to the desired log file as the argument.\n
    Attributes
    ----------
    logfile : str
        The filename or path of the logfile to analyze.
    all_delivery_records : list
        List of `.DeliveryRecord` objects parsed.
    all_pulse_records : list
        List of `pandas.DataFrame` objects parsed.
    records_without_pulses : list
        List of `.DeliveryRecord` objects whose
            pulse data was empty in the log file.
    record_start_lines : list
        List of lines in the logfile that indicate the beginning
            of a delivery record.
    total_delivery_records : int
        The total number of delivery records identified by parsing
            the logfile.
    current_record : `.DeliveryRecord`
        The current delivery record.
    pulse_index : `pandas.RangeIndex`
        Utility parameter for assigning unique index values
            to individual pulses.
    combined_pulse_records : `pandas.DataFrame`
        Single DataFrame containing all pulse data parsed from the log.\n
    Methods
    -------
    analyze()
        Runs the primary parsing and analysis routines.
    dfplot(df, **kwargs)
        A mildly-customized wrapper for `pandas.DataFrame.plot`.
    find_start_lines()
        Scans the logfile and identifies lines where delivery records start.
    get_delivery_record(record_no)
        Retrieves the requested `.DeliveryRecord` from `self.all_delivery_records`.
    update_pulse_index(dr)
        Utility method for tracking pulse-record indices.
    combine_pulse_records()
        Combines all pulse records into a single `pandas.DataFrame`.
    plot_record(record_no=0, **kwargs)
        Creates a `pandas.DataFrame` plot from the requested record's pulse data.
    show_record(record_no=0, x = 'Timestamp_uS', y = 'Doseplane_pC', **kwargs)
        Plots the pulses for a given record and prints some useful results.
    save_profile(name="LOGFILE_NAME"_profile.png)
        Saves the `.profile()` plot in both .png and .svg formats.
    print_log_raw()
        Prints the input logfile to stdout.
    debug_print()
        Utility method for general debugging.
    '''

    # LogAnalyzer
    def __init__(self, logfile = None):
        '''
        Parameters
        ----------
        logfile : str
            The filename or path of the logfile to analyze.
        '''
        self.low_charge_threshold = 25. #pC
        self.low_charge_list = []
        if GUI:
            self.logfile = askopenfilename(title = 'FlashLogs File Selection', filetypes = [('Log Files', '.log .txt'), ('All Files', '*')])
        else:
            self.logfile = logfile
        if not self.logfile:
            msg_nofilename = 'Empty filename specified; canceling FlashLogs.'
            raise FileNotFoundError(msg_nofilename)
        self.absdir = os.path.dirname( os.path.abspath(self.logfile) )
        self.log_timestamp = None
        self.analysis_timestamp = None
        self.all_delivery_records = []
        self.all_pulse_records = []
        self.records_without_pulses = []
        self.record_start_lines = []
        # self.record_end_chars = []
        self.linecount = 0
        self.eof_byte = 0
        self.total_delivery_records = 0
        self.current_record = None
        self.bFirst_record_shown = False
        self.record_figure = None
        self.record_axes = None
        self.pulse_index = None
        self.combined_pulse_records = None
        self.f_prof = None
        self.datarun = os.path.splitext(self.logfile)[0]
        self.plot_absdir = os.path.join(self.absdir, self.datarun + '_plots')
        self.empty_plots = os.path.join( os.path.basename(self.plot_absdir), 'empty_plots.txt' )
        self.profile_name = 'profile.png'
        self.analyze()
        self.info()
        print('\n')

    # LogAnalyzer
    def analyze(self, start_line: int = 0):
        '''
        MAIN ANALYSIS
        Runs the primary parsing and analysis routines.
        This method is called automatically by the constructor.
        '''
        print(f'\nRunning FLASH-log analysis on \'{self.logfile}\'...\n')
        self.read_timestamp()
        self.find_start_lines(start_line)
        # loop over delivery records
        for line_no in self.record_start_lines:
            if (start_line != 0) and (line_no < start_line):
                continue
            else:
                dr = DeliveryRecord(self.logfile, line_no) 
                dr.record_no = len(self.all_delivery_records)
                if dr.record_no == 0:
                    self.pulse_index = dr.pulses.index
                else:
                    dr = self.update_pulse_index(dr)
                self.all_delivery_records.append(dr)
                if dr.pulses.empty:
                    self.records_without_pulses.append(dr)
                else:
                    self.all_pulse_records.append(dr.pulses)
                    if np.any(dr.pulses.Doseplane_pC < self.low_charge_threshold):
                        dr.low_charge_flag = True
                        self.low_charge_list.append(dr)
                self.current_record = dr
                self.get_delivery_record(dr.record_no)
            self.total_delivery_records = len(self.all_delivery_records)
            if self.all_delivery_records:
                self.current_record = self.all_delivery_records[0]
            self.combine_pulse_records()
        self.analysis_timestamp = pd.Timestamp.now( tz = datetime.now().astimezone().tzinfo ).ceil('s')
        print(f'Done.\n')
        return None

    # LogAnalyzer
    def check_for_update(self) -> bool:
        with open(self.logfile,'r') as f:
            new_linecount = sum([1 for _ in f])
        return new_linecount > self.linecount

    # LogAnalyzer
    def update(self):
        if self.check_for_update():
            print('Updating...')
            self.analyze(self.linecount)
        else:
            print('LogAnalyzer is already up-to-date.')

    # LogAnalyzer
    def read_timestamp(self):
        '''
        Attempts to parse a timestamp from the logfile's name;
            if successful, sets `self.timestamp`.\n
        '''
        ts = None
        # # attempt to determine local timezone
        # tzinfo_local = datetime.datetime.now().astimezone().tzinfo
        datetime_fmt = '%m%d%Y_%H%M%S'    # MMDDYYYY_HHMMSS; e.g., '05152024_152107' --> 2024/05/15 @ 15:21:07
        filename = self.logfile.split(os.path.sep)[-1]
        datetime_match = re.search( '\d{8}_\d{6}', filename )
        # time_parts = re.sub( ) #TODO: generalize date-time separator from '_' to '.+'
        if datetime_match:
            datetime_str = datetime_match.group(0)
            ts = pd.to_datetime( datetime_str, format = datetime_fmt )
            self.log_timestamp = ts
        return ts

    # LogAnalyzer
    def find_start_lines(self, start_line: int = 0):
        '''
        Finds lines where dose records begin.\n
        Returns
        -------
        List of lines where dose records begin.
        '''
        record_pat = re.compile('Dose Record Parameters')
        with open(self.logfile,'r') as f:
            for k, line in enumerate(f):
                if (start_line != 0) and (k < start_line):
                    continue
                else:
                    if record_pat.match(line):
                        self.record_start_lines.append(k)
            self.eof_byte = f.tell()
            self.linecount = k + 1
        return self.record_start_lines

    # LogAnalyzer
    def update_pulse_index(self, dr):
        '''
        Prepares delivery-pulse indices for concatenation.\n
        Parameters
        ----------
        dr : `.DeliveryRecord`
            The delivery record whose indices need updating.\n
        Returns
        -------
        A `.DeliveryRecord` object whose `.pulses` attribute
            has an updated `pandas.DataFrame.index` parameter.
        '''
        df = dr.pulses
        step = df.index.step
        ri_prev = self.pulse_index
        start = ri_prev.stop
        L = len(df)
        ri_next = pd.RangeIndex( start, start+L, step )
        dr.pulses = df.set_index(ri_next)
        self.pulse_index = ri_next
        return dr

    # LogAnalyzer
    def combine_pulse_records(self):
        '''
        Combines pulse records into a single DataFrame.\n
        Returns
        -------
        A single `pandas.DataFrame` containing all the pulse data
            found by the parser.
        '''
        DF = pd.concat(self.all_pulse_records).infer_objects()
        self.combined_pulse_records = DF
        return DF

    # LogAnalyzer
    def dfplot(self, df, **kwargs):
        '''
        Customized plotting for DataFrames.\n
        Parameters
        ----------
        df : `pandas.DataFrame`
            The DataFrame to plot.
        kwargs : dict #TODO: verify type
            Arguments to pass to `pandas.DataFrame.plot`.
            Most important are the following:
                `y = 'column_name'` : str, REQUIRED
                    Name of the DataFrame column for the y-axis
                `x = 'column_name'` : str, optional
                    Name of the DataFrame column for the x-axis; default = DataFrame index
                `kind = 'plot_type'` : str, optional
                    Name of the desired plot type (e.g., line, scatter, etc.); default = line\n
        Returns
        -------
        `pandas.DataFrame.plot` result.
        '''
        if 'y' in kwargs:
            if type(kwargs['y']) == str:
                ymin = np.min([0., 1.2*df[kwargs['y']].min()])
                ymax = np.max([0., 1.2*df[kwargs['y']].max()])
                kwargs['ylim'] = (ymin, ymax)
        if ('x' not in kwargs) and (not df.index.name): # if no x-var is specified, add an 'index' column to the dataframe to enable more plot types
            df = df.reset_index(names = 'Index')
            kwargs['x'] = 'Index'
        if kwargs['y'] == 'Doseplane_pC':
            kwargs['ylim'] = (ymin, 40.)
        p = df.plot(**kwargs)
        plt.axhline(y = self.low_charge_threshold, color = 'red', label = f'Low-charge Threshold ({self.low_charge_threshold} pC)')
        plt.legend(loc = 'lower left')
        return p

    # LogAnalyzer
    def get_delivery_record(self, record_no: int = 0):
        '''
        Fetches a particular delivery record.\n
        Parameters
        ----------
        record_no : int = 0
            The numeric ID of the desired delivery record.\n
        Returns
        -------
        The requested `.DeliveryRecord` object.
        '''
        if hasattr(self, 'root'):
            self.recresults_var.set(self.current_record.gui_info())
            self.recno_var.set(f'Current Delivery Record: {record_no:3d}')
        return self.all_delivery_records[record_no]

    # LogAnalyzer
    def profile(self, column: str = 'Doseplane_pC', ylim = (0.,41.), **kwargs):
        '''
        Creates a profile of a given variable caross all PulseCounts.
        '''
        H = []
        HIMG = []
        VMAX = 0
        DF = self.combined_pulse_records
        L = 500
        SEGMENTS = 3 # max PulseCount = 1500 ##HC##
        xbins = np.arange(SEGMENTS * L)
        ybins = np.arange(*ylim)
        h_vmax = np.histogram2d( DF.PulseCount, DF[column], (xbins,ybins) )[0]
        VMAX = np.max(h_vmax)
        f_prof, a_prof = plt.subplots(SEGMENTS,1)
        print('Generating profile plot...')
        for seg in range(SEGMENTS):
            plt.sca(a_prof[seg])
            LL = L * seg
            UL = L * (seg+1) + 1
            xbins = np.arange(LL, UL)
            df = DF [ DF.PulseCount.between(LL,UL-2) ]
            turbmod = plt.colormaps['turbo']
            turbmod.colors[0] = [1., 1., 1.]
            h, himg = plt.hist2d( df.PulseCount, df[column], (xbins,ybins), vmax = VMAX, cmap = turbmod, **kwargs )[::3];
            H.append(h)
            HIMG.append(himg)
            plt.xlabel('PulseCount')
            plt.ylabel(column)
        a_prof[0].set_title(f'FlashLogs Profile | Total Delivery Records: {self.total_delivery_records}')
        cb = plt.colorbar( ax = a_prof.tolist(), label = 'Entries')
        # if bInteractive: plt.show()
        self.f_prof = f_prof
        print('Done.\n')
        return f_prof

    # LogAnalyzer
    def plot_record( self, record_no: int = np.nan, **kwargs ):
        '''
        Plots pulse data for a requested record.\n
        Parameters
        ----------
        record_no : int = 0
            The numeric ID of the desired delivery record.
        kwargs : dict #TODO: verify type
            Arguments to pass to `pandas.DataFrame.plot`.
            Most important are the following:
                `y = 'column_name'` : str, REQUIRED
                    Name of the DataFrame column for the y-axis
                `x = 'column_name'` : str, optional
                    Name of the DataFrame column for the x-axis; default = DataFrame index
                `kind = 'plot_type'` : str, optional
                    Name of the desired plot type (e.g., line, scatter, etc.); default = line\n
        Returns
        -------
        `pandas.DataFrame.plot` result.
        '''
        if np.isnan(record_no):
            record_no = self.current_record.record_no
        self.current_record = self.get_delivery_record(record_no)
        df = self.current_record.pulses
        if df.empty:
            print('No plot data found.')
            return None
        if 'title' not in kwargs: kwargs['title'] = f'Delivery Record #{record_no}  |  Total Records: {self.total_delivery_records}'
        if self.get_delivery_record(record_no).low_charge_flag:
            kwargs['color'] = 'orange'
        p = self.dfplot(df, **kwargs)
        self.record_figure = plt.gcf()
        self.record_axes = plt.gca()
        # if bInteractive: plt.show()
        return p

    # LogAnalyzer
    # def show_record( self, record_no: int = 0, x: str = 'Timestamp_uS', y: str = 'Doseplane_pC', **kwargs ):
    def show_record( self, record_no: int = 0, **kwargs ):
        '''
        Plots the pulses for a given record and prints some useful results.

        Parameters
        ----------
        Same as for `.plot_record`, just with extra defaults.
        '''
        # init
        if not self.record_figure:
            self.record_figure, self.record_axes = plt.subplots() #TODO: add to docstring
        self.current_record = self.get_delivery_record(record_no)
        df = self.current_record.pulses
        print(f'Record: {self.current_record.record_no}')
        # plot
        plt.figure(self.record_figure)
        plt.sca(self.record_axes)
        plt.cla()
        if df.empty:
            print('No pulse data for this record.')
            return None
        kwargs['x'] = 'Timestamp_uS'
        kwargs['y'] = 'Doseplane_pC'
        kwargs['ax'] = plt.gca()
        p = self.plot_record(record_no, **kwargs)
        # if bInteractive: plt.show()
        # print results
        if self.current_record.low_charge_flag: print('Warning: low charge detected.')
        for qty in self.current_record.results.values():
            print(f'\t{qty.name}:\t\t{qty.value} ({qty.unit})')
        print()
        return None

    # LogAnalyzer
    def show_next_record(self, **kwargs):
        '''
        Show the results for the next record in the list.
        '''
        index = self.current_record.record_no
        if (index==0) and not self.bFirst_record_shown:
            next_index = 0
            self.bFirst_record_shown = True
        else:
            next_index = index + 1
        final_index = self.total_delivery_records - 1
        if next_index > final_index:
            if self.check_for_update():
                self.update()
                self.current_record = self.get_delivery_record(next_index)
                self.show_record(self.current_record.record_no, **kwargs)
            else:
                print('This is the last record.')
        else:
            self.current_record = self.get_delivery_record(next_index)
            self.show_record(self.current_record.record_no, **kwargs)

    # LogAnalyzer
    def show_prev_record(self, **kwargs):
        '''
        Show the results for the previous record in the list.
        '''
        index = self.current_record.record_no
        prev_index = index - 1
        if prev_index < 0:
            print('This is the first record.')
        else:
            self.current_record = self.get_delivery_record(prev_index)
            self.show_record(self.current_record.record_no, **kwargs)

    # LogAnalyzer
    def go_next_record(self, **kwargs) -> bool:
        '''
        Go to the next record in the list.
        '''
        index = self.current_record.record_no
        next_index = index + 1
        final_index = self.total_delivery_records - 1
        if next_index > final_index:
            if self.check_for_update():
                self.update()
                self.current_record = self.get_delivery_record(next_index)
            else:
                print('This is the last record.')
                return False
        else:
            log.debug(f'  {index} -> {next_index}')
            self.current_record = self.get_delivery_record(next_index)
            self.show_record(self.current_record.record_no)
        return True

    # LogAnalyzer
    def go_prev_record(self, **kwargs) -> bool:
        '''
        Go to the previous record in the list.
        '''
        index = self.current_record.record_no
        prev_index = index - 1
        if prev_index < 0:
            print('This is the first record.')
            return False
        else:
            log.debug(f'  {index} -> {prev_index}')
            self.current_record = self.get_delivery_record(prev_index)
            self.show_record(self.current_record.record_no)
        return True

    # LogAnalyzer
    def go_record(self, new_record_no: int = 0, **kwargs):
        '''
        Go to the requested record in the list.
        '''
        _index = new_record_no
        final_index = self.total_delivery_records - 1
        if _index < 0:
            print('This is the first record.')
        elif _index > final_index:
            if self.check_for_update():
                self.update()
                self.current_record = self.get_delivery_record(_index)
            else:
                print('This is the last record.')
        else:
            log.debug(f'  {self.current_record.record_no} -> {_index}')
            self.current_record = self.get_delivery_record(_index)
            self.show_record(self.current_record.record_no)

    # LogAnalyzer
    def save_plot(self, savename = ''):
        os.makedirs(self.plot_absdir, exist_ok = True)
        if not os.path.isfile(self.empty_plots):
            # create list of empty plots
            with open(self.empty_plots, 'w') as f:
                hdr = 'Delivery Records Without Plot Data\n'
                hdr += '=' * ( len(hdr) - 1 ) + '\n'
                f.write(hdr)
        if self.current_record.pulses.empty:
            with open(self.empty_plots, 'a') as f:
                f.write(str(self.current_record.record_no)+'\n')
            return
        if not savename:
            recno = self.current_record.record_no
            savename = f'{recno:03d}'
            savepath = os.path.join(self.plot_absdir, savename)
        plt.savefig(savepath)
        print(f'Plot saved to `{os.path.join( os.path.basename(self.datarun)+"_plots", savename+".png" )}`.')

    # LogAnalyzer
    def save_all_plots(self):
        plotdir = os.path.basename(self.plot_absdir) 
        print('Saving individual plots...')
        # save individual plots
        for dr in self.all_delivery_records:
            recno = dr.record_no
            self.show_record(recno)
            self.save_plot()
        print('\nDone -- all plots saved!\n')
        # create and save profile plot if needed
        if not os.path.isdir( os.path.join(plotdir, self.profile_name) ):
            self.save_profile()
        # archive (and compress a bit)
        archive_name = plotdir + '.zip'
        print(f'Combining plots into single archive file...')
        os.chdir(plotdir)
        plot_names = ls('*.png')
        plot_names.sort()
        plot_names.append( os.path.basename(self.empty_plots) )
        with zipfile.ZipFile(os.path.join('..', archive_name), 'w') as zip:
            for plotfile in plot_names:
                zip.write(plotfile)
        os.chdir('..')
        msg = f'Done! Individual plots saved under `{os.path.basename(self.plot_absdir)+os.sep}`;\n'
        msg += f'  plot archive saved in this directory as `{archive_name}`.\n'
        print(msg)

    # LogAnalyzer
    def save_profile(self, name: str = ""):
        os.makedirs(self.plot_absdir, exist_ok = True)
        if not self.f_prof:
            self.profile()
        if not name:
            name = self.logfile
        print('Saving profile plot...')
        self.f_prof.savefig( os.path.join(self.plot_absdir, self.profile_name) )
        print(f'Done! Profile plot saved as `{self.profile_name}`.\n')

    # LogAnalyzer
    def print_log_raw(self):
        '''
        Prints the raw logfile to the terminal or other current stream (default: stdout).\n
        Returns
        -------
        A single (probablye very large) string containing the entire input logfile.
        '''
        with open(self.logfile,'r') as f:
            self.lines = f.readlines()
        return self.lines

    # LogAnalyzer
    def info(self) -> str:
        '''
        Print a summary of the log analysis.
        '''
        infostr = 'FLASH-Log Analyzer | Summary'
        infostr += f'\n' + len(infostr)*'=' + f'\n'
        infostr += f'logfile:\t\t\t{self.logfile}\n'
        infostr += f'logfile_Timestamp:\t\t{self.log_timestamp}\n'
        infostr += f'Analysis_Timestamp:\t\t{self.analysis_timestamp}\n'
        infostr += f'Total_Delivery_Records:\t\t{self.total_delivery_records}\n'
        infostr += f'Current_Record:\t\t\t{self.current_record}'
        print(infostr)
        return None

    # LogAnalyzer
    def debug_print(self):
        '''
        Prints general debug messages to stdout.
        '''
        print('debug_print:\n')

    # LogAnalyzer
    def __repr__(self):
        '''
        Overrides default `print`.\n
        Returns
        -------
        Overridden string representation of `self`.
        '''
        # return f'FLASH-Log Analyzer | Current: {self.current_record} | End: {self.all_delivery_records[-1]}'
        return f'FLASH-Log Analyzer for `{self.logfile}`'


class DeliveryRecord:
    '''
    Class representing a single complete delivery record.
    It contains:
        - A list of environmental conditions (e.g., temperature, humidity, etc.) in `.params`; and
        - A PANDAS DataFrame of the pulse data.\n
    Attributes
    ----------
    logfile : 
        The filename or path of the logfile to analyze.
    start_line : int
        The line in the input logfile where this record starts.
    record_no : int
        The numeric ID of this delivery record.
    pat_pulse_start : `re.Pattern` object for finding the beginning
        of this record's pulse data.
    pat_pulse_end : `re.Pattern` object for finding the beginning
        of this record's pulse data.
    params : list
        List of `.Quantity` objects representing environmental conditions.
    pulses : `pandas.DataFrame`
        PANDAS DataFrame containing this delivery record's pulse data.
    results : list
        List of `.Quantity` objects representing certain results.\n
    Methods
    -------
    analyze()
        Performs primary parsing and analysis for this delivery record.
    fill_params()
        Parses and stores the environmental conditions for this delivery record.
    fill_pulses()
        Parses and stores the pulse data for this delivery record.
    info()
        Prints detailed information about this delivery record.
    print_params()
        Prints a nicely-formatted list of this delivery record's environmental conditions.
    print_results()
        Prints a nicely-formatted list of this delivery record's results.
    doserate_calculation()
        #TODO
    generate_xlxs_record()
        #TODO
    '''

    # DeliveryRecord
    def __init__(self, logfile, start_line: int = 0, record_no: int = 0):
        '''
        Parameters
        ----------
        logfile : str
            The filename or path of the logfile to analyze.
        start_line : int = 0
            The line in the input logfile where this record starts.
        record_no: int = 0
            The numeric ID of this delivery record.
        '''
        self.logfile = logfile
        self.start_line = start_line
        self.record_no = record_no
        self.low_charge_flag = False
        self.pat_pulse_start = re.compile('^FLASHDOS_PCM>\s*$')
        self.pat_pulse_end = re.compile('^$')
        self.params = dict()
        self.pulses = None
        self.results = dict()
        self.parse()
        self.analyze()

    # DeliveryRecord
    def parse(self):
        '''
        Perform the parsing for this delivery record.
        This involves:
            - Parsing the environmental conditions into
                a list of `.Quantity` objects in `self.params`; and
            - Parsing  the pulse data into a PANDAS DataFrame
                in `self.pulses`.
        '''
        self.fill_params()
        self.fill_pulses()
        return None

    # DeliveryRecord
    def analyze(self):
        '''
        Do the thing.
        '''
        if not self.pulses.empty:
            # bLow = np.any(self.pulses.Doseplane_pC < LOW_CHARGE_THRESHOLD)
            # bLow = np.sum(self.pulses.Doseplane_pC < LOW_CHARGE_THRESHOLD) > 1
            # if bLow:
            #     self.low_charge_flag = True
                # self.log_analyzer.low_charge_list[self.record_no] = 1
            t_i = Quantity( 't_initial', self.pulses.Timestamp_uS.iloc[0], 'uS' )
            t_f = Quantity( 't_final', self.pulses.Timestamp_uS.iloc[-1], 'uS' )
            dt = Quantity( 'delta_t', t_f.value - t_i.value, 'uS' )
            # q_tot = Quantity( 'q_total', self.pulses.Doseplane_pC.sum(), 'pC' )
            q_tot = Quantity( 'q_total', self.pulses.AccumDoseplane_nC.iloc[-1], 'nC' )
            for qty in [t_i, t_f, dt, q_tot]:
                self.results[qty.name] = qty
        return None

    # DeliveryRecord
    def fill_params(self):
        '''
        Parses the environmental conditions for this delivery record
            into a list of `.Quantity` objects in `self.params`.\n
        Returns
        -------
        List of environmental conditions stored as `.Quantity` objects.
        '''
        with open(self.logfile,'r') as f:
            for k, line in enumerate(f):
                if k <= self.start_line:
                    continue
                if self.pat_pulse_start.search(line):
                    break
                else:
                    q = Quantity.extract_from_string(line)
                    splitname = re.split(' ', q.name)
                    key = splitname[0][0]
                    if len(splitname) > 1 and len(splitname[1]) > 0:
                        key += splitname[1][0]
                    self.params[key] = q
        return self.params

    # DeliveryRecord
    def fill_pulses(self):
        '''
        Parses the pulse data for this delivery record.
        Returns
        -------
        PANDAS DataFrame of pulse data.
        A `pandas.DataFrame` for this delivery record in `self.pulses`.
        '''
        pulse_record = ''
        bPulseStart = False
        with open(self.logfile,'r') as f:
            for k, line in enumerate(f):                    #TODO: clean up the logic here
                if k <= self.start_line:
                    continue
                if self.pat_pulse_start.search(line):
                    bPulseStart = True
                    continue
                if self.pat_pulse_end.search(line):
                    break
                else:
                    if bPulseStart:
                        # filter out any stray non-numeric entities
                        if '\n' in pulse_record:
                            line = re.sub( '[^\d\.,\n]', '', line ) #FIXME This is a temporary solution; it will not handle engineering format.
                            #TODO: Correctly process engineering format -- remove any [Ee] that are not part of a number
                            # line = re.sub( '[^\d\.Ee,\n]', '', line )
                            # line = re.sub(...) 
                        # add line to record string
                        pulse_record += line.replace(' ','')
        pulse_record.replace(' ','')
        # convert string to stream and read into PANDAS
        pulse_io = io.StringIO(pulse_record)
        self.pulses = pd.read_csv(pulse_io)
        return self.pulses

    # DeliveryRecord
    def info(self, do_table: bool = False):
        '''
        Prints a summary of this delivery record.
        '''
        info = f'Delivery Record #{self.record_no}'
        info += f'\n' + len(info)*'-' + f'\n'
        for key in self.params.keys():
            value = self.params[key]
            info += f'{value.name} = {value.value}'
            if not value.unit == None:
                info += f' {value.unit}'
            info += '\n'
        info += f'#\n'
        if do_table:
            info = info.split('\n')
            log.debug(info)
            info = tab(info, tablefmt = 'simple_outline')
            return info
        else:
            print(info)

    # DeliveryRecord
    def gui_info(self):
        '''
        Prints a GUI-friendly summary of this delivery record.
        '''
        df = self.pulses
        if df.empty:
            return 'This delivery record has no pulse data.' + '\n'*7
        N = df.shape[0]
        ind_begin = df.index[0]
        ind_end = ind_begin + N - 1
        info = [ ['Record_No:', self.record_no],
                ['Samples', N],
                ['t_initial', df.Timestamp_uS[ind_begin], 'μs'],
                ['t_final', df.Timestamp_uS[ind_end], 'μs'],
                ['Δt', df.Timestamp_uS[ind_end] - df.Timestamp_uS[ind_begin], 'μs'],
                ['Accum_Doseplane', df.AccumDoseplane_nC[ind_end], 'nC'] ]
        rec_info = tab(info, tablefmt = 'simple_outline', floatfmt = '20.4f') #//HC//
        return rec_info

    # DeliveryRecord
    def print_params(self):
        '''
        Prints this delivery record's environmental conditions
            in an easily-readable format.
        '''
        print('\n', end='')
        for key in self.params.keys():
            value = self.params[key]
            print(key)
            print(f'{value}\n')
        return None

    # DeliveryRecord
    def print_results(self):
        '''
        Prints this delivery record's analysis results
            in an easily-readable format.
        '''
        print('\n', end='')
        for key in self.results.keys():
            value = self.results[key]
            print(key)
            print(f'{value}\n')
        return None

    # DeliveryRecord
    def doserate_calculation(self):
        '''
        #TODO
        '''
        return

    # DeliveryRecord
    def generate_xlxs_record(self):
        '''
        #TODO
        '''
        # generate spreadsheet, leave space for conversion from charge to dose
        return

    # DeliveryRecord
    def __repr__(self):
        '''
        Overrides default `print`.\n
        Returns
        -------
        Overridden string representation of `self`.
        '''
        return f'DR_{self.record_no}'


# Quantity
class Quantity:
    '''
    Class for recording a single physical quantity's name, value, and units.
    Primarily used (in this module) for storing the environmental conditions of a `.DeliveryRecord`.
    '''

    # Quantity
    def __init__(self, name: str = 'Qty', value: float = np.nan, unit: str = 'N/A'):
        '''
        Parameters
        ----------
        name : str = 'Qty'
            The name of this quantity (e.g., 'mass' or 'Doseplane').
        value : float = NaN
            The numerical value for this measurement.
        unit : str = 'N/A'
            The units for this quantity (e.g., 'kg' or 'pC').
        '''
        self.name = name
        self.value = value
        self.unit = unit

    # Quantity
    # extract quantity from string
    @classmethod
    def extract_from_string(cls, linestr: str): #-> Quantity
        '''
        Generates `.Quantity` object from string.
        Note: String is assumed to have Mevion FLASH-log format:
            'Quantity Name (Unit): Value', with '(Unit)'
            anywhere in the string (e.g., 'Quantity (Unit) Name: Value').
        Note: This is intended for use as an alternate consructor, 
            hence its status as a `@classmethod`.
        Parameters
        ----------
        linestr : str
            Line containing quantity's name, unit, and value, as described above.\n
        Returns
        -------
        `.Quantity` object created from the input-line string.
        '''
        name = None
        value = np.nan
        unit = None
        unit_pat = re.compile('\s*\(.*\)\s*')
        line_parts = re.split(':', linestr)
        for k, item in enumerate(line_parts):
            line_parts[k] = item.strip()
        name = line_parts[0]
        value = float(line_parts[1])
        unit_match = unit_pat.search(name)
        if unit_match:
            unit = unit_match.group(0)
            name = name.replace(unit, ' ')
            unit = unit.replace('(','')
            unit = unit.replace(')','').strip()
        return cls(name, value, unit)

    # Quantity
    # overrides
    def __repr__(self):
        '''
        Overrides default `print`.\n
        Returns
        -------
        Overridden string representation of `self`.
        '''
        return f'Name:\t{self.name}\nValue:\t{self.value}\nUnit:\t{self.unit}'


# # Results #TODO
# '''
# ds
# '''
# class Results:
#     def __init__(self):
#         self.q1 = Quantity()


# # Config
# class Config:
#     '''
#     Class for configuration options.\n
#     Attributes
#     ----------
#     '''


## FUNCTIONS

# listre -- regex-search a list
def listre( listname: list, restring: str ) -> list:
    """ Search a list for items matching a regex. """
    pat = re.compile(restring)
    matches = list( filter(pat.match, listname) )
    return matches
# glob-enabled ls()
def ls( globstr: str = '*' ) -> list:
    if (globstr == '.' ): globstr = '*'
    if globstr.endswith(os.sep):
        globstr = globstr.removesuffix(os.sep)
    if globstr.endswith('..') or os.path.isdir(globstr):
        globstr = globstr + os.sep + '*'
    return glob(globstr)
# get a list of logfiles in the current directory
def get_logfile_list() -> list:
    pat = r'MevionFLASH_[0-9]{8}_[0-9]{6}'
    files = ls(pat)
    # ignore files with extensions other than '.log'
    outlist = ls('*.log')
    for filename in files:
        split_filename = filename.split('.')
        filename_end = split_filename[-1]
        if ( len(split_filename) == 1 ) or not ( 3 <= len(filename_end) <= 4 ):
            outlist.append(filename)
    return list( enumerate(outlist) )
# conveniently list log files
def list_logfiles():
    pp(get_logfile_list())
# conveniently get a log filename
def get_logfile_name( number: int = 0 ) -> str:
    filelist = get_logfile_list()
    return filelist[number][1]


## DATA

# # run as script
# if __name__ == '__main__':
#     FILE = askopenfilename()
#     la = LogAnalyzer(FILE)
#     # ...

# settings for matplotlib
# bInteractive = False
bInteractive = True
plt.interactive(bInteractive)
plt.rcParams['axes.grid'] = True

# other constants
IS_UNIX = (os.name == 'posix')
LOGGER = log.getLogger()

# module information
NAME = 'FlashLogs'
VERSION_MAJOR = '1'
VERSION_MINOR = '0'
VERSION_BUILD = '5'
VERSION = '.'.join([VERSION_MAJOR, VERSION_MINOR, VERSION_BUILD])
print(f'{NAME} v{VERSION} loaded.\n')

## CRUCIAL NOTE FOR FINDING NON-NUMERIC 'PulseCount' VALUES: DF[pd.to_numeric(DF.PulseCount, errors='coerce').isnull()]['PulseCount']
## REGEX FOR FINDING VALID NUMBERS USING re.fullmatch: pat = re.compile( '\d+\.?\d*([Ee]\d+)?' )

## all pau!   )
