Source code for RAPIDpy.rapid

# -*- coding: utf-8 -*-
##
##  rapid.py
##  RAPIDpy
##
##  Created by Alan D Snow, 2015.
##  Copyright © 2015 Alan D Snow. All rights reserved.
##

from calendar import isleap
from csv import writer as csvwriter
import datetime
from dateutil.parser import parse
#from dateutil.tz import tzoffset
from multiprocessing import cpu_count
import numpy as np
import os
#USGS not returning tzinfo, so this is no longer needed
#from pytz import utc
from requests import get
from subprocess import Popen, PIPE
from time import gmtime
import xarray
#local
from .dataset import RAPIDDataset
from .helper_functions import csv_to_list, log, open_csv
from .postprocess import ConvertRAPIDOutputToCF

#------------------------------------------------------------------------------
#Main RAPID Manager Class
#------------------------------------------------------------------------------
[docs]class RAPID(object):
    """
    This class is designed to prepare the rapid_namelist file and run 
    the RAPID program. There are also other utilities added.

    Attributes:
        rapid_executable_location(Optional[str]): Path to the RAPID executable location.
        num_processors(Optional[int]): Number of procesors to use. Default is 1. Overridden if *use_all_processors* is True. 
        use_all_processors(Optional[bool]): If set to True, the RAPID program will use all available processors. Default is False.
        cygwin_bin_location(Optional[str]): If using Windows, this is the path to the Cygwin 'bin' directory.
        mpiexec_command(Optional[str]): This is the mpi execute commmand. Default is "mpiexec".
        ksp_type(Optional[str]): This is the solver type. Default is "richardson".
        **kwargs(Optional[str]): Keyword arguments matching the input parameters in the RAPID namelist.

    Linux Example:
    
    .. code:: python
        
        from RAPIDpy import RAPID
        
        rapid_manager = RAPID(rapid_executable_location='~/work/rapid/run/rapid'
                              use_all_processors=True,
                              ZS_TauR=24*3600, #duration of routing procedure (time step of runoff data)
                              ZS_dtR=15*60, #internal routing time step
                              ZS_TauM=365*24*3600, #total simulation time 
                              ZS_dtM=24*3600 #input time step 
                             )
                             
    Windows with Cygwin Example:
    
    .. code:: python

        from RAPIDpy import RAPID
        
        rapid_manager = RAPID(rapid_executable_location='C:/cygwin64/home/username/work/rapid/run/rapid',
                              cygwin_bin_location='C:/cygwin64/bin',
                              use_all_processors=True, 
                              ZS_TauR=24*3600, #duration of routing procedure (time step of runoff data)
                              ZS_dtR=15*60, #internal routing time step
                              ZS_TauM=365*24*3600, #total simulation time 
                              ZS_dtM=24*3600 #input time step 
                             )
    """
    def __init__(self, 
                 rapid_executable_location="", 
                 num_processors=1, 
                 use_all_processors=False, 
                 cygwin_bin_location="",
                 mpiexec_command="mpiexec", 
                 ksp_type="richardson",
                 **kwargs):
        """
        Initialize the class with variables given by the user
        """
        if os.name == "nt" and (not cygwin_bin_location or not os.path.exists(cygwin_bin_location))\
            and rapid_executable_location:
            raise Exception("Required to have cygwin_bin_location set if using windows!")
            
        self._rapid_executable_location = rapid_executable_location
        self._cygwin_bin_location = cygwin_bin_location
        self._cygwin_bash_exe_location = os.path.join(cygwin_bin_location, "bash.exe")
        self._mpiexec_command = mpiexec_command
        self._ksp_type = ksp_type
        
        #use all processors makes precedent over num_processors arg
        if use_all_processors == True:
            self._num_processors = cpu_count()
        elif num_processors > cpu_count():
            log("Num processors requested exceeded max. Set to max ...",
                "WARNING")
            self._num_processors = cpu_count()
        else:
            self._num_processors = num_processors
    
        #*******************************************************************************
        #Runtime options 
        #*******************************************************************************
        self.BS_opt_Qinit = False
        #!.false. --> no read initial flow    .true. --> read initial flow
        self.BS_opt_Qfinal = False
        #!.false. --> no write final flow     .true. --> write final flow 
        self.BS_opt_dam = False
        #!.false. --> no dam model used       .true. --> dam model used
        self.BS_opt_for = False
        #!.false. --> no forcing              .true. --> forcing
        self.BS_opt_influence = False
        #!.false. --> no output influence     .true. --> output influence
        self.IS_opt_routing = 1
        #!1       --> matrix-based Muskingum  2      --> traditional Muskingum
        #!3       --> Transbnd. matrix-based
        self.IS_opt_run = 1
        #!1       --> regular run             2      --> parameter optimization
        self.IS_opt_phi = 1
        #!1       --> phi1                    2      --> phi2
        
        #*******************************************************************************
        #Temporal information
        #*******************************************************************************
        #NOTE: ALL TIME IN SECONDS!
        #ALWAYS USED
        self.ZS_TauR = 0 #duration of routing procedure (time step of runoff data)
        self.ZS_dtR = 0 #internal routing time step
        #ONLY FOR REGULAR RUN
        self.ZS_TauM = 0 #total simulation time 
        self.ZS_dtM = 0 #input time step 
        #ONLY FOR OPTIMIZATION RUN
        self.ZS_TauO = 0 #total optimization time  
        self.ZS_dtO = 0 #observation time step
        #FORCING MODE (replace some values with observations) 
        self.ZS_dtF = 0 #time step of forcing data
        
        #*******************************************************************************
        #Domain in which input data is available
        #*******************************************************************************
        self.IS_riv_tot = 0 #number of river reaches in rapid connect file
        self.rapid_connect_file = '' #path to rapid_connect file
        self.IS_max_up = 0 #maximum number of ustream segments
        self.Vlat_file = '' #path to runoff file
        
        #*******************************************************************************
        #Domain in which model runs
        #*******************************************************************************
        self.IS_riv_bas = 0 #number of river reaches in subbasin
        self.riv_bas_id_file = '' #subbasin reach id file
        
        #*******************************************************************************
        #Initial instantaneous flow file
        #*******************************************************************************
        self.Qinit_file = '' #initial flow file (same order as rapid_connect)
        
        #*******************************************************************************
        #Final instantaneous flow file
        #*******************************************************************************
        self.Qfinal_file = '' #path to output final flow file
        
        #*******************************************************************************
        #Available dam data
        #*******************************************************************************
        self.IS_dam_tot = 0 #number of dams
        self.dam_tot_id_file = '' #ids of dam location
        
        #*******************************************************************************
        #Dam data used
        #*******************************************************************************
        self.IS_dam_use = 0 #number in subset of dam data to use
        self.dam_use_id_file = '' #ids of subset of dams
        
        #*******************************************************************************
        #Available forcing data
        #*******************************************************************************
        self.IS_for_tot = 0
        self.for_tot_id_file = ''
        self.Qfor_file = ''
        
        #*******************************************************************************
        #Forcing data used as model runs
        #*******************************************************************************
        self.IS_for_use = 0
        self.for_use_id_file = ''
        
        #*******************************************************************************
        #File where max (min) of absolute values of b (QoutR) are stored
        #*******************************************************************************
        self.babsmax_file = ''
        self.QoutRabsmin_file = ''
        self.QoutRabsmax_file = ''
        
        #*******************************************************************************
        #Regular model run
        #*******************************************************************************
        self.k_file = ''
        self.x_file = ''
        self.Qout_file = ''
        
        #*******************************************************************************
        #Optimization
        #*******************************************************************************
        self.ZS_phifac = 0
        #------------------------------------------------------------------------------
        #Routing parameters
        #------------------------------------------------------------------------------
        self.kfac_file = ''
        self.xfac_file = '' 
        self.ZS_knorm_init = 0
        self.ZS_xnorm_init = 0
        #------------------------------------------------------------------------------
        #Gage observations
        #------------------------------------------------------------------------------
        self.IS_obs_tot = 0
        self.obs_tot_id_file = ''
        self.Qobs_file = ''
        self.Qobsbarrec_file = ''
        self.IS_obs_use = 0
        self.obs_use_id_file = ''
        self.IS_strt_opt = 0
        
        
        
        self.update_parameters(**kwargs)
        

    def _get_cygwin_path(self, windows_path):
        """
        Convert windows path to cygpath
        """
        conv_cmd = [os.path.join(self._cygwin_bin_location, "cygpath.exe"),
                    "-u", windows_path]
        process = Popen(conv_cmd, 
                        stdout=PIPE, stderr=PIPE, shell=False)
        out, err = process.communicate()
        if err:
            print(err)
            raise Exception(err)
        
        return out.strip()

    def _create_symlink_cygwin(self, initial_path, final_path):
        """
        Use cygqin to generate symbolic link
        """
        symlink_cmd = [os.path.join(self._cygwin_bin_location, "ln.exe"),
                       "-s", self._get_cygwin_path(initial_path), 
                       self._get_cygwin_path(final_path)]
        process = Popen(symlink_cmd, 
                        stdout=PIPE, stderr=PIPE, shell=False)
        out, err = process.communicate()
        if err:
            print(err)
            raise Exception(err)
        
        return out.strip()

    def _dos2unix_cygwin(self, file_path):
        """
        Use cygwin to convert file to unix format
        """
        dos2unix_cmd = [os.path.join(self._cygwin_bin_location, "dos2unix.exe"),
                       self._get_cygwin_path(file_path)]
        process = Popen(dos2unix_cmd, 
                        stdout=PIPE, stderr=PIPE, shell=False)
        process.communicate()
    
[docs]    def update_parameters(self, **kwargs):
        """
        You can add or update rapid namelist parameters by using the name of 
        the variable in the rapid namelist file (this is case sensitive).

        Parameters:
            **kwargs(Optional[str]): Keyword arguments matching the input parameters in the RAPID namelist.

        Example:
        
        .. code:: python

            from RAPIDpy import RAPID
            
            rapid_manager = RAPID(
                                    #ADD PARAMETERS
                                 )
                                 
            rapid_manager.update_parameters(rapid_connect_file='../rapid-io/input/rapid_connect.csv',
                                            Vlat_file='../rapid-io/input/m3_riv.nc',
                                            riv_bas_id_file='../rapid-io/input/riv_bas_id.csv',
                                            k_file='../rapid-io/input/k.csv',
                                            x_file='../rapid-io/input/x.csv',
                                            Qout_file='../rapid-io/output/Qout.nc',
                                            )
        
            
        """
        #set arguments based off of user input
        for key, value in list(kwargs.items()):
            if key in dir(self) and not key.startswith('_'):
                setattr(self, key, value)
            else:
                log("Invalid RAPID parameter %s." % key,
                    "ERROR")
    
[docs]    def update_reach_number_data(self):
        """
        Update the reach number data for the namelist based on input files.

        .. warning:: You need to make sure you set *rapid_connect_file* and *riv_bas_id_file* before running this function.

        Example:
        
        .. code:: python
        
            from RAPIDpy import RAPID
            
            rapid_manager = RAPID(
                                  #ADD PARAMETERS
                                  rapid_connect_file='../rapid-io/input/rapid_connect.csv',
                                  riv_bas_id_file='../rapid-io/input/riv_bas_id.csv',
                                 )
                                 
            rapid_manager.update_reach_number_data()
                                 
        """
        
        if not self.rapid_connect_file or not self.rapid_connect_file:
            log("Missing rapid_connect_file. Please set before running this function ...",
                "ERROR")

        if not self.riv_bas_id_file or not self.riv_bas_id_file:
            log("Missing riv_bas_id_file. Please set before running this function ...",
                "ERROR")

        #get rapid connect info
        rapid_connect_table = np.loadtxt(self.rapid_connect_file, ndmin=2, delimiter=",", dtype=int)

        self.IS_riv_tot = int(rapid_connect_table.shape[0])
        self.IS_max_up = int(rapid_connect_table[:,2].max())
    
        #get riv_bas_id info
        riv_bas_id_table = np.loadtxt(self.riv_bas_id_file, ndmin=1, delimiter=",", usecols=(0,), dtype=int)
        self.IS_riv_bas = int(riv_bas_id_table.size)
    
[docs]    def update_simulation_runtime(self):
        """
        Updates the total simulation duration from
        the m3 file (Vlat_file) and the time step (ZS_TauR).
        
        .. warning:: You need to set the m3 file (Vlat_file) and the 
                     time step (ZS_TauR) before runnning this function.
        
        Example:
        
        .. code:: python
        
            from RAPIDpy import RAPID
            
            rapid_manager = RAPID(
                                  #ADD PARAMETERS
                                  Vlat_file='../rapid-io/input/m3_riv.csv',
                                  ZS_TauR=3*3600,
                                 )
    
            rapid_manager.update_simulation_runtime()
        """
        if not self.Vlat_file or not os.path.exists(self.Vlat_file):
            log("Need Vlat_file to proceed ...",
                "ERROR")

        if self.ZS_TauR <= 0:
            log("Missing routing time step ...",
                "ERROR")
    
        try:
            self.ZS_TauR = int(self.ZS_TauR)
        except Exception:
            log("Invalid routing time step: {0} ...".format(self.ZS_TauR),
                "ERROR")

        with RAPIDDataset(self.Vlat_file) as m3_nc:
            self.ZS_TauM = m3_nc.size_time*self.ZS_TauR
            self.ZS_TauO = m3_nc.size_time*self.ZS_TauR

[docs]    def generate_namelist_file(self, rapid_namelist_file):
        """
        Generate rapid_namelist file.

        Parameters:
            rapid_namelist_file(str): Path of namelist file to generate from 
                                      parameters added to the RAPID manager.
        """
        log("Generating RAPID namelist file ...",
            "INFO")
        try:
            os.remove(rapid_namelist_file)
        except OSError:
            pass
        
        with open(rapid_namelist_file,'w') as new_file:
            new_file.write('&NL_namelist\n')
            for attr, value in sorted(list(self.__dict__.items())):
                if not attr.startswith('_'):
                    if attr.startswith('BS'):
                        new_file.write("%s = .%s.\n" % (attr, str(value).lower()))
                    elif isinstance(value, int):
                        new_file.write("%s = %s\n" % (attr, value))
                    else:
                        if value:
                            #file path
                            if os.name == "nt":
                                #if windows generate file with cygpath
                                value = self._get_cygwin_path(value)
                            new_file.write("%s = \'%s\'\n" % (attr, value))
            new_file.write("/\n")
        
[docs]    def update_namelist_file(self, rapid_namelist_file, new_namelist_file=None):
        """
        Update existing namelist file with new parameters

        Parameters:
            rapid_namelist_file(str): Path of namelist file to use in the simulation. 
                                      It will be updated with any parameters added to
                                      the RAPID manager.
            new_namelist_file(Optional[str]): Path to output the updated namelist file.
        """
        if os.path.exists(rapid_namelist_file) and rapid_namelist_file:
            log("Adding missing inputs from RAPID input file ...",
                "INFO")
            with open(rapid_namelist_file, 'r') as old_file:
                for line in old_file:
                    line = line.strip()
                    if not line[:1].isalpha() or not line:
                        continue
                    line_split = line.split("=")
                    attr = line_split[0].strip()
                    value = None
                    if len(line_split)>1:
                        value = line_split[1].strip().replace("'", "").replace('"', "")
                        #convert integers to integers
                        try:
                            value = int(value)
                        except Exception:
                            pass
                        #remove dots from beginning & end of value
                        if attr.startswith('BS'):
                            value = value.replace(".", "") 
                    elif attr in self._no_value_attr_list:
                        value = True
                    #add attribute if exists
                    if attr in dir(self) \
                        and not attr.startswith('_'):
                        #set attribute if not set already
                        if not getattr(self, attr):
                            setattr(self, attr, value)
                    else:
                        log("Invalid argument {0}. Skipping ...".format(attr),
                            "INFO")

            if new_namelist_file is None:
                new_namelist_file = rapid_namelist_file
                
            self.generate_namelist_file(new_namelist_file)
        else:
            log("RAPID namelist file to update not found.",
                "ERROR")
            
[docs]    def make_output_CF_compliant(self, 
                                 simulation_start_datetime,
                                 comid_lat_lon_z_file="",
                                 project_name="Normal RAPID project"):
        """
        This function converts the RAPID output to be CF compliant. This will require
        a *comid_lat_lon_z.csv* file (See: :func:`~RAPIDpy.gis.centroid.FlowlineToPoint` 
        to generate the file). 
        
        .. note:: It prepends time an initial flow to your simulation from the *qinit_file*.
                  If no qinit file is given, an initial value of zero is added.
        
        .. warning:: This will delete your original Qout file.
        
        Parameters:
            simulation_start_datetime(datetime): Datetime object with the start date of the simulation.
            comid_lat_lon_z_file(Optional[str]): Path to the *comid_lat_lon_z.csv* file. If none given, spatial information will be skipped.
            project_name(Optional[str]): Name of project to add to the RAPID output file.

        Example:
        
        .. code:: python

        
            from RAPIDpy import RAPID

            rapid_manager = RAPID(rapid_executable_location='~/work/rapid/run/rapid'
                                  use_all_processors=True,
                                  ZS_TauR=24*3600, 
                                  ZS_dtR=15*60, 
                                  ZS_TauM=365*24*3600, 
                                  ZS_dtM=24*3600
                                  rapid_connect_file='../rapid-io/input/rapid_connect.csv',
                                  Vlat_file='../rapid-io/input/m3_riv.nc',
                                  riv_bas_id_file='../rapid-io/input/riv_bas_id.csv',
                                  k_file='../rapid-io/input/k.csv',
                                  x_file='../rapid-io/input/x.csv',
                                  Qout_file='../rapid-io/output/Qout.nc',
                                 )

            rapid_manager.run()
            
            rapid_manager.make_output_CF_compliant(simulation_start_datetime=datetime.datetime(1980, 1, 1),
                                                   comid_lat_lon_z_file='../rapid-io/input/comid_lat_lon_z.csv',
                                                   project_name="ERA Interim Historical flows by US Army ERDC")         
        """
        need_to_convert = True
        with RAPIDDataset(self.Qout_file) as qout_nc:
            need_to_convert = not qout_nc.is_time_variable_valid()
        if not need_to_convert:
            log("RAPID Qout file already CF compliant ...",
                "INFO")
        else:
            cv = ConvertRAPIDOutputToCF(rapid_output_file=self.Qout_file, #location of timeseries output file
                                        start_datetime=simulation_start_datetime, #time of the start of the simulation time
                                        time_step=self.ZS_TauR, #time step of simulation in seconds
                                        qinit_file=self.Qinit_file, #RAPID qinit file
                                        comid_lat_lon_z_file=comid_lat_lon_z_file, #path to comid_lat_lon_z file
                                        rapid_connect_file=self.rapid_connect_file, #path to RAPID connect file
                                        project_name=project_name, #name of your project
                                        output_id_dim_name='rivid', #name of ID dimension in output file, typically COMID or FEATUREID
                                        output_flow_var_name='Qout', #name of streamflow variable in output file, typically Qout or m3_riv
                                        print_debug=False)
            cv.convert()
    
[docs]    def run(self, rapid_namelist_file=""):
        """
        Run RAPID program and generate file based on inputs
        This will generate your rapid_namelist file and run RAPID from wherever
        you call this script (your working directory).
        
        Parameters:
            rapid_namelist_file(Optional(str)): Path of namelist file to use in the simulation. 
                                                It will be updated with any parameters added to
                                                the RAPID manager.
        
        Linux Example:
        
        .. code:: python
        
            from RAPIDpy import RAPID
            
            rapid_manager = RAPID(rapid_executable_location='~/work/rapid/src/rapid'
                                  use_all_processors=True,
                                 )

            rapid_manager.update_parameters(rapid_connect_file='../rapid-io/input/rapid_connect.csv',
                                            Vlat_file='../rapid-io/input/m3_riv.nc',
                                            riv_bas_id_file='../rapid-io/input/riv_bas_id.csv',
                                            k_file='../rapid-io/input/k.csv',
                                            x_file='../rapid-io/input/x.csv',
                                            Qout_file='../rapid-io/output/Qout.nc',
                                            )
                
            rapid_manager.update_reach_number_data()
            rapid_manager.update_simulation_runtime()
            rapid_manager.run(rapid_namelist_file='../rapid-io/input/rapid_namelist')
                       
        Linux Reservoir Forcing Flows Example:
        
        .. code:: python
        
            from RAPIDpy import RAPID

            rapid_manager = RAPID(rapid_executable_location='~/work/rapid/src/rapid',
                                  num_processors=4,
                                  IS_for_tot=4,
                                  IS_for_use=4,
                                  for_tot_id_file='../rapid-io/input/dam_id.csv',
                                  for_use_id_file='../rapid-io/input/dam_id.csv',
                                  Qfor_file='../rapid-io/input/qout_dams.csv',
                                  ZS_dtF=86400,
                                  BS_opt_for=True,
                                  )
            
            rapid_manager.run(rapid_namelist_file='../rapid-io/input/rapid_namelist_regular_run')
            
        Windows with Cygwin Example:
    
        .. code:: python
        
            from RAPIDpy import RAPID
            from os import path
            
            rapid_manager = RAPID(rapid_executable_location='C:/cygwin64/home/username/work/rapid/run/rapid',
                                  cygwin_bin_location='C:/cygwin64/bin',
                                  use_all_processors=True, 
                                  ZS_TauR=24*3600, 
                                  ZS_dtR=15*60, 
                                  ZS_TauM=365*24*3600, 
                                  ZS_dtM=24*3600
                                 )
                                 
            rapid_input_folder = 'C:/cygwin64/home/username/work/rapid-io/input'      
            rapid_output_folder = 'C:/cygwin64/home/username/work/rapid-io/output'      
            rapid_manager.update_parameters(rapid_connect_file=path.join(rapid_input_folder, 'rapid_connect.csv'),
                                            Vlat_file=path.join(rapid_input_folder, 'm3_riv.nc'),
                                            riv_bas_id_file=path.join(rapid_input_folder, 'riv_bas_id.csv'),
                                            k_file=path.join(rapid_input_folder, 'k.csv'),
                                            x_file=path.join(rapid_input_folder, 'x.csv'),
                                            Qout_file=path.join(rapid_output_folder, 'Qout.nc'),
                                            )
                
            rapid_manager.update_reach_number_data()
            rapid_manager.update_simulation_runtime()
            rapid_manager.run()
        """
        if not self._rapid_executable_location or not self._rapid_executable_location:
            log("Missing rapid_executable_location. Please set before running this function ...",
                "ERROR")

        time_start = datetime.datetime.utcnow()
        temp_rapid_namelist_file = os.path.join(os.getcwd(), "rapid_namelist")
    
        if not rapid_namelist_file or not os.path.exists(rapid_namelist_file):
            #generate input file if it does not exist
            self.generate_namelist_file(temp_rapid_namelist_file)
        else:
            #update existing file
            self.update_namelist_file(rapid_namelist_file, temp_rapid_namelist_file)

        local_rapid_executable_location = os.path.join(os.path.dirname(temp_rapid_namelist_file), "rapid_exe_symlink")

        def rapid_cleanup(*args):
            """
            Cleans up the rapid files generated by the process
            """
            for arg in args:
                #remove files
                try:
                    os.remove(arg)
                except OSError:
                    pass

        #create link to RAPID if needed
        temp_link_to_rapid = ""
        if not self._rapid_executable_location == local_rapid_executable_location:
            rapid_cleanup(local_rapid_executable_location)
            if os.name == "nt":
                self._create_symlink_cygwin(self._rapid_executable_location, 
                                            local_rapid_executable_location)
            else:
                os.symlink(self._rapid_executable_location, local_rapid_executable_location)
            temp_link_to_rapid = local_rapid_executable_location

        
        #run RAPID
        log("Running RAPID ...",
            "INFO")
        run_rapid_script = ""
        if os.name == "nt":
            run_rapid_script = os.path.join(os.getcwd(), "run_rapid.sh")
            with open(run_rapid_script, "w") as run_rapid:
                run_rapid.write("#!/bin/sh\n")
                run_rapid.write("cd {}\n".format(self._get_cygwin_path(os.getcwd())))
                if self._num_processors > 1:
                    run_rapid.write("{0} -np {1} {2} -ksp_type {3}\n".format(self._mpiexec_command,
                                                                             self._num_processors,
                                                                             self._get_cygwin_path(local_rapid_executable_location),
                                                                             self._ksp_type))
                else:
                    #htcondor will not allow mpiexec for single processor jobs
                    #this was added for that purpose
                    run_rapid.write("{0} -ksp_type {1}\n".format(self._get_cygwin_path(local_rapid_executable_location),
                                                                 self._ksp_type))
                
            
            self._dos2unix_cygwin(run_rapid_script)
            run_rapid_command = [self._cygwin_bash_exe_location, "-l", "-c", 
                                 self._get_cygwin_path(run_rapid_script)]

        else:
            #htcondor will not allow mpiexec for single processor jobs
            #this was added for that purpose
            run_rapid_command = [local_rapid_executable_location, 
                                 "-ksp_type", self._ksp_type]
                                 
            if self._num_processors > 1:
                run_rapid_command = [self._mpiexec_command, "-n", str(self._num_processors),
                                     local_rapid_executable_location, 
                                     "-ksp_type", self._ksp_type]

        process = Popen(run_rapid_command, 
                        stdout=PIPE, stderr=PIPE, shell=False)
        out, err = process.communicate()
        if err:
            rapid_cleanup(temp_link_to_rapid, temp_rapid_namelist_file, run_rapid_script)
            raise Exception(err)
        else:
            log('RAPID output:',
                "INFO")
            for line in out.split(b'\n'):
                print(line)
        rapid_cleanup(temp_link_to_rapid, temp_rapid_namelist_file, run_rapid_script)
        log("Time to run RAPID: %s" % (datetime.datetime.utcnow()-time_start),
            "INFO")

[docs]    def generate_qinit_from_past_qout(self, qinit_file, time_index=-1, out_datetime=None):
        """
        Generate qinit from a RAPID qout file

        Parameters:
            qinit_file(str): Path to output qinit_file.
            time_index(Optional[int]): Index of simulation to generate initial flow file. Default is the end.
            out_datetime(Optional[datetime]): Datetime object containing time of initialization.

        Example:
        
        .. code:: python
        
            from RAPIDpy import RAPID
            
            rapid_manager = RAPID(Qout_file='/output_mississippi-nfie/Qout_k2v1_2005to2009.nc', 
                                  rapid_connect_file='/input_mississippi_nfie/rapid_connect_ECMWF.csv'
                                 )
                                 
            rapid_manager.generate_qinit_from_past_qout(qinit_file='/input_mississippi_nfie/Qinit_2008_flood.csv',
                                                        time_index=10162)
        """

        if not self.Qout_file or not os.path.exists(self.Qout_file):
            log('Missing Qout_file. Please set before running this function ...',
                "ERROR")

        if not self.rapid_connect_file or not self.rapid_connect_file:
            log('Missing rapid_connect file. Please set before running this function ...',
                "ERROR")
 
        log("Generating qinit file from qout file ...",
            "INFO")
        #get information from dataset
        with xarray.open_dataset(self.Qout_file) as qds:
            rivid_array = qds.rivid.values
            if out_datetime is None:
                streamflow_values = qds.isel(time=time_index).Qout.values
            else:
                streamflow_values = qds.sel(time=str(out_datetime)).Qout.values
                
        log("Reordering data ...",
            "INFO")

        stream_id_array = np.loadtxt(self.rapid_connect_file, ndmin=1, delimiter=",", usecols=(0,), dtype=int)
        init_flows_array = np.zeros(stream_id_array.size)
        for riv_bas_index, riv_bas_id in enumerate(rivid_array):
            try:
                data_index = np.where(stream_id_array==riv_bas_id)[0][0]
                init_flows_array[data_index] = streamflow_values[riv_bas_index]
            except Exception:
                log('riv bas id {0} not found in connectivity list.'.format(riv_bas_id),
                    "WARNING")
        
        log("Writing to file ...",
            "INFO")
        with open_csv(qinit_file, 'w') as qinit_out:
            for init_flow in init_flows_array:
                qinit_out.write('{0}\n'.format(init_flow))

        self.Qinit_file = qinit_file
        self.BS_opt_Qinit = True
        log("Initialization Complete!",
            "INFO")

[docs]    def generate_seasonal_intitialization(self, 
                                          qinit_file,
                                          datetime_start_initialization=datetime.datetime.utcnow()):
        """
        This creates a seasonal qinit file from a RAPID qout file. This
        requires a simulation Qout file with a longer time period of record and
        to be CF compliant. It takes the average of the current date +- 3 days
        and goes back as far as possible. 
        
        Parameters:
            qinit_file(str): Path to output qinit_file.
            datetime_start_initialization(Optional[datetime]): Datetime object with date of simulation to 
                                                     go back through the years and get a running average
                                                     to generate streamflow initialization. Default is utcnow.


        This example shows how to use it:
        
        .. code:: python
        
            from RAPIDpy.rapid import RAPID
            
            rapid_manager = RAPID(Qout_file='/output_mississippi-nfie/Qout_2000to2015.nc', 
                                  rapid_connect_file='/input_mississippi_nfie/rapid_connect_ECMWF.csv'
                                  )
        
            rapid_manager.generate_seasonal_intitialization(qinit_file='/input_mississippi_nfie/Qinit_seasonal_avg_jan_1.csv')
        """
        #get information from datasets
        if not self.Qout_file or not os.path.exists(self.Qout_file):
            log("Missing Qout_file. Please set before running this function ...",
                "ERROR")

        if not self.rapid_connect_file or not self.rapid_connect_file:
            log("Missing rapid_connect file. Please set before running this function ...",
                "ERROR")
        
        day_of_year = datetime_start_initialization.timetuple().tm_yday
        min_day = day_of_year - 3
        max_day = day_of_year + 3

        with RAPIDDataset(self.Qout_file) as qout_hist_nc:
            if not qout_hist_nc.is_time_variable_valid():
                log("File must be CF 1.6 compliant with valid time variable ...",
                    "ERROR")

            log("Generating seasonal average qinit file from qout file ...",
                "INFO")
            
            log("Determining dates with streamflows of interest ...",
                "INFO")
            
            time_indices = []
            for idx, t in enumerate(qout_hist_nc.get_time_array()):
                var_time = gmtime(t)
                compare_yday = var_time.tm_yday
                #move day back one past because of leap year adds 
                #a day after feb 29 (day 60)
                if isleap(var_time.tm_year) and compare_yday > 60:
                    compare_yday -= 1
                #check if date within range of season
                if compare_yday >= min_day and compare_yday < max_day:
                    time_indices.append(idx)

            if not time_indices:
                log("No time steps found within range ...",
                    "ERROR")
            
            log("Extracting data ...",
                "INFO")
                
            streamflow_array = qout_hist_nc.get_qout(time_index_array=time_indices)

            log("Reordering data...",
                "INFO")
            stream_id_array = np.loadtxt(self.rapid_connect_file, ndmin=1, delimiter=",", usecols=(0,), dtype=int)
            init_flows_array = np.zeros(stream_id_array.size)
            for riv_bas_index, riv_bas_id in enumerate(qout_hist_nc.get_river_id_array()):
                try:
                    data_index = np.where(stream_id_array==riv_bas_id)[0][0]
                    init_flows_array[data_index] = np.mean(streamflow_array[riv_bas_index])
                except Exception:
                    log('riv_bas_id {0} not found in connectivity list.'.format(riv_bas_id),
                        "WARNING")

            log("Writing to file ...",
                "INFO")
            with open_csv(qinit_file, 'w') as qinit_out:
                for init_flow in init_flows_array:
                    qinit_out.write('{}\n'.format(init_flow))

            log("Initialization Complete!",
                "INFO")

[docs]    def generate_usgs_avg_daily_flows_opt(self, 
                                          reach_id_gage_id_file,
                                          start_datetime, 
                                          end_datetime,
                                          out_streamflow_file, 
                                          out_stream_id_file):
        """
        Generate daily streamflow file and stream id file required for calibration 
        or for substituting flows based on USGS gage ids associated with stream ids.
        
        Parameters:
            reach_id_gage_id_file(str): Path to reach_id_gage_id file.
            start_datetime(datetime): A datetime object with the start date to download data.
            end_datetime(datetime): A datetime object with the end date to download data.
            out_streamflow_file(str): The path to output the streamflow file for RAPID.
            out_stream_id_file(str): The path to output the stream ID file associated with the streamflow file for RAPID.

        Example *reach_id_gage_id_file*::
        
            COMID, USGS_GAGE_ID
            2000, 503944
            ...

        .. warning:: Overuse will get you blocked from downloading data from USGS.
        
        .. warning:: This code does not clean the data in any way. Thus, you
                     are likely to run into issues if you simply use the raw data.
                     
        .. warning:: The code skips gages that do not have data
                     for the entire time period.
        
        Simple Example:
        
        .. code:: python
        
            import datetime
            from os.path import join
            from RAPIDpy import RAPID
            
            main_path = "/home/username/data"

            rapid_manager = RAPID()
            rapid_manager.generate_usgs_avg_daily_flows_opt(reach_id_gage_id_file=join(main_path,"mississippi_usgsgage_id_comid.csv"),
                                                            start_datetime=datetime.datetime(2000,1,1),
                                                            end_datetime=datetime.datetime(2014,12,31),
                                                            out_streamflow_file=join(main_path,"streamflow_2000_2014.csv"), 
                                                            out_stream_id_file=join(main_path,"streamid_2000_2014.csv"))
                                                            
        
        Complex Example:
        
        .. code:: python
        
            import datetime
            from os.path import join
            from RAPIDpy import RAPID
            
            main_path = "/home/username/data"
                
            rapid_manager = RAPID(rapid_executable_location='~/work/rapid/run/rapid'
                                  use_all_processors=True,
                                  ZS_TauR=24*3600, 
                                  ZS_dtR=15*60, 
                                  ZS_TauM=365*24*3600, 
                                  ZS_dtM=24*3600
                                 )

            rapid_manager.update_parameters(rapid_connect_file='../rapid-io/input/rapid_connect.csv',
                                            Vlat_file='../rapid-io/input/m3_riv.nc',
                                            riv_bas_id_file='../rapid-io/input/riv_bas_id.csv',
                                            k_file='../rapid-io/input/k.csv',
                                            x_file='../rapid-io/input/x.csv',
                                            Qout_file='../rapid-io/output/Qout.nc',
                                            )
                
            rapid_manager.update_reach_number_data()
            rapid_manager.update_simulation_runtime()
            rapid_manager.generate_usgs_avg_daily_flows_opt(reach_id_gage_id_file=join(main_path,"mississippi_usgsgage_id_comid.csv"),
                                                            start_datetime=datetime.datetime(2000,1,1),
                                                            end_datetime=datetime.datetime(2014,12,31),
                                                            out_streamflow_file=join(main_path,"streamflow_2000_2014.csv"), 
                                                            out_stream_id_file=join(main_path,"streamid_2000_2014.csv"))
            rapid_manager.run()

        """
        log("Generating avg streamflow file and stream id file required for calibration ...",
            "INFO")
        log("Generating avg streamflow file and stream id file required for calibration ...",
            "INFO")
        reach_id_gage_id_list = csv_to_list(reach_id_gage_id_file)
# USGS not returning tzinfo anymore, so removed tzinfo operations
#       if start_datetime.tzinfo is None or start_datetime.tzinfo.utcoffset(start_datetime) is None:
#            start_datetime = start_datetime.replace(tzinfo=utc)
#        if end_datetime.tzinfo is None or end_datetime.tzinfo.utcoffset(end_datetime) is None:
#            end_datetime = end_datetime.replace(tzinfo=utc)
        gage_data_matrix = []
        valid_comid_list = []
        
        #add extra day as it includes the start date (e.g. 7-5 is 2 days, but have data for 5,6,7, so +1)
        num_days_needed = (end_datetime-start_datetime).days + 1

        gage_id_list = []
        for row in reach_id_gage_id_list[1:]:
            station_id = row[1]
            if len(row[1]) == 7:
                station_id = '0' + row[1]
            gage_id_list.append(station_id)
        
        num_gage_id_list = np.array(gage_id_list, dtype=np.int32)
        log("Querying Server for Data ..." ,
            "INFO")
    
        query_params = {
                        'format': 'json',
                        'sites': ",".join(gage_id_list),
# USGS not returning tzinfo anymore, so removed tzinfo operations 
#                        'startDT': start_datetime.astimezone(tzoffset(None, -18000)).strftime("%Y-%m-%d"),
#                        'endDT': end_datetime.astimezone(tzoffset(None, -18000)).strftime("%Y-%m-%d"),
                        'startDT': start_datetime.strftime("%Y-%m-%d"),
                        'endDT': end_datetime.strftime("%Y-%m-%d"),
                        'parameterCd': '00060', #streamflow
                        'statCd': '00003' #average
                       }
        response = get("http://waterservices.usgs.gov/nwis/dv", params=query_params)
        if response.ok:
            data_valid = True
            try:
                requested_data = response.json()['value']['timeSeries']
            except IndexError:
                data_valid = False
                pass
            
            if data_valid:
                for time_series in enumerate(requested_data):
                    usgs_station_full_name = time_series[1]['name']
                    usgs_station_id = usgs_station_full_name.split(":")[1]
                    gage_data = []
                    for time_step in time_series[1]['values'][0]['value']:
                        local_datetime = parse(time_step['dateTime'])
                        if local_datetime > end_datetime:
                            break
                        
                        if local_datetime >= start_datetime:
                            if not time_step['value']:
                                log("MISSING DATA for USGS Station {0} {1} {2}".format(station_id,
                                                                                       local_datetime,
                                                                                       time_step['value']),
                                    "WARNING")
                            gage_data.append(float(time_step['value'])/35.3146667)
    
                    try:
                        #get where streamids assocated with USGS sation id is
                        streamid_index = np.where(num_gage_id_list==int(float(usgs_station_id)))[0][0]+1
                    except Exception:
                        log("USGS Station {0} not found in list ...".format(usgs_station_id),
                            "WARNING")
                        raise
                        
                    if len(gage_data) == num_days_needed:
                        gage_data_matrix.append(gage_data)
                        valid_comid_list.append(reach_id_gage_id_list[streamid_index][0])
                    else:
                        log("StreamID {0} USGS Station {1} MISSING {2} DATA VALUES".format(reach_id_gage_id_list[streamid_index][0],
                                                                                           usgs_station_id,
                                                                                           num_days_needed-len(gage_data)),
                            "WARNING")

            if gage_data_matrix and valid_comid_list:
                log("Writing Output ...",
                    "INFO")
                np_array = np.array(gage_data_matrix).transpose()  
                with open_csv(out_streamflow_file, 'w') as gage_data:
                    wf = csvwriter(gage_data)
                    for row in np_array:
                        wf.writerow(row)
                        
                with open_csv(out_stream_id_file, 'w') as comid_data:
                    cf = csvwriter(comid_data)
                    for row in valid_comid_list:
                        cf.writerow([int(float(row))])
                        
                #set parameters for RAPID run
                self.IS_obs_tot = len(valid_comid_list)
                self.obs_tot_id_file = out_stream_id_file
                self.Qobs_file = out_streamflow_file
                self.IS_obs_use = len(valid_comid_list)
                self.obs_use_id_file = out_stream_id_file
            else:
                log("No valid data returned ...",
                    "WARNING")
        else:
                log("USGS query error ...",
                    "WARNING")