Source code for RAPIDpy.dataset

# -*- coding: utf-8 -*-
'''RAPIDDataset docstrings

'''
##
#  dataset.py
#  RAPIDpy
#
#  Created by Alan D Snow.
#  Copyright © 2016 Alan D Snow. All rights reserved.
#  BSD 3-Clause

from csv import writer as csv_writer
import datetime
from netCDF4 import Dataset, num2date
import numpy as np
from numpy.ma import masked
import pandas as pd
from past.builtins import xrange
from pytz import utc
# local import
from .helper_functions import log, open_csv


# ------------------------------------------------------------------------------
# Helper Function
# ------------------------------------------------------------------------------
def compare_qout_files(dataset1_path, dataset2_path, Qout_var="Qout"):
    """
    This function compares the output of RAPID Qout and tells you where
    they are different.
    """
    qout_same = False

    d1 = RAPIDDataset(dataset1_path)
    d2 = RAPIDDataset(dataset2_path)

    if len(d1.get_river_id_array()) != len(d2.get_river_id_array()):
        log("Length of COMID/rivid input not the same.",
            "ERROR")

    if not (d1.get_river_id_array() == d2.get_river_id_array()).all():
        log("COMID/rivid order is different in each dataset."
            " Reordering data for comparison.",
            "WARNING")

        d2_reordered_river_index_list = []
        for rivid in d1.get_river_id_array():
            reordered_index = np.where(d2.get_river_id_array() == rivid)[0][0]
            d2_reordered_river_index_list.append(reordered_index)
        d2_reordered_qout = d2.get_qout_index(d2_reordered_river_index_list)
    else:
        d2_reordered_qout = d2.get_qout()

    # get where the files are different
    d1_qout = d1.get_qout()
    where_diff = np.where(d1_qout != d2_reordered_qout)
    un_where_diff = np.unique(where_diff[0])

    # if different, check to see how different
    if un_where_diff.any():
        decimal_test = 7
        while decimal_test > 0:
            try:
                np.testing.assert_almost_equal(d1_qout,
                                               d2_reordered_qout,
                                               decimal=decimal_test)
                log("ALMOST EQUAL to {0} decimal places.".format(decimal_test),
                    "INFO")
                qout_same = True
                decimal_test = -1
            except AssertionError as ex:
                if decimal_test <= 1:
                    print(ex)
                decimal_test -= 1
                pass
        log("Number of different timeseries: {0}".format(len(un_where_diff)),
            "INFO")
        log("COMID idexes where different: {0}".format(un_where_diff),
            "INFO")
        log("COMID idexes where different: {0}".format(un_where_diff),
            "INFO")
        index = un_where_diff[0]
        log("Dataset 1 example. COMID index: "
            "{0}".format(d1.get_qout_index(index)),
            "INFO")
        log("Dataset 2 example. COMID index: "
            "{0}".format(d2_reordered_qout[index, :]),
            "INFO")

    else:
        qout_same = True
        log("Output Qout data is the same.",
            "INFO")

    d1.close()
    d2.close()
    return qout_same


# ------------------------------------------------------------------------------
# Main Dataset Manager Class
# ------------------------------------------------------------------------------
[docs]class RAPIDDataset(object): """ This class is designed to access data from the RAPID Qout NetCDF file. Attributes: filename(str): Path to the RAPID Qout NetCDF file. river_id_dimension(Optional[str]): Name of the river ID dimension. Default is to search through a standard list. river_id_variable(Optional[str]): Name of the river ID variable. Default is to search through a standard list. streamflow_variable(Optional[str]): Name of the streamflow varaible. Default is to search through a standard list. datetime_simulation_start(Optional[datetime]): This is a datetime object with the date of the simulation start time. simulation_time_step_seconds(Optional[integer]): This is the time step of the simulation output in seconds. out_tzinfo(Optional[tzinfo]): Time zone to output data as. The dates will be converted from UTC to the time zone input. Default is UTC. Example:: from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #USE FUNCTIONS TO ACCESS DATA HERE """ def __init__(self, filename, river_id_dimension="", river_id_variable="", streamflow_variable="", datetime_simulation_start=None, simulation_time_step_seconds=None, out_tzinfo=None): """ Initialize the class with variables given by the user """ self.qout_nc = Dataset(filename, mode='r') # determine river ID dimension self.river_id_dimension = river_id_dimension if not river_id_dimension: if 'rivid' in self.qout_nc.dimensions: self.river_id_dimension = 'rivid' elif 'COMID' in self.qout_nc.dimensions: self.river_id_dimension = 'COMID' elif 'station' in self.qout_nc.dimensions: self.river_id_dimension = 'station' elif 'DrainLnID' in self.qout_nc.dimensions: self.river_id_dimension = 'DrainLnID' elif 'FEATUREID' in self.qout_nc.dimensions: self.river_id_dimension = 'FEATUREID' else: raise IndexError('ERROR: Could not find river ID dimension.') elif river_id_dimension not in self.qout_nc.dimensions: raise IndexError('ERROR: Could not find river ID dimension:' ' {0}.'.format(river_id_dimension)) self.size_river_id = len(self.qout_nc .dimensions[self.river_id_dimension]) variable_keys = self.qout_nc.variables.keys() # determine streamflow variable self.q_var_name = streamflow_variable if not streamflow_variable: if 'Qout' in variable_keys: self.q_var_name = 'Qout' elif 'streamflow' in variable_keys: self.q_var_name = 'streamflow' elif 'm3_riv' in variable_keys: self.q_var_name = 'm3_riv' else: raise IndexError('ERROR: Could not find flow variable.' ' Looked for Qout, streamflow, and m3_riv.') elif streamflow_variable not in variable_keys: raise IndexError('ERROR: Could not find flow variable.' ' Looked for {0}.'.format(streamflow_variable)) self.size_q_var = len(self.qout_nc.variables[self.q_var_name]) # determine time dimension if 'time' in self.qout_nc.dimensions: self.size_time = len(self.qout_nc.dimensions['time']) elif 'Time' in self.qout_nc.dimensions: self.size_time = len(self.qout_nc.dimensions['Time']) else: raise IndexError('ERROR: Could not find time dimension.') # determine river ID variable self.river_id_variable = river_id_variable if not river_id_variable: if 'rivid' in variable_keys: self.river_id_variable = 'rivid' elif 'COMID' in variable_keys: self.river_id_variable = 'COMID' elif 'station_id' in variable_keys: self.river_id_variable = 'station_id' elif 'DrainLnID' in variable_keys: self.river_id_variable = 'DrainLnID' elif 'FEATUREID' in variable_keys: self.river_id_variable = 'FEATUREID' else: print('WARNING: Could not find river ID variable' ' in {0}.'.format(variable_keys)) elif river_id_variable not in variable_keys: print('WARNING: Could not find river ID variable:' ' {0}.'.format(river_id_variable)) self.out_tzinfo = out_tzinfo self.datetime_simulation_start = datetime_simulation_start self.simulation_time_step_seconds = simulation_time_step_seconds def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): self.qout_nc.close() def _is_legacy_time_valid(self): """ This determines whether or not legacy time is set correctly """ return self.datetime_simulation_start is not None and \ self.simulation_time_step_seconds is not None
[docs] def is_time_variable_valid(self): """ This function returns whether or not the time variable is valid. Returns: boolean: True if the time variable is valid, otherwise false. Example:: from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: if qout_nc.is_time_variable_valid(): #DO WORK HERE """ time_var_valid = False if 'time' in self.qout_nc.variables.keys(): if len(self.qout_nc.dimensions['time']) > 0: if not (self.qout_nc.variables['time'][:] == masked).any(): try: timestep = (datetime.datetime .utcfromtimestamp( self.qout_nc.variables['time'][1] ) - datetime.datetime .utcfromtimestamp( self.qout_nc.variables['time'][0] )).total_seconds() if timestep > 0: time_var_valid = True except ValueError: pass return time_var_valid
[docs] def get_time_array(self, datetime_simulation_start=None, simulation_time_step_seconds=None, return_datetime=False, time_index_array=None): """ This method extracts or generates an array of time. The new version of RAPID output has the time array stored. However, the old version requires the user to know when the simulation began and the time step of the output. Parameters: return_datetime(Optional[boolean]): If true, it converts the data to a list of datetime objects. Default is False. time_index_array(Optional[list or np.array]): This is used to extract the datetime vales. This can be from the *get_time_index_range* function. Returns: list: An array of integers representing seconds since Jan 1, 1970 UTC or datetime objects if return_datetime is set to True. These examples demonstrates how to retrieve or generate a time array to go along with your RAPID streamflow series. CF-Compliant Qout File Example: .. code:: python from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #retrieve integer timestamp array time_array = qout_nc.get_time_array() #or, to get datetime array time_datetime = qout_nc.get_time_array(return_datetime=True) Legacy Qout File Example: .. code:: python from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout, datetime_simulation_start=datetime_simulation_start, simulation_time_step_seconds=simulation_time_step_seconds) as qout_nc: #retrieve integer timestamp array time_array = qout_nc.get_time_array() #or, to get datetime array time_datetime = qout_nc.get_time_array(return_datetime=True) """ # Original Qout file if datetime_simulation_start is not None: self.datetime_simulation_start = datetime_simulation_start if simulation_time_step_seconds is not None: self.simulation_time_step_seconds = simulation_time_step_seconds time_array = [] epoch = datetime.datetime(1970, 1, 1, tzinfo=utc) time_units = "seconds since {0}".format(epoch) # CF-1.6 compliant file if self.is_time_variable_valid(): time_array = self.qout_nc.variables['time'][:] if self.qout_nc.variables['time'].units: time_units = self.qout_nc.variables['time'].units # Original Qout file elif self._is_legacy_time_valid(): initial_time_seconds = ((self.datetime_simulation_start .replace(tzinfo=utc) - epoch) .total_seconds() + self.simulation_time_step_seconds) final_time_seconds = (initial_time_seconds + self.size_time * self.simulation_time_step_seconds) time_array = np.arange(initial_time_seconds, final_time_seconds, self.simulation_time_step_seconds) else: raise Exception("ERROR: This file does not contain the time" " variable. To get time array, add" " datetime_simulation_start and" " simulation_time_step_seconds") if time_index_array is not None: time_array = time_array[time_index_array] if return_datetime: time_array = num2date(time_array, time_units) if self.out_tzinfo is not None: for i in xrange(len(time_array)): # convert time to output timezone time_array[i] = utc.localize(time_array[i]) \ .astimezone(self.out_tzinfo) \ .replace(tzinfo=None) return time_array
[docs] def get_time_index_range(self, date_search_start=None, date_search_end=None, time_index_start=None, time_index_end=None, time_index=None): """ Generates a time index range based on time bounds given. This is useful for subset data extraction. Parameters: date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. time_index_start(Optional[int]): This is the index of the start of the time array subset. Useful for the old file version. time_index_end(Optional[int]): This is the index of the end of the time array subset. Useful for the old file version. time_index(Optional[int]): This is the index of time to return in the case that your code only wants one index. Used internally. Returns: index_array: This is an array used to extract a subset of data. CF-Compliant Qout File Example: .. code:: python from datetime import datetime from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: time_index_range = qout_nc.get_time_index_range(date_search_start=datetime(1980, 1, 1), date_search_end=datetime(1980, 12, 11)) Legacy Qout File Example: .. code:: python from datetime import datetime from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout, datetime_simulation_start=datetime(1980, 1, 1), simulation_time_step_seconds=3600) as qout_nc: time_index_range = qout_nc.get_time_index_range(date_search_start=datetime(1980, 1, 1), date_search_end=datetime(1980, 12, 11)) """ # get the range of time based on datetime range time_range = None if ((self.is_time_variable_valid() or self._is_legacy_time_valid()) and (date_search_start is not None or date_search_end is not None)): print("Determining time range ({0} to {1})" "...".format(date_search_start, date_search_end)) time_array = self.get_time_array() if date_search_start is not None: date_search_start_utc = date_search_start if self.out_tzinfo is not None: date_search_start_utc = self.out_tzinfo \ .localize(date_search_start) \ .astimezone(utc) \ .replace(tzinfo=None) seconds_start = (date_search_start_utc - datetime.datetime(1970, 1, 1)).total_seconds() time_range = np.where(time_array >= seconds_start)[0] if date_search_end is not None: date_search_end_utc = date_search_end if self.out_tzinfo is not None: date_search_end_utc = self.out_tzinfo \ .localize(date_search_end) \ .astimezone(utc) \ .replace(tzinfo=None) seconds_end = (date_search_end_utc - datetime.datetime(1970, 1, 1)).total_seconds() if time_range is not None: time_range = np.intersect1d(time_range, np.where(time_array <= seconds_end)[0]) else: time_range = np.where(time_array <= seconds_end)[0] # get the range of time based on time index range elif time_index_start is not None or time_index_end is not None: if time_index_start is None: time_index_start = 0 if time_index_end is None: time_index_end = self.size_time time_range = range(time_index_start, time_index_end) # get only one time step elif time_index is not None: time_range = [time_index] # return all else: time_range = range(self.size_time) return time_range
[docs] def get_river_id_array(self): """ This method returns the river ID array for this file. Returns: numpy.array: An array of the river ID's Example:: from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: river_ids = qout_nc.get_river_id_array() """ return self.qout_nc.variables[self.river_id_variable][:]
[docs] def get_river_index(self, river_id): """ This method retrieves the river index in the netCDF dataset corresponding to the river ID. Returns: int: The index of the river ID's in the file Example:: from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' river_id = 53458 with RAPIDDataset(path_to_rapid_qout) as qout_nc: river_index = qout_nc.get_river_index(river_id) """ try: return np.where(self.get_river_id_array() == river_id)[0][0] except IndexError: raise IndexError("ERROR: River ID {0} not found in dataset " "...".format(river_id))
def get_subset_riverid_index_list(self, river_id_list): """ Gets the subset riverid_list from the netcdf file Optional returns include the list of valid river ids in the dataset as well as a list of missing rive rids """ netcdf_river_indices_list = [] valid_river_ids = [] missing_river_ids = [] for river_id in river_id_list: # get where streamids are in netcdf file try: netcdf_river_indices_list \ .append(self.get_river_index(river_id)) valid_river_ids.append(river_id) except IndexError: print("WARNING: ReachID {0} not found in netCDF dataset." " Skipping ...".format(river_id)) missing_river_ids.append(river_id) pass np_valid_river_indices_list = np.array(netcdf_river_indices_list) np_valid_river_ids = np.array(valid_river_ids) sorted_indexes = np.argsort(np_valid_river_indices_list) return(np_valid_river_indices_list[sorted_indexes], np_valid_river_ids[sorted_indexes], np.array(missing_river_ids))
[docs] def get_qout(self, river_id_array=None, date_search_start=None, date_search_end=None, time_index_start=None, time_index_end=None, time_index=None, time_index_array=None, daily=False, pd_filter=None, daily_mode="mean"): """ This method extracts streamflow data by a single river ID or by a river ID array. It has options to extract by date or by date index. Parameters: river_id_array(Optional[list or int]): A single river ID or an array of river IDs. date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. time_index_start(Optional[int]): This is the index of the start of the time array subset. Useful for the old file version. time_index_end(Optional[int]): This is the index of the end of the time array subset. Useful for the old file version. time_index(Optional[int]): This is the index of time to return in the case that your code only wants one index. Used internally. time_index_array(Optional[list or np.array]): This is used to extract the vales only for particular dates. This can be from the *get_time_index_range* function. daily(Optional[bool]): If true, this will convert qout to daily average. pd_filter(Optional[str]): This is a valid pandas resample frequency filter. filter_mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Default is "mean". Returns: numpy.array: This is a 1D or 2D array or a single value depending on your input search. This example demonstrates how to retrieve the streamflow associated with the reach you are interested in:: from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' river_id = 500 with RAPIDDataset(path_to_rapid_qout) as qout_nc: streamflow_array = qout_nc.get_qout(river_id) This example demonstrates how to retrieve the streamflow within a date range associated with the reach you are interested in:: from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' river_id = 500 with RAPIDDataset(path_to_rapid_qout) as qout_nc: streamflow_array = qout_nc.get_qout(river_id, date_search_start=datetime(1985,1,1), date_search_end=datetime(1985,2,4)) """ # get indices of where the streamflow data is riverid_index_list_subset = None if river_id_array is not None: if not hasattr(river_id_array, "__len__"): river_id_array = [river_id_array] riverid_index_list_subset = \ self.get_subset_riverid_index_list(river_id_array)[0] return self.get_qout_index(riverid_index_list_subset, date_search_start, date_search_end, time_index_start, time_index_end, time_index, time_index_array, daily, pd_filter, daily_mode)
def get_qout_index(self, river_index_array=None, date_search_start=None, date_search_end=None, time_index_start=None, time_index_end=None, time_index=None, time_index_array=None, daily=False, pd_filter=None, filter_mode="mean"): """ This method extracts streamflow data by river index It allows for extracting single or multiple river streamflow arrays It has options to extract by date or by date index """ if river_index_array is not None: if hasattr(river_index_array, "__len__"): if len(river_index_array) == 1: river_index_array = river_index_array[0] if time_index_array is None: time_index_array = self.get_time_index_range(date_search_start, date_search_end, time_index_start, time_index_end, time_index) qout_variable = self.qout_nc.variables[self.q_var_name] qout_dimensions = qout_variable.dimensions streamflow_array = [] if qout_dimensions[0].lower() == 'time' and \ qout_dimensions[1].lower() == self.river_id_dimension.lower(): if time_index_array is not None and river_index_array is not None: streamflow_array = qout_variable[time_index_array, river_index_array].transpose() elif time_index_array is not None: streamflow_array = qout_variable[time_index_array, :] \ .transpose() elif river_index_array is not None: streamflow_array = qout_variable[:, river_index_array] \ .transpose() else: streamflow_array = qout_variable[:].transpose() elif qout_dimensions[1].lower() == 'time' and \ qout_dimensions[0].lower() == self.river_id_dimension.lower(): if time_index_array is not None and river_index_array is not None: streamflow_array = qout_variable[river_index_array, time_index_array] elif time_index_array is not None: streamflow_array = qout_variable[:, time_index_array] elif river_index_array is not None: streamflow_array = qout_variable[river_index_array, :] else: streamflow_array = qout_variable[:] else: raise Exception("Invalid RAPID Qout file dimensions ...") if daily: pd_filter = "D" if pd_filter is not None: time_array = self.get_time_array(return_datetime=True, time_index_array=time_index_array) df = pd.DataFrame(streamflow_array.T, index=time_array) \ .resample(pd_filter) if filter_mode == "mean": df = df.mean() elif filter_mode == "max": df = df.max() else: raise Exception("Invalid filter_mode ...") streamflow_array = df.as_matrix().T if streamflow_array.ndim > 0 and streamflow_array.shape[0] == 1: streamflow_array = streamflow_array[0] return streamflow_array
[docs] def write_flows_to_csv(self, path_to_output_file, river_index=None, river_id=None, date_search_start=None, date_search_end=None, daily=False, mode="mean"): """ Write out RAPID output to CSV file. .. note:: Need either *reach\_id* or *reach\_index* parameter, but either can be used. Parameters: path_to_output_file(str): Path to the output csv file. river_index(Optional[datetime]): This is the index of the river in the file you want the streamflow for. river_id(Optional[datetime]): This is the river ID that you want the streamflow for. date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". Example writing entire time series to file: .. code:: python from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #for writing entire time series to file qout_nc.write_flows_to_csv('/timeseries/Qout_3624735.csv', river_id=river_id, ) #if file is CF compliant, you can write out daily average #NOTE: Getting the river index is not necessary #this is just an example of how to use this river_index = qout_nc.get_river_index(river_id) qout_nc.write_flows_to_csv('/timeseries/Qout_daily.csv', river_index=river_index, daily=True, ) Example writing entire time series as daily average to file: .. code:: python from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #NOTE: Getting the river index is not necessary #this is just an example of how to use this river_index = qout_nc.get_river_index(river_id) #if file is CF compliant, you can write out daily average qout_nc.write_flows_to_csv('/timeseries/Qout_daily.csv', river_index=river_index, daily=True, ) Example writing entire time series as daily average to file: .. code:: python from datetime import datetime from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: # if file is CF compliant, you can filter by date qout_nc.write_flows_to_csv('/timeseries/Qout_daily_date_filter.csv', river_id=river_id, daily=True, date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 9, 15), mode="max" ) """ if river_id is not None: river_index = self.get_river_index(river_id) elif river_id is None and river_index is None: raise Exception("ERROR: Need reach id or reach index ...") # analyze and write if self.is_time_variable_valid() or self._is_legacy_time_valid(): time_index_range = \ self.get_time_index_range(date_search_start=date_search_start, date_search_end=date_search_end) qout_arr = self.get_qout_index(river_index, time_index_array=time_index_range) time_array = self.get_time_array(time_index_array=time_index_range, return_datetime=True) df = pd.DataFrame(qout_arr.T, index=time_array) if daily: df = df.resample('D') if mode == "mean": df = df.mean() elif mode == "max": df = df.max() else: raise Exception("Invalid mode ...") df.to_csv(path_to_output_file, header=False) else: print("Valid time variable not found. Printing values only ...") qout_arr = self.get_qout_index(river_index) with open_csv(path_to_output_file, 'w') as outcsv: writer = csv_writer(outcsv) for index in xrange(len(qout_arr)): writer.writerow([index, "{0:.5f}".format(qout_arr[index])])
[docs] def write_flows_to_gssha_time_series_xys(self, path_to_output_file, series_name, series_id, river_index=None, river_id=None, date_search_start=None, date_search_end=None, daily=False, mode="mean"): """ Write out RAPID output to GSSHA WMS time series xys file. Parameters: path_to_output_file(str): Path to the output xys file. series_name(str): The name for the series. series_id(int): The ID to give the series. river_index(Optional[datetime]): This is the index of the river in the file you want the streamflow for. river_id(Optional[datetime]): This is the river ID that you want the streamflow for. date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". Example writing entire time series to file: .. code:: python from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: qout_nc.write_flows_to_gssha_time_series_xys('/timeseries/Qout_3624735.xys', series_name="RAPID_TO_GSSHA_{0}".format(river_id), series_id=34, river_id=river_id, ) Example writing entire time series as daily average to file: .. code:: python from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: # NOTE: Getting the river index is not necessary # this is just an example of how to use this river_index = qout_nc.get_river_index(river_id) # if file is CF compliant, you can write out daily average qout_nc.write_flows_to_gssha_time_series_xys('/timeseries/Qout_daily.xys', series_name="RAPID_TO_GSSHA_{0}".format(river_id), series_id=34, river_index=river_index, daily=True, ) Example writing subset of time series as daily maximum to file: .. code:: python from datetime import datetime from RAPIDpy import RAPIDDataset river_id = 3624735 path_to_rapid_qout = '/path/to/Qout.nc' with RAPIDDataset(path_to_rapid_qout) as qout_nc: # NOTE: Getting the river index is not necessary # this is just an example of how to use this river_index = qout_nc.get_river_index(river_id) # if file is CF compliant, you can filter by date and # get daily values qout_nc.write_flows_to_gssha_time_series_xys('/timeseries/Qout_daily_date_filter.xys', series_name="RAPID_TO_GSSHA_{0}".format(river_id), series_id=34, river_index=river_index, date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 9, 15), daily=True, mode="max" ) """ if river_id is not None: river_index = self.get_river_index(river_id) elif river_id is None and river_index is None: raise Exception("ERROR: Need reach id or reach index ...") # analyze and write if self.is_time_variable_valid() or self._is_legacy_time_valid(): time_index_range = \ self.get_time_index_range(date_search_start=date_search_start, date_search_end=date_search_end) qout_arr = self.get_qout_index(river_index, time_index_array=time_index_range) time_array = self.get_time_array(time_index_array=time_index_range, return_datetime=True) df = pd.DataFrame(qout_arr.T, index=time_array) if daily: df = df.resample('D').mean() with open_csv(path_to_output_file, 'w') as out_ts: out_ts.write("XYS {0} {1} \"{2}\"\r\n".format(series_id, len(df.index), series_name)) for index, pd_row in df.iterrows(): date_str = index.strftime("%m/%d/%Y %I:%M:%S %p") out_ts.write("\"{0}\" {1:.5f}\n".format(date_str, pd_row[0])) else: raise IndexError("Valid time variable not found. Valid time" " variable required in Qout file to proceed ...")
[docs] def write_flows_to_gssha_time_series_ihg(self, path_to_output_file, connection_list_file, date_search_start=None, date_search_end=None, daily=False, mode="mean"): """ Write out RAPID output to GSSHA time series ihg file .. note:: See: http://www.gsshawiki.com/Surface_Water_Routing:Introducing_Dischage/Constituent_Hydrographs .. note:: GSSHA project card is CHAN_POINT_INPUT Parameters: path_to_output_file(str): Path to the output xys file. connection_list_file(list): CSV file with link_id, node_id, baseflow, and rapid_rivid header and rows with data. date_search_start(Optional[datetime]): This is a datetime object with the date of the minimum date for starting. date_search_end(Optional[datetime]): This is a datetime object with the date of the maximum date for ending. out_tzinfo(Optional[tzinfo]): Timezone object with output time zone for GSSHA. Default is the native RAPID output timezone (UTC). daily(Optional[boolean]): If True and the file is CF-Compliant, write out daily flows. mode(Optional[str]): You can get the daily average "mean" or the maximum "max". Defauls is "mean". Example connection list file:: link_id, node_id, baseflow, rapid_rivid 599, 1, 0.0, 80968 603, 1, 0.0, 80967 Example writing entire time series to file: .. code:: python from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' connection_list_file = '/path/to/connection_list_file.csv' with RAPIDDataset(path_to_rapid_qout) as qout_nc: #for writing entire time series to file qout_nc.write_flows_to_gssha_time_series_ihg('/timeseries/Qout_3624735.ihg', connection_list_file, ) Example writing entire time series as daily average to file: .. code:: python from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' connection_list_file = '/path/to/connection_list_file.csv' with RAPIDDataset(path_to_rapid_qout) as qout_nc: # if file is CF compliant, you can write out daily average qout_nc.write_flows_to_gssha_time_series_ihg('/timeseries/Qout_3624735.ihg', connection_list_file, daily=True, ) Example writing subset of time series as daily maximum to file: .. code:: python from datetime import datetime from RAPIDpy import RAPIDDataset path_to_rapid_qout = '/path/to/Qout.nc' connection_list_file = '/path/to/connection_list_file.csv' with RAPIDDataset(path_to_rapid_qout) as qout_nc: # if file is CF compliant, you can filter by date and get daily values qout_nc.write_flows_to_gssha_time_series_ihg('/timeseries/Qout_daily_date_filter.ihg', connection_list_file, date_search_start=datetime(2002, 8, 31), date_search_end=datetime(2002, 9, 15), daily=True, mode="max" ) """ # analyze and write if self.is_time_variable_valid() or self._is_legacy_time_valid(): time_index_range = \ self.get_time_index_range(date_search_start=date_search_start, date_search_end=date_search_end) with open_csv(path_to_output_file, 'w') as out_ts: # HEADER SECTION EXAMPLE: # NUMPT 3 # POINT 1 599 0.0 # POINT 1 603 0.0 # POINT 1 605 0.0 connection_list = np.loadtxt(connection_list_file, skiprows=1, ndmin=1, delimiter=',', usecols=(0, 1, 2, 3), dtype={'names': ('link_id', 'node_id', 'baseflow', 'rapid_rivid'), 'formats': ('i8', 'i8', 'f4', 'i8') }, ) out_ts.write("NUMPT {0}\n".format(connection_list.size)) river_idx_list = [] for connection in connection_list: out_ts.write("POINT {0} {1} {2}\n" "".format(connection['node_id'], connection['link_id'], connection['baseflow'], ), ) river_idx_list.append( self.get_river_index(connection['rapid_rivid']) ) # INFLOW SECTION EXAMPLE: # NRPDS 54 # INPUT 2002 01 01 00 00 15.551210 12.765090 0.000000 # INPUT 2002 01 02 00 00 15.480830 12.765090 0.000000 # INPUT 2002 01 03 00 00 16.078910 12.765090 0.000000 # ... qout_2d_array = \ self.get_qout_index(river_idx_list, time_index_array=time_index_range) time_array = \ self.get_time_array(time_index_array=time_index_range, return_datetime=True) df = pd.DataFrame(qout_2d_array.T, index=time_array) if daily: df = df.resample('D').mean() out_ts.write("NRPDS {0}\n".format(len(df.index))) for index, pd_row in df.iterrows(): date_str = index.strftime("%Y %m %d %H %M") qout_str = " ".join(["{0:.5f}".format(pd_row[column]) for column in df]) out_ts.write("INPUT {0} {1}\n".format(date_str, qout_str)) else: raise IndexError("Valid time variable not found. Valid time " "variable required in Qout file to proceed ...")