|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +''' |
| 3 | +data.py -- holds ``Data`` class for standard PRMS climate input data. |
| 4 | +''' |
| 5 | + |
| 6 | +import pandas as pd |
| 7 | +from shutil import copyfile |
| 8 | + |
| 9 | +class Data(object): |
| 10 | + """ |
| 11 | + Object to access or create a PRMS data file with ability to load/assign it to a |
| 12 | + date-time indexed pandas.DataFrame for data management, analysis and visualization. |
| 13 | + It can be used to build a new PRMS data file from user defined metadata and a |
| 14 | + ``pandas.DataFrame`` of PRMS datetime-indexed climatic forcing and observation |
| 15 | + variables. |
| 16 | +
|
| 17 | + The class properties ``metadata`` and ``data_frame`` can be later assigned if no |
| 18 | + ``base_file`` is given on initialization, allowing for the creation of PRMS climatic |
| 19 | + forcing file in a Python environment. |
| 20 | +
|
| 21 | + Keyword Arguments: |
| 22 | + base_file (str, optional): path to standard PRMS data file |
| 23 | + na_rep (int, optional): how to represent missing values default = -999 |
| 24 | +
|
| 25 | + Attributes: |
| 26 | + date_header (list): date and time header for PRMS data file |
| 27 | + valid_input_variables (tuple): valid hydro-climate variables for PRMS data file |
| 28 | +
|
| 29 | + Note: |
| 30 | + If using the ``Data`` class to create a new data file, it is up to the user |
| 31 | + to ensure that the metadata and :class:`pandas.DataFrame` assigned are correct |
| 32 | + and compatible. |
| 33 | +
|
| 34 | + """ |
| 35 | + |
| 36 | + ## data file constant attributes |
| 37 | + date_header = ['year', |
| 38 | + 'month', |
| 39 | + 'day', |
| 40 | + 'hh', |
| 41 | + 'mm', |
| 42 | + 'sec'] |
| 43 | + |
| 44 | + valid_input_variables = ('gate_ht', |
| 45 | + 'humidity', |
| 46 | + 'lake_elev', |
| 47 | + 'pan_evap', |
| 48 | + 'precip', |
| 49 | + 'rain_day', |
| 50 | + 'runoff', |
| 51 | + 'snowdepth', |
| 52 | + 'solrad', |
| 53 | + 'tmax', |
| 54 | + 'tmin', |
| 55 | + 'wind_speed') |
| 56 | + |
| 57 | + def __init__(self, base_file=None, na_rep=-999): |
| 58 | + self.base_file = base_file |
| 59 | + self.na_rep = na_rep |
| 60 | + self._metadata = None |
| 61 | + self._data_frame = None |
| 62 | + |
| 63 | + @property |
| 64 | + def metadata(self): |
| 65 | + """ |
| 66 | + :obj:`dict`:A property that gets and sets the header information from |
| 67 | + a standard PRMS climate input data file held in a Python dictionary. As |
| 68 | + a property it can be assigned directly to overwrite or create a new PRMS |
| 69 | + data file. As such the user is in control and must supply the correct |
| 70 | + syntax for PRMS standard data files, e.g. text lines before header should |
| 71 | + begin with "//". Here is an example of the information gathered and held in |
| 72 | + this attribute: |
| 73 | +
|
| 74 | + Example: |
| 75 | + >>> data.metadata |
| 76 | + { |
| 77 | + 'data_startline' : 6, |
| 78 | + 'data_variables' : ['runoff 1', 'runoff 2', 'tmin', 'tmax', 'ppt'] |
| 79 | + 'text_before_header' : "Title of data file \\n //some comments\\nrunoff 2 |
| 80 | + \\ntmin 1\\ntmax 1\\nppt 1\\nrunoff 2\\ntmin 1 |
| 81 | + \\ntmax 1\\nppt 1\\n |
| 82 | + ########################################\\n" |
| 83 | + } |
| 84 | + |
| 85 | + Note: |
| 86 | + When assigning or creating a new data file, the ``Data.write`` method will |
| 87 | + assign the appropriate date header that follows the line of number signs "#". |
| 88 | +
|
| 89 | + Raises: |
| 90 | + ValueError: if data in metadata is accessed before data is assigned, |
| 91 | + e.g. if accessed to write a PRMS data file from a ``Data`` instance |
| 92 | + that was initialized without a valid PRMS data file. |
| 93 | + TypeError: if an object that is not a Python dictionary is assigned. |
| 94 | + |
| 95 | + """ |
| 96 | + # to avoid overwriting pre-assigned data, check if already exists |
| 97 | + if isinstance(self._metadata, dict): |
| 98 | + return self._metadata |
| 99 | + elif not self.base_file: |
| 100 | + raise ValueError('No data file was given on initialization') |
| 101 | + |
| 102 | + ## starting list for variable names in data file |
| 103 | + input_data_names = [] |
| 104 | + text_before_header = str() |
| 105 | + ## open data file and read header information |
| 106 | + with open(self.base_file, 'r') as inf: |
| 107 | + for idx,l in enumerate(inf): |
| 108 | + text_before_header+=l |
| 109 | + if idx == 0: ## first line always string identifier of the file- may use later |
| 110 | + data_head = l.rstrip() |
| 111 | + elif l.startswith('/'): ## comment lines |
| 112 | + continue |
| 113 | + if l.startswith(Data.valid_input_variables): |
| 114 | + h = l.split() ## split, first name and second number of columns |
| 115 | + if int(h[1]) > 1: ## more than one input time series of a particular variable |
| 116 | + for el in range(int(h[1])): |
| 117 | + tmp = '{var_name} {var_ind}'.format(var_name = h[0], var_ind = el+1) |
| 118 | + input_data_names.append(tmp) |
| 119 | + elif int(h[1]) == 1: |
| 120 | + input_data_names.append(h[0]) |
| 121 | + if l.startswith('#'): ## end of header info and begin time series input data |
| 122 | + data_startline = idx+1 ## 0 indexed line of first data entry |
| 123 | + break |
| 124 | + |
| 125 | + self._metadata = dict([('data_startline',data_startline), |
| 126 | + ('data_variables',input_data_names), |
| 127 | + ('text_before_header',text_before_header)]) |
| 128 | + return self._metadata |
| 129 | + |
| 130 | + @metadata.setter |
| 131 | + def metadata(self, dic): |
| 132 | + if not isinstance(dic, dict): |
| 133 | + raise TypeError('Must assign a Python dictionary for new Data object/file metadata') |
| 134 | + self._metadata = dic |
| 135 | + |
| 136 | + @property |
| 137 | + def data_frame(self): |
| 138 | + """ |
| 139 | + A property that gets and sets the climatic forcing data for a standard PRMS |
| 140 | + climate input data file as a :class:`pandas.DataFrame`. |
| 141 | +
|
| 142 | + Example: |
| 143 | + d is a Data instance, calling |
| 144 | + |
| 145 | + >>> d.data_frame |
| 146 | + input variables runoff 1 runoff 2 runoff 3 precip tmax tmin |
| 147 | + date |
| 148 | + 1996-12-27 0.54 1.6 NaN 0.0 46 32.0 |
| 149 | + 1996-12-28 0.65 1.6 NaN 0.0 45 24.0 |
| 150 | + 1996-12-29 0.80 1.6 NaN 0.0 44 28.0 |
| 151 | + 1996-12-30 0.90 1.6 NaN 0.0 51 33.0 |
| 152 | + 1996-12-31 1.00 1.7 NaN 0.0 47 32.0 |
| 153 | + |
| 154 | + shows the date-indexed ``pd.DataFrame`` of the input data that is created |
| 155 | + when a ``Data`` object is initiated if given a valid ``base_file``, i.e. |
| 156 | + file path to a PRMS climate data file. |
| 157 | +
|
| 158 | + Raises: |
| 159 | + ValueError: if attribute is accessed before either assigning a PRMS data |
| 160 | + file on ``Data`` initialization or not assigning a compatabile |
| 161 | + date-indexed ``pandas.DataFrame`` of hydro-climate variables. |
| 162 | + TypeError: if a data type other than ``pandas.DataFrame`` is assigned. |
| 163 | + """ |
| 164 | + if not self._metadata: |
| 165 | + self.metadata |
| 166 | + elif not isinstance(self._data_frame, pd.DataFrame) and self.base_file == None: |
| 167 | + raise ValueError('No data base_file given on initialization, '\ |
| 168 | + 'therefore you must assign a DataFrame'\ |
| 169 | + +' before accessing the .data_frame attribute!') |
| 170 | + # to avoid overwriting pre-assigned data |
| 171 | + elif isinstance(self._data_frame, pd.DataFrame): |
| 172 | + return self._data_frame |
| 173 | + |
| 174 | + df = pd.read_csv(self.base_file, header = -1, skiprows = self.metadata['data_startline'], |
| 175 | + delim_whitespace = True, na_values = [self.na_rep]) ## read data file |
| 176 | + df.columns = Data.date_header + self.metadata['data_variables'] |
| 177 | + date = pd.Series(pd.to_datetime(df.year * 10000 + df.month * 100 +\ |
| 178 | + df.day, format = '%Y%m%d'), index = df.index) |
| 179 | + df.index = pd.to_datetime(date) ## assign datetime index |
| 180 | + df.drop(Data.date_header, axis = 1, inplace = True) ## unneeded columns |
| 181 | + df.columns.name = 'input variables' ; df.index.name = 'date' |
| 182 | + self._data_frame = df |
| 183 | + return self._data_frame |
| 184 | + |
| 185 | + @data_frame.setter |
| 186 | + def data_frame(self, df): |
| 187 | + if not isinstance(df, pd.DataFrame): |
| 188 | + raise TypeError("Must assign a Pandas.DataFrame object for PRMS data input") |
| 189 | + self._data_frame = df |
| 190 | + |
| 191 | + def modify(self, func, vars_to_adjust): |
| 192 | + """ |
| 193 | + Apply a user defined function to one or more variable(s) in the data file. |
| 194 | +
|
| 195 | + The ``modify`` method allows for inplace modification of one or more |
| 196 | + time series inputs in the data file based on a user defined function. |
| 197 | + |
| 198 | + Arguments: |
| 199 | + func (function): function to apply to each variable in vars_to_adjust |
| 200 | + vars_to_adjust (list or tuple): collection of variable names to apply func to. |
| 201 | +
|
| 202 | + Returns: |
| 203 | + None |
| 204 | +
|
| 205 | + Example: |
| 206 | + Here is an example of loading a data file, modifying the temperature inputs |
| 207 | + (*tmin* and *tmax*) by adding two degrees to each element, and rewritting the |
| 208 | + modified data to disk, |
| 209 | +
|
| 210 | + >>> d = Data('path_to_data_file') |
| 211 | + >>> def f(x): |
| 212 | + return x + 2 |
| 213 | + >>> d.modify(f,['tmax','tmin']) |
| 214 | + >>> d.write('data_temp_plus_2') |
| 215 | + """ |
| 216 | + |
| 217 | + if not isinstance(self._data_frame, pd.DataFrame): |
| 218 | + self.data_frame # will raise ValueError from data_frame property |
| 219 | + for v in vars_to_adjust: |
| 220 | + self._data_frame[v] = self._data_frame[v].apply(func) |
| 221 | + |
| 222 | + def write(self, out_path): |
| 223 | + """ |
| 224 | + Writes the current state of the ``Data`` to PRMS text format |
| 225 | + utilizing the ``Data.metadata`` and ``Data.data_frame`` instance |
| 226 | + properties. If ``Data.data_frame`` was never accessed or assigned |
| 227 | + new values then this method simply copies the original PRMS |
| 228 | + data file to ``out_path``. |
| 229 | +
|
| 230 | + Arguments: |
| 231 | + out_path (str): full path to save or copy the current PRMS data |
| 232 | + in PRMS text format. |
| 233 | +
|
| 234 | + Returns: |
| 235 | + None |
| 236 | +
|
| 237 | + Raises: |
| 238 | + ValueError: if the ``write`` method is called without assigning either |
| 239 | + an initial data (``base_file``) path or assigning correct ``metadata`` |
| 240 | + and ``data_frame`` properties. |
| 241 | +
|
| 242 | + """ |
| 243 | + # if file data was never accessed- unchanged |
| 244 | + if not isinstance(self._data_frame, pd.DataFrame): |
| 245 | + if self.base_file: |
| 246 | + copyfile(self.base_file, out_path) |
| 247 | + else: # if data not from original file and dataframe never assigned |
| 248 | + raise ValueError('No data base_file was given and'\ |
| 249 | + +' no data was assigned!') |
| 250 | + |
| 251 | + ## reconstruct PRMS data file format, don't overwrite date-indexed |
| 252 | + else: |
| 253 | + df = self._data_frame[self.metadata['data_variables']] |
| 254 | + df['year'] = self._data_frame.index.year |
| 255 | + df['month'] = self._data_frame.index.month |
| 256 | + df['day'] = self._data_frame.index.day |
| 257 | + df['hh'] = df['mm'] = df['sec'] = 0 |
| 258 | + df = df[Data.date_header + self._metadata['data_variables']] |
| 259 | + with open(out_path,'w') as outf: # write comment header then data |
| 260 | + outf.write(self._metadata['text_before_header']) |
| 261 | + df.to_csv(outf, sep=' ', header=None,\ |
| 262 | + index=False, na_rep=self.na_rep) |
| 263 | + |
0 commit comments