1   
  2  """ 
  3  This file contains a collection of file reader functions which can be 
  4  called when reading in the raw data files.  The datafiles must be in a 
  5  array (1D or 2D) format. 
  6   
  7  The functions must have the following calling signature: 
  8  reader_fun(input_file, other_parameters). 
  9   
 10  The time (in matplotlib format) will has always to be in the first column. 
 11   
 12  And will return a tuple (data, raw_data, metadata).  The data will 
 13  have the format: time (in matplolib format), other (non-time related) 
 14  data columns. 
 15  """ 
 16   
 17  from __future__ import division 
 18   
 19  import numpy as np 
 20  import pylab as plt 
 21  import datetime 
 22  import copy 
 23  import matplotlib.mlab as mlab 
 24  from distutils.version import LooseVersion 
 25   
 26  import pdb 
 27   
 28   
 29  from fieldpy.core.extra_classes import Metadata 
 30   
 31   
 32   
 33   
 34   
 35 -def readfile_raw(input_file, separator=None, comment=None, 
 36                     start=0, stop=-1, ignore_empty=False): 
  37      """Reads a text file line by line and returns the raw data into 
 38      the nested list raw_data.  Ignores all trainling empty lines. 
 39   
 40      @type input_file: string 
 41      @param input_file: The file to read. 
 42   
 43      @type separator: string 
 44      @param separator: Column separator in file, if equals to 'no_split' colums will not be split. 
 45   
 46      @type comment: string 
 47      @param comment: a string which comments the rest of the line 
 48   
 49      @type start: int 
 50      @param start: which line to start on (default 0) 
 51   
 52      @type stop: int 
 53      @param stop: which line to stop on (default -1, i.e. to the end) 
 54   
 55      @type ignore_empty: boolena 
 56      @param ignore_empty: if C{True}, ignore empty lines 
 57   
 58      @rtype: list 
 59      @return: Returns a nested list containing the split raw file lines (as strings). 
 60   
 61      >>> readfile_raw('test_files/maw_file_test.maw', separator=',', comment='#') 
 62      [['2010-07-13 08:49:00', '0', '0.3030', '5', 'asdd asdlkj asl'], ['2010-07-13 08:56:00', '15', '0.2320', '8866', 'asdd asdlkj asl'], ['2010-07-13 08:58:00', '25', '0.2055', '5', '7'], ['2010-07-13 09:03:00', '50', '0.1620', '5', '']] 
 63      """ 
 64   
 65      raw_data = [] 
 66      rw_opt='rU' 
 67      line_no = 0 
 68      with open(input_file, rw_opt) as fil: 
 69           
 70          for i in range(start): 
 71              line = fil.readline() 
 72              line_no += 1 
 73          for line in fil: 
 74              if line_no==stop: 
 75                  break 
 76              if comment is not None: 
 77                   
 78                  line = line.split(comment)[0].strip() 
 79                  if line=='': 
 80                      continue 
 81              if separator != 'no_split': 
 82                  tmp = line.strip() 
 83                  if ignore_empty and tmp=='': 
 84                      continue 
 85                  tmp = [el.strip() for el in tmp.split(separator)]    
 86              else: 
 87                  tmp = line.strip() 
 88              raw_data.append(tmp) 
 89              line_no += 1 
 90       
 91      while True: 
 92          if raw_data[-1]=='': 
 93              raw_data.pop() 
 94          else: 
 95              break 
 96      return raw_data  
  97   
 99      """Convert our standart campbell notation into pylab format. 
100   
101      @type campbell_date: list of lists or numpy array 
102      @param campbell_date: A numpy array with Campbell dates 
103   
104      @returns: numpy array 
105   
106      >>> cd1 = [[2006, 139, 1245]] 
107      >>> cd2 = [[2006, 139, 1245, 34]] 
108      >>> campbell2num_date(cd1) 
109      array([ 732450.53125]) 
110      >>> campbell2num_date(cd2) 
111      array([ 732450.53164352]) 
112      >>> np.alltrue(cd1 == num_date2campbell(campbell2num_date(cd1))) 
113      True 
114      >>> np.alltrue(cd2 == num_date2campbell(campbell2num_date(cd2))) 
115      False 
116      >>> np.alltrue(cd2 == num_date2campbell(campbell2num_date(cd2), secs=True)) 
117      True 
118      """ 
119      if type(campbell_date)!=type(np.array([])): 
120          campbell_date = np.array(campbell_date) 
121   
122       
123      if len(campbell_date.shape)==1 or len(campbell_date.shape)>2: 
124          raise(TypeError('2-d numpy array or list of lists expected.')) 
125       
126      if campbell_date.shape[1]==3:        
127          campbell_date = np.hstack(( campbell_date, np.zeros((campbell_date.shape[0],1)) )) 
128   
129      out = np.zeros(campbell_date.shape[0]) 
130   
131       
132      for n,i in enumerate(campbell_date): 
133          out[n] =( plt.date2num(datetime.datetime(int(i[0]),1,1))   
134                    + i[1]-1.0                                     
135                    + np.floor(i[2]/100.0)/24.0                     
136                    + (i[2] - np.floor(i[2]/100.0)*100.0)/24.0/60.0  
137                    + i[3]/24./3600                                
138                  ) 
139      return out 
 140   
142      """Convert a numerical date as in pylab to our standart campbell 
143      notation (in a narray or a single date). 
144   
145      @type num_date: numpy array or list 
146      @param num_date: vector of pylab dates 
147       
148      @type secs: boolean 
149      @param secs: if C{True} secs are appended 
150   
151      @return: numpy array with rows [year, julian day, time (,seconds)] 
152   
153      >>> dt = datetime.datetime(2006,6,6,12,37,25) 
154      >>> nd = plt.date2num(dt) 
155      >>> num_date2campbell([nd]) 
156      array([[2006,  157, 1237]]) 
157      >>> num_date2campbell([nd], secs=True) 
158      array([[2006,  157, 1237,   25]]) 
159      """ 
160      if type(num_date)!=type(np.array([])): 
161          num_date = np.array(num_date) 
162   
163       
164      if len(np.squeeze(num_date).shape)>1: 
165          raise(TypeError('1-d numpy array or a list expected, shape is %s', num_date.shape.__repr__())) 
166   
167       
168      dateobj = plt.num2date(num_date + 0.45/24/60/60)    
169      out = [] 
170      init_tmp_list = [] 
171      for i in xrange(len(num_date)): 
172          tmp_list = copy.copy(init_tmp_list) 
173          tmp_list.append(dateobj[i].year) 
174          tmp_list.append(int(np.floor(num_date[i] 
175                                      - plt.date2num(datetime.datetime(dateobj[i].year,1,1)) + 1))) 
176          if secs:  
177              tmp_list.append(int(dateobj[i].hour*100 + dateobj[i].minute)) 
178          else: 
179              tmp_list.append(int(dateobj[i].hour*100 + dateobj[i].minute + int(np.round(dateobj[i].second/60.)))) 
180          if secs: 
181              tmp_list.append(int(dateobj[i].second)) 
182          out.append(tmp_list) 
183      return np.array(out, dtype=int) 
 184   
186      """ 
187      Converts a ISO 8601 date & time string (well, slightly perverted) 
188      into a matplotlib date number.  Note that this implementation is 
189      not particularly fast as it uses several try/except blocks.  If 
190      efficiency is a concern, hard-code it. 
191   
192      @type isostrings: list of stings 
193      @param isostrings: ISO 8601 date & time string: following formats are supported: 
194   
195      @type method: string 
196      @param method: Switch to use different alogrithm. 
197                     In order of decreasing speed: 
198                      - magic 40x 
199                      - hash  3x 
200                      - fast  4x (needs numpy>1.5) 
201                      - ''    1x 
202                     Set to '' to get good error checking/reporting. 
203   
204      @rtype: np.array of floats 
205      @return: matplotlib date numbers 
206   
207      >>> iso_time_to_date(["2010-07-07 00:00:00"]) 
208      array([ 733960.]) 
209      >>> iso_time_to_date(["2010-07-07 00:00:00","2010-07-07 00:00:00.5","2010-09-07 03:01:00.5"]) 
210      array([ 733960.        ,  733960.00000579,  734022.12570023]) 
211      >>> iso_time_to_date(["2010-07-07 00:00:00","2010-07-07 00:01:00","2010-09-07 03:01:00"]) 
212      array([ 733960.        ,  733960.00069444,  734022.12569444]) 
213      """ 
214      if not np.iterable(isostrings): 
215          raise TypeError('isostrings is not a string nor iterable!') 
216   
217       
218       
219       
220       
221       
222      if method=='fast':  
223          tmpar = np.zeros((len(isostrings),7), dtype=int) 
224           
225          for ii,isostr in enumerate(isostrings): 
226              li = isostr.split('.') 
227              if len(li)==2:  
228                  tmpar[ii,-1] = int(float('0.' + li.pop()) * 1e6) 
229              li = li[0].split(':') 
230              tmpar[ii,-2] = int(li.pop()) 
231              tmpar[ii,-3] = int(li.pop()) 
232              li = li[0].split(' ') 
233              tmpar[ii,-4] = int(li.pop()) 
234              li = li[0].split('-') 
235              tmpar[ii,-5] = int(li.pop()) 
236              tmpar[ii,-6] = int(li.pop()) 
237              tmpar[ii,-7] = int(li.pop()) 
238           
239           
240           
241           
242           
243           
244          def convert_fast(arr):  
245               
246              out = np.ndarray(arr.shape[0]) 
247               
248              [yearmonths, ymind] = np.unique(arr[:,0]+arr[:,1]/100., return_inverse=True) 
249               
250              for ii, ym in enumerate(yearmonths): 
251                  year = int(np.floor(ym)) 
252                  month = int(round((ym-year)*100)) 
253                  yearmonths[ii] = datetime.date(year, month, 1).toordinal() 
254               
255              out = yearmonths[ymind] 
256               
257              out += arr[:,2] - 1 
258               
259              HOURS_PER_DAY = 24. 
260              MINUTES_PER_DAY  = 60.*HOURS_PER_DAY 
261              SECONDS_PER_DAY =  60.*MINUTES_PER_DAY 
262              MUSECONDS_PER_DAY = 1e6*SECONDS_PER_DAY 
263              out += (arr[:,3]/HOURS_PER_DAY   + arr[:,4]/MINUTES_PER_DAY + 
264                      arr[:,5]/SECONDS_PER_DAY + arr[:,6]/MUSECONDS_PER_DAY) 
265              return out 
 266   
267   
268          return convert_fast(tmpar) 
269      elif method=='hash':  
270          out = np.empty(len(isostrings)) 
271          year_month_hash = {} 
272           
273          for ii,isostr in enumerate(isostrings): 
274               
275              li = isostr.split(':') 
276              secs = float(li.pop()) 
277              mins = int(li.pop()) 
278              li = li[0].split(' ') 
279              hours = int(li.pop()) 
280              li = li[0].split('-') 
281              days = int(li.pop()) 
282              months = int(li.pop()) 
283              years = int(li.pop()) 
284               
285              yearmonth = years+months/100. 
286              if not year_month_hash.has_key(yearmonth): 
287                  year_month_hash[yearmonth] = datetime.date(years, months, 1).toordinal() 
288               
289              out[ii] = year_month_hash[yearmonth] 
290               
291              out[ii] += days - 1 
292               
293              HOURS_PER_DAY = 24. 
294              MINUTES_PER_DAY  = 60.*HOURS_PER_DAY 
295              SECONDS_PER_DAY =  60.*MINUTES_PER_DAY 
296              out[ii] += (hours/HOURS_PER_DAY   + mins/MINUTES_PER_DAY + 
297                      secs/SECONDS_PER_DAY) 
298          return out 
299      elif method=='magic':   
300          str_len = 30 
301           
302          if type(isostrings) == np.ndarray and np.issubdtype(isostrings.dtype, np.dtype('O')): 
303              isostrings = isostrings.astype('S'+str(str_len)) 
304          elif type(isostrings) == list: 
305              isostrings = np.array(isostrings,'S'+str(str_len)) 
306           
307          isobytes = isostrings.view(np.byte) 
308          isobytes = isobytes.reshape((isostrings.shape[0], str_len)) 
309           
310          isoints = isobytes - 48  
311          isoints[isoints==-48] = 0  
312           
313          years = np.sum(isoints[:,0:4]*np.array([1000,100,10,1]),1) 
314          months = np.sum(isoints[:,5:7]*np.array([10,1]),1) 
315          years_months = years+months/100. 
316           
317           
318          year_month_hash = {} 
319          for year in range(years[0], years[-1]+1): 
320              for month in range(1,13): 
321                  year_month = year+month/100. 
322                  year_month_hash[year_month] = datetime.date(year, month, 1).toordinal() 
323           
324          days = np.empty(len(isostrings)) 
325          for k in year_month_hash: 
326              days[years_months==k] = year_month_hash[k] - 1 
327           
328          HOURS_PER_DAY = 24. 
329          MINUTES_PER_DAY  = 60.*HOURS_PER_DAY 
330          SECONDS_PER_DAY =  60.*MINUTES_PER_DAY 
331          days += np.sum(isoints[:,8:10]*np.array([10,1]),1) 
332          days += 1/HOURS_PER_DAY * np.sum(isoints[:,11:13]*np.array([10,1]),1) 
333          days += 1/MINUTES_PER_DAY * np.sum(isoints[:,14:16]*np.array([10,1]),1) 
334          days += 1/SECONDS_PER_DAY * np.sum(isoints[:,17:19]*np.array([10,1]),1) 
335          if str_len>19:   
336              days += 1/SECONDS_PER_DAY * np.sum(isoints[:,20:]*np.logspace(-1, -(str_len-20), 10),1) 
337          return days 
338      else:  
339          out = [] 
340          for isostr in isostrings: 
341              try: 
342                  out.append(plt.date2num(datetime.datetime.strptime(isostr, '%Y-%m-%d %H:%M:%S'))) 
343                  continue 
344              except ValueError: 
345                  pass 
346              try: 
347                  out.append(plt.date2num(datetime.datetime.strptime(isostr, '%Y-%m-%d %H:%M:%S.%f'))) 
348                  continue 
349              except ValueError: 
350                  raise 
351          return np.array(out) 
352   
353   
354   
355   
356   
358      """ 
359      Reads the file in standard Campbell CR10X dataformat: 
360   
361      number, year, julian day, time, data, ... 
362   
363      Or if year is not None: 
364      number, julian day, time, data, ... 
365   
366      @type input_file: string 
367      @param input_file: input file name 
368   
369      @type headers: [string] 
370      @param headers: a list of headers to be given to the variable columns 
371                      (default is [var1, var2, var3...]) 
372   
373      @type secs: boolean 
374      @param secs: If true the fifth row is interpreted as seconds else as data 
375   
376      @type year: integer 
377      @param year: If not None, then it is interpreted that the datafile 
378                   contains no year column and value of 'year' parameter is 
379                   used.  (note, the colums are counted from zero) 
380   
381      @rtype: tuple 
382      @return: tuple (data, raw_data, metadata) 
383   
384      >>> data, raw_data, metadata = read_campbell_cr10x('test_files/cr10x.dat') 
385      >>> data, metadata, raw_data # doctest:+ELLIPSIS 
386      (array([(732102.9722222222, -0.30595, 3.2896, 335.44), 
387             (732102.9791666667, -0.30629, 3.2656, 332.99), 
388             (732102.9861111111, -0.27962, 3.2405, 330.43), 
389             (732102.9930555556, -0.30513, 3.205, 326.81), 
390             (732103.0, -0.30523, 3.1689, 323.13), 
391             (732103.0069444445, -0.30457, 3.141, 320.29)],  
392            dtype=[('time', '<f8'), ('var0', '<f8'), ('var1', '<f8'), ('var2', '<f8')]), {'headers': ['time', 'var0', 'var1', 'var2'], 
393       'input_file': 'test_files/cr10x.dat', 
394       'raw_headers': ['station number', 
395                       'year', 
396                       'julian day', 
397                       'time', 
398                       'var0', 
399                       'var1', 
400                       'var2'], 
401       'secs': False, 
402       'units': [], 
403       'year': None}, array([[  1.05000000e+02,   2.00500000e+03,   1.56000000e+02, 
404                2.32000000e+03,  -3.05950000e-01,   3.28960000e+00, 
405                3.35440000e+02], 
406      ... 
407      """ 
408       
409      metadata = Metadata() 
410      metadata.__dict__['input_file'] = input_file 
411      metadata.secs = secs 
412      metadata.year = year 
413   
414      first_line = readfile_raw(input_file, stop=1, separator=',')[0] 
415       
416      header_len = len(first_line) 
417      if headers is None: 
418          if secs: 
419              if year is None: 
420                  head = ['station number', 'year', 'julian day', 'time', 'secs'] 
421              else: 
422                  head = ['station number', 'julian day', 'time', 'secs'] 
423          else: 
424              if year is None: 
425                  head = ['station number', 'year', 'julian day', 'time'] 
426              else: 
427                  head = ['station number', 'julian day', 'time'] 
428          std_head_len = len(head) 
429          headers = head + ['var'+str(ii) for ii in range(header_len - std_head_len)] 
430      else:  
431           
432          if len(headers)!=len(first_line): 
433              raise TypeError('Given header does not have the same length as the first row of the file.') 
434          if secs: 
435              if year is None: 
436                  std_head_len = 5 
437              else: 
438                  std_head_len = 4                 
439          else: 
440              if year is None: 
441                  std_head_len = 4                 
442              else: 
443                  std_head_len = 3                 
444      metadata.raw_headers = headers 
445      metadata.headers = ['time'] + headers[std_head_len:] 
446   
447       
448      raw_data = np.genfromtxt(input_file, delimiter=',') 
449      tmp_dat = copy.deepcopy(raw_data) 
450   
451       
452      if year is not None: 
453          tmp_dat = np.hstack((tmp_dat[:,0], year*np.ones(tmp_dat.shape[0]), tmp_dat[:,1:])) 
454      if secs: 
455          last_ind_time = 5 
456      else: 
457          last_ind_time = 4 
458      tmp_t = campbell2num_date(tmp_dat[:,1:last_ind_time]) 
459      tmp_t = tmp_t[:,np.newaxis] 
460      tmp_dat = np.hstack((tmp_t, tmp_dat[:,last_ind_time:])) 
461   
462       
463      dtype_data = np.dtype([(head, np.float64) for head in  metadata.headers]) 
464   
465       
466      data = np.ascontiguousarray(tmp_dat).view(dtype_data).squeeze() 
467   
468      return data, raw_data, metadata 
 469   
471      """ 
472      Reads the file in TAO5 Campbell dataformat as used by CR1000: 
473   
474      Resources: 
475      http://www.campbellsci.com/documents/manuals/loggernet_3-1.pdf 
476      Section B.1.4 
477   
478      Header format 
479      file format, station, logger type, serial number, OS version, logger-program file name, logger-program file checksum, table name 
480      "TIMESTAMP","RECORD",fieldname,fieldname,... 
481      "TS","RN", field-units, field-units,... 
482      "","",field recording method,field recording method,... 
483   
484      If the fieldname is not specified then a header of format 'var1' 
485      etc will be given, except if specified in the list L{given_headers}. 
486       
487      @type input_file: string 
488      @param input_file: input file name 
489   
490      @type given_headers: list 
491      @param given_headers: list of header names to give in the record 
492                            array data. If an entry is None then the 
493                            default one is used. Note that the field 
494                            'RECORD' is ignored in the data and thus 
495                            does not feature in this list. 
496   
497      @rtype: tuple 
498      @return: tuple (data, raw_data, metadata) 
499   
500      @note: It is assumed that any string-like thing is a date+time string 
501       
502      >>> d,rd,md = read_campbell_TAO5('test_files/TOA5_cr1000.dat') 
503      >>> d,rd,md # doctest:+ELLIPSIS 
504      (array([ (733960.0, 13.72, 12.6, 733959.9930787038, 13.43, 10.2, 733959.5997685185, 4.493, 7), 
505             (733961.0, 13.78, 12.48, 733960.2569675926, 13.15, 17.09, 733960.6921296297, 4.064, 8), 
506             (733962.0416666666, 13.74, 12.5, 733961.2257175926, 13.07, 17.36, 733961.6785185186, 5.637, 10)],  
507            dtype=[('TIMESTAMP', '<f8'), ('Batt_Volt_Max', '<f8'), ('Batt_Volt_Min', '<f8'), ('Batt_Volt_TMn', '<f8'), ('Batt_Volt_Avg', '<f8'), ('Panel_Temp_Max', '<f8'), ('Panel_Temp_TMx', '<f8'), ('var7', '<f8'), ('Panel_Temp_Avg', '<i8')]), ... 
508      """ 
509      metadata = Metadata() 
510      TOA5_info = {} 
511      TOA5_info_fields_line1 = ['file_format', 'station', 'logger_type', 'serial_number', 
512                             'OS_version', 'logger-program_file_name', 
513                             'logger-program_file_checksum', 'table_name'] 
514   
515      header_lines = readfile_raw(input_file, stop=5, separator=',') 
516       
517      for ii,key in enumerate(TOA5_info_fields_line1): 
518          try: 
519              TOA5_info[key] = header_lines[0][ii].strip('"') 
520          except: 
521              TOA5_info[key] = header_lines[0][ii] 
522      TOA5_info['fields'] = [st.strip('"') for st in header_lines[1]] 
523      TOA5_info['units'] = [st.strip('"') for st in header_lines[2]] 
524      TOA5_info['recording_type'] = [st.strip('"') for st in header_lines[3]] 
525      metadata.__dict__['TOA5_info'] = TOA5_info 
526      metadata.__dict__['input_file'] = input_file 
527      metadata.raw_units = TOA5_info['units'] 
528      metadata.units = metadata.raw_units[0:1] + metadata.raw_units[2:]  
529       
530      metadata.headers = [] 
531      metadata.raw_headers = [] 
532      ind = -1 
533       
534      if len(given_headers)>0 and len(given_headers)!=(len(TOA5_info['fields'])-1): 
535          raise ValueError('Variable given_headers is not of right length. It is %i but should be %i' % (len(given_headers), len(TOA5_info['fields'])-1)) 
536      for ii,hd in enumerate(TOA5_info['fields']): 
537          if hd=="RECORD":  
538              metadata.raw_headers.append(hd) 
539              continue  
540          ind += 1 
541          if len(given_headers)>0 and given_headers[ind] is not None: 
542              metadata.raw_headers.append(given_headers[ind]) 
543              metadata.headers.append(given_headers[ind]) 
544              continue 
545          if hd!='': 
546              metadata.raw_headers.append(hd)             
547              metadata.headers.append(hd) 
548          else: 
549              head = 'var' + str(ind) 
550              metadata.raw_headers.append(head) 
551              metadata.headers.append(head) 
552       
553      first_line = header_lines[-1] 
554      raw_dtypes = [] 
555      type_dict = {int: 'int', 
556                   str: 'str', 
557                   float: 'float'} 
558      for field in first_line:  
559          raw_dtypes.append(type_dict[type(eval(field))]) 
560      metadata.raw_dtypes = raw_dtypes 
561   
562       
563       
564      dt = [rdt if rdt!='str' else 'O' for rdt in metadata.raw_dtypes] 
565       
566      dtypes_raw = np.dtype(zip(metadata.raw_headers,dt)) 
567       
568      remove_double_quotes = lambda str_: str_.replace('"', '') 
569      converters = {} 
570      for ii,dt in enumerate(metadata.raw_dtypes): 
571          converters[ii] = remove_double_quotes 
572       
573       
574      if LooseVersion(np.__version__)<LooseVersion('1.5'):   
575          raw_data = mlab.csv2rec(input_file, names=metadata.raw_headers, 
576                              skiprows=4, delimiter=',').view(np.ndarray) 
577           
578           
579   
580           
581          dtype_data = [raw_data.dtype[ii] for ii in [0]+range(2,len(raw_data.dtype))] 
582           
583          dtype_data = [dt if dt!=np.object else np.dtype(np.float) for dt in dtype_data] 
584   
585           
586          data = np.zeros(len(raw_data), dtype=zip(metadata.headers, dtype_data)) 
587   
588           
589           
590          for head in metadata.headers: 
591              if raw_data.dtype[head]==np.dtype('object'): 
592                   
593                  data[head] = plt.date2num(raw_data[head]) 
594              else: 
595                  data[head] = raw_data[head] 
596   
597   
598      else:  
599          raw_data = np.genfromtxt(input_file, delimiter=',', skip_header=4,  
600                                   dtype=dtypes_raw, converters=converters) 
601   
602           
603          dtype_data = [raw_data.dtype[ii] for ii in [0]+range(2,len(raw_data.dtype))] 
604           
605          dtype_data = [dt if dt!=np.object else np.dtype(np.float) for dt in dtype_data] 
606   
607           
608          data = np.zeros(len(raw_data), dtype=zip(metadata.headers, dtype_data)) 
609           
610          for head in metadata.headers: 
611              if raw_data.dtype[head]==np.dtype('object'): 
612                   
613                  data[head] = iso_time_to_date(raw_data[head]) 
614              else: 
615                  data[head] = raw_data[head] 
616   
617      return data, raw_data, metadata 
 618   
620      """ 
621      Reads a standart MAW file (as only used by me, Mauro A Werder) 
622      with format: 
623   
624      #maw name of dataset 
625      # comment line 
626      #metadata is an metadata tag: 
627      #metadata.eg = 'asdf' 
628      # will create a attribute in metadata.eg with value 'asdf' 
629      #metadata.num = '1.234' 
630      #  
631      # the last comment line has the format and will be put into 
632      # metadata['headers'], metadata['units'] and use as datatype: 
633      # name0 (units) [dtype], name1 (units) [dtype], name2 (units) [dtype], ... 
634      val0, val1, val2 ... 
635      . 
636      . 
637      . 
638   
639      dtypes is one of the following: int, float, str, time_str 
640   
641      Time is represented as an ISO 8601 sting: "yyyy/mm/dd HH:MM:SS(.FF)" 
642      excluding the 'T' without time zone information (which should be 
643      given in the units as eg (UTC-7)). 
644   
645      The idea is to have a easy to parse text represenation of (a 
646      subset of) what can be contained in a netcdf3 file.   
647       
648      @type input_file: string 
649      @param input_file: input file name 
650   
651      @rtype: tuple 
652      @return: tuple (data, raw_data, metadata) 
653   
654      >>> d,rd,md = read_maw_file('test_files/maw_file_test.maw') 
655      >>> d,rd,md  
656      (array([(733966.3673611111, 0.0, 0.303, 5, 'asdd asdlkj asl'), 
657             (733966.3722222223, 15.0, 0.232, 8866, 'asdd asdlkj asl'), 
658             (733966.3736111111, 25.0, 0.2055, 5, '7'), 
659             (733966.3770833333, 50.0, 0.162, 5, '')],  
660            dtype=[('time', '<f8'), ('var1', '<f8'), ('var2', '<f8'), ('var3', '<i8'), ('var4', '|O8')]), array([('2010-07-13 08:49:00', 0.0, 0.303, 5, 'asdd asdlkj asl'), 
661             ('2010-07-13 08:56:00', 15.0, 0.232, 8866, 'asdd asdlkj asl'), 
662             ('2010-07-13 08:58:00', 25.0, 0.2055, 5, '7'), 
663             ('2010-07-13 09:03:00', 50.0, 0.162, 5, '')],  
664            dtype=[('time', '|O8'), ('var1', '<f8'), ('var2', '<f8'), ('var3', '<i8'), ('var4', '|O8')]), {'calibaration_solution_concentration': 10.0, 
665       'calibaration_solution_concentration_units': 'g/l', 
666       'dtypes': ['time_str', 'float', 'float', 'int', 'str'], 
667       'experimenter': 'MAW + UM', 
668       'headers': ['time', 'var1', 'var2', 'var3', 'var4'], 
669       'raw_headers': ['time', 'var1', 'var2', 'var3', 'var4'], 
670       'title': 'Test file', 
671       'units': ['UTC-7', 'ml', '', 'm^3', '']}) 
672      """ 
673       
674      raw_data = readfile_raw(input_file, separator='no_split') 
675      data = [] 
676      comment = [] 
677      metadata = Metadata() 
678   
679       
680      for line in raw_data: 
681          if line.startswith('#'):  
682              comment.append(line.strip()) 
683          else: 
684              tmp = line.split(',') 
685              tmp = [tt.strip(' "') for tt in tmp] 
686              data.append(tuple(tmp)) 
687   
688       
689      if not comment[0].startswith('#maw'): 
690          raise TypeError("File does not start with '#maw'") 
691      else: 
692          metadata.title = comment[0].strip('#maw ') 
693      for line in comment: 
694          if line.startswith('#metadata.'): 
695              tmp = line.split('#metadata.')[1] 
696              tmp = tmp.split('=') 
697              tmp = [t.strip() for t in tmp] 
698               
699              if tmp[1]!='nan': 
700                  metadata.__dict__[tmp[0]] = eval(tmp[1]) 
701              else: 
702                  metadata.__dict__[tmp[0]] = float('nan') 
703   
704       
705      last_line = [st.strip() for st in comment[-1].strip('#').split(',')] 
706      headers = [] 
707      units = [] 
708      dtypes = [] 
709      for head in last_line: 
710          tmp = head.split('(') 
711          headers.append(tmp[0].strip()) 
712          tmp = tmp[1].split(')') 
713          units.append(tmp[0].strip()) 
714          dtypes.append(tmp[1].strip(' []')) 
715          if dtypes[-1]=='': 
716              raise ValueError('No datatype given in file') 
717      metadata.__dict__['headers'] = headers 
718      metadata.__dict__['raw_headers'] = headers     
719      metadata.__dict__['units'] = units 
720      metadata.__dict__['dtypes'] = dtypes 
721       
722       
723      dtypes_raw = [np.dtype(dt) if dt!='time_str' 
724                    else np.dtype('O') for dt in dtypes] 
725       
726      dtypes_raw = [np.dtype(dt) if dt!='str' 
727                    else np.dtype('O') for dt in dtypes_raw] 
728       
729      dtypes_processed = [np.dtype(dt) if dt!='time_str' 
730                          else np.dtype(float) for dt in dtypes] 
731       
732      dtypes_processed = [np.dtype(dt) if dt!='str' 
733                          else np.dtype('O') for dt in dtypes_processed] 
734   
735       
736       
737      raw_data = np.array(data, zip(headers, dtypes_raw))   
738       
739      data = np.zeros(len(raw_data), dtype=zip(headers, dtypes_processed)) 
740       
741      for ii, head in enumerate(headers): 
742          if dtypes[ii]=='time_str': 
743              data[head] = iso_time_to_date(raw_data[head]) 
744          else: 
745              data[head] = raw_data[head] 
746   
747      return data, raw_data, metadata 
 748   
749   
750   
751   
752   
753   
754   
755   
756   
757   
758