1
2 """
3 This file contains a collection of file reader functions which can be
4 called when reading in the raw data files. The datafiles must be in a
5 array (1D or 2D) format.
6
7 The functions must have the following calling signature:
8 reader_fun(input_file, other_parameters).
9
10 The time (in matplotlib format) will has always to be in the first column.
11
12 And will return a tuple (data, raw_data, metadata). The data will
13 have the format: time (in matplolib format), other (non-time related)
14 data columns.
15 """
16
17 from __future__ import division
18
19 import numpy as np
20 import pylab as plt
21 import datetime
22 import copy
23 import matplotlib.mlab as mlab
24 from distutils.version import LooseVersion
25
26 import pdb
27
28
29 from fieldpy.core.extra_classes import Metadata
30
31
32
33
34
35 -def readfile_raw(input_file, separator=None, comment=None,
36 start=0, stop=-1, ignore_empty=False):
37 """Reads a text file line by line and returns the raw data into
38 the nested list raw_data. Ignores all trainling empty lines.
39
40 @type input_file: string
41 @param input_file: The file to read.
42
43 @type separator: string
44 @param separator: Column separator in file, if equals to 'no_split' colums will not be split.
45
46 @type comment: string
47 @param comment: a string which comments the rest of the line
48
49 @type start: int
50 @param start: which line to start on (default 0)
51
52 @type stop: int
53 @param stop: which line to stop on (default -1, i.e. to the end)
54
55 @type ignore_empty: boolena
56 @param ignore_empty: if C{True}, ignore empty lines
57
58 @rtype: list
59 @return: Returns a nested list containing the split raw file lines (as strings).
60
61 >>> readfile_raw('test_files/maw_file_test.maw', separator=',', comment='#')
62 [['2010-07-13 08:49:00', '0', '0.3030', '5', 'asdd asdlkj asl'], ['2010-07-13 08:56:00', '15', '0.2320', '8866', 'asdd asdlkj asl'], ['2010-07-13 08:58:00', '25', '0.2055', '5', '7'], ['2010-07-13 09:03:00', '50', '0.1620', '5', '']]
63 """
64
65 raw_data = []
66 rw_opt='rU'
67 line_no = 0
68 with open(input_file, rw_opt) as fil:
69
70 for i in range(start):
71 line = fil.readline()
72 line_no += 1
73 for line in fil:
74 if line_no==stop:
75 break
76 if comment is not None:
77
78 line = line.split(comment)[0].strip()
79 if line=='':
80 continue
81 if separator != 'no_split':
82 tmp = line.strip()
83 if ignore_empty and tmp=='':
84 continue
85 tmp = [el.strip() for el in tmp.split(separator)]
86 else:
87 tmp = line.strip()
88 raw_data.append(tmp)
89 line_no += 1
90
91 while True:
92 if raw_data[-1]=='':
93 raw_data.pop()
94 else:
95 break
96 return raw_data
97
99 """Convert our standart campbell notation into pylab format.
100
101 @type campbell_date: list of lists or numpy array
102 @param campbell_date: A numpy array with Campbell dates
103
104 @returns: numpy array
105
106 >>> cd1 = [[2006, 139, 1245]]
107 >>> cd2 = [[2006, 139, 1245, 34]]
108 >>> campbell2num_date(cd1)
109 array([ 732450.53125])
110 >>> campbell2num_date(cd2)
111 array([ 732450.53164352])
112 >>> np.alltrue(cd1 == num_date2campbell(campbell2num_date(cd1)))
113 True
114 >>> np.alltrue(cd2 == num_date2campbell(campbell2num_date(cd2)))
115 False
116 >>> np.alltrue(cd2 == num_date2campbell(campbell2num_date(cd2), secs=True))
117 True
118 """
119 if type(campbell_date)!=type(np.array([])):
120 campbell_date = np.array(campbell_date)
121
122
123 if len(campbell_date.shape)==1 or len(campbell_date.shape)>2:
124 raise(TypeError('2-d numpy array or list of lists expected.'))
125
126 if campbell_date.shape[1]==3:
127 campbell_date = np.hstack(( campbell_date, np.zeros((campbell_date.shape[0],1)) ))
128
129 out = np.zeros(campbell_date.shape[0])
130
131
132 for n,i in enumerate(campbell_date):
133 out[n] =( plt.date2num(datetime.datetime(int(i[0]),1,1))
134 + i[1]-1.0
135 + np.floor(i[2]/100.0)/24.0
136 + (i[2] - np.floor(i[2]/100.0)*100.0)/24.0/60.0
137 + i[3]/24./3600
138 )
139 return out
140
142 """Convert a numerical date as in pylab to our standart campbell
143 notation (in a narray or a single date).
144
145 @type num_date: numpy array or list
146 @param num_date: vector of pylab dates
147
148 @type secs: boolean
149 @param secs: if C{True} secs are appended
150
151 @return: numpy array with rows [year, julian day, time (,seconds)]
152
153 >>> dt = datetime.datetime(2006,6,6,12,37,25)
154 >>> nd = plt.date2num(dt)
155 >>> num_date2campbell([nd])
156 array([[2006, 157, 1237]])
157 >>> num_date2campbell([nd], secs=True)
158 array([[2006, 157, 1237, 25]])
159 """
160 if type(num_date)!=type(np.array([])):
161 num_date = np.array(num_date)
162
163
164 if len(np.squeeze(num_date).shape)>1:
165 raise(TypeError('1-d numpy array or a list expected, shape is %s', num_date.shape.__repr__()))
166
167
168 dateobj = plt.num2date(num_date + 0.45/24/60/60)
169 out = []
170 init_tmp_list = []
171 for i in xrange(len(num_date)):
172 tmp_list = copy.copy(init_tmp_list)
173 tmp_list.append(dateobj[i].year)
174 tmp_list.append(int(np.floor(num_date[i]
175 - plt.date2num(datetime.datetime(dateobj[i].year,1,1)) + 1)))
176 if secs:
177 tmp_list.append(int(dateobj[i].hour*100 + dateobj[i].minute))
178 else:
179 tmp_list.append(int(dateobj[i].hour*100 + dateobj[i].minute + int(np.round(dateobj[i].second/60.))))
180 if secs:
181 tmp_list.append(int(dateobj[i].second))
182 out.append(tmp_list)
183 return np.array(out, dtype=int)
184
186 """
187 Converts a ISO 8601 date & time string (well, slightly perverted)
188 into a matplotlib date number. Note that this implementation is
189 not particularly fast as it uses several try/except blocks. If
190 efficiency is a concern, hard-code it.
191
192 @type isostrings: list of stings
193 @param isostrings: ISO 8601 date & time string: following formats are supported:
194
195 @type method: string
196 @param method: Switch to use different alogrithm.
197 In order of decreasing speed:
198 - magic 40x
199 - hash 3x
200 - fast 4x (needs numpy>1.5)
201 - '' 1x
202 Set to '' to get good error checking/reporting.
203
204 @rtype: np.array of floats
205 @return: matplotlib date numbers
206
207 >>> iso_time_to_date(["2010-07-07 00:00:00"])
208 array([ 733960.])
209 >>> iso_time_to_date(["2010-07-07 00:00:00","2010-07-07 00:00:00.5","2010-09-07 03:01:00.5"])
210 array([ 733960. , 733960.00000579, 734022.12570023])
211 >>> iso_time_to_date(["2010-07-07 00:00:00","2010-07-07 00:01:00","2010-09-07 03:01:00"])
212 array([ 733960. , 733960.00069444, 734022.12569444])
213 """
214 if not np.iterable(isostrings):
215 raise TypeError('isostrings is not a string nor iterable!')
216
217
218
219
220
221
222 if method=='fast':
223 tmpar = np.zeros((len(isostrings),7), dtype=int)
224
225 for ii,isostr in enumerate(isostrings):
226 li = isostr.split('.')
227 if len(li)==2:
228 tmpar[ii,-1] = int(float('0.' + li.pop()) * 1e6)
229 li = li[0].split(':')
230 tmpar[ii,-2] = int(li.pop())
231 tmpar[ii,-3] = int(li.pop())
232 li = li[0].split(' ')
233 tmpar[ii,-4] = int(li.pop())
234 li = li[0].split('-')
235 tmpar[ii,-5] = int(li.pop())
236 tmpar[ii,-6] = int(li.pop())
237 tmpar[ii,-7] = int(li.pop())
238
239
240
241
242
243
244 def convert_fast(arr):
245
246 out = np.ndarray(arr.shape[0])
247
248 [yearmonths, ymind] = np.unique(arr[:,0]+arr[:,1]/100., return_inverse=True)
249
250 for ii, ym in enumerate(yearmonths):
251 year = int(np.floor(ym))
252 month = int(round((ym-year)*100))
253 yearmonths[ii] = datetime.date(year, month, 1).toordinal()
254
255 out = yearmonths[ymind]
256
257 out += arr[:,2] - 1
258
259 HOURS_PER_DAY = 24.
260 MINUTES_PER_DAY = 60.*HOURS_PER_DAY
261 SECONDS_PER_DAY = 60.*MINUTES_PER_DAY
262 MUSECONDS_PER_DAY = 1e6*SECONDS_PER_DAY
263 out += (arr[:,3]/HOURS_PER_DAY + arr[:,4]/MINUTES_PER_DAY +
264 arr[:,5]/SECONDS_PER_DAY + arr[:,6]/MUSECONDS_PER_DAY)
265 return out
266
267
268 return convert_fast(tmpar)
269 elif method=='hash':
270 out = np.empty(len(isostrings))
271 year_month_hash = {}
272
273 for ii,isostr in enumerate(isostrings):
274
275 li = isostr.split(':')
276 secs = float(li.pop())
277 mins = int(li.pop())
278 li = li[0].split(' ')
279 hours = int(li.pop())
280 li = li[0].split('-')
281 days = int(li.pop())
282 months = int(li.pop())
283 years = int(li.pop())
284
285 yearmonth = years+months/100.
286 if not year_month_hash.has_key(yearmonth):
287 year_month_hash[yearmonth] = datetime.date(years, months, 1).toordinal()
288
289 out[ii] = year_month_hash[yearmonth]
290
291 out[ii] += days - 1
292
293 HOURS_PER_DAY = 24.
294 MINUTES_PER_DAY = 60.*HOURS_PER_DAY
295 SECONDS_PER_DAY = 60.*MINUTES_PER_DAY
296 out[ii] += (hours/HOURS_PER_DAY + mins/MINUTES_PER_DAY +
297 secs/SECONDS_PER_DAY)
298 return out
299 elif method=='magic':
300 str_len = 30
301
302 if type(isostrings) == np.ndarray and np.issubdtype(isostrings.dtype, np.dtype('O')):
303 isostrings = isostrings.astype('S'+str(str_len))
304 elif type(isostrings) == list:
305 isostrings = np.array(isostrings,'S'+str(str_len))
306
307 isobytes = isostrings.view(np.byte)
308 isobytes = isobytes.reshape((isostrings.shape[0], str_len))
309
310 isoints = isobytes - 48
311 isoints[isoints==-48] = 0
312
313 years = np.sum(isoints[:,0:4]*np.array([1000,100,10,1]),1)
314 months = np.sum(isoints[:,5:7]*np.array([10,1]),1)
315 years_months = years+months/100.
316
317
318 year_month_hash = {}
319 for year in range(years[0], years[-1]+1):
320 for month in range(1,13):
321 year_month = year+month/100.
322 year_month_hash[year_month] = datetime.date(year, month, 1).toordinal()
323
324 days = np.empty(len(isostrings))
325 for k in year_month_hash:
326 days[years_months==k] = year_month_hash[k] - 1
327
328 HOURS_PER_DAY = 24.
329 MINUTES_PER_DAY = 60.*HOURS_PER_DAY
330 SECONDS_PER_DAY = 60.*MINUTES_PER_DAY
331 days += np.sum(isoints[:,8:10]*np.array([10,1]),1)
332 days += 1/HOURS_PER_DAY * np.sum(isoints[:,11:13]*np.array([10,1]),1)
333 days += 1/MINUTES_PER_DAY * np.sum(isoints[:,14:16]*np.array([10,1]),1)
334 days += 1/SECONDS_PER_DAY * np.sum(isoints[:,17:19]*np.array([10,1]),1)
335 if str_len>19:
336 days += 1/SECONDS_PER_DAY * np.sum(isoints[:,20:]*np.logspace(-1, -(str_len-20), 10),1)
337 return days
338 else:
339 out = []
340 for isostr in isostrings:
341 try:
342 out.append(plt.date2num(datetime.datetime.strptime(isostr, '%Y-%m-%d %H:%M:%S')))
343 continue
344 except ValueError:
345 pass
346 try:
347 out.append(plt.date2num(datetime.datetime.strptime(isostr, '%Y-%m-%d %H:%M:%S.%f')))
348 continue
349 except ValueError:
350 raise
351 return np.array(out)
352
353
354
355
356
358 """
359 Reads the file in standard Campbell CR10X dataformat:
360
361 number, year, julian day, time, data, ...
362
363 Or if year is not None:
364 number, julian day, time, data, ...
365
366 @type input_file: string
367 @param input_file: input file name
368
369 @type headers: [string]
370 @param headers: a list of headers to be given to the variable columns
371 (default is [var1, var2, var3...])
372
373 @type secs: boolean
374 @param secs: If true the fifth row is interpreted as seconds else as data
375
376 @type year: integer
377 @param year: If not None, then it is interpreted that the datafile
378 contains no year column and value of 'year' parameter is
379 used. (note, the colums are counted from zero)
380
381 @rtype: tuple
382 @return: tuple (data, raw_data, metadata)
383
384 >>> data, raw_data, metadata = read_campbell_cr10x('test_files/cr10x.dat')
385 >>> data, metadata, raw_data # doctest:+ELLIPSIS
386 (array([(732102.9722222222, -0.30595, 3.2896, 335.44),
387 (732102.9791666667, -0.30629, 3.2656, 332.99),
388 (732102.9861111111, -0.27962, 3.2405, 330.43),
389 (732102.9930555556, -0.30513, 3.205, 326.81),
390 (732103.0, -0.30523, 3.1689, 323.13),
391 (732103.0069444445, -0.30457, 3.141, 320.29)],
392 dtype=[('time', '<f8'), ('var0', '<f8'), ('var1', '<f8'), ('var2', '<f8')]), {'headers': ['time', 'var0', 'var1', 'var2'],
393 'input_file': 'test_files/cr10x.dat',
394 'raw_headers': ['station number',
395 'year',
396 'julian day',
397 'time',
398 'var0',
399 'var1',
400 'var2'],
401 'secs': False,
402 'units': [],
403 'year': None}, array([[ 1.05000000e+02, 2.00500000e+03, 1.56000000e+02,
404 2.32000000e+03, -3.05950000e-01, 3.28960000e+00,
405 3.35440000e+02],
406 ...
407 """
408
409 metadata = Metadata()
410 metadata.__dict__['input_file'] = input_file
411 metadata.secs = secs
412 metadata.year = year
413
414 first_line = readfile_raw(input_file, stop=1, separator=',')[0]
415
416 header_len = len(first_line)
417 if headers is None:
418 if secs:
419 if year is None:
420 head = ['station number', 'year', 'julian day', 'time', 'secs']
421 else:
422 head = ['station number', 'julian day', 'time', 'secs']
423 else:
424 if year is None:
425 head = ['station number', 'year', 'julian day', 'time']
426 else:
427 head = ['station number', 'julian day', 'time']
428 std_head_len = len(head)
429 headers = head + ['var'+str(ii) for ii in range(header_len - std_head_len)]
430 else:
431
432 if len(headers)!=len(first_line):
433 raise TypeError('Given header does not have the same length as the first row of the file.')
434 if secs:
435 if year is None:
436 std_head_len = 5
437 else:
438 std_head_len = 4
439 else:
440 if year is None:
441 std_head_len = 4
442 else:
443 std_head_len = 3
444 metadata.raw_headers = headers
445 metadata.headers = ['time'] + headers[std_head_len:]
446
447
448 raw_data = np.genfromtxt(input_file, delimiter=',')
449 tmp_dat = copy.deepcopy(raw_data)
450
451
452 if year is not None:
453 tmp_dat = np.hstack((tmp_dat[:,0], year*np.ones(tmp_dat.shape[0]), tmp_dat[:,1:]))
454 if secs:
455 last_ind_time = 5
456 else:
457 last_ind_time = 4
458 tmp_t = campbell2num_date(tmp_dat[:,1:last_ind_time])
459 tmp_t = tmp_t[:,np.newaxis]
460 tmp_dat = np.hstack((tmp_t, tmp_dat[:,last_ind_time:]))
461
462
463 dtype_data = np.dtype([(head, np.float64) for head in metadata.headers])
464
465
466 data = np.ascontiguousarray(tmp_dat).view(dtype_data).squeeze()
467
468 return data, raw_data, metadata
469
471 """
472 Reads the file in TAO5 Campbell dataformat as used by CR1000:
473
474 Resources:
475 http://www.campbellsci.com/documents/manuals/loggernet_3-1.pdf
476 Section B.1.4
477
478 Header format
479 file format, station, logger type, serial number, OS version, logger-program file name, logger-program file checksum, table name
480 "TIMESTAMP","RECORD",fieldname,fieldname,...
481 "TS","RN", field-units, field-units,...
482 "","",field recording method,field recording method,...
483
484 If the fieldname is not specified then a header of format 'var1'
485 etc will be given, except if specified in the list L{given_headers}.
486
487 @type input_file: string
488 @param input_file: input file name
489
490 @type given_headers: list
491 @param given_headers: list of header names to give in the record
492 array data. If an entry is None then the
493 default one is used. Note that the field
494 'RECORD' is ignored in the data and thus
495 does not feature in this list.
496
497 @rtype: tuple
498 @return: tuple (data, raw_data, metadata)
499
500 @note: It is assumed that any string-like thing is a date+time string
501
502 >>> d,rd,md = read_campbell_TAO5('test_files/TOA5_cr1000.dat')
503 >>> d,rd,md # doctest:+ELLIPSIS
504 (array([ (733960.0, 13.72, 12.6, 733959.9930787038, 13.43, 10.2, 733959.5997685185, 4.493, 7),
505 (733961.0, 13.78, 12.48, 733960.2569675926, 13.15, 17.09, 733960.6921296297, 4.064, 8),
506 (733962.0416666666, 13.74, 12.5, 733961.2257175926, 13.07, 17.36, 733961.6785185186, 5.637, 10)],
507 dtype=[('TIMESTAMP', '<f8'), ('Batt_Volt_Max', '<f8'), ('Batt_Volt_Min', '<f8'), ('Batt_Volt_TMn', '<f8'), ('Batt_Volt_Avg', '<f8'), ('Panel_Temp_Max', '<f8'), ('Panel_Temp_TMx', '<f8'), ('var7', '<f8'), ('Panel_Temp_Avg', '<i8')]), ...
508 """
509 metadata = Metadata()
510 TOA5_info = {}
511 TOA5_info_fields_line1 = ['file_format', 'station', 'logger_type', 'serial_number',
512 'OS_version', 'logger-program_file_name',
513 'logger-program_file_checksum', 'table_name']
514
515 header_lines = readfile_raw(input_file, stop=5, separator=',')
516
517 for ii,key in enumerate(TOA5_info_fields_line1):
518 try:
519 TOA5_info[key] = header_lines[0][ii].strip('"')
520 except:
521 TOA5_info[key] = header_lines[0][ii]
522 TOA5_info['fields'] = [st.strip('"') for st in header_lines[1]]
523 TOA5_info['units'] = [st.strip('"') for st in header_lines[2]]
524 TOA5_info['recording_type'] = [st.strip('"') for st in header_lines[3]]
525 metadata.__dict__['TOA5_info'] = TOA5_info
526 metadata.__dict__['input_file'] = input_file
527 metadata.raw_units = TOA5_info['units']
528 metadata.units = metadata.raw_units[0:1] + metadata.raw_units[2:]
529
530 metadata.headers = []
531 metadata.raw_headers = []
532 ind = -1
533
534 if len(given_headers)>0 and len(given_headers)!=(len(TOA5_info['fields'])-1):
535 raise ValueError('Variable given_headers is not of right length. It is %i but should be %i' % (len(given_headers), len(TOA5_info['fields'])-1))
536 for ii,hd in enumerate(TOA5_info['fields']):
537 if hd=="RECORD":
538 metadata.raw_headers.append(hd)
539 continue
540 ind += 1
541 if len(given_headers)>0 and given_headers[ind] is not None:
542 metadata.raw_headers.append(given_headers[ind])
543 metadata.headers.append(given_headers[ind])
544 continue
545 if hd!='':
546 metadata.raw_headers.append(hd)
547 metadata.headers.append(hd)
548 else:
549 head = 'var' + str(ind)
550 metadata.raw_headers.append(head)
551 metadata.headers.append(head)
552
553 first_line = header_lines[-1]
554 raw_dtypes = []
555 type_dict = {int: 'int',
556 str: 'str',
557 float: 'float'}
558 for field in first_line:
559 raw_dtypes.append(type_dict[type(eval(field))])
560 metadata.raw_dtypes = raw_dtypes
561
562
563
564 dt = [rdt if rdt!='str' else 'O' for rdt in metadata.raw_dtypes]
565
566 dtypes_raw = np.dtype(zip(metadata.raw_headers,dt))
567
568 remove_double_quotes = lambda str_: str_.replace('"', '')
569 converters = {}
570 for ii,dt in enumerate(metadata.raw_dtypes):
571 converters[ii] = remove_double_quotes
572
573
574 if LooseVersion(np.__version__)<LooseVersion('1.5'):
575 raw_data = mlab.csv2rec(input_file, names=metadata.raw_headers,
576 skiprows=4, delimiter=',').view(np.ndarray)
577
578
579
580
581 dtype_data = [raw_data.dtype[ii] for ii in [0]+range(2,len(raw_data.dtype))]
582
583 dtype_data = [dt if dt!=np.object else np.dtype(np.float) for dt in dtype_data]
584
585
586 data = np.zeros(len(raw_data), dtype=zip(metadata.headers, dtype_data))
587
588
589
590 for head in metadata.headers:
591 if raw_data.dtype[head]==np.dtype('object'):
592
593 data[head] = plt.date2num(raw_data[head])
594 else:
595 data[head] = raw_data[head]
596
597
598 else:
599 raw_data = np.genfromtxt(input_file, delimiter=',', skip_header=4,
600 dtype=dtypes_raw, converters=converters)
601
602
603 dtype_data = [raw_data.dtype[ii] for ii in [0]+range(2,len(raw_data.dtype))]
604
605 dtype_data = [dt if dt!=np.object else np.dtype(np.float) for dt in dtype_data]
606
607
608 data = np.zeros(len(raw_data), dtype=zip(metadata.headers, dtype_data))
609
610 for head in metadata.headers:
611 if raw_data.dtype[head]==np.dtype('object'):
612
613 data[head] = iso_time_to_date(raw_data[head])
614 else:
615 data[head] = raw_data[head]
616
617 return data, raw_data, metadata
618
620 """
621 Reads a standart MAW file (as only used by me, Mauro A Werder)
622 with format:
623
624 #maw name of dataset
625 # comment line
626 #metadata is an metadata tag:
627 #metadata.eg = 'asdf'
628 # will create a attribute in metadata.eg with value 'asdf'
629 #metadata.num = '1.234'
630 #
631 # the last comment line has the format and will be put into
632 # metadata['headers'], metadata['units'] and use as datatype:
633 # name0 (units) [dtype], name1 (units) [dtype], name2 (units) [dtype], ...
634 val0, val1, val2 ...
635 .
636 .
637 .
638
639 dtypes is one of the following: int, float, str, time_str
640
641 Time is represented as an ISO 8601 sting: "yyyy/mm/dd HH:MM:SS(.FF)"
642 excluding the 'T' without time zone information (which should be
643 given in the units as eg (UTC-7)).
644
645 The idea is to have a easy to parse text represenation of (a
646 subset of) what can be contained in a netcdf3 file.
647
648 @type input_file: string
649 @param input_file: input file name
650
651 @rtype: tuple
652 @return: tuple (data, raw_data, metadata)
653
654 >>> d,rd,md = read_maw_file('test_files/maw_file_test.maw')
655 >>> d,rd,md
656 (array([(733966.3673611111, 0.0, 0.303, 5, 'asdd asdlkj asl'),
657 (733966.3722222223, 15.0, 0.232, 8866, 'asdd asdlkj asl'),
658 (733966.3736111111, 25.0, 0.2055, 5, '7'),
659 (733966.3770833333, 50.0, 0.162, 5, '')],
660 dtype=[('time', '<f8'), ('var1', '<f8'), ('var2', '<f8'), ('var3', '<i8'), ('var4', '|O8')]), array([('2010-07-13 08:49:00', 0.0, 0.303, 5, 'asdd asdlkj asl'),
661 ('2010-07-13 08:56:00', 15.0, 0.232, 8866, 'asdd asdlkj asl'),
662 ('2010-07-13 08:58:00', 25.0, 0.2055, 5, '7'),
663 ('2010-07-13 09:03:00', 50.0, 0.162, 5, '')],
664 dtype=[('time', '|O8'), ('var1', '<f8'), ('var2', '<f8'), ('var3', '<i8'), ('var4', '|O8')]), {'calibaration_solution_concentration': 10.0,
665 'calibaration_solution_concentration_units': 'g/l',
666 'dtypes': ['time_str', 'float', 'float', 'int', 'str'],
667 'experimenter': 'MAW + UM',
668 'headers': ['time', 'var1', 'var2', 'var3', 'var4'],
669 'raw_headers': ['time', 'var1', 'var2', 'var3', 'var4'],
670 'title': 'Test file',
671 'units': ['UTC-7', 'ml', '', 'm^3', '']})
672 """
673
674 raw_data = readfile_raw(input_file, separator='no_split')
675 data = []
676 comment = []
677 metadata = Metadata()
678
679
680 for line in raw_data:
681 if line.startswith('#'):
682 comment.append(line.strip())
683 else:
684 tmp = line.split(',')
685 tmp = [tt.strip(' "') for tt in tmp]
686 data.append(tuple(tmp))
687
688
689 if not comment[0].startswith('#maw'):
690 raise TypeError("File does not start with '#maw'")
691 else:
692 metadata.title = comment[0].strip('#maw ')
693 for line in comment:
694 if line.startswith('#metadata.'):
695 tmp = line.split('#metadata.')[1]
696 tmp = tmp.split('=')
697 tmp = [t.strip() for t in tmp]
698
699 if tmp[1]!='nan':
700 metadata.__dict__[tmp[0]] = eval(tmp[1])
701 else:
702 metadata.__dict__[tmp[0]] = float('nan')
703
704
705 last_line = [st.strip() for st in comment[-1].strip('#').split(',')]
706 headers = []
707 units = []
708 dtypes = []
709 for head in last_line:
710 tmp = head.split('(')
711 headers.append(tmp[0].strip())
712 tmp = tmp[1].split(')')
713 units.append(tmp[0].strip())
714 dtypes.append(tmp[1].strip(' []'))
715 if dtypes[-1]=='':
716 raise ValueError('No datatype given in file')
717 metadata.__dict__['headers'] = headers
718 metadata.__dict__['raw_headers'] = headers
719 metadata.__dict__['units'] = units
720 metadata.__dict__['dtypes'] = dtypes
721
722
723 dtypes_raw = [np.dtype(dt) if dt!='time_str'
724 else np.dtype('O') for dt in dtypes]
725
726 dtypes_raw = [np.dtype(dt) if dt!='str'
727 else np.dtype('O') for dt in dtypes_raw]
728
729 dtypes_processed = [np.dtype(dt) if dt!='time_str'
730 else np.dtype(float) for dt in dtypes]
731
732 dtypes_processed = [np.dtype(dt) if dt!='str'
733 else np.dtype('O') for dt in dtypes_processed]
734
735
736
737 raw_data = np.array(data, zip(headers, dtypes_raw))
738
739 data = np.zeros(len(raw_data), dtype=zip(headers, dtypes_processed))
740
741 for ii, head in enumerate(headers):
742 if dtypes[ii]=='time_str':
743 data[head] = iso_time_to_date(raw_data[head])
744 else:
745 data[head] = raw_data[head]
746
747 return data, raw_data, metadata
748
749
750
751
752
753
754
755
756
757
758