1 """
2 Just a few helper classes.
3 """
4 from __future__ import division
5
6 import cPickle as pickle
7 import pprint
8
9 import pylab as plt
10 import numpy as np
11 import scipy.signal as signal
12 import scipy.integrate
13
14
15 import fieldpy.core.helper_fns as helper_fns
16
17
18 try:
19 import pyximport; pyximport.install(setup_args={'include_dirs':[np.get_include()]})
20 except:
21 pass
22
23 try:
24 from fieldpy.core.helper_fns_cython import filter_and_resample
25 from fieldpy.core.helper_fns_cython import get_ind_closest
26 cython_module = True
27 except:
28 from fieldpy.core.helper_fns import filter_and_resample_slow as filter_and_resample
29 cython_module = False
30
32 """Parent class for all kinds of data. The data itself should be held in
33 self.data as a masked record array (masked np.ma.core.MaskedArray)
34
35 @todo: at the moment only working for 1D data, maybe extend it to N-D
36 """
49
51 """
52 Do integrity checks; should be invoked at the end of __init__
53 [NOT IMPLEMENTED]
54 """
55 pass
56
58 """
59 Pickel self.
60
61 @type filename: string
62 @param filename: the name of the picklefile (reccomended to use .pkl extension)
63 """
64 with open(filename, 'wb') as fil:
65 pickle.dump(self, fil, protocol=pickle.HIGHEST_PROTOCOL)
66
68 """
69 Filters out the given value of from the given field by masking them.
70
71 @type field: string
72 @param field: the name of the self.data field
73
74 @type value: ?
75 @param value: the value which should be masked
76 """
77
78 self.data[field][self.data[field]==value] = np.ma.masked
79
81 """
82 Maskes datapoint for which the absolute difference between
83 them and the point before is more then jump_size.
84
85 @type field: string
86 @param field: the name of the self.data field
87
88 @type jump_size: ?
89 @param jump_size: The size of jump which is masked
90 """
91 absdiff = np.abs(np.diff(self.data[field]))
92 self.data[field][absdiff>jump_size] = np.ma.masked
93
95 """
96 Masks all the values of field where bool_fn is True.
97
98 @type field: string
99 @param field: the name of the self.data field
100
101 @type bool_fn: function
102 @param bool_fn: function which takes self.data['field'] as argument
103 and returns a boolean
104 """
105 self.data[field][bool_fn(self.data[field])] = np.ma.masked
106
108 """
109 Maskes datapoint for which the difference between them and the
110 point before is more/less then jump_size, for jump_size
111 positive/negative.
112
113 @type field: string
114 @param field: the name of the self.data field
115
116 @type jump_size: ?
117 @param jump_size: The size of jump which is filtered
118 """
119 diff = np.diff(self.data[field])
120 jump_size = -jump_size
121 if jump_size>0:
122 self.data[field][diff>jump_size] = np.ma.masked
123 else:
124 self.data[field][diff<jump_size] = np.ma.masked
125
126 - def filter_by_freq(self, field, output_field, sample_rate, cutoff, bandwidth=None, mode='lowpass'):
127 """ Perform filtering with a windowed sinc frequency-domain
128 filter. Can be used for either lowpass or highpass filtering,
129 useful for removing rapid signal variations or long-wavelength
130 trends, respectively.
131
132 The call signature is complicated and should be streamlined.
133
134 @type field: string
135 @param field: the name of the self.data field to filter
136
137 @type output_field: string
138 @param output_field: the name of the output field in self.data
139
140 @type sample_rate: float
141 @param sample_rate: the time or distance per sample, i.e.
142 sampling interval
143
144 @type cutoff: float
145 @param cutoff: the e-folding cutoff frequency or wavenumber
146
147 @type bandwidth: float
148 @param bandwidth: transition bandwidth (narrow means sharper
149 response at the expense of a longer filtering kernel and
150 slower convolution)
151
152 @type mode: str
153 @param mode: either 'lowpass' or 'highpass'
154 """
155
156 if bandwidth is None:
157
158 bandwidth = 0.1 * cutoff
159
160
161 bw = bandwidth * sample_rate
162 fc = cutoff * sample_rate
163 N = 4/bw
164 if N % 2 == 0: N += 1
165
166
167 kernel = np.sinc(np.arange(-(N-1)/2, (N+1)/2) * fc) \
168 * signal.firwin(N, fc, window='blackman')
169 kernel = kernel / sum(kernel)
170
171 if mode == 'highpass':
172
173 kernel = -kernel
174 kernel[(N-1)/2] += 1.0
175
176
177 if len(kernel)>len(self.data[field]):
178 raise ValueError(
179 "Kernel {0} is longer than input array {1}".format(
180 len(kernel), len(self.data[field])))
181
182
183 filt_data = np.convolve(kernel, self.data[field], mode='same')
184
185
186 try:
187 self.data[output_field] = filt
188 except:
189 self.data = helper_fns.append_a2mrec(self.data, filt, output_field)
190 return
191
192 - def filter_low_pass(self, field, output_field,
193 bandwidth, sample_rate, cutoff_freq):
194 """
195 This implements a sinc filter which is a low pass filter
196 (i.e. a step filter in the frequency domain).
197
198 @type field: string
199 @param field: the name of the self.data field
200
201 @type output_field: string
202 @param output_field: the name of the output field of self.data
203
204 @type bandwidth: float
205 @param bandwidth: is the precision of the filter narrower
206 bandwidth would seem better, but increases
207 numerical artefacts. Set to a few times
208 more than what you wanna filter
209
210 @type sample_rate: float
211 @param sample_rate: is the sampling interval
212
213 @param cutoff_freq: float
214 @param cutoff_freq: is the centre frequency to cut off at
215 example: you have day-long events and
216 hour-long noise so use a cut-off period of
217 a few hours
218
219 """
220
221
222
223
224 bw = bandwidth * sample_rate
225
226
227
228
229 fc = cutoff_freq * sample_rate
230
231 N = 4//bw
232
233 if N % 2 == 0: N += 1
234
235
236
237
238 kernel = np.sinc(np.arange(-(N-1)/2, (N+1)/2) * fc) \
239 * signal.firwin(N, fc, window='blackman')
240
241 kernel /= kernel.sum()
242
243 if len(kernel)>len(self.data[field]):
244 raise ValueError('Kernel is longer than input array: len(kernel)=%i' % len(kernel))
245
246 filt = np.convolve(self.data[field], kernel, mode='same')
247 try:
248 self.data[output_field] = filt
249 except:
250 self.data = helper_fns.append_a2mrec(self.data, filt, output_field)
251
252
253
254
255
256
257
258
260 """
261 A class to hold time series data. In self.data['time'] has to be
262 the monotonically increasing times.
263 """
266
270
272 """
273 Checks whether the times in self.data['time'] are
274 monotonically increasing. Raises an error if they are not.
275 """
276 if np.any(np.diff(self.data['time'])<=0):
277 raise ValueError("self.data['time'] is not stricktly increasing!")
278
280 """
281 Checks whether sampling interval in self.data['time'] is
282 constant. Raises an error if they are not.
283 """
284 self.check_monotonically_inc_time()
285 samp_int = self.data['time'][1]-self.data['time'][0]
286 if np.any(1e-10*samp_int < np.abs(np.diff(self.data['time'])-samp_int)):
287 raise ValueError("The sampling interval in self.data['time'] is not constant.")
288
289 - def plot_ts(self, var_name=None, ax=None, fmt='-', **plot_kwargs):
290 """
291 Plots the time series data for var_name.
292
293 @type var_name: string
294 @param var_name: string of variable to be plotted
295
296 @type ax: axes object
297 @param ax: axes to be plotted into (default: make a new figure)
298
299 @type fmt: string
300 @param fmt: format string to be passed to the plotter (default '-')
301
302 @param plot_kwargs: keywords arguments for plt.plot function
303 """
304 if ax is None:
305 fig = plt.figure()
306 ax = fig.add_subplot(111)
307 if var_name is None:
308 var_name = self.data.dtype.names[1]
309
310 ax.plot(self.data['time'], self.data[var_name], fmt, **plot_kwargs)
311 ax.set_xlabel('Time')
312 ax.set_ylabel(var_name)
313
314 return ax
315
316 - def plot_date(self, var_name=None, ax=None, fmt='-', **plot_kwargs):
317 """
318 Plots the time series data for var_name with plt.plot_date.
319
320 @type var_name: string
321 @param var_name: string of variable to be plotted
322
323 @type ax: axes object
324 @param ax: axes to be plotted into (default: make a new figure)
325
326 @type fmt: string
327 @param fmt: format string to be passed to the plotter (default '-')
328
329 @param plot_kwargs: keywords arguments for plt.plot_date function
330 """
331 if ax is None:
332 fig = plt.figure()
333 ax = fig.add_subplot(111)
334 if var_name is None:
335 var_name = self.data.dtype.names[1]
336
337
338 labels = ax.get_xticklabels()
339 plt.setp(labels, rotation=45)
340 ax.plot_date(self.data['time'], self.data[var_name], fmt, **plot_kwargs)
341 ax.set_xlabel('Time')
342 ax.set_ylabel(var_name)
343
344 return ax
345
347 """
348 Filters the self.data[field] with a weighted moving average filter.
349
350 Appends or over-writes the field output_fild to self.data
351
352 @type field: string
353 @param field: the name of the self.data field
354
355 @type output_field: string
356 @param output_field: the name of the output field of self.data
357
358 @type time_window: float
359 @param time_window: the filter window in units of self.data['time']
360
361 @note: I'm not sure how exactly the missing data are handled...
362 """
363 filt = helper_fns.filter_gauss(self.data['time'], self.data[field], time_window)
364 try:
365 self.data[output_field] = filt
366 except:
367 self.data = helper_fns.append_ma2mrec(self.data, filt, output_field)
368
369
370
371
372
373
374
375 - def filter_and_resample(self, field, filter_window,
376 new_sample_int, first_sample=None,
377 modify_instance=False):
378 """Filters and resamples a time signal with a running average in a
379 given time intervall +/-filter_window/2. The filter has a PHASE
380 SHIFT for a UN-equally sampled signal.
381
382 @param field: which field of self.data to use
383 @param filter_window: filter time interval
384 @param new_sample_int: the new sampling interval (in units of t)
385 @param first_sample: the time of the first sample in the resampled
386 signal. If None (default) it is
387 (approximatly) set to time[0]+filter_window
388 @param modify_instance: If true, self.data will only contain
389 the new times and data
390
391 @rtype: list of np.array
392 @return: Returns [new_time, new_data]
393 """
394 [new_t, new_d] = filter_and_resample(self.data['time'], self.data[field], filter_window,
395 new_sample_int, first_sample)
396 if modify_instance:
397 self.data = np.empty(new_t.shape, dtype=[('time', float),(field, float)])
398 self.data['time'] = new_t
399 self.data[field] = new_d
400 return
401 else:
402 return new_t, new_d
403
405 """This is the index function you want to use.
406
407 @param times: a list of times
408
409 @returns: a list of indices nearest to the times
410 """
411 ind = []
412 try:
413 for ti in times:
414 ind.append(self.get_index_nearest(ti)[0])
415 except:
416 ind.append(self.get_index_nearest(times)[0])
417 return ind
419 """This function takes two times and returns a slice object
420 such that the slice includes start and end time.
421 """
422 ind = self.get_ind(times)
423 return slice(ind[0],ind[1]+1)
424
426 """get_index_after(t) returns the index at t or immediatly
427 after t and the exact time corresponding to that index.
428
429 If there is no index afterwards, return (None, None).
430
431 @rtype: tuple
432 @return: tuple containing index and the time corresponing to that index
433 """
434 ind = (np.where(time <= self.data['time']))[0]
435 if ind.shape[0] == 0:
436 ind = None
437 exact_time = None
438 else:
439 ind = np.min(ind)
440 exact_time = self.data['time'][ind]
441 return ind, exact_time
443 """get_index_before(t) returns the index at t or immediately
444 before t and the exact time corresponding to that index.
445
446 If there is no index before, return (None, None).
447
448 @rtype: tuple
449 @return: tuple containing index and the time corresponing to that index
450 """
451 ind = (np.where(time >= self.data['time']))[0]
452 if ind.shape[0] == 0:
453 ind = None
454 exact_time = None
455 else:
456 ind = np.max(ind)
457 exact_time = self.data['time'][ind]
458 return ind, exact_time
459
461 """Get the index and time which is nearest to the time.
462
463 @rtype: tuple
464 @return: tuple containing index and the time corresponing to that index
465 """
466 if cython_module:
467 ind = get_ind_closest(self.data['time'], time)
468 return ind, self.data['time'][ind]
469 else:
470 ind_b, t_b = self.get_index_before(time)
471 ind_a, t_a = self.get_index_after(time)
472 if ind_b is None:
473 return ind_a, t_a
474 if ind_a is None:
475 return ind_b, t_b
476 if time-t_b > t_a-time:
477 return ind_a, t_a
478 else:
479 return ind_b, t_b
480
482 """This cut a piece of the timeseries out and modifies the
483 data B{in place}.
484
485 @type time_span:
486 """
487 sli = self.get_ind_as_slice(time_span)
488 self.data = self.data[sli]
489
490 - def integrate(self, field, start=None, end=None):
491 """
492 Integrates the field from start time to end time by a
493 trapezoidal method.
494 """
495 if start is None:
496 ind0 = 0
497 else:
498 ind0 = self.get_index_nearest(start)
499 if end is None:
500 ind1 = len(self.data)
501 else:
502 ind1 = self.get_index_nearest(end)
503
504 return scipy.integrate.trapz(self.data[field][ind0:ind1], self.data['time'][ind0:ind1])
505
506
521