# -*- coding: utf-8 -*-
# Author: Florian Mayer <florian.mayer@bitsrc.org>
from __future__ import absolute_import
import os
import glob
import shutil
import datetime
import urllib2
import numpy as np
import pyfits
from itertools import izip, chain
from functools import partial
from collections import defaultdict
from bs4 import BeautifulSoup
from scipy.optimize import leastsq
from scipy.ndimage import gaussian_filter1d
from sunpy.time import parse_time
from sunpy.util.util import (
findpeaks, delta, polyfun_at, minimal_pairs, find_next
)
from sunpy.util.cond_dispatch import ConditionalDispatch, run_cls
from sunpy.spectra.spectrogram import LinearTimeSpectrogram, REFERENCE
from sunpy.net.util import download_file
TIME_STR = "%Y%m%d%H%M%S"
DEFAULT_URL = 'http://soleil.i4ds.ch/solarradio/data/2002-20yy_Callisto/'
_DAY = datetime.timedelta(days=1)
DATA_SIZE = datetime.timedelta(seconds=15*60)
[docs]def query(start, end, instruments=None, url=DEFAULT_URL):
""" Get URLs for callisto data from instruments between start and end.
Parameters
----------
start : parse_time compatible
end : parse_time compatible
instruments : sequence
Sequence of instruments whose data is requested.
url : str
Base URL for the request.
"""
day = datetime.datetime(start.year, start.month, start.day)
while day <= end:
directory = url + '%d/%02d/%02d/' % (day.year, day.month, day.day)
opn = urllib2.urlopen(directory)
try:
soup = BeautifulSoup(opn)
for link in soup.find_all("a"):
href = link.get("href")
name = href.split('.')[0]
try:
inst, date, time, no = name.split('_')
except ValueError:
# If the split fails, the file name does not match out
# format,so we skip it and continue to the next
# iteration of the loop.
continue
dstart = datetime.datetime.strptime(date + time, TIME_STR)
if (instruments is not None and
inst not in instruments and
(inst, int(no)) not in instruments):
continue
dend = dstart + DATA_SIZE
if dend > start and dstart < end:
yield directory + href
finally:
opn.close()
day += _DAY
[docs]def download(urls, directory):
""" Download files from urls into directory.
Parameters
----------
urls : list of str
urls of the files to retrieve
directory : str
directory to save them in
"""
return [download_file(url, directory) for url in urls]
def _parse_header_time(date, time):
""" Return datetime object from date and time fields of header. """
if time is not None:
date = date + 'T' + time
return parse_time(date)
[docs]class CallistoSpectrogram(LinearTimeSpectrogram):
""" Classed used for dynamic spectra coming from the Callisto network.
Additional (not inherited) parameters
-------------------------------------
header : pyfits.Header
main header of the FITS file
axes_header : pyfits.Header
header foe the axes table
swapped : boolean
flag that specifies whether originally in the file the x-axis was
frequency
"""
# XXX: Determine those from the data.
SIGMA_SUM = 75
SIGMA_DELTA_SUM = 20
_create = ConditionalDispatch.from_existing(LinearTimeSpectrogram._create)
create = classmethod(_create.wrapper())
# Contrary to what pylint may think, this is not an old-style class.
# pylint: disable=E1002,W0142,R0902
# This needs to list all attributes that need to be
# copied to maintain the object and how to handle them.
COPY_PROPERTIES = LinearTimeSpectrogram.COPY_PROPERTIES + [
('header', REFERENCE),
('swapped', REFERENCE),
('axes_header', REFERENCE)
]
# List of instruments retrieved in July 2012 from
# http://soleil.i4ds.ch/solarradio/data/2002-20yy_Callisto/
INSTRUMENTS = set([
'ALASKA', 'ALMATY', 'BIR', 'DARO', 'HB9SCT', 'HUMAIN',
'HURBANOVO', 'KASI', 'KENYA', 'KRIM', 'MALAYSIA', 'MRT1',
'MRT2', 'OOTY', 'OSRA', 'SWMC', 'TRIEST', 'UNAM'
])
[docs] def save(self, filepath):
""" Save modified spectrogram back to filepath.
Parameters
----------
filepath : str
path to save the spectrogram to
"""
main_header = self.get_header()
data = pyfits.PrimaryHDU(self, header=main_header)
## XXX: Update axes header.
freq_col = pyfits.Column(
name="frequency", format="D8.3", array=self.freq_axis
)
time_col = pyfits.Column(
name="time", format="D8.3", array=self.time_axis
)
cols = pyfits.ColDefs([freq_col, time_col])
table = pyfits.new_table(cols, header=self.axes_header)
hdulist = pyfits.HDUList([data, table])
hdulist.writeto(filepath)
@classmethod
[docs] def read(cls, filename, **kwargs):
""" Read in FITS file and return a new CallistoSpectrogram.
Any unknown (i.e. any except filename) keyword arguments get
passed to pyfits.open.
Parameters
----------
filename : str
path of the file to read
"""
fl = pyfits.open(filename, **kwargs)
data = fl[0].data
axes = fl[1]
header = fl[0].header
start = _parse_header_time(
header['DATE-OBS'], header.get('TIME-OBS', header.get('TIME$_OBS'))
)
end = _parse_header_time(
header['DATE-END'], header.get('TIME-END', header.get('TIME$_END'))
)
swapped = "time" not in header["CTYPE1"].lower()
# Swap dimensions so x-axis is always time.
if swapped:
t_delt = header["CDELT2"]
t_init = header["CRVAL2"] - t_delt * header["CRPIX2"]
t_label = header["CTYPE2"]
f_delt = header["CDELT1"]
f_init = header["CRVAL1"] - t_delt * header["CRPIX1"]
f_label = header["CTYPE1"]
data = data.transpose()
else:
t_delt = header["CDELT1"]
t_init = header["CRVAL1"] - t_delt * header["CRPIX1"]
t_label = header["CTYPE1"]
f_delt = header["CDELT2"]
f_init = header["CRVAL2"] - t_delt * header["CRPIX2"]
f_label = header["CTYPE2"]
# Table may contain the axes data. If it does, the other way of doing
# it might be very wrong.
if axes is not None:
try:
# It's not my fault. Neither supports __contains__ nor .get
tm = axes.data['time']
except KeyError:
tm = None
try:
fq = axes.data['frequency']
except KeyError:
fq = None
if tm is not None:
# Fix dimensions (whyever they are (1, x) in the first place)
time_axis = np.squeeze(tm)
else:
# Otherwise, assume it's linear.
time_axis = \
np.linspace(0, data.shape[1] - 1) * t_delt + t_init # pylint: disable=E1101
if fq is not None:
freq_axis = np.squeeze(fq)
else:
freq_axis = \
np.linspace(0, data.shape[0] - 1) * f_delt + f_init # pylint: disable=E1101
content = header["CONTENT"]
instruments = set([header["INSTRUME"]])
return cls(
data, time_axis, freq_axis, start, end, t_init, t_delt,
t_label, f_label, content, instruments,
header, axes.header, swapped
)
def __init__(self, data, time_axis, freq_axis, start, end,
t_init=None, t_delt=None, t_label="Time", f_label="Frequency",
content="", instruments=None, header=None, axes_header=None,
swapped=False):
# Because of how object creation works, there is no avoiding
# unused arguments in this case.
# pylint: disable=W0613
super(CallistoSpectrogram, self).__init__(
data, time_axis, freq_axis, start, end,
t_init, t_delt, t_label, f_label,
content, instruments
)
self.header = header
self.axes_header = axes_header
self.swapped = swapped
@classmethod
[docs] def is_datasource_for(cls, header):
""" Check if class supports data from the given FITS file.
Parameters
----------
header : pyfits.Header
main header of the FITS file
"""
return header.get('instrume', '').strip() in cls.INSTRUMENTS
[docs] def remove_border(self):
""" Remove duplicate entries on the borders. """
left = 0
while self.freq_axis[left] == self.freq_axis[0]:
left += 1
right = self.shape[0] - 1
while self.freq_axis[right] == self.freq_axis[-1]:
right -= 1
return self[left-1:right+2, :]
@classmethod
[docs] def read_many(cls, filenames, sort_by=None):
""" Return list of CallistoSpectrogram objects read from filenames.
Parameters
----------
filenames : list of str
list of paths to read from
sort_by : str
optional attribute of the resulting objects to sort from, e.g.
start to sort by starting time.
"""
objs = map(cls.read, filenames)
if sort_by is not None:
objs.sort(key=lambda x: getattr(x, sort_by))
return objs
@classmethod
[docs] def from_range(cls, instrument, start, end, **kwargs):
""" Automatically download data from instrument between start and
end and join it together.
Parameters
----------
instrument : str
instrument to retrieve the data from
start : parse_time compatible
start of the measurement
end : parse_time compatible
end of the measurement
"""
kw = {
'maxgap': 1,
'fill': cls.JOIN_REPEAT,
}
kw.update(kwargs)
start = parse_time(start)
end = parse_time(end)
urls = query(start, end, [instrument])
data = map(cls.from_url, urls)
freq_buckets = defaultdict(list)
for elem in data:
freq_buckets[tuple(elem.freq_axis)].append(elem)
try:
return cls.combine_frequencies(
[cls.join_many(elem, **kw) for elem in freq_buckets.itervalues()]
)
except ValueError:
raise ValueError("No data found.")
def _overlap(self, other):
""" Find frequency and time overlap of two spectrograms. """
one, two = self.intersect_time([self, other])
ovl = one.freq_overlap(two)
return one.clip_freq(*ovl), two.clip_freq(*ovl)
@staticmethod
def _to_minimize(a, b):
"""
Function to be minimized for matching to frequency channels.
"""
def _fun(p):
if p[0] <= 0.2 or abs(p[1]) >= a.max():
return float("inf")
return a - (p[0] * b + p[1])
return _fun
def _homogenize_params(self, other, maxdiff=1):
"""
Return triple with a tuple of indices (in self and other, respectively),
factors and constants at these frequencies.
Parameters
----------
other : CallistoSpectrogram
Spectrogram to be homogenized with the current one.
maxdiff : float
Threshold for which frequencies are considered equal.
"""
pairs_indices = [
(x, y) for x, y, d in minimal_pairs(self.freq_axis, other.freq_axis)
if d <= maxdiff
]
pairs_data = [
(self[n_one, :], other[n_two, :]) for n_one, n_two in pairs_indices
]
# XXX: Maybe unnecessary.
pairs_data_gaussian = [
(gaussian_filter1d(a, 15), gaussian_filter1d(b, 15))
for a, b in pairs_data
]
# If we used integer arithmetic, we would accept more invalid
# values.
pairs_data_gaussian64 = np.float64(pairs_data_gaussian)
least = [
leastsq(self._to_minimize(a,b), [1, 0])[0]
for a, b in pairs_data_gaussian64
]
factors = [x for x, y in least]
constants = [y for x, y in least]
return pairs_indices, factors, constants
[docs] def homogenize(self, other, maxdiff=1):
""" Return overlapping part of self and other as (self, other) tuple.
Homogenize intensities so that the images can be used with
combine_frequencies. Note that this works best when most of the
picture is signal, so use :py:meth:`in_interval` to select the subset
of your image before applying this method.
Parameters
----------
other : CallistoSpectrogram
Spectrogram to be homogenized with the current one.
maxdiff : float
Threshold for which frequencies are considered equal.
"""
one, two = self._overlap(other)
pairs_indices, factors, constants = one._homogenize_params(
two, maxdiff
)
# XXX: Maybe (xd.freq_axis[x] + yd.freq_axis[y]) / 2.
pairs_freqs = [one.freq_axis[x] for x, y in pairs_indices]
# XXX: Extrapolation does not work this way.
# XXX: Improve.
f1 = np.polyfit(pairs_freqs, factors, 3)
f2 = np.polyfit(pairs_freqs, constants, 3)
return (
one,
two * polyfun_at(f1, two.freq_axis)[:, np.newaxis] +
polyfun_at(f2, two.freq_axis)[:, np.newaxis]
)
[docs] def extend(self, minutes=15, **kwargs):
""" Request subsequent files from the server. If minutes is negative,
retrieve preceding files. """
if len(self.instruments) != 1:
raise ValueError
instrument = iter(self.instruments).next()
if minutes > 0:
data = CallistoSpectrogram.from_range(
instrument,
self.end, self.end + datetime.timedelta(minutes=minutes)
)
else:
data = CallistoSpectrogram.from_range(
instrument,
self.start - datetime.timedelta(minutes=-minutes), self.start
)
data = data.clip_freq(self.freq_axis[-1], self.freq_axis[0])
return CallistoSpectrogram.join_many([self, data], **kwargs)
@classmethod
[docs] def from_url(cls, url):
""" Return CallistoSpectrogram read from URL.
Parameters
----------
url : str
URL to retrieve the data from
"""
return cls.read(url)
CallistoSpectrogram._create.add(
run_cls('from_range'),
lambda cls, instrument, start, end: True,
check=False
)
CallistoSpectrogram.create.im_func.__doc__ = (
""" Create CallistoSpectrogram from given input dispatching to the
appropriate from_* function.
Possible signatures:
""" + CallistoSpectrogram._create.generate_docs()
)
if __name__ == "__main__":
opn = CallistoSpectrogram.read("callisto/BIR_20110922_103000_01.fit")
opn.subtract_bg().clip(0).plot(ratio=2).show()
print "Press return to exit"
raw_input()