web.py 14.3 KB
Newer Older
1
"""Tools to retrieve and send data on the web.
2

3
4
Here are a collection of tools to work with data on the internet. Thus,
this module mostly requires an internet connection.
5
6
7
8
9
"""
__author__ = 'Qusai Al Shidi'
__email__ = 'qusai@umich.edu'

import datetime as dt
10
import urllib
11
from .tools import _import_error_string, _nearest
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33


def get_omni_data(time_from, time_to, **kwargs):
    """Retrieve omni solar wind data over http.

    This will download omni data from https://spdf.gsfc.nasa.gov/pub/data/omni
    and put it into a dictionary. If your data is large, then make a csv and
    use swmfpy.io.read_omni_data().

    Args:
        time_from (datetime.datetime): The start time of the solar wind
                                       data that you want to receive.
        time_to (datetime.datetime): The end time of the solar wind data
                                     you want to receive.

    Returns:
        dict: This will be a list of *all* columns
              available in the omni data set.

    Examples:
        ```python
        import datetime
34
        import swmfpy.web
35

36
37
38
39
        storm_start = datetime.datetime(year=2000, month=1, day=1)
        storm_end = datetime.datetime(year=2000, month=2, day=15)
        data = swmfpy.web.get_omni_data(time_from=storm_start,
                                        time_to=storm_end)
40
41
        ```
    """
42
43
44
    # Author: Qusai Al Shidi
    # Email: qusai@umich.edu

Qusai Al Shidi's avatar
Qusai Al Shidi committed
45
46
    from dateutil import rrule

47
    # This is straight from the format guide on spdf
48
    col_names = ('ID for IMF spacecraft',
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
                 'ID for SW Plasma spacecraft',
                 '# of points in IMF averages',
                 '# of points in Plasma averages',
                 'Percent interp',
                 'Timeshift, sec',
                 'RMS, Timeshift',
                 'RMS, Phase front normal',
                 'Time btwn observations, sec',
                 'Field magnitude average, nT',
                 'Bx, nT (GSE, GSM)',
                 'By, nT (GSE)',
                 'Bz, nT (GSE)',
                 'By, nT (GSM)',
                 'Bz, nT (GSM)',
                 'RMS SD B scalar, nT',
                 'RMS SD field vector, nT',
                 'Flow speed, km/s',
                 'Vx Velocity, km/s, GSE',
                 'Vy Velocity, km/s, GSE',
                 'Vz Velocity, km/s, GSE',
                 'Proton Density, n/cc',
                 'Temperature, K',
                 'Flow pressure, nPa',
                 'Electric field, mV/m',
                 'Plasma beta',
                 'Alfven mach number',
                 'X(s/c), GSE, Re',
                 'Y(s/c), GSE, Re',
                 'Z(s/c), GSE, Re',
                 'BSN location, Xgse, Re',
                 'BSN location, Ygse, Re',
80
81
82
83
84
85
86
87
88
89
                 'BSN location, Zgse, Re',
                 'AE-index, nT',
                 'AL-index, nT',
                 'AU-index, nT',
                 'SYM/D index, nT',
                 'SYM/H index, nT',
                 'ASY/D index, nT',
                 'ASY/H index, nT',
                 'PC(N) index',
                 'Magnetosonic mach number')
90
91
92
93
94
95
96

    # Set the url
    omni_url = 'https://spdf.gsfc.nasa.gov/pub/data/omni/'
    if kwargs.get('high_res', True):
        omni_url += 'high_res_omni/monthly_1min/'

    # Initialize return dict
97
    return_data = {}
98
    return_data['times'] = []
99
    for name in col_names:
100
        return_data[name] = []
101

102
    # Iterate monthly to save RAM
103
104
105
106
107
108
109
    for date in rrule.rrule(rrule.MONTHLY,
                            dtstart=dt.datetime(time_from.year,
                                                time_from.month,
                                                1),
                            until=dt.datetime(time_to.year,
                                              time_to.month,
                                              25)):
110
111
112
        suffix = 'omni_min'
        suffix += str(date.year) + str(date.month).zfill(2)
        suffix += '.asc'
113
        omni_data = list(urllib.request.urlopen(omni_url+suffix))
114
115

        # Parse omni data
116
        for line in omni_data:
117
118
            cols = line.decode('ascii').split()
            # Time uses day of year which must be parsed
119
120
121
122
            time = dt.datetime.strptime(cols[0] + ' '  # year
                                        + cols[1] + ' '  # day of year
                                        + cols[2] + ' '  # hour
                                        + cols[3],  # minute
123
124
                                        '%Y %j %H %M')
            if time >= time_from and time <= time_to:
125
                return_data['times'].append(time)
126
127
                # Assign the data from after the time columns (0:3)
                for num, value in enumerate(cols[4:len(col_names)+4]):
128
129
130
131
                    if _check_bad_omni_num(value):
                        return_data[col_names[num]].append(None)
                    else:
                        return_data[col_names[num]].append(float(value))
132

133
    return return_data  # dictionary with omni values where index is the row
Qusai Al Shidi's avatar
Qusai Al Shidi committed
134
135


136
def _check_bad_omni_num(value_string):
137
138
139
    """Returns true if bad or false if not. Bad numbers usually just have 9s
       in omni.
    """
140
141
142
143
144
145
    for char in value_string:
        if char != '9' and char != '.':
            return False
    return True


146
147

def download_magnetogram_hmi(mag_time, hmi_map='hmi.B_720s', **kwargs):
148
149
150
    """Downloads HMI vector magnetogram fits files.

    This will download vector magnetogram FITS files from
151
    Joint Science Operations Center (JSOC) near a certain hour.
152

153
    This unfortunately depends on sunpy and drms, if you don't have it try,
154
155

    ```bash
156
    pip install -U --user sunpy drms
157
158
159
    ```

    Args:
160
161
        mag_time (datetime.datetime): Time after which to find
                                      vector magnetograms.
162
163
        hmi_map (str): JSOC prefix for hmi maps. Currently can only do
                       'hmi.B_720s' and 'hmi.b_synoptic.small'.
164
165
166
167
168
169

    **kwargs:
        download_dir (str): Relative directory to download to.
        verbose (bool): (default False) print out the files it's downloading.

    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
170
        str: list of filenames downloaded.
171
172
173

    Raises:
        ImportError: If module `drms` is not found.
174
175
        FileNotFoundError: If the JSOC doesn't have the magnetograms for that
                           time.
176
177
178
179
180
181
182
183
184
185

    Examples:
        ```python
        from swmfpy.web import download_magnetogram_hmi
        import datetime as dt

        # I am interested in the hmi vector magnetogram from 2014, 2, 18
        time_mag = dt.datetime(2014, 2, 18, 10)  # Around hour 10

        # Calling it will download
186
        filenames = download_magnetogram_hmi(mag_time=time_mag,
187
                                             hmi_map='B_720s',
188
189
190
191
192
193
                                             download_dir='mydir/')

        # To see my list
        print('The magnetograms I downloaded are:', filenames)

        # You may call and ignore the file list
194
195
196
        download_magnetogram_hmi(mag_time=time_mag,
                                 hmi_map='b_synoptic_small',
                                 download_dir='mydir')
197
198
199
200
201
202
203
        ```
    """

    # import drms dynamically
    try:
        import drms
    except ImportError:
204
        raise ImportError(_import_error_string('drms'))
205

206
207
208
209
    get_urls = {
        'hmi.B_720s': _get_urls_hmi_b720,
        'hmi.b_synoptic_small': _get_urls_hmi_b_synoptic_small,
        }
210
211
    client = drms.Client()

212
    urls = get_urls[hmi_map](client, mag_time)    
213

214
215
216
    # Download data
    if kwargs.get('verbose', False):
        print('Starting download of magnetograms:\n')
217
    return_name = ''
218
219
220
    download_dir = kwargs.get('download_dir', '')
    if not download_dir.endswith('/') and download_dir != '':
        download_dir += '/'
221
    for data_time, mag_url in urls:
222
223
        if mag_url == 'BadSegLink':  # JSOC will return this if not found
            raise FileNotFoundError('Could not find those HMI magnetograms.')
224
        filename = 'hmi_' + str(data_time).replace(' ', '_')  # Add timestamp
225
226
227
228
229
230
231
232
233
        filename += '_' + mag_url.split('/')[-1]  # Last is filename
        url = 'http://jsoc.stanford.edu' + mag_url
        if kwargs.get('verbose', False):
            print(f'Downloading from {url} to {download_dir+filename}.')
        with urllib.request.urlopen(url) as fits_file:
            with open(download_dir+filename, 'wb') as local_file:
                local_file.write(fits_file.read())
        if kwargs.get('verbose', False):
            print(f'Done writing {download_dir+filename}.\n')
234
        return_name = download_dir+filename
235
236
237
238

    if kwargs.get('verbose', False):
        print('Completed downloads.\n')

239
    return return_name
240
241


242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def _get_urls_hmi_b_synoptic_small(client, mag_time):
    """Returns for #download_magnetogram_hmi needed urls

    Args:
        client (drms.Client): To query and return urls.
        mag_time (datetime.datetime): To find nearest magnetogram.

    Returns:
        generator that yields (datetime.datetime, str): Time of magnetogram,
            suffix url of magnetogram
    """
    import drms
    try:
        from sunpy.coordinates.sun import carrington_rotation_number
    except ImportError as error:
        print(_import_error_string('sunpy'))
        raise error

    cr_number = int(round(carrington_rotation_number(mag_time)))
    query_string = f'hmi.b_synoptic_small[{int(round(cr_number))}]'
    components = ['Bp', 'Bt', 'Br']
    data = client.query(query_string, seg=components)
    # Generator to find the nearest time
    prefix_str = 'CR' + str(cr_number) + '_' + str(mag_time)
    urls = ((prefix_str, data[component][0]) for component in components)
    return urls


def _get_urls_hmi_b720(client, mag_time):
    """Returns for #download_magnetogram_hmi needed urls for hmi.B_720s

    Args:
        client (drms.Client): To query and return urls.
        mag_time (datetime.datetime): To find nearest magnetogram.

    Returns:
        generator that yields (datetime.datetime, str): Time of magnetogram,
            suffix url of magnetogram
    """
    import drms
    query_string = 'hmi.B_720s'
    query_string += f'[{mag_time.year}.'
    query_string += f'{str(mag_time.month).zfill(2)}.'
    query_string += f'{str(mag_time.day).zfill(2)}_'
    query_string += f'{str(mag_time.hour).zfill(2)}'
    query_string += f'/1h]'
    data = client.query(query_string, key='T_REC', seg='field')
    times = drms.to_datetime(data[0].T_REC)
    nearest_time = _nearest(mag_time, times)
    # Generator to find the nearest time
    urls = ((data_time, mag_url) for (data_time, mag_url)
            in zip(times, data[1].field) if data_time == nearest_time)
    return urls

 
Qusai Al Shidi's avatar
Qusai Al Shidi committed
297
def download_magnetogram_adapt(time, map_type='fixed', **kwargs):
Qusai Al Shidi's avatar
Qusai Al Shidi committed
298
    """This routine downloads GONG ADAPT magnetograms.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
299
300

    Downloads ADAPT magnetograms from ftp://gong2.nso.edu/adapt/maps/gong/
301
    to a local directory. It will download all maps with the regex file
302
    pattern: adapt4[0,1]3*yyyymmddhh
Qusai Al Shidi's avatar
Qusai Al Shidi committed
303
304
305
306
307
308

    Args:
        time (datetime.datetime): Time in which you want the magnetogram.
        map_type (str): (default: 'fixed')
                        Choose either 'fixed' or 'central' for
                        the map type you want.
309
310
311
312

    **kwargs:
        download_dir (str): (default is current dir) Relative directory
                            where you want the maps to be downloaded.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
313

314
315
316
    Returns:
        str: First unzipped filename found.

Qusai Al Shidi's avatar
Qusai Al Shidi committed
317
    Raises:
318
319
        NotADirectoryError: If the adapt maps directory
                            is not found on the server.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
320
321
        ValueError: If map_type is not recognized.
                    (i.e. not 'fixed' or 'central')
322
        FileNotFoundError: If maps were not found.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
323
324
325
326
327
328

    Examples:
        ```python
        import datetime as dt

        # Use datetime objects for the time
329
        time_flare = dt.datetime(2018, 2, 12, hour=10)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
330
331
332
333
        swmfpy.web.download_magnetogram_adapt(time=time_flare,
                                              map_type='central',
                                              download_dir='./mymaps/')
        ```
Qusai Al Shidi's avatar
Qusai Al Shidi committed
334
    """
Qusai Al Shidi's avatar
Qusai Al Shidi committed
335
336
337
338
339
340
341
342
343
344
345
346
347
    # Author: Zhenguang Huang
    # Email: zghuang@umich.edu

    import ftplib
    from ftplib import FTP
    import gzip
    import shutil

    if map_type == 'fixed':
        map_id = '0'
    elif map_type == 'central':
        map_id = '1'
    else:
348
        raise ValueError('Not recognized type of ADAPT map')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
349
350
351
352
353
354
355
356
357
358
359
360
361

    # Go to the the ADAPT ftp server
    ftp = FTP('gong2.nso.edu')
    ftp.login()

    # Only ADAPT GONG is considered
    ftp.cwd('adapt/maps/gong')

    # Go to the specific year
    try:
        ftp.cwd(str(time.year))
    except ftplib.all_errors:
        ftp.quit()
362
        raise NotADirectoryError('Cannot go to the specific year directory')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
363

364
365
366
367
    # ADAPT maps only contains the hours for even numbers
    if time.hour % 2 != 0:
        print('Warning: Hour must be an even number.',
              'The entered hour value is changed to',
Qusai Al Shidi's avatar
Qusai Al Shidi committed
368
              time.hour//2*2)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
369
370
371
372
373
    # Only consider the public (4) Carrington Fixed (0) GONG (3) ADAPT maps
    file_pattern = 'adapt4' + map_id + '3*' \
        + str(time.year).zfill(4) \
        + str(time.month).zfill(2) \
        + str(time.day).zfill(2) \
Qusai Al Shidi's avatar
Qusai Al Shidi committed
374
        + str(time.hour//2*2).zfill(2) + '*'
375
    # adapt4[0,1]3*yyyymmddhh
Qusai Al Shidi's avatar
Qusai Al Shidi committed
376
377
378
379

    filenames = ftp.nlst(file_pattern)

    if len(filenames) < 1:
380
381
        raise FileNotFoundError('Could not find a file that matches'
                                + 'the pattern.')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
382
383
384
385
386
387
388
389
390
391
392
393

    for filename in filenames:
        # open the file locally
        directory = kwargs.get('download_dir', './')
        if directory[-1] != '/':
            directory += '/'
        with open(directory + filename, 'wb') as fhandle:
            # try to download the magnetogram
            try:
                ftp.retrbinary('RETR ' + filename, fhandle.write)
            except ftplib.all_errors:
                ftp.quit()
394
                raise FileNotFoundError('Cannot download ', filename)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
395
396
397
398
399
400
401
402
403
404

        # unzip the file
        if '.gz' in filename:
            filename_unzip = filename.replace('.gz', '')
            with gzip.open(directory + filename, 'rb') as s_file:
                with open(directory + filename_unzip, 'wb') as d_file:
                    shutil.copyfileobj(s_file, d_file, 65536)

    # close the connection
    ftp.quit()
405
406

    # return first file name if all goes well
407
408
409
410
411
    return_names = filenames
    for index, filename in enumerate(return_names):
        if '.gz' in filename:
            return_names[index] = filename.replace('.gz', '')
    return return_names