Note: The default ITS GitLab runner is a shared resource and is subject to slowdowns during heavy usage.
You can run your own GitLab runner that is dedicated just to your group if you need to avoid processing delays.

web.py 8.26 KB
Newer Older
1
"""Tools to retrieve and send data on the web.
2

Qusai Al Shidi's avatar
Qusai Al Shidi committed
3
4
SWMF Web Tools
==============
5

6
7
Here are a collection of tools to work with data on the internet. Thus,
this module mostly requires an internet connection.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""
__author__ = 'Qusai Al Shidi'
__email__ = 'qusai@umich.edu'

import datetime as dt


def get_omni_data(time_from, time_to, **kwargs):
    """Retrieve omni solar wind data over http.

    This will download omni data from https://spdf.gsfc.nasa.gov/pub/data/omni
    and put it into a dictionary. If your data is large, then make a csv and
    use swmfpy.io.read_omni_data().

    Args:
        time_from (datetime.datetime): The start time of the solar wind
                                       data that you want to receive.
        time_to (datetime.datetime): The end time of the solar wind data
                                     you want to receive.

    Returns:
        dict: This will be a list of *all* columns
              available in the omni data set.

    Examples:
        ```python
        import datetime
35
        import swmfpy.web
36

37
38
39
40
        storm_start = datetime.datetime(year=2000, month=1, day=1)
        storm_end = datetime.datetime(year=2000, month=2, day=15)
        data = swmfpy.web.get_omni_data(time_from=storm_start,
                                        time_to=storm_end)
41
42
        ```
    """
43
44
45
    # Author: Qusai Al Shidi
    # Email: qusai@umich.edu

Qusai Al Shidi's avatar
Qusai Al Shidi committed
46
47
48
    import urllib.request
    from dateutil import rrule

49
    # This is straight from the format guide on spdf
50
    col_names = ('ID for IMF spacecraft',
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
                 'ID for SW Plasma spacecraft',
                 '# of points in IMF averages',
                 '# of points in Plasma averages',
                 'Percent interp',
                 'Timeshift, sec',
                 'RMS, Timeshift',
                 'RMS, Phase front normal',
                 'Time btwn observations, sec',
                 'Field magnitude average, nT',
                 'Bx, nT (GSE, GSM)',
                 'By, nT (GSE)',
                 'Bz, nT (GSE)',
                 'By, nT (GSM)',
                 'Bz, nT (GSM)',
                 'RMS SD B scalar, nT',
                 'RMS SD field vector, nT',
                 'Flow speed, km/s',
                 'Vx Velocity, km/s, GSE',
                 'Vy Velocity, km/s, GSE',
                 'Vz Velocity, km/s, GSE',
                 'Proton Density, n/cc',
                 'Temperature, K',
                 'Flow pressure, nPa',
                 'Electric field, mV/m',
                 'Plasma beta',
                 'Alfven mach number',
                 'X(s/c), GSE, Re',
                 'Y(s/c), GSE, Re',
                 'Z(s/c), GSE, Re',
                 'BSN location, Xgse, Re',
                 'BSN location, Ygse, Re',
82
83
84
85
86
87
88
89
90
91
                 'BSN location, Zgse, Re',
                 'AE-index, nT',
                 'AL-index, nT',
                 'AU-index, nT',
                 'SYM/D index, nT',
                 'SYM/H index, nT',
                 'ASY/D index, nT',
                 'ASY/H index, nT',
                 'PC(N) index',
                 'Magnetosonic mach number')
92
93
94
95
96
97
98

    # Set the url
    omni_url = 'https://spdf.gsfc.nasa.gov/pub/data/omni/'
    if kwargs.get('high_res', True):
        omni_url += 'high_res_omni/monthly_1min/'

    # Initialize return dict
99
100
    return_data = {}
    return_data['Time [UT]'] = []
101
    for name in col_names:
102
        return_data[name] = []
103

104
    # Iterate monthly to save RAM
105
106
107
108
    for date in rrule.rrule(rrule.MONTHLY, dtstart=time_from, until=time_to):
        suffix = 'omni_min'
        suffix += str(date.year) + str(date.month).zfill(2)
        suffix += '.asc'
109
        omni_data = list(urllib.request.urlopen(omni_url+suffix))
110
111

        # Parse omni data
112
        for line in omni_data:
113
114
            cols = line.decode('ascii').split()
            # Time uses day of year which must be parsed
115
116
117
118
            time = dt.datetime.strptime(cols[0] + ' '  # year
                                        + cols[1] + ' '  # day of year
                                        + cols[2] + ' '  # hour
                                        + cols[3],  # minute
119
120
                                        '%Y %j %H %M')
            if time >= time_from and time <= time_to:
121
                return_data['Time [UT]'].append(time)
122
123
                # Assign the data from after the time columns (0:3)
                for num, value in enumerate(cols[4:len(col_names)+4]):
124
                    return_data[col_names[num]].append(float(value))
125

126
    return return_data  # dictionary with omni values where index is the row
Qusai Al Shidi's avatar
Qusai Al Shidi committed
127
128
129
130
131
132


def download_magnetogram_adapt(time, map_type='fixed', **kwargs):
    '''This routine downloads GONG ADAPT magnetograms.

    Downloads ADAPT magnetograms from ftp://gong2.nso.edu/adapt/maps/gong/
133
    to a local directory. It will download all maps with the regex file
134
    pattern: adapt4[0,1]3*yyyymmddhh
Qusai Al Shidi's avatar
Qusai Al Shidi committed
135
136
137
138
139
140
141

    Args:
        time (datetime.datetime): Time in which you want the magnetogram.
        map_type (str): (default: 'fixed')
                        Choose either 'fixed' or 'central' for
                        the map type you want.
        **kwargs:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
142
            download_dir (str): (default is current dir) Relative directory
Qusai Al Shidi's avatar
Qusai Al Shidi committed
143
144
                                where you want the maps to be downloaded.

145
146
147
    Returns:
        str: First unzipped filename found.

Qusai Al Shidi's avatar
Qusai Al Shidi committed
148
    Raises:
149
150
        NotADirectoryError: If the adapt maps directory
                            is not found on the server.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
151
152
        ValueError: If map_type is not recognized.
                    (i.e. not 'fixed' or 'central')
153
        FileNotFoundError: If maps were not found.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
154
155
156
157
158
159

    Examples:
        ```python
        import datetime as dt

        # Use datetime objects for the time
160
        time_flare = dt.datetime(2018, 2, 12, hour=10)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
        swmfpy.web.download_magnetogram_adapt(time=time_flare,
                                              map_type='central',
                                              download_dir='./mymaps/')
        ```
    '''
    # Author: Zhenguang Huang
    # Email: zghuang@umich.edu

    import ftplib
    from ftplib import FTP
    import gzip
    import shutil

    if map_type == 'fixed':
        map_id = '0'
    elif map_type == 'central':
        map_id = '1'
    else:
179
        raise ValueError('Not recognized type of ADAPT map')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
180
181
182
183
184
185
186
187
188
189
190
191
192

    # Go to the the ADAPT ftp server
    ftp = FTP('gong2.nso.edu')
    ftp.login()

    # Only ADAPT GONG is considered
    ftp.cwd('adapt/maps/gong')

    # Go to the specific year
    try:
        ftp.cwd(str(time.year))
    except ftplib.all_errors:
        ftp.quit()
193
        raise NotADirectoryError('Cannot go to the specific year directory')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
194

195
196
197
198
    # ADAPT maps only contains the hours for even numbers
    if time.hour % 2 != 0:
        print('Warning: Hour must be an even number.',
              'The entered hour value is changed to',
Qusai Al Shidi's avatar
Qusai Al Shidi committed
199
              time.hour//2*2)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
200
201
202
203
204
    # Only consider the public (4) Carrington Fixed (0) GONG (3) ADAPT maps
    file_pattern = 'adapt4' + map_id + '3*' \
        + str(time.year).zfill(4) \
        + str(time.month).zfill(2) \
        + str(time.day).zfill(2) \
Qusai Al Shidi's avatar
Qusai Al Shidi committed
205
        + str(time.hour//2*2).zfill(2) + '*'
206
    # adapt4[0,1]3*yyyymmddhh
Qusai Al Shidi's avatar
Qusai Al Shidi committed
207
208
209
210

    filenames = ftp.nlst(file_pattern)

    if len(filenames) < 1:
211
212
        raise FileNotFoundError('Could not find a file that matches'
                                + 'the pattern.')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
213
214
215
216
217
218
219
220
221
222
223
224

    for filename in filenames:
        # open the file locally
        directory = kwargs.get('download_dir', './')
        if directory[-1] != '/':
            directory += '/'
        with open(directory + filename, 'wb') as fhandle:
            # try to download the magnetogram
            try:
                ftp.retrbinary('RETR ' + filename, fhandle.write)
            except ftplib.all_errors:
                ftp.quit()
225
                raise FileNotFoundError('Cannot download ', filename)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
226
227
228
229
230
231
232
233
234
235

        # unzip the file
        if '.gz' in filename:
            filename_unzip = filename.replace('.gz', '')
            with gzip.open(directory + filename, 'rb') as s_file:
                with open(directory + filename_unzip, 'wb') as d_file:
                    shutil.copyfileobj(s_file, d_file, 65536)

    # close the connection
    ftp.quit()
236
237
238
239
240
241

    # return first file name if all goes well
    return_name = filenames[0]
    if '.gz' in return_name:
        return_name = return_name[:-3]
    return return_name