io.py 10.3 KB
Newer Older
1
"""### Input/Output tools
Qusai Al Shidi's avatar
Qusai Al Shidi committed
2
3

Here are tools to read and write files relating to SWMF.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
4
5
6
"""

import datetime as dt
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from .tools import _make_line


def write_imf_input(imf_data, filename='IMF.dat', **kwargs):
    """Creates the IMF.dat input file for the SWMF BATS-R-US geospace model.

    `imf_data` must be a dictionary of array_like objects with same length
    in data. In swmfpy Pythonic versions are always preferred so the 'times'
    must be `datetime.datetime` array.
    imf_data = dict(times, bx, by, bz, vx, vy, vz, density, temperature)

    Args:
        imf_data (dict): This dictionary contains the solar wind data.
        filename (str): (default: 'IMF.dat') Filename to write to.
        **kwargs:
            commands ([str]): (default: None) List of commands to write to imf
                              input file (indexed by line then by tabs on same
                              line). *Note*: Must be a list if have one command
                              str.

    Raises:
        TypeError: If commands is not a list or tuple. It must be at least a
                   one element list of strings.

    Examples:
    """

    columns_dat = ['year', 'month', 'day', 'hour', 'min', 'sec', 'msec',
                   'bx', 'by', 'bz', 'vx', 'vy', 'vz',
                   'density', 'temperature']
    columns_dict = ['times',
                    'bx', 'by', 'bz', 'vx', 'vy', 'vz',
                    'density', 'temperature']

    def _time_repr(time_raw):
        'Represent time in a suitable format'
        raw_str = dt.datetime.strftime(time_raw, '%Y %m %d %H %M %S %f')[:-3]
        return raw_str.split()

    with open(filename, 'w') as file_imf:
        # header
48
49
        file_imf.write('Made with swmfpy ')
        file_imf.write('(https://gitlab.umich.edu/swmf_software/swmfpy)\n\n')
50
51
52
53
54
55
56
57
58
59
60
61

        # write commands
        commands = kwargs.get('commands', None)
        if commands:
            try:
                isinstance(commands, (list, tuple))
            except TypeError:
                raise TypeError(f'{__name__}: commands must be list or tuple')
            for command in commands:
                file_imf.write(_make_line(command)+'\n')

        # write dat file
62
        file_imf.write('\n'+'\t'.join(columns_dat))
63
64
65
66
67
68
69
70
71
        file_imf.write('#START\n')
        lines = []
        for index, _time in enumerate(imf_data[columns_dict[0]]):
            line = [_time_repr(_time)]
            for key in columns_dict[1:]:
                line += [str(imf_data[key][index])]
            line = '\t'.join(line)
            lines += line
        file_imf.writelines(lines)
Qusai Al Shidi's avatar
Qusai Al Shidi committed
72
73
74


def read_wdc_ae(wdc_filename):
75
76
    """Read an auroral electrojet (AE) indeces from Kyoto's World Data Center
       text file into a dictionary of lists.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
77
78

    Args:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
79

Qusai Al Shidi's avatar
Qusai Al Shidi committed
80
81
82
        wdc_filename (str): Filename of wdc data from
                            http://wdc.kugi.kyoto-u.ac.jp/
    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
83
84
85
        dict: Auroral indeces 'AL', 'AE', 'AO', 'AU'
            datetime.datetime: 'times'
            int: 'values'
Qusai Al Shidi's avatar
Qusai Al Shidi committed
86
    """
87
88
89
90
91
92
93
94

    # Initialize return data
    return_data = {'AL': {'times': [], 'values': []},
                   'AE': {'times': [], 'values': []},
                   'AO': {'times': [], 'values': []},
                   'AU': {'times': [], 'values': []}}

    # Open and make sure it is correct file
95
    with open(wdc_filename, 'rt') as wdc_file:
96
        header = wdc_file.readline()
97
98
        assert header[:8] == 'AEALAOAU', \
            'Does not seem to be a WDC AE file. First 8 chars: ' + header[:8]
99
100

        # Parse
Qusai Al Shidi's avatar
Qusai Al Shidi committed
101
        for line in wdc_file:
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
            data = line.split()
            year_suffix = int(data[1][:2])
            if year_suffix < 50:
                year = 2000 + year_suffix
            else:
                year = 1990 + year_suffix
            month = int(data[1][2:4])
            day = int(data[1][4:6])
            hour = int(data[1][7:9])
            index = data[1][-2:]
            values_60 = [int(val) for val in data[3:60+3]]

            # Fill
            for minute, value in enumerate(values_60):
                return_data[index]['values'].append(value)
                return_data[index]['times'].append(
                    dt.datetime(year, month, day, hour, minute))

    return return_data
Qusai Al Shidi's avatar
Qusai Al Shidi committed
121
122


123
def read_wdc_asy_sym(wdc_filename):
124
125
126
127
128
129
130
    """Reads a WDC file for ASY/SYM data.

    Reads an ASY/SYM file downloaded from
    http://wdc.kugi.kyoto-u.ac.jp/aeasy/index.html
    and puts it into a dictionary.

    Args:
131
        wdc_filename (str): Relative filename (or file handle no.) to read.
132
133

    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
134
        dict: of values. {'[ASY-SYM]-[D-H]': 'times': [], 'values': []}
135
136

    Examples:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
137
        ```python
138
139
140
141
142
143
144
145
146
147
        indeces = swmfpy.io.read_wdc_asy_sym('wdc.dat')
        # Plot data
        plt.plot(indeces['SYM-H']['times'],
                 indeces['SYM-H']['values'],
                 label='SYM-H [nT]'
                 )
        plt.xlabel('Time [UT]')
        ```

    Important to note if there is bad data it will be filled as None.
148
149
    """

150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
    return_data = {
        'ASY-D': {
            'times': [],
            'values': [],
            },
        'ASY-H': {
            'times': [],
            'values': [],
            },
        'SYM-D': {
            'times': [],
            'values': [],
            },
        'SYM-H': {
            'times': [],
            'values': []
            }
        }
168
169

    with open(wdc_filename) as wdc_file:
170
171
172
173
174
175
176
177
        # Check for correct file
        header = wdc_file.readline()
        assert header[:12] == 'ASYSYM N6E01', ('File does not seem to be'
                                               + 'an ASY/SYM file from wdc.'
                                               + 'First 12 characters: '
                                               + header)
        return_data['edition'] = header[24:34]

178
        for line in wdc_file:
179
180
            # Parse
            year = int(line[12:14])
181
182
183
184
            if year < 70:  # Starts from 1970 but only gives 2 digits
                year += 2000
            else:
                year += 1900
185
186
187
188
189
190
191
            month = int(line[14:16])
            day = int(line[16:18])
            hour = int(line[19:21])
            comp = line[18]
            index = line[21:24]

            # Fill 60 min data
192
            data = line.split()
193
194
195
196
197
198
199
200
201
202
203
            values_60 = [int(val) for val in data[2:62]]
            for minute, value in enumerate(values_60):
                return_data[index+'-'+comp]['times'].append(
                    dt.datetime(year, month, day, hour, minute))
                # Check if data is bad
                if value != 99999:
                    return_data[index+'-'+comp]['values'].append(
                        value)
                else:
                    return_data[index+'-'+comp]['values'].append(
                        None)
204

205
    return return_data
206
207


208
def _coarse_filtering(data, coarseness=3):
Qusai Al Shidi's avatar
Qusai Al Shidi committed
209
210
211
212
213
214
215
    """Applies coarse filtering to a pandas.DataFrame"""
    for column in data.columns:
        mean = data[column].abs().mean()
        sigma = data[column].std()
        data[column] = data[data[column].abs() < mean+coarseness*sigma][column]


216
def read_gm_log(filename, colnames=None, dtypes=None, index_time=True):
217
    """Make a dictionary out of the indeces outputted
Qusai Al Shidi's avatar
Qusai Al Shidi committed
218
219
220
    from the GM model log.

    Args:
221
        filename (str): The relative filename as a string. (or file handle no.)
222
223
224
        colnames ([str]): (default: None) Supply the name of the columns.
                                          If None it will use second line
                                          of log file.
225
226
        dtypes ([types]): (default: None) Provide types for the columns, if
                                          None then all will be float.
227
228
229
        index_time (bool): (default: True) Make a column of dt.datetime objects
                                           in dictionary key 'Time [UT]'.

Qusai Al Shidi's avatar
Qusai Al Shidi committed
230
    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
231
        dict: Dictionary of the log file
Qusai Al Shidi's avatar
Qusai Al Shidi committed
232
233

    Examples:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
234
        To plot AL and Dst get the log files
Qusai Al Shidi's avatar
Qusai Al Shidi committed
235
        ```python
236
237
        geo = swmfpy.io.read_gm_log('run/GM/IO2/geoindex_e20140215-100500.log')
        dst = swmfpy.io.read_gm_log('run/GM/IO2/log_e20140215-100500.log')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
238

239
        # Plot AL indeces
240
        plt.plot(geo['times', geo['AL'])
Qusai Al Shidi's avatar
Qusai Al Shidi committed
241
        ```
Qusai Al Shidi's avatar
Qusai Al Shidi committed
242

243
    """
Qusai Al Shidi's avatar
Qusai Al Shidi committed
244

Qusai Al Shidi's avatar
Qusai Al Shidi committed
245
    # If column names were not specified
246
247
248
249
250
251
252
253
    return_data = {}
    with open(filename, 'r') as logfile:

        # Find column names and initialize
        description = logfile.readline()
        return_data['description'] = description
        # Usually the names are in the second line
        if not colnames:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
254
            colnames = logfile.readline().split()
255
256
257
258
259
260
        try:
            colnames = _fix_str_duplicates(colnames)
        except RuntimeWarning:
            print(f'{__name__}: Warning: '
                  + 'Found duplicates in column names. '
                  + 'Changes made to column names.')
261
262
263
264
265
266
        for col in colnames:
            return_data[col] = []

        # Fill data dictionary
        for line_num, line in enumerate(logfile):
            if line_num > 2:  # First two lines are usually metadata
267
                for col, data in enumerate(line.strip().split()):
268
269
270
271
                    if dtypes:
                        data = dtypes[col](data)
                    else:
                        data = float(data)
272
273
274
275
                    return_data[colnames[col]].append(data)

        # datetime index
        if index_time:
276
            return_data['times'] = []
277
            for row, year in enumerate(return_data[colnames[1]]):
278
                return_data['times'].append(
279
280
281
282
283
284
285
286
287
                    dt.datetime(int(year),
                                int(return_data[colnames[2]][row]),  # month
                                int(return_data[colnames[3]][row]),  # day
                                int(return_data[colnames[4]][row]),  # hour
                                int(return_data[colnames[5]][row]),  # min
                                int(return_data[colnames[6]][row]),  # sec
                                int(return_data[colnames[7]][row])))  # ms

    return return_data
288
289
290
291
292
293
294
295
296
297
298
299
300
301


def _fix_str_duplicates(str_list):
    """Returns a list and bool if a fix was made.
       The fix is adding an _[index] to avoid duplicates.
    """
    duplicate_found = False
    for index, string in enumerate(str_list):
        if str_list.count(string) > 1:
            duplicate_found = True
            str_list[index] = string + f'_{index}'
    if duplicate_found:
        raise RuntimeWarning
    return str_list