Note: The default ITS GitLab runner is a shared resource and is subject to slowdowns during heavy usage.
You can run your own GitLab runner that is dedicated just to your group if you need to avoid processing delays.

io.py 7.82 KB
Newer Older
1
"""### Input/Output tools
Qusai Al Shidi's avatar
Qusai Al Shidi committed
2
3

Here are tools to read and write files relating to SWMF.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
4
5
6
7
8
9
"""

import datetime as dt


def read_wdc_ae(wdc_filename):
10
11
    """Read an auroral electrojet (AE) indeces from Kyoto's World Data Center
       text file into a dictionary of lists.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
12
13

    Args:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
14

Qusai Al Shidi's avatar
Qusai Al Shidi committed
15
16
17
        wdc_filename (str): Filename of wdc data from
                            http://wdc.kugi.kyoto-u.ac.jp/
    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
18
19
20
        dict: Auroral indeces 'AL', 'AE', 'AO', 'AU'
            datetime.datetime: 'times'
            int: 'values'
Qusai Al Shidi's avatar
Qusai Al Shidi committed
21
    """
22
23
24
25
26
27
28
29

    # Initialize return data
    return_data = {'AL': {'times': [], 'values': []},
                   'AE': {'times': [], 'values': []},
                   'AO': {'times': [], 'values': []},
                   'AU': {'times': [], 'values': []}}

    # Open and make sure it is correct file
30
    with open(wdc_filename, 'rt') as wdc_file:
31
        header = wdc_file.readline()
32
33
        assert header[:8] == 'AEALAOAU', \
            'Does not seem to be a WDC AE file. First 8 chars: ' + header[:8]
34
35

        # Parse
Qusai Al Shidi's avatar
Qusai Al Shidi committed
36
        for line in wdc_file:
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
            data = line.split()
            year_suffix = int(data[1][:2])
            if year_suffix < 50:
                year = 2000 + year_suffix
            else:
                year = 1990 + year_suffix
            month = int(data[1][2:4])
            day = int(data[1][4:6])
            hour = int(data[1][7:9])
            index = data[1][-2:]
            values_60 = [int(val) for val in data[3:60+3]]

            # Fill
            for minute, value in enumerate(values_60):
                return_data[index]['values'].append(value)
                return_data[index]['times'].append(
                    dt.datetime(year, month, day, hour, minute))

    return return_data
Qusai Al Shidi's avatar
Qusai Al Shidi committed
56
57


58
def read_wdc_asy_sym(wdc_filename):
59
60
61
62
63
64
65
    """Reads a WDC file for ASY/SYM data.

    Reads an ASY/SYM file downloaded from
    http://wdc.kugi.kyoto-u.ac.jp/aeasy/index.html
    and puts it into a dictionary.

    Args:
66
        wdc_filename (str): Relative filename (or file handle no.) to read.
67
68

    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
69
        dict: of values. {'[ASY-SYM]-[D-H]': 'times': [], 'values': []}
70
71

    Examples:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
72
        ```python
73
74
75
76
77
78
79
80
81
82
        indeces = swmfpy.io.read_wdc_asy_sym('wdc.dat')
        # Plot data
        plt.plot(indeces['SYM-H']['times'],
                 indeces['SYM-H']['values'],
                 label='SYM-H [nT]'
                 )
        plt.xlabel('Time [UT]')
        ```

    Important to note if there is bad data it will be filled as None.
83
84
    """

85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    return_data = {
        'ASY-D': {
            'times': [],
            'values': [],
            },
        'ASY-H': {
            'times': [],
            'values': [],
            },
        'SYM-D': {
            'times': [],
            'values': [],
            },
        'SYM-H': {
            'times': [],
            'values': []
            }
        }
103
104

    with open(wdc_filename) as wdc_file:
105
106
107
108
109
110
111
112
        # Check for correct file
        header = wdc_file.readline()
        assert header[:12] == 'ASYSYM N6E01', ('File does not seem to be'
                                               + 'an ASY/SYM file from wdc.'
                                               + 'First 12 characters: '
                                               + header)
        return_data['edition'] = header[24:34]

113
        for line in wdc_file:
114
115
            # Parse
            year = int(line[12:14])
116
117
118
119
            if year < 70:  # Starts from 1970 but only gives 2 digits
                year += 2000
            else:
                year += 1900
120
121
122
123
124
125
126
            month = int(line[14:16])
            day = int(line[16:18])
            hour = int(line[19:21])
            comp = line[18]
            index = line[21:24]

            # Fill 60 min data
127
            data = line.split()
128
129
130
131
132
133
134
135
136
137
138
            values_60 = [int(val) for val in data[2:62]]
            for minute, value in enumerate(values_60):
                return_data[index+'-'+comp]['times'].append(
                    dt.datetime(year, month, day, hour, minute))
                # Check if data is bad
                if value != 99999:
                    return_data[index+'-'+comp]['values'].append(
                        value)
                else:
                    return_data[index+'-'+comp]['values'].append(
                        None)
139

140
    return return_data
141
142


143
def _coarse_filtering(data, coarseness=3):
Qusai Al Shidi's avatar
Qusai Al Shidi committed
144
145
146
147
148
149
150
    """Applies coarse filtering to a pandas.DataFrame"""
    for column in data.columns:
        mean = data[column].abs().mean()
        sigma = data[column].std()
        data[column] = data[data[column].abs() < mean+coarseness*sigma][column]


151
def read_gm_log(filename, colnames=None, dtypes=None, index_time=True):
152
    """Make a dictionary out of the indeces outputted
Qusai Al Shidi's avatar
Qusai Al Shidi committed
153
154
155
    from the GM model log.

    Args:
156
        filename (str): The relative filename as a string. (or file handle no.)
157
158
159
        colnames ([str]): (default: None) Supply the name of the columns.
                                          If None it will use second line
                                          of log file.
160
161
        dtypes ([types]): (default: None) Provide types for the columns, if
                                          None then all will be float.
162
163
164
        index_time (bool): (default: True) Make a column of dt.datetime objects
                                           in dictionary key 'Time [UT]'.

Qusai Al Shidi's avatar
Qusai Al Shidi committed
165
    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
166
        dict: Dictionary of the log file
Qusai Al Shidi's avatar
Qusai Al Shidi committed
167
168

    Examples:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
169
        To plot AL and Dst get the log files
Qusai Al Shidi's avatar
Qusai Al Shidi committed
170
        ```python
171
172
        geo = swmfpy.io.read_gm_log('run/GM/IO2/geoindex_e20140215-100500.log')
        dst = swmfpy.io.read_gm_log('run/GM/IO2/log_e20140215-100500.log')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
173

174
        # Plot AL indeces
175
        plt.plot(geo['times', geo['AL'])
Qusai Al Shidi's avatar
Qusai Al Shidi committed
176
        ```
Qusai Al Shidi's avatar
Qusai Al Shidi committed
177

178
    """
Qusai Al Shidi's avatar
Qusai Al Shidi committed
179

Qusai Al Shidi's avatar
Qusai Al Shidi committed
180
    # If column names were not specified
181
182
183
184
185
186
187
188
    return_data = {}
    with open(filename, 'r') as logfile:

        # Find column names and initialize
        description = logfile.readline()
        return_data['description'] = description
        # Usually the names are in the second line
        if not colnames:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
189
            colnames = logfile.readline().split()
190
191
192
193
194
195
        try:
            colnames = _fix_str_duplicates(colnames)
        except RuntimeWarning:
            print(f'{__name__}: Warning: '
                  + 'Found duplicates in column names. '
                  + 'Changes made to column names.')
196
197
198
199
200
201
        for col in colnames:
            return_data[col] = []

        # Fill data dictionary
        for line_num, line in enumerate(logfile):
            if line_num > 2:  # First two lines are usually metadata
202
                for col, data in enumerate(line.strip().split()):
203
204
205
206
                    if dtypes:
                        data = dtypes[col](data)
                    else:
                        data = float(data)
207
208
209
210
                    return_data[colnames[col]].append(data)

        # datetime index
        if index_time:
211
            return_data['times'] = []
212
            for row, year in enumerate(return_data[colnames[1]]):
213
                return_data['times'].append(
214
215
216
217
218
219
220
221
222
                    dt.datetime(int(year),
                                int(return_data[colnames[2]][row]),  # month
                                int(return_data[colnames[3]][row]),  # day
                                int(return_data[colnames[4]][row]),  # hour
                                int(return_data[colnames[5]][row]),  # min
                                int(return_data[colnames[6]][row]),  # sec
                                int(return_data[colnames[7]][row])))  # ms

    return return_data
223
224
225
226
227
228
229
230
231
232
233
234
235
236


def _fix_str_duplicates(str_list):
    """Returns a list and bool if a fix was made.
       The fix is adding an _[index] to avoid duplicates.
    """
    duplicate_found = False
    for index, string in enumerate(str_list):
        if str_list.count(string) > 1:
            duplicate_found = True
            str_list[index] = string + f'_{index}'
    if duplicate_found:
        raise RuntimeWarning
    return str_list