io.py 7.86 KB
Newer Older
Qusai Al Shidi's avatar
Qusai Al Shidi committed
1
2
3
4
5
"""Input/Output tools

Here are tools to read and write files relating to SWMF.

TODO: Move pandas dependancy elsewhere.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
6
7
8
9
10
11
"""

import datetime as dt


def read_wdc_ae(wdc_filename):
12
13
    """Read an auroral electrojet (AE) indeces from Kyoto's World Data Center
       text file into a dictionary of lists.
Qusai Al Shidi's avatar
Qusai Al Shidi committed
14
15

    Args:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
16

Qusai Al Shidi's avatar
Qusai Al Shidi committed
17
18
19
        wdc_filename (str): Filename of wdc data from
                            http://wdc.kugi.kyoto-u.ac.jp/
    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
20
21
22
        dict: Auroral indeces 'AL', 'AE', 'AO', 'AU'
            datetime.datetime: 'times'
            int: 'values'
Qusai Al Shidi's avatar
Qusai Al Shidi committed
23
    """
24
25
26
27
28
29
30
31

    # Initialize return data
    return_data = {'AL': {'times': [], 'values': []},
                   'AE': {'times': [], 'values': []},
                   'AO': {'times': [], 'values': []},
                   'AU': {'times': [], 'values': []}}

    # Open and make sure it is correct file
32
    with open(wdc_filename, 'rt') as wdc_file:
33
        header = wdc_file.readline()
34
35
        assert header[:8] == 'AEALAOAU', \
            'Does not seem to be a WDC AE file. First 8 chars: ' + header[:8]
36
37

        # Parse
Qusai Al Shidi's avatar
Qusai Al Shidi committed
38
        for line in wdc_file:
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
            data = line.split()
            year_suffix = int(data[1][:2])
            if year_suffix < 50:
                year = 2000 + year_suffix
            else:
                year = 1990 + year_suffix
            month = int(data[1][2:4])
            day = int(data[1][4:6])
            hour = int(data[1][7:9])
            index = data[1][-2:]
            values_60 = [int(val) for val in data[3:60+3]]

            # Fill
            for minute, value in enumerate(values_60):
                return_data[index]['values'].append(value)
                return_data[index]['times'].append(
                    dt.datetime(year, month, day, hour, minute))

    return return_data
Qusai Al Shidi's avatar
Qusai Al Shidi committed
58
59


60
def read_wdc_asy_sym(wdc_filename):
61
62
63
64
65
66
67
    """Reads a WDC file for ASY/SYM data.

    Reads an ASY/SYM file downloaded from
    http://wdc.kugi.kyoto-u.ac.jp/aeasy/index.html
    and puts it into a dictionary.

    Args:
68
        wdc_filename (str): Relative filename (or file handle no.) to read.
69
70

    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
71
        dict: of values. {'[ASY-SYM]-[D-H]': 'times': [], 'values': []}
72
73

    Examples:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
74
        ```python
75
76
77
78
79
80
81
82
83
84
        indeces = swmfpy.io.read_wdc_asy_sym('wdc.dat')
        # Plot data
        plt.plot(indeces['SYM-H']['times'],
                 indeces['SYM-H']['values'],
                 label='SYM-H [nT]'
                 )
        plt.xlabel('Time [UT]')
        ```

    Important to note if there is bad data it will be filled as None.
85
86
    """

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    return_data = {
        'ASY-D': {
            'times': [],
            'values': [],
            },
        'ASY-H': {
            'times': [],
            'values': [],
            },
        'SYM-D': {
            'times': [],
            'values': [],
            },
        'SYM-H': {
            'times': [],
            'values': []
            }
        }
105
106

    with open(wdc_filename) as wdc_file:
107
108
109
110
111
112
113
114
        # Check for correct file
        header = wdc_file.readline()
        assert header[:12] == 'ASYSYM N6E01', ('File does not seem to be'
                                               + 'an ASY/SYM file from wdc.'
                                               + 'First 12 characters: '
                                               + header)
        return_data['edition'] = header[24:34]

115
        for line in wdc_file:
116
117
            # Parse
            year = int(line[12:14])
118
119
120
121
            if year < 70:  # Starts from 1970 but only gives 2 digits
                year += 2000
            else:
                year += 1900
122
123
124
125
126
127
128
            month = int(line[14:16])
            day = int(line[16:18])
            hour = int(line[19:21])
            comp = line[18]
            index = line[21:24]

            # Fill 60 min data
129
            data = line.split()
130
131
132
133
134
135
136
137
138
139
140
            values_60 = [int(val) for val in data[2:62]]
            for minute, value in enumerate(values_60):
                return_data[index+'-'+comp]['times'].append(
                    dt.datetime(year, month, day, hour, minute))
                # Check if data is bad
                if value != 99999:
                    return_data[index+'-'+comp]['values'].append(
                        value)
                else:
                    return_data[index+'-'+comp]['values'].append(
                        None)
141

142
    return return_data
143
144


145
def _coarse_filtering(data, coarseness=3):
Qusai Al Shidi's avatar
Qusai Al Shidi committed
146
147
148
149
150
151
152
    """Applies coarse filtering to a pandas.DataFrame"""
    for column in data.columns:
        mean = data[column].abs().mean()
        sigma = data[column].std()
        data[column] = data[data[column].abs() < mean+coarseness*sigma][column]


153
def read_gm_log(filename, colnames=None, dtypes=None, index_time=True):
154
    """Make a dictionary out of the indeces outputted
Qusai Al Shidi's avatar
Qusai Al Shidi committed
155
156
157
    from the GM model log.

    Args:
158
        filename (str): The relative filename as a string. (or file handle no.)
159
160
161
        colnames ([str]): (default: None) Supply the name of the columns.
                                          If None it will use second line
                                          of log file.
162
163
        dtypes ([types]): (default: None) Provide types for the columns, if
                                          None then all will be float.
164
165
166
        index_time (bool): (default: True) Make a column of dt.datetime objects
                                           in dictionary key 'Time [UT]'.

Qusai Al Shidi's avatar
Qusai Al Shidi committed
167
    Returns:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
168
        dict: Dictionary of the log file
Qusai Al Shidi's avatar
Qusai Al Shidi committed
169
170

    Examples:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
171
        To plot AL and Dst get the log files
Qusai Al Shidi's avatar
Qusai Al Shidi committed
172
        ```python
173
174
        geo = swmfpy.io.read_gm_log('run/GM/IO2/geoindex_e20140215-100500.log')
        dst = swmfpy.io.read_gm_log('run/GM/IO2/log_e20140215-100500.log')
Qusai Al Shidi's avatar
Qusai Al Shidi committed
175

176
        # Plot AL indeces
177
        plt.plot(geo['times', geo['AL'])
Qusai Al Shidi's avatar
Qusai Al Shidi committed
178
        ```
Qusai Al Shidi's avatar
Qusai Al Shidi committed
179

180
    """
Qusai Al Shidi's avatar
Qusai Al Shidi committed
181

Qusai Al Shidi's avatar
Qusai Al Shidi committed
182
    # If column names were not specified
183
184
185
186
187
188
189
190
    return_data = {}
    with open(filename, 'r') as logfile:

        # Find column names and initialize
        description = logfile.readline()
        return_data['description'] = description
        # Usually the names are in the second line
        if not colnames:
Qusai Al Shidi's avatar
Qusai Al Shidi committed
191
            colnames = logfile.readline().split()
192
193
194
195
196
197
        try:
            colnames = _fix_str_duplicates(colnames)
        except RuntimeWarning:
            print(f'{__name__}: Warning: '
                  + 'Found duplicates in column names. '
                  + 'Changes made to column names.')
198
199
200
201
202
203
        for col in colnames:
            return_data[col] = []

        # Fill data dictionary
        for line_num, line in enumerate(logfile):
            if line_num > 2:  # First two lines are usually metadata
204
                for col, data in enumerate(line.strip().split()):
205
206
207
208
                    if dtypes:
                        data = dtypes[col](data)
                    else:
                        data = float(data)
209
210
211
212
                    return_data[colnames[col]].append(data)

        # datetime index
        if index_time:
213
            return_data['times'] = []
214
            for row, year in enumerate(return_data[colnames[1]]):
215
                return_data['times'].append(
216
217
218
219
220
221
222
223
224
                    dt.datetime(int(year),
                                int(return_data[colnames[2]][row]),  # month
                                int(return_data[colnames[3]][row]),  # day
                                int(return_data[colnames[4]][row]),  # hour
                                int(return_data[colnames[5]][row]),  # min
                                int(return_data[colnames[6]][row]),  # sec
                                int(return_data[colnames[7]][row])))  # ms

    return return_data
225
226
227
228
229
230
231
232
233
234
235
236
237
238


def _fix_str_duplicates(str_list):
    """Returns a list and bool if a fix was made.
       The fix is adding an _[index] to avoid duplicates.
    """
    duplicate_found = False
    for index, string in enumerate(str_list):
        if str_list.count(string) > 1:
            duplicate_found = True
            str_list[index] = string + f'_{index}'
    if duplicate_found:
        raise RuntimeWarning
    return str_list