Note: The default ITS GitLab runner is a shared resource and is subject to slowdowns during heavy usage.
You can run your own GitLab runner that is dedicated just to your group if you need to avoid processing delays.

transfer_mail.py 11.3 KB
Newer Older
1
#!/usr/bin/env python3
2
3
4
5
6
7
8
import argparse
import logging
import logging.handlers
import os
import re
import yaml

9
from .core import arg_prompt
10
11
12
13
14
15
16
17
18
19
20
21
22
from hashlib import md5
from shutil import rmtree
from subprocess import CalledProcessError
from subprocess import check_output as cmd

devnull = open(os.devnull, 'w')


class Mover:

    def __init__(self, args, config):
        self.__dict__.update(args)
        self.config = config
Rob Carleski's avatar
Rob Carleski committed
23
        self.logger = logging.getLogger(__name__)
24
        self.retries = 0
25
26
27
28
29
30
31
32
33
34
35
36
37

        try:
            cmd([
                self.config['google']['gam_command'],
                'whatis',
                self.current_owner
            ])
            cmd([
                self.config['google']['gam_command'],
                'whatis',
                self.new_owner
            ])
        except CalledProcessError as e:
Rob Carleski's avatar
Rob Carleski committed
38
            self.logger.error(e.output, extra={'entity': self.current_owner})
39
40
            exit(2)

Rob Carleski's avatar
Rob Carleski committed
41
42
43
44
45
46
47
48
49
50
51
52
53
        if hasattr(self, 'label_file'):
            try:
                with open(self.label_file) as labelFile:
                    self.labels = []
                    labels = labelFile.read().splitlines()
                    for label in filter(None, labels):
                        self.labels.append(label)
            except OSError as e:
                self.logger.error(
                    e.strerror,
                    extra={'entity': self.current_owner}
                )
                exit(2)
54
            except TypeError:
Rob Carleski's avatar
Rob Carleski committed
55
                pass
56
57

        if len(self.labels) < 1:
Rob Carleski's avatar
Rob Carleski committed
58
            self.logger.error(
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
                'No labels found to transfer.',
                extra={'entity': self.current_owner}
            )
            exit(2)

        # Create list of labels to work with. Entries in this list
        # are tuples of the label search name and filesystem-safe download
        # directory name. This gives us a way to have not only the normal
        # label name to push back up, but also something to search properly
        # with, and something we can store mail on disk with.
        self.transfer = {}
        for label in self.labels:
            self.transfer[label] = {}
            self.transfer[label]['original'] = label
            self.transfer[label]['searchable'] = re.sub(
                r"[\^\|\&/)(\s]",
                '-',
                label
            )
78
            if self.label_prefix:
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
                self.transfer[label]['destination'] = '{}-{}'.format(
                    self.label_prefix,
                    label
                )
            else:
                self.transfer[label]['destination'] = label

            self.transfer[label]['disk_hash'] = md5(
                label.encode('UTF-8')
            ).hexdigest()[:8]
            self.transfer[label]['dest_searchable'] = re.sub(
                r"[\^\|\&\/\)\(\s]",
                '-',
                self.transfer[label]['destination']
            )

        # Create a probably-unique hash to use for collision avoidance
        ownerhash = md5('{}{}'.format(
            self.current_owner,
            self.new_owner
            ).encode('UTF-8')
        ).hexdigest()[:8]

Rob Carleski's avatar
Rob Carleski committed
102
        if self.email_directory:
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
            pass
        else:
            self.email_directory = '{}/{}/{}/'.format(
                self.config['general']['data_dir'],
                'mail',
                ownerhash
            )

        if not os.path.exists(self.email_directory):
            os.makedirs(self.email_directory)

        if '@' not in self.current_owner:
            self.current_owner += '@' + config['google']['domain']
        self.current_owner = self.current_owner

        if '@' not in self.new_owner:
            self.new_owner += '@' + config['google']['domain']
        self.new_owner = self.new_owner

    def transfer_mail(self):
        for _ in self.transfer:
            self.process_label(self.transfer[_])

    def process_label(self, label):
        origin_count = self.count_label(self.current_owner, label)
        if origin_count > 0:
            self.pull_label(label)
            self.push_label(label)

            destination_count = self.count_label(self.new_owner, label)
            if origin_count > destination_count:
Rob Carleski's avatar
Rob Carleski committed
134
                self.logger.warning(
135
136
137
                    'Message count mismatch. Retrying transfer.',
                    extra={'entity': self.current_owner}
                )
138
139
140
141
142
143
144
145
146
147
                self.retries += 1
                if self.retries <= 3:
                    self.process_label(label)
                else:
                    self.logger.warning(
                        'Unable to transfer \'{}\''.format(label['original']),
                        extra={'entity': self.current_owner}
                    )
                    self.retries = 0
                    return False
148
            else:
Rob Carleski's avatar
Rob Carleski committed
149
                self.logger.info(
150
151
152
153
                    'Message counts match between mailboxes',
                    extra={'entity': self.current_owner}
                )
        else:
Rob Carleski's avatar
Rob Carleski committed
154
            self.logger.info(
155
156
157
158
159
                    'No messages found for {}'.format(label['original']),
                    extra={'entity': self.current_owner}
            )

    def pull_label(self, label):
Rob Carleski's avatar
Rob Carleski committed
160
        self.logger.info(
Rob Carleski's avatar
Rob Carleski committed
161
            'Pulling messages in label {}'.format(label['original']),
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
            extra={'entity': self.current_owner}
        )
        try:
            cmd([
                self.config['google']['gyb_command'],
                '--email',
                self.current_owner,
                '--action',
                'backup',
                '--search',
                'label:{}'.format(label['searchable']),
                '--local-folder',
                self.email_directory + label['disk_hash']
                ], stderr=devnull)
        except CalledProcessError as e:
Rob Carleski's avatar
Rob Carleski committed
177
            self.logger.warning(e.output, extra={'entity': self.current_owner})
178
179

    def push_label(self, label):
Rob Carleski's avatar
Rob Carleski committed
180
        self.logger.info(
Rob Carleski's avatar
Rob Carleski committed
181
            'Pushing messages in label {}'.format(label['original']),
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
            extra={'entity': self.current_owner}
        )
        try:
            cmd([
                self.config['google']['gyb_command'],
                '--email',
                self.new_owner,
                '--action',
                'restore',
                '--strip-labels',
                '--local-folder',
                self.email_directory + label['disk_hash'],
                '--label-restored',
                '{}'.format(label['destination'])
            ])
        except CalledProcessError as e:
Rob Carleski's avatar
Rob Carleski committed
198
            self.logger.warning(e.output, extra={'entity': self.current_owner})
199
200
201
202
203
204
205
206
207

    def count_label(self, user, label):
        if user == self.current_owner:
            search_label = label['searchable']
            location = 'origin'
        else:
            search_label = label['dest_searchable']
            location = 'destination'

Rob Carleski's avatar
Rob Carleski committed
208
        self.logger.info(
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
            'Counting messages in {} at {}'.format(
                label['original'],
                location
            ),
            extra={'entity': self.current_owner}
        )
        count = cmd([
            self.config['google']['gyb_command'],
            '--email',
            user,
            '--action',
            'count',
            '--search',
            'label:{}'.format(
                search_label
            )]
        )
        count = int(count.split(b',')[1])

Rob Carleski's avatar
Rob Carleski committed
228
        self.logger.info(
229
230
231
232
233
234
235
236
237
238
239
240
241
            'Found {} messages in {} at {}'.format(
                count,
                label['original'],
                location
            ),
            extra={'entity': self.current_owner}
        )
        return count

    def cleanup(self):
        try:
            rmtree(self.email_directory)
        except OSError:
Rob Carleski's avatar
Rob Carleski committed
242
            self.logger.warning(
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
                'Unable to delete email archive folder at {}{}'.format(
                    self.config['general']['data_dir'],
                    self.email_directory
                )
            )


def main():

    helptext = '''examples:
    google-transfer-mail -o dgrohl -n cweathrs -f '/tmp/labels' -p 'from dave'
    google-transfer-mail --current_owner dgrohl --new_owner cweathrs
        --label_prefix 'from_dave' --label_list foofighters qotsa nirvana
    '''

    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Transfers email between two gsuite accounts',
        epilog=helptext,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        '--current_owner',
        '-o',
        help='The original owner of the email message(s)',
    )
    parser.add_argument(
        '--new_owner',
        '-n',
        help='The new owner of the email message(s)',
    )
274
    labels = parser.add_mutually_exclusive_group()
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
    labels.add_argument(
        '--label_file',
        '-f',
        help='A newline-delimited list of labels to be transferred.',
    )
    labels.add_argument(
        '--labels',
        '-l',
        help='A list of labels to be transferred',
        nargs='+'
    )
    parser.add_argument(
        '--label_prefix',
        '-p',
        help='A prefix to append to existing label names',
    )
    parser.add_argument(
        '--email_directory',
        '-d',
        help='Subdirectory of GAK data directory to store mail in.'
    )
    parser.add_argument(
        '-c',
        '--config',
        help="The GAK config to use.",
        default='/etc/collab-admin-kit.yml'
    )
    args = parser.parse_args()

304
305
306
307
308
309
310
311
312
    # Argument prompter fallback
    if not args.current_owner:
        args.current_owner = arg_prompt(
            'Email address associated with source mailbox'
        )
        args.new_owner = arg_prompt(
            'Email address associated with destination mailbox'
        )
        args.labels = arg_prompt(
Rob Carleski's avatar
Rob Carleski committed
313
            '(option 1) Labels to transfer, separated by commas',
314
            default=''
Rob Carleski's avatar
Rob Carleski committed
315
316
317
318
319
        ).split(',')
        if not args.labels[0]:
            args.label_file = arg_prompt(
                '(option 2) Full path to a list of user labels to transfer'
            )
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335

    # Argument sanity check
    _required = [
        'current_owner',
        'new_owner',
        ('labels', 'label_file')
    ]
    for r in _required:
        try:
            if isinstance(r, str):
                assert getattr(args, r)
            if isinstance(r, tuple):
                assert getattr(args, r[0]) or getattr(args, r[1])
        except AssertionError:
            print('Missing required argument {}'.format(r))

336
337
338
339
340
    # Open the CAK Config
    with open(args.config) as stream:
        config = yaml.load(stream, Loader=yaml.BaseLoader)

    # Get the root logger and set the debug level
Rob Carleski's avatar
Rob Carleski committed
341
    logger = logging.getLogger(__name__)
342
343
344
345
346
    logger.setLevel(logging.DEBUG)

    # Create a syslog handler, set format, and associate.
    sh = logging.handlers.SysLogHandler(
        address='/dev/log',
Rob Carleski's avatar
Rob Carleski committed
347
        facility=config['general']['log_facility']
348
    )
Rob Carleski's avatar
Rob Carleski committed
349
    formatter = logging.Formatter(config['general']['log_format'])
350
351
352
353
354
    sh.setFormatter(formatter)
    logger.addHandler(sh)

    # Create a console handler, set format, and associate.
    ch = logging.StreamHandler()
355
356
357
358
    formatter = logging.Formatter(
        config['general']['console_format'],
        config['general']['date_format']
    )
359
360
361
    ch.setFormatter(formatter)
    logger.addHandler(ch)

362
    mover = Mover(vars(args), config)
363
364
365
366
367
368
    mover.transfer_mail()
    mover.cleanup()


if __name__ == '__main__':
    main()