#!/usr/bin/env python3 import argparse import csv import logging import logging.handlers import os import yaml from .core import arg_prompt from subprocess import CalledProcessError from subprocess import check_output as cmd from datetime import datetime from shutil import rmtree devnull = open(os.devnull, 'w') class Sifter: def __init__(self, args, config): self.__dict__.update(args) self.config = config self.logger = logging.getLogger(__name__) try: cmd([ config['google']['gam_command'], 'whatis', self.account ]) except CalledProcessError as e: self.logger.error(e.output, extra={'entity': self.account}) exit(2) if '@' not in self.account: self.account = '{}@{}'.format( self.account, config['google']['domain'] ) private_folders = ['personal', 'private', 'personal and private'] if hasattr(self, 'omit_folders'): private_folders += self.omit_folders self.omit_folders = list(map((lambda x: x.lower()), private_folders)) self.log_path = '{}/{}-{}'.format( self.config['general']['data_dir'], self.account, datetime.now().strftime('%Y-%m-%d') ) if not os.path.exists(self.log_path): os.makedirs(self.log_path, 0o770) self.transfer_files = {} self.private_files = {} self.unknown_files = {} self.shared_files = {} self.parent = [self.root_folder_id] def sift(self, folder_id, in_private=False): self.logger.info( 'message=Processing folder {}'.format(folder_id), extra={'entity': self.account} ) try: folderFiles = cmd([ self.config['google']['gam_command'], 'user', self.account, 'show', 'filelist', 'name', 'id', 'alternateLink', 'mimeType', 'anyowner', 'owners', 'query', "'{}' in parents".format( folder_id )], stderr=devnull ).decode('utf-8').split('\n')[:-1] except CalledProcessError as e: self.logger.error(e.output, extra={'entity': self.account}) return None if len(folderFiles) > 0: children = csv.DictReader( folderFiles, delimiter=',', quotechar='"' ) try: for child in children: if not hasattr(self, 'ignore_owner'): if self.account != child['owners.0.emailAddress']: continue data = { 'owner': child['owners.0.emailAddress'], 'name': child['title'], 'id': child['id'], 'link': child['alternateLink'], 'parent': '\\'.join(self.parent) } _title = child['title'].lower() if in_private or _title in self.omit_folders: self.private_files[child['id']] = data if 'folder' in child['mimeType']: self.parent.append(child['title']) self.sift(child['id'], in_private=True) else: self.transfer_files[child['id']] = data if 'folder' in child['mimeType']: self.parent.append(child['title']) self.sift(child['id'], in_private=False) except csv.Error: self.logger.warning( 'Failed reading files in {}.' 'Check for bad file title(s)?'.format(self.parent), extra={'entity': self.account} ) self.parent.pop() def find_unorganized(self): try: results = cmd([ self.config['google']['gam_command'], 'user', self.account, 'show', 'filelist', 'name', 'id', 'owners', 'alternateLink' ], stderr=devnull ).decode('utf-8').split('\n')[:-1] allFiles = csv.DictReader(results, delimiter=',', quotechar='"') for item in allFiles: if item['id'] not in self.transfer_files.keys(): if item['id'] not in self.private_files.keys(): self.unknown_files[item['id']] = { 'owner': item['owners.0.emailAddress'], 'name': item['title'], 'id': item['id'], 'link': item['alternateLink'] } except CalledProcessError as e: self.logger.error(e.output, extra={'entity': self.account}) def find_shared(self): try: results = cmd([ self.config['google']['gam_command'], 'user', self.account, 'show', 'filelist', 'name', 'id', 'mimetype', 'alternateLink', 'anyowner', 'owners', 'query', 'sharedWithMe=True' ], stderr=devnull ).decode('UTF-8').split('\n')[:-1] shared = csv.DictReader(results, delimiter=',', quotechar='"') for item in shared: if item['owners.0.emailAddress'] != self.account: self.shared_files[item['id']] = { 'owner': item['owners.0.emailAddress'], 'name': item['title'], 'id': item['id'], 'link': item['alternateLink'] } except CalledProcessError as e: self.logger.error(e.output, extra={'entity': self.account}) def scrub_duplicates(self): for id in self.transfer_files.keys(): if id in self.private_files.keys(): del self.transfer_files[id] def write_logfiles(self): if len(self.transfer_files) > 0: target = '{}/transfer_files.csv'.format(self.log_path) with open(target, 'w+') as csvfile: fieldnames = ['owner', 'name', 'id', 'link', 'parent'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in self.transfer_files: writer.writerow(self.transfer_files[item]) csvfile.close() if len(self.private_files) > 0: target = '{}/private_files.csv'.format(self.log_path) with open(target, 'w+') as csvfile: fieldnames = ['owner', 'name', 'id', 'link', 'parent'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in self.private_files: writer.writerow(self.private_files[item]) csvfile.close() if len(self.unknown_files) > 0: target = '{}/unknown_files.csv'.format(self.log_path) with open(target, 'w+') as csvfile: fieldnames = ['owner', 'name', 'id', 'link', 'parent'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in self.unknown_files: writer.writerow(self.unknown_files[item]) csvfile.close() if len(self.shared_files) > 0: target = '{}/shared_files.csv'.format(self.log_path) with open(target, 'w+') as csvfile: fieldnames = ['owner', 'name', 'id', 'link'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in self.shared_files: writer.writerow(self.shared_files[item]) csvfile.close() def upload_logfiles(self): try: if self.destination_account: upload_user = self.destination_account else: upload_user = str(os.getlogin()) cmd([ self.config['google']['gam_command'], 'whatis', upload_user ]) except CalledProcessError as e: self.logger.warning(e.output, extra={'entity': self.account}) upload_user = self.config['google']['admin_account'] # Find available files, then upload them. available_files = os.listdir(self.log_path) if len(available_files) == 0: self.logger.warning( 'No files available for transfer', extra={'entity': self.account} ) return None gFolderName = self.account + ' Drive Report' try: cmd([ self.config['google']['gam_command'], 'user', upload_user, 'add', 'drivefile', 'drivefilename', gFolderName, 'mimetype', 'gdirectory' ], stderr=devnull ) except CalledProcessError: self.logger.error( 'Unable to create report directory', extra={'entity': self.account} ) for log_file in available_files: try: cmd([ self.config['google']['gam_command'], 'user', upload_user, 'add', 'drivefile', 'localfile', '{}/{}'.format(self.log_path, log_file), 'parentname', gFolderName, 'convert' ], stderr=devnull ) except CalledProcessError: self.logger.error( 'Error uploading {}'.format(log_file), extra={'entity': self.account} ) rmtree(self.log_path) def main(): helptext = '''examples: google-sift-drive -a alexham -f 'burr_beef' 'duel notes' google-sift-drive --account alexham --omit_folders 'burr_beef' ''' parser = argparse.ArgumentParser( description='Reports owned Google files and folders for a user', epilog=helptext, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( '--account', '-a', help="The user to sift files for.", ) parser.add_argument( '--root_folder_id', '-r', help='The ID of the folder to use as root.', default='root' ) parser.add_argument( '--ignore_owner', '-i', help='Ignore ownership requirement for reporting.', dest='ignore_owner', action='store_true' ) parser.add_argument( '--omit_folders', '-o', help='A list of additional private folder names.', nargs='+', default=[] ) parser.add_argument( '--destination_account', '-d', help='An optional account to upload reports to.', default=False ) parser.add_argument( '--skip_upload', '-u', help='Skip upload of logfiles to Drive', dest='skip_upload', action='store_true' ) parser.add_argument( '--config', '-c', help='The CAK config to use.', default='/etc/collab-admin-kit.yml' ) args = parser.parse_args() # Argment prompt fallback if not args.account: args.account = arg_prompt( 'Sifter target user' ) args.root_folder_id = arg_prompt( '(optional) Use this folder ID as root', default='root' ) args.omit_foders = arg_prompt( '(optional) Omit these folders (comma-delimited)', default='' ).split(',') args.destination_account = arg_prompt( '(optional) Upload reports to this account. (defaults to you)', default=False ) args.skip_upload = arg_prompt( '(optional) Skip upload of reports [y/N]', default='n' ).lower() # Argument sanity check _required = ['account'] for r in _required: try: if isinstance(r, str): assert getattr(args, r) if isinstance(r, tuple): assert getattr(args, r[0]) or getattr(args, r[1]) except AssertionError: print('Missing required argument {}'.format(r)) # Open the CAK Config with open(args.config) as stream: config = yaml.load(stream, Loader=yaml.BaseLoader) # Get the root logger and set the debug level logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # Create a syslog handler, set format, and associate. sh = logging.handlers.SysLogHandler( address='/dev/log', facility=config['general']['log_facility'] ) formatter = logging.Formatter(config['general']['log_format']) sh.setFormatter(formatter) logger.addHandler(sh) # Create a console handler, set format, and associate. ch = logging.StreamHandler() formatter = logging.Formatter( config['general']['console_format'], config['general']['date_format'] ) ch.setFormatter(formatter) logger.addHandler(ch) sifter = Sifter(vars(args), config) sifter.sift(args.root_folder_id) if args.root_folder_id == 'root': sifter.find_unorganized() sifter.find_shared() sifter.scrub_duplicates() sifter.write_logfiles() if args.skip_upload: pass else: sifter.upload_logfiles() if __name__ == '__main__': main()