I. Description:
Component python code as a module for better reusability in disk file cleanup
II. Reaching the target:
Empty outdated log files, clean up log files that are larger than the customized size.
III. Original code
#!/usr/bin/env python # -*- coding: utf-8 -*- import commands import os import time import re import getopt import sys # Return a two-element tuple tuple(status, result), status is an int, result is a string def execute_local_shell_cmd(cmd): status, result = (cmd) result = ("\n") return status, result def send_alert_mail(): pass ''' Getting the space usage of a disk ''' def get_disk_used(disk_name): status, result = execute_local_shell_cmd("df | grep %s | awk '{print $5}'" % disk_name) return status, result[0] #print(get_disk_used('/data0')) ''' Determine if a file has been modified within the specified time ''' def file_modify_in(file_path,time_interval='1d'): current_time = () # Returns the last modification time. Returns the number of jump seconds since the unix epoch. if current_time - (file_path) < translate_time_interval_to_second(time_interval): return True return False def translate_file_size_to_kb(file_size): # Convert all uppercase characters of a string to lowercase file_size = str(file_size.lower()) # Create a pattern object that matches a number one or more times and the decimal point occurs once or not; the number repeats zero or more times after the decimal point pattern = (r'\d+\.?\d*') match = (file_size) file_size_number = None if match: # Use Match to get grouping information #print(()) file_size_number = float(()) else: raise IOError("Input {0} can't translate to byte." "Current support g(gb)/m(mb)/k(kb)/b(byte)".format(file_size)) The # endswith() method is used to determine whether a string ends with the specified suffix, returning True if it ends with the specified suffix and False otherwise. # The optional parameters "start" and "end" are the start and end positions of the retrieved string. if file_size.endswith("g") or file_size.endswith("gb"): return file_size_number * 1024 * 1024 * 1024 elif file_size.endswith("m") or file_size.endswith("mb"): return file_size_number * 1024 * 1024 elif file_size.endswith("k") or file_size.endswith("kb"): return file_size_number * 1024 elif file_size.endswith("b") or file_size.endswith("byte"): return file_size_number else: raise IOError("Input {0} can't translate to byte." "Current support g(gb)/m(mb)/k(kb)/b(byte)".format(file_size)) #print(translate_file_size_to_kb('10g')) def translate_time_interval_to_second(time_interval): date_interval = str(time_interval.lower()) pattern = (r'\d+') match = (date_interval) date_interval_number = None if match: date_interval_number = int(()) else: raise IOError("Input {0} can't translate to second." "Current support d(day)/h(hour)/m(min)/s(sec)".format(date_interval)) if date_interval.endswith('d') or date_interval.endswith('day'): return date_interval_number * 24 * 3600 elif date_interval.endswith('h') or date_interval.endswith('hour'): return date_interval_number * 3600 elif date_interval.endswith('m') or date_interval.endswith('min'): return date_interval_number * 60 elif date_interval.endswith('s') or date_interval.endswith('sec'): return date_interval_number else: raise IOError("Input {0} cant't translate to second." "Current support d(day)/h(hour)/m(min)/s(second)".format(date_interval)) #print(translate_time_interval_to_second('7d')) ''' Whether the shutdown file may be the current log file 1) Within 1 day of the modification 2) End with pattern ''' def probable_current_log_file(file_path,pattern='log',modify_in='1d'): if file_modify_in(file_path,time_interval=modify_in): return True return str(file_path).endswith(pattern) ''' Get more than days to set the log, note that it will not return the file that may be currently being modified, check probable_current_log_file Determine how to make that determination ''' def get_clean_log_list_by_date(target_dir,before_days_remove='7d',pattern="log"): before_seconds_remove = translate_time_interval_to_second(before_days_remove) current_time = () # Returns a list of names of files or folders contained in the specified folder for candidate_file in (target_dir): candidate_file_fullpath = "%s/%s" %(target_dir,candidate_file) # Whether a common file exists if (candidate_file_fullpath): candidate_file_mtime = (candidate_file_fullpath) # find\(\) according to whether it contains a string, if it does, return the beginning of the index value, otherwise return -1 if current_time - candidate_file_mtime > before_seconds_remove \ and candidate_file.find(pattern) != -1 \ and not probable_current_log_file(candidate_file_fullpath): # yield is to return a value and remember the position of the returned value so that the next iteration starts from that position. yield candidate_file_fullpath ''' Get log files that are larger than size (note that files with less than 1 day of modification are not returned by default) ''' def get_clean_log_list_by_size(target_dir,file_size_limit='10g',pattern="log"): file_size_limit_byte = translate_file_size_to_kb(file_size_limit) for candidate_file in (target_dir): candidate_file_fullpath = "%s/%s" %(target_dir,candidate_file) if (candidate_file_fullpath): # stat returns system status information about the file in question file_stat = (candidate_file_fullpath) if candidate_file.find(pattern) != -1 and \ file_stat.st_size >= file_size_limit_byte: yield candidate_file_fullpath # If the file has been modified within modify_in, it is not returned. # if not (modify_in and file_modify_in(candidate_file_fullpath, time_interval=modify_in)) and \ # not probable_current_log_file(candidate_file_fullpath): # yield candidate_file_fullpath ''' Remove file list ''' def remove_file_list(file_list,pattern='log',roll_back=False): for file_item in file_list: if roll_back or probable_current_log_file(file_item,pattern=pattern,modify_in='1d'): print('roll back file %s' % file_item) execute_local_shell_cmd("cat /dev/null > {0}".format(file_item)) else: print('remove file %s' % file_item) # Delete the specified path file. If the specified path is a directory, OSError will be thrown (file_item) ''' Cleaning out over-dated log files ''' def remove_files_by_date(target_dir,before_days_remove='7d',pattern='log'): file_list = get_clean_log_list_by_date(target_dir,before_days_remove,pattern) remove_file_list(file_list) ''' Clean up log files that are oversized ''' def remove_files_by_size(target_dir,file_size_limit='10g',pattern='log'): file_list = get_clean_log_list_by_size(target_dir,file_size_limit,pattern) remove_file_list(file_list) ''' Empty the current log file, use cat /dev/null > {log_file} method ''' def clean_curren_log_file(target_dir,file_size_limit='10g',pattern='log'): for candidate_file in (target_dir): candidate_file_fullpath = '%s/%s' % (target_dir,candidate_file) if candidate_file.endswith(pattern) and (candidate_file_fullpath): file_stat = (candidate_file_fullpath) if file_stat.st_size >= translate_file_size_to_kb(file_size_limit): remove_file_list([candidate_file_fullpath],roll_back=True) def clean_data_release_disk(disk_name, target_dir, disk_used_limit='80%', before_days_remove='7d', file_size_limit='10g', pattern='log'): disk_used_limit = disk_used_limit.replace('%', '') # The first step is to perform a time-based log cleanup print('Step one remove files {0} ago.'.format(before_days_remove)) remove_files_by_date(target_dir, before_days_remove=before_days_remove, pattern=pattern) # If the disk space is still not fully freed, perform a size-based log cleanup. current_disk_used = int(get_disk_used(disk_name)[1].replace('%', '')) if current_disk_used > int(disk_used_limit): print("Disk {0}'s current used {1}% great than input used limit {2}%," "so we will remove files bigger than {3}". format(disk_name, current_disk_used, disk_used_limit, file_size_limit)) remove_files_by_size(target_dir, file_size_limit=file_size_limit, pattern=pattern) # If disk space is not freed, empty the log file currently being written, and alert current_disk_used = int(get_disk_used(disk_name)[1].replace('%', '')) if current_disk_used > int(disk_used_limit): print("Disk {0}'s current used {1}% great than input used limit {2}%," "so we will roll back current log file". format(disk_name, current_disk_used, disk_used_limit, file_size_limit)) clean_curren_log_file(target_dir, file_size_limit=file_size_limit, pattern=pattern) # If it's still not there, alert mail # if int(get_disk_used(disk_name)[1].replace('%', '')) > int(disk_used_limit): send_alert_mail() def usage(): print(' -d <target_disk> -r <target_dirctory -u <diskUsedLimit(default 80%)> ' '-f <fileSizeLimit(default 10gb,gb/mb/kb)> -p <filePattern(default log)> ' '-t <beforeDaysRemove(default 7d,d)> ') if __name__ == "__main__": target_disk_input = '/data0' target_dir_input = '/data0/hadoop2/logs' disk_used_limit_input = '80%' file_size_limit_input = '10g' pattern_input = 'log' before_days_remove_input = '7d' try: # getopt command parsing with short and long options # getopt returns two arguments: one corresponding to the parameter options and value tuple, and the other generally empty opts,args = ([1:], 'hd:r:u:f:p:t:', ['help' 'disk=', 'directory=', 'diskUsedLimit=', 'fileSizeLimit=', 'filePattern=', 'beforeDaysRemove=']) # getopt module function exception error, catch exception and print error except as err: print err usage() (2) if len(opts) < 6: usage() (2) for opt,arg in opts: if opt == '-h': usage() () elif opt in ("-d","--disk"): target_disk_input = ('/','') elif opt in ("-r","--directory"): target_dir_input = arg elif opt in ("-u","--diskUsedLimit"): disk_used_limit_input = arg elif opt in ("-f","--fileSizeLimit"): file_size_limit_input = arg translate_file_size_to_kb(file_size_limit_input) elif opt in ("-p","filePattern"): pattern_input = arg elif opt in ("-t","--beforeDaysRemove"): before_days_remove_input = arg translate_time_interval_to_second(before_days_remove_input) print ("{0} Start clean job.target_disk:{1},target_directory:{2},disk_used_limit:{3}," "file_size_limit:{4},pattern:{5},before_days_remove:{6}".format((()), target_disk_input, target_dir_input, disk_used_limit_input, file_size_limit_input, pattern_input, before_days_remove_input)) clean_data_release_disk(target_disk_input, target_dir_input, disk_used_limit=disk_used_limit_input, file_size_limit=file_size_limit_input, pattern=pattern_input, before_days_remove=before_days_remove_input)
IV. Timed deletion of the unified call directory
#!/usr/bin/env python # -*- coding: utf-8 -*- import os # Traverse the catalog def Lisdir(targetdir): list_dirs = (targetdir) for root,list_dirs,files in list_dirs: for d in list_dirs: yield (root,d) def log_dir(targetdir): list_dirs = (targetdir) for ph in list_dirs: if ((targetdir,ph)): yield Lisdir((targetdir,ph)) for path in log_dir('/data0/backup_log-bin'): for ppp in path: # ending with log-bin False if ('log-bin') is False: ("db_script/clean_robo.py -d /data0 -r {0} -u 75% -f 501M -p bin -t 5d".format(ppp))
The above is the python implementation of disk log cleanup example of the details, more information about python disk log cleanup please pay attention to my other related articles!