Python的一个增量读分析日志的脚本
#!/usr/bin/env python # coding: utf-8 # guoiang.han import time,datetime import sys import os.path import re import smtplib from email.mime.text import MIMEText #定义日志的日期名称 date = time.strftime(‘%F‘) #定义要过滤的日志名字 log_file = ‘localhost_access_log.%s.txt‘ % date #记录日志读取过的指针位置 seek_file = ‘/tmp/log_check_seek.tmp‘ global data data = { ‘code‘ : {}, ‘time_range‘ : { ‘3s‘: 0, ‘5s‘: 0, ‘10s‘: 0,‘11s+‘: 0 } } #定义邮件发送 mailto_list=[‘[email protected]‘,‘‘] mail_host="smtp.163.com" #设置服务器 mail_user="alert" #用户名 mail_pass="aaatest" #口令 mail_postfix="163.com" #发件箱的后缀 def send_mail(to_list,sub,content): me="tomcat_log"+"<"+mail_user+"@"+mail_postfix+">" msg = MIMEText(content,_subtype=‘plain‘,_charset=‘gb2312‘) msg[‘Subject‘] = sub msg[‘From‘] = me msg[‘To‘] = ";".join(to_list) try: server = smtplib.SMTP() server.connect(mail_host) server.login(mail_user,mail_pass) server.sendmail(me, to_list, msg.as_string()) server.close() return True except Exception, e: print str(e) return False #如果seek文件存在,解析时间和位置,否则seek 为0,指针移动到文件末尾 if os.path.exists(seek_file): #读取文件内容 with open(seek_file) as f: seek_tmp = f.readlines() #解析时间和本地时间 算时间差,如果差300s或当前时间小于文件时间,都将指针移动到文件末尾 time_seek = datetime.datetime.strptime(seek_tmp[0].strip(),‘%Y-%m-%d %H:%M:%S‘) time_local = datetime.datetime.now() time_delta = time_local - time_seek #计算本地时间是否大于seek的记录时间 if time_local > time_seek: #计算时间差,如果是在300s以内,认为有效,增量读,否则移动到文件末尾 if time_delta.seconds < 300: try: seek = int(seek_tmp[1].strip()) seek_where = 0 except IndexError: seek = 0 seek_where = 2 else: seek = 0 seek_where = 2 else: seek = 0 seek_where = 2 else: seek = 0 seek_where = 2 #读取日志 #如果日志最后的指针小于文件记录中的seek,则将指针移动到文件末尾 with open(log_file) as f: f.seek(0,2) if seek < f.tell(): f.seek(seek,seek_where) pattren = re.compile(r‘(?P<client_ip>\d+.\d+.\d+.\d+) (?P<remote_user>.*) (?P<remote_auth>.*) (?P<time>\[.*\]) \"(?P<request>.*)\" (?P<status_code>.*) (?P<sent_byte>.*) (?P<request_time>.*)‘) re_uri = re.compile(r‘(?P<request_method>\w+) (?P<request_uri>.*) (?P<http_version>.*)‘) for i in f.xreadlines(): match = pattren.match(i) #生成uri的key request = re_uri.match(match.groupdict()[‘request‘]) request_uri = request.groupdict()[‘request_uri‘].split(‘?‘)[0] request_uri = request_uri.split(‘%‘)[0] if not data.has_key(request_uri): data[request_uri] = {‘code‘ : {}, ‘time_range‘ : { ‘3s‘: 0, ‘5s‘: 0, ‘10s‘: 0,‘11s+‘: 0 }} #计算uri 下的code status_code = int(match.groupdict()[‘status_code‘]) if data[request_uri][‘code‘].has_key(status_code): data[request_uri][‘code‘][status_code] +=1 else: data[request_uri][‘code‘][status_code] = 1 if data[‘code‘].has_key(status_code): data[‘code‘][status_code] +=1 else: data[‘code‘][status_code] = 1 #计算uri 下的time range time_range = int(match.groupdict()[‘request_time‘]) if time_range in range(0,4): data[request_uri][‘time_range‘][‘3s‘] += 1 data[‘time_range‘][‘3s‘] += 1 elif time_range in range(4,6): data[request_uri][‘time_range‘][‘5s‘] += 1 data[‘time_range‘][‘5s‘] += 1 elif time_range in range(6,11): data[request_uri][‘time_range‘][‘10s‘] += 1 data[‘time_range‘][‘10s‘] += 1 elif time_range in range(11,60): data[request_uri][‘time_range‘][‘11s+‘] += 1 data[‘time_range‘][‘11s+‘] += 1 content = [] alert = [] for k in data.keys(): if k not in [‘code‘,‘time_range‘]: tt = k total_sum = 0 for t in sorted(data[k][‘code‘].keys()): total_sum += data[k][‘code‘][t] tt += " " + str(t) + ":" + str(data[k][‘code‘][t]) if data[k][‘code‘].has_key(200): success_code = data[k][‘code‘][200]/total_sum*100 else: data[k][‘code‘][200]=0 success_code = data[k][‘code‘][200]/total_sum*100 for rr in sorted(data[k][‘time_range‘].keys()): tt += " " + rr + ":" + str(data[k][‘time_range‘][rr]) success_time = data[k][‘time_range‘][‘3s‘]/total_sum*100 if success_time < 90: alert.append(tt) if success_code < 99: alert.append(tt) content.append(tt) msg = ‘\x0a\x0d‘.join(alert) print msg if len(alert) > 0: if send_mail(mailto_list,"TV_tomcat_Alert-Report",msg): print ‘send ok‘ else: print ‘send err‘ seek = f.tell() with open(seek_file,‘w‘) as f: f.write(time.strftime(‘%F %X‘)+"\n") f.write(str(seek)+"\n")
本文出自 “晓风残月” 博客,请务必保留此出处http://kinda22.blog.51cto.com/2969503/1624818
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。