从日志文件中提取ip并找到归属地完成输出,日志文件ip,从日志文件中提取ip并找


从日志文件中提取ip并找到归属地完成输出。意思是从apache log的文件里有许多ip地址,从这些ip找到归属地,之后把信息写到文件里的操作

import os,refrom getIpFrom import HttpsockethttpIP = Httpsocket()LogFilePath = "./logs/logs/"OutPutPath = "./outs/"LogFileList = ["site1-access_log","site1-error_log","site2-access_log","site2-error_log","site3-access_log","site3-error_log"]def CheckLogFile():    for f in LogFileList:      if not os.path.isfile(LogFilePath+f):         print "logfile is not existed  %s" %f         raise SystemExitdef GetIpList(logfile):    temp = []    fd = open(LogFilePath+logfile,'rb')    line = fd.readline()    while(line):        r = re.findall(r'(\d+\.\d+\.\d+\.\d+).*', line)        if len(r) == 1:           if r[0] not in temp:              temp.append(r[0])        else:           print "GetIpList occuer error ip list r size:" + str(len(r))           raise SystemExit        line = fd.readline();    fd.close()    if not os.path.isdir(OutPutPath):       os.mkdir(OutPutPath)    op = open(OutPutPath+logfile+"_IP",'wb')    ipstr='';    for ip in temp:        httpIP.setbody(ip)        (code,ipaddress) = httpIP.getIP()        if (code == 0):            ipstr = ip+"\t"+ipaddress        else:            ipstr = ip+"\tÎÞ·¨È·¶¨"        op.write(ipstr+os.linesep);    op.close()def process():    CheckLogFile()    for f in LogFileList:      print "log file:"+f      if not os.path.isfile(LogFilePath+f):         print "logfile is not existed  %s" %f      else:         GetIpList(f)if __name__ == '__main__':   process()

python方法从日志文件中提取ip并找到归属地完成输出

import re,urllib,urllib2,cookielibipmat = re.compile('<li>本站主数据:.*</li>', re.M)class Httpsocket:    """ Build for Make a full HttpRequest via POST/GET """    isok = 0  #default 0    ip_url = 'http://www.ip138.com/ips.asp'    ip_send = 'http://www.ip138.com/ips8.asp'    ip_body = []    def __init__(self):        self.cookies = urllib2.HTTPCookieProcessor()        self.opener  = urllib2.build_opener(self.cookies)        urllib2.install_opener(self.opener)    def connect(self,ip_url,param={},header={}):        encodeparam = urllib.urlencode(param)        urllib2.urlopen(urllib2.Request(ip_url,encodeparam,header) )    def openurl(self,url,param={},header={}):        encodeparam = urllib.urlencode(param)        req = urllib2.Request(url,encodeparam,header)        return urllib2.urlopen(req)     def setbody(self,ip):        self.ip_body = [('ip',ip),('action','2'),]    def getIP(self):        r1 = (0,'')        body = self.ip_body[:]        try:           self.connect(self.ip_url)        except Exception,e:           return (1,'')        try:           u = self.openurl(self.ip_url)           data = u.read()           if "您的IP地址是" in data:               u2 = self.openurl(self.ip_send,param=body)               data2 = u2.read()               r1 = ipmat.findall(data2)               if (len(r1)==1):                  r1 = r1[0];                  r1=r1.replace("</li><li>","\t2");                  r1=r1.replace("<li>","1");                  r1=r1.replace("</li>","1");                  r1=(0,r1)           else:               return (1,'')        except Exception,e:           return (1,'')        retu2000rn r1

编橙之家文章,

评论关闭