Python爬虫自动获取whois信息的方法源码代码,Python代码库,德仔网

代码语言
代码分类
【Python】 Python爬虫自动获取whois信息的方法源码
作者: / 发布于2018/3/7/ 1013
通过Python代码自动获取域名查询服务信息的方法。Python爬虫自动获取whois信息的方法源码示例提供。以前有看过python通过socket查询whois的方法，这次我们要使用的方法python 模块是subprocess、Spider等
#!/usr/bin/python
#Python 解释器路径 
 
# -*- coding: utf-8 -*-
#编码声明 
 
 
import subprocess
from webauth.common.spider.base import Spider
#导入方法模块 获取whois信息
 
#代码块如下：
def get_whois(domain):
    try:
        p = subprocess.Popen(["whois %s" % domain], stdout=subprocess.PIPE, shell=True)
        out = p.communicate()[0]
        return get_info(out)
    except:
        return None, None
         
def get_info(content):
    """获取whois 信息"""
    url = ''
    templates = [
        {
          'email': ur'''(?P<email>[0-9a-zA-Z_-]*?@[0-9a-zA-Z\.]*)''',
         }
    ]
    spider = Spider(url, templates)
     
    msg = 'success'
    cmp, name, addr, email, phone = None, None, None, None, None
 
    lines = content.split('\n')
 
    if content.find('Registrant Contact:') != -1:
        add_start, start = False, False
        index = 0
 
        for line in lines:
            if not email and start:
                email_info = spider.get_info(line, spider.templates[0]['email'])
                if email_info:
                    email = email_info['email']
 
            if line.find("Registrant Contact:") != -1:
                start = True
                continue
 
            if start and index <= 6:
                if index == 0:
                    cmp = line.strip()
                elif index == 1:
                    name = line.strip()
                elif not line.strip():
                    pass
                elif line.strip().startswith('Fax:'):
                    add_start = True
                    addr = ''
                elif add_start:
                    addr += ' %s' % line.strip()
                index += 1
 
    elif content.find('Registrars.') != -1:
 
        for line in lines:
            line = line.strip()
            if line.startswith('Organisation Name....'):
                name = line[len('Organisation Name....'):].strip()
                cmp = name
 
            elif line.startswith("Organisation Address."):
                addr = addr if addr else ''
                addr += " %s" % line[len("Organisation Address."):].strip()
 
            elif line.startswith("Admin Email.........."):
 
                email = line[len('Admin Email..........'):].strip()
            elif line.startswith("Admin Phone.........."):
                phone = line[len('Admin Phone..........'):].strip()
 
    elif content.startswith('Domain Name'):
 
        for line in lines:
            line = line.strip()
 
            if line.startswith('Registrant Organization:'):
                cmp = line[len('Registrant Organization:'):].strip()          
 
            elif line.startswith('Registrant Name:'):
                name = line[len('Registrant Name:'):].strip().decode('utf8')
 
            elif line.startswith('Administrative Email:'):
                email = line[len('Administrative Email:'):].strip()
 
            elif line.startswith('Registrant Organization:'):
                cmp = line[len('Registrant Organization:'):].strip()
 
                #www.iplaypy.com 玩蛇网
     
    elif content.find("Registrant Name") != -1 and content.find("Registrant Organization") != -1          and content.find("Registrant Country") != -1:
        street, city, state, post, country = '', '', '', '', ''
 
        for line in lines:
 
            if line.startswith('Registrant Name:'):
                name = line[len('Registrant Name:'):].strip()          
 
            elif line.startswith('Registrant Organization:'):
                cmp = line[len('Registrant Organization:'):].strip().decode('utf8')
 
            elif line.startswith('Registrant Phone:'):
                phone = line[len('Registrant Phone:'):].strip()
 
            elif line.startswith('Registrant Email:'):
                email = line[len('Registrant Email:'):].strip() 
 
            elif line.startswith('Registrant Street1'):
                street = line[len('Registrant Street1'):].strip()
 
            elif line.startswith('Registrant City:'):
                city = line[len('Registrant City:'):].strip()
 
            elif line.startswith('Registrant State/Province:'):
                state = line[len('Registrant State/Province:'):].strip()
  
           elif line.startswith('Registrant Postal Code:'):
                post = line[len('Registrant Postal Code:'):].strip()
 
            elif line.startswith('Registrant Country:'):
2000
 
                country = line[len('Registrant Country:'):].strip()
        addr = '%s %s %s %s %s' % (street, city, state, post, country)         
 
    elif content.find('NOT FOUND') != -1 or content.find('no matching record') != -1:
        msg = 'error'    
 
    else:
        msg = 'exception'
         
    if not email:
        email_info = spider.get_info(content, spider.templates[0]['email'])
 
        if email_info:
            email = email_info['email']
 
    if name:
        name = name.replace("()", "")
               
    return msg, {'addr': addr, 'name': name, 'email': email, 'phone': phone, 'raw_data': content}
 
if __name__ == '__main__':
    import sys
 
    if len(sys.argv) == 2:
        print get_whois(sys.argv[1])
    else:
        print "pleas pass a domain"
试试其它关键字
同语言下
可能有用的
贡献的其它代码Label