[Python] 纯文本查看 复制代码
# -*- coding:utf-8 -*-import reimport osimport requestsimport yagmailimport urllib3import loggingfrom concurrent.futures import ThreadPoolExecutorimport timeimport threadingfrom requests.packages.urllib3.exceptions import InsecureRequestWarningimport xlwtimport xlrdimport socketfrom xlutils.copy import copy#获取状态码、标题header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36', }def get_ip(url): url = url.strip('\n').replace('http://','') myaddr = socket.getaddrinfo(url, 'http') return myaddr[0][4][0]def get_codetitle(url): code = "无法访问" title = " " resurl = " " try: urllib3.disable_warnings() requests.adapters.DEFAULT_RETRIES = 5 res = requests.get(url, headers=header, verify=False, allow_redirects=True, timeout=(3,12)) res.encoding = res.apparent_encoding code = res.status_code title = re.findall(r"(?<=\<title\>)(?:.|\n)+?(?=\<)", res.text, re.IGNORECASE)[0].strip() description = re.findall(r"(?<=\<meta name=\"description\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE)[0].strip() keywords = re.findall(r"(?<=\<meta name=\"keywords\" content=\")(?:.|\n)+?(?=\" \/\>|\"\/\>)", res.text, re.IGNORECASE)[0].strip() resurl = res.url except Exception as error: print('%s网址无效或者IP被封锁'%(url)) try: ip = get_ip(url) except: ip = 'null' return resurl,code,title,description,keywords,ipdef write(url): codetitle = get_codetitle(url) resurl=str(codetitle[0]) code=str(codetitle[1]) title=str(codetitle[2]) description=str(codetitle[3]) keywords=str(codetitle[4]) ip=str(codetitle[5]) print(url+ "|" +code+ "|" +title+ "|" +ip) with lock: word_book = xlrd.open_workbook(path+savefilename+'.xls') sheets = word_book.sheet_names() work_sheet = word_book.sheet_by_name(sheets[0]) old_rows = work_sheet.nrows heads = work_sheet.row_values(0) new_work_book = copy(word_book) new_sheet = new_work_book.get_sheet(0) i = old_rows new_sheet.write(i, 0, url) new_sheet.write(i, 1, resurl) new_sheet.write(i, 2, code) new_sheet.write(i, 3, title) new_sheet.write(i, 4, description) new_sheet.write(i, 5, keywords) new_sheet.write(i, 6, ip) new_work_book.save(path + savefilename+'.xls') n = 0path = os.getcwd()logging.captureWarnings(True)requests.packages.urllib3.disable_warnings(InsecureRequestWarning)start = time.time()lock = threading.Lock()savefilename = time.strftime("%Y-%m-%d %H.%M.%S")myxls=xlwt.Workbook()sheet1=myxls.add_sheet(u'title',cell_overwrite_ok=True)sheet1.write(0,0,"源地址")sheet1.write(0,1,"跳转地址")sheet1.write(0,2,"状态码")sheet1.write(0,3,"标题")sheet1.write(0,4,"描述")sheet1.write(0,5,"关键词")sheet1.write(0,6,"IP")myxls.save(path + savefilename+'.xls')#url.txt中ip:port格式转换成http、https格式,保存到url-run.txt中with open(path + "\\url.txt","r") as f: line = f.readlines()with open(path + "\\url-run.txt","w") as f2: for i in line: i=i.strip('\n') if 'http://' not in i: f2.write('http://'+i+'\n') else: f2.write(i+'\n')#获取url列表with open(path + '\\url-run.txt', 'r', encoding='utf-8') as f: urls_data = https://www.52pojie.cn/[data.strip().strip('\\') for data in f] #多线程with ThreadPoolExecutor(max_workers=100) as executor: for urls in urls_data: executor.submit( write, url=urls )end = time.time()print("总耗时:",end - start,"秒")# 发送邮箱提醒try: yag = yagmail.SMTP(user="xxxxxx@163.com", password="密码你的", host='smtp.163.com')#smtp的邮箱和密码 contents = ['TDK获取时间:%s'%(end - start)] #主要内容 DDOSD_Sender = f'TDK获取完成通知' #标题 receiver =["xxxx@qq.com"] # 接受的邮箱 多个邮箱 yag.send(receiver, DDOSD_Sender, contents) #提交发送 yag.close() #结束进程except: print('smtp 同个时间发送超过10条或者过期')
纯干货,原创源码,使用方法也很简单,请将py文件和url.txt保存在一个文件夹即可,会生成xlsx的报告文件
这里提供成品软件:https://www.123pan.com/s/ePprVv-dPoJ
效果图:
速度很快,大概一分钟能跑几千个网站,准确率绝对没问题,不存在缓存尊重原创哈,感谢大家!喜欢多多支持FiimeROM的奥奥,我也会多给大家贡献工具的