欢迎光临
我们一直在努力

原创力文库工具开源

本帖最后由 bean0283 于 2022-10-30 19:07 编辑

前几天发了工具的成品,虽然不是很完美,但也受到了大伙的热烈关注,由于本人才疏学浅,难以写出更好的东西,故决定开源此工具,供大家一起学习优化
若大佬们有好的建议和想法,请提出来一起学习,谢谢

[Python] 纯文本查看 复制代码
# !/usr/bin/python# -*- coding: UTF-8 -*-import reimport jsonimport osimport shutilimport sysimport timeimport requestsimport img2pdffrom PIL import Imagefrom alive_progress import alive_barfrom requests.exceptions import SSLErrorpng_headers = {    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',    'Referer': 'https://max.book118.com/',}def down_pngs(pageIndex):    print(pngs[pageIndex])    down_url = 'https://view-cache.book118.com' + pngs[pageIndex]    print(down_url)    res = requests.get(url=down_url)    try:        png = res.content        with open(os.path.join(temp_dir, str(pageIndex) + '.jpeg'), 'wb') as f:            f.write(png)    except:        returnlogo = """                  __  __      _ _        /_@)_@) \          /^^\ /^\ /^^\_     _/oo \____/~''. . .  '~\       /'\''  ~ ''~~' -'\_    / '.'. ~.~.~.       .'    ~ |     /'\~~..''''.'' ''  ~\_   ('_'_'_'_'_'_'_'_  ' :   '     \_/' '.''  . '.   .''  '.  ~\_   ~V~V~V~V  \   ~\  '' '~  '   '' ~   `   ~  ''   ~\_     /\~/\~/\~/\~/|/  '   ''  _   ' ~ ''  '    ~  '' __  '  ..  \_  <-- --- ---.---.--/'   ''   /'  '\_ '' ': ~ ;;''    ' /''; \ ;'''''' '' ~\ _     \~ '. . : .:: ~. :.  /_'''_'' \_' :'''_ : _ ''/''_' '_ \:_ '''' #''..\/\/\/~/\~ ''~~~~~O ~~ \-~ `---~~~---- \(_)(_)(_)/ ~ ~~' ~\(_)(_)(_)\_~_~_~_~_~/&#732;¤1 ---------------------------------------------------------------------------------------------by:bean0283"""print(logo)while True:    url = input('请输入原创力文库url:')    url = url.split('?')[0]    print('地址:', url)    temp_dir = url.split('/')[-1]    # 删除老的临时文件夹并新建临时文件夹    if os.path.exists(temp_dir):        shutil.rmtree(temp_dir)    os.mkdir(temp_dir)    print('开始 HTML...', end='')    try:        response = requests.get(url=url)    except(SSLError):        print("\n\033[31m不要使用代理软件-------\033[0m")        print("\033[31m请关闭代理软件后重新运行程序\033[0m")        print("\033[31m请关闭vpn软件后重新运行程序\033[0m")        sys.exit(0)    page = response.text    print('成功. \n开始解析 HTML...', end='')    title = re.search('title: (.*),', page).group(1).replace("'", "")    view_token = re.search('view_token: (.*)\'', page).group(1).replace("'", "")    filetype = re.search('format: (.*)\'', page).group(1).replace("'", "")    senddate = re.search('senddate: (.*),', page).group(1).replace("'", "")    aid = re.search(' aid: (.*), //解密后的id', page).group(1)    actual_page = int(re.search('actual_page: (.*),', page).group(1))  # 真实页数    preview_page = int(re.search('preview_page: (.*),', page).group(1))  # 可预览页数    if actual_page > preview_page:        print("\n\033[31m该文档为限制文档,无法全部内容,请用vip+rmb,该问题无需反馈给开发者,他也不会解决!!\033[0m\n")    output = title  # 输出文件(夹)    print('解析成功. ')    print('文档标题: ', title)    # 接口每次会返回6个page地址    list_pn = list(range(1, preview_page + 1, 6))    # print(pngs)    if filetype == 'pdf':        pngs = {}        print('解析到pdf文档, 准备开始解析..', end='')        print('解析成功.\n正在获取pngs地址...')        print('受接口限制,2s访问一次,请耐心等待所有接口信息返回')        with alive_bar(len(list_pn), title='ing...') as bar:            for pn in list_pn:                bar()                down_page_url = 'https://openapi.book118.com/getPreview.html?project_id=1&aid={}&view_token={}&page={}&_={}'.format(                    aid, view_token, pn, str(int(time.time())))                jsonpReturn = requests.get(url=down_page_url)                page = re.search('jsonpReturn\((.*)\)', jsonpReturn.text).group(1)                data_temp = json.loads(page)['data']                # print(data_temp)                pngs.update({x: data_temp[x] for x in data_temp})  # 这里有个bug,若返回值的url为空时,这里不会报错,但会造成png时异常,暂时没有考虑处理                if pn != list_pn[-1]:                    time.sleep(2)        print('\n开始 jpg(s)...')        pagenums = list(range(1, len(pngs) + 1))        with alive_bar(len(pagenums), title='ing...') as bar:            for i in range(len(pagenums)):                bar()                down_url = "https:" + pngs[str(i + 1)]                request = requests.get(url=down_url, headers=png_headers)                try:                    page = request.content                    with open(os.path.join(temp_dir, str(pagenums[i]) + '.jpeg'), 'wb') as f:                        f.write(page)                except:                    continue        print('\n开始合并图片成PDF...', end='')        file_imgs = [os.path.join(temp_dir, str(i) + '.jpeg') for i in pagenums]        # 不用以下代码会使img2pdf报错        for img_path in file_imgs:            with open(img_path, 'rb') as data:                img = Image.open(data)                # 将PNG中RGBA属性变为RGB,即可删掉alpha透明度通道                img.convert('RGB').save(img_path)        with open(output + '.pdf', 'wb') as f:            f.write(img2pdf.convert(file_imgs))        shutil.rmtree(temp_dir)        print('成功.')        print('保存到 ' + output + '.pdf')    elif filetype in ['docx', 'doc']:        pngs = {}        print('解析到{}文档, 准备开始解析..'.format(filetype), end='')        print('解析成功.\n正在获取pngs地址...')        print('受接口限制,2s访问一次,请耐心等待所有接口信息返回')        with alive_bar(len(list_pn), title='ing...') as bar:            for pn in list_pn:                down_page_url = 'https://openapi.book118.com/getPreview.html?&project_id=1&aid={}&t={}&view_token={}&page={}&_={}'.format(                    aid, senddate, view_token, pn, str(int(time.time())))                jsonpReturn = requests.get(url=down_page_url)                page = re.search('jsonpReturn\((.*)\)', jsonpReturn.text).group(1)                data_temp = json.loads(page)['data']                # print(data_temp)                bar()                pngs.update({x: data_temp[x] for x in data_temp})                if pn != list_pn[-1]:                    time.sleep(2)        print('\n开始 jpg(s)...')        pagenums = list(range(1, len(pngs) + 1))        with alive_bar(len(pagenums), title='ing...') as bar:            for i in range(len(pagenums)):                down_url = "https:" + pngs[str(i + 1)]                request = requests.get(url=down_url, headers=png_headers)                bar()                try:                    page = request.content                    with open(os.path.join(temp_dir, str(pagenums[i]) + '.jpeg'), 'wb') as f:                        f.write(page)                except:                    continue        print('\n开始合并图片成PDF...', end='')        file_imgs = [os.path.join(temp_dir, str(i) + '.jpeg') for i in pagenums]        for img_path in file_imgs:            with open(img_path, 'rb') as data:                img = Image.open(data)                # 将PNG中RGBA属性变为RGB,即可删掉alpha透明度通道                img.convert('RGB').save(img_path)        with open(output + '.pdf', 'wb') as f:            f.write(img2pdf.convert(file_imgs))        shutil.rmtree(temp_dir)        print('成功.')        print('保存到 ' + output + '.pdf')    else:        print('不支持的参数.文件类型:', filetype)    temp_ = os.path.realpath(sys.argv[0])    os.startfile(os.path.dirname(temp_))    print("执行完成,继续请黏贴url,结束请关闭窗口\n")
赞(0) 打赏
未经允许不得转载:哈哈网 » 原创力文库工具开源

评论 抢沙发

觉得文章有用就打赏一下文章作者

非常感谢你的打赏,我们将继续提供更多优质内容,让我们一起创建更加美好的网络世界!

支付宝扫一扫打赏

微信扫一扫打赏