MrDoc/app_doc/report_html2pdf.py
2021-04-08 10:19:38 +08:00

105 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding:utf-8
# @文件: report_html2pdf.py
# @创建者:州的先生
# #日期2020/12/27
# 博客地址zmister.com
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import staleness_of
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.utils import ChromeType
from django.conf import settings
from django.utils.translation import gettext_lazy as _
import sys
import json
import base64
def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True):
'''
Convert a given html file or website into PDF
:param str source: source html file or website link
:param str target: target location to save the PDF
:param int timeout: timeout in seconds. Default value is set to 2 seconds
:param bool compress: whether PDF is compressed or not. Default value is False
:param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen
'''
result = __get_pdf_from_html(source, timeout, install_driver)
# if compress:
# __compress(result, target, power)
# else:
with open(target, 'wb') as file:
file.write(result)
def __send_devtools(driver, cmd, params={}):
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
if not response:
raise Exception(response.get('value'))
return response.get('value')
def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}):
webdriver_options = Options()
webdriver_prefs = {}
driver = None
webdriver_options.add_argument('--no-sandbox')
webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')
webdriver_options.add_argument("--remote-debugging-port=9222")
webdriver_options.add_argument('--disable-dev-shm-usage')
webdriver_options.experimental_options['prefs'] = webdriver_prefs
webdriver_prefs['profile.default_content_settings'] = {'images': 2}
# 使用指定的chromedriver
if settings.CHROMIUM_DRIVER_PATH is not None:
driver = webdriver.Chrome(executable_path=settings.CHROMIUM_DRIVER_PATH,options=webdriver_options)
# 使用默认的chromedriver
else:
if install_driver:
driver = webdriver.Chrome(
ChromeDriverManager(
url='https://npm.taobao.org/mirrors/chromedriver/',
latest_release_url='https://npm.taobao.org/mirrors/chromedriver/LATEST_RELEASE',
chrome_type=ChromeType.GOOGLE if settings.CHROMIUM_DRIVER == 'Chrome' else ChromeType.CHROMIUM
).install(),
options=webdriver_options
)
else:
driver = webdriver.Chrome(options=webdriver_options)
driver.get(path)
try:
WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html')))
except TimeoutException:
calculated_print_options = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': True,
}
calculated_print_options.update(print_options)
result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
driver.quit()
return base64.b64decode(result['data'])
if __name__ == '__main__':
# print(sys.argv)
html_path, pdf_path = sys.argv[1],sys.argv[2]
convert(html_path,pdf_path)
# html2pdf(html_path=html_path,pdf_path=pdf_path)