MrDoc/app_doc/report_html2pdf.py

95 lines
3.4 KiB
Python
Raw Normal View History

# coding:utf-8
# @文件: report_html2pdf.py
# @创建者:州的先生
# #日期2020/12/27
# 博客地址zmister.com
2021-02-28 08:22:34 +08:00
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import staleness_of
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.utils import ChromeType
from django.conf import settings
from django.utils.translation import gettext_lazy as _
import sys
2021-02-28 08:22:34 +08:00
import json
import base64
2021-02-28 08:22:34 +08:00
def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True):
'''
Convert a given html file or website into PDF
:param str source: source html file or website link
:param str target: target location to save the PDF
:param int timeout: timeout in seconds. Default value is set to 2 seconds
:param bool compress: whether PDF is compressed or not. Default value is False
:param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen
'''
result = __get_pdf_from_html(source, timeout, install_driver)
# if compress:
# __compress(result, target, power)
# else:
with open(target, 'wb') as file:
file.write(result)
def __send_devtools(driver, cmd, params={}):
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
if not response:
raise Exception(response.get('value'))
return response.get('value')
def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}):
webdriver_options = Options()
webdriver_prefs = {}
driver = None
2021-04-08 10:19:38 +08:00
webdriver_options.add_argument('--no-sandbox')
2021-02-28 08:22:34 +08:00
webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')
2021-04-08 10:19:38 +08:00
webdriver_options.add_argument("--remote-debugging-port=9222")
2021-02-28 08:22:34 +08:00
webdriver_options.add_argument('--disable-dev-shm-usage')
webdriver_options.experimental_options['prefs'] = webdriver_prefs
webdriver_prefs['profile.default_content_settings'] = {'images': 2}
# 使用指定的chromedriver
if settings.CHROMIUM_DRIVER_PATH is not None:
driver = webdriver.Chrome(executable_path=settings.CHROMIUM_DRIVER_PATH,options=webdriver_options)
# 使用默认的chromedriver
2021-02-28 08:22:34 +08:00
else:
2022-01-07 14:49:27 +08:00
driver = webdriver.Chrome(options=webdriver_options)
2021-02-28 08:22:34 +08:00
driver.get(path)
try:
WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html')))
except TimeoutException:
calculated_print_options = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': True,
}
calculated_print_options.update(print_options)
result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
driver.quit()
return base64.b64decode(result['data'])
if __name__ == '__main__':
# print(sys.argv)
html_path, pdf_path = sys.argv[1],sys.argv[2]
2021-02-28 08:22:34 +08:00
convert(html_path,pdf_path)
# html2pdf(html_path=html_path,pdf_path=pdf_path)