2021-02-21 22:01:26 +08:00
|
|
|
|
# coding:utf-8
|
|
|
|
|
# @文件: report_html2pdf.py
|
|
|
|
|
# @创建者:州的先生
|
|
|
|
|
# #日期:2020/12/27
|
|
|
|
|
# 博客地址:zmister.com
|
|
|
|
|
|
2021-02-28 08:22:34 +08:00
|
|
|
|
from selenium import webdriver
|
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
|
from selenium.common.exceptions import TimeoutException
|
|
|
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
|
from selenium.webdriver.support.expected_conditions import staleness_of
|
|
|
|
|
from webdriver_manager.chrome import ChromeDriverManager
|
|
|
|
|
from webdriver_manager.utils import ChromeType
|
|
|
|
|
from django.conf import settings
|
2021-03-13 16:45:51 +08:00
|
|
|
|
from django.utils.translation import gettext_lazy as _
|
2021-02-21 22:01:26 +08:00
|
|
|
|
import sys
|
2021-02-28 08:22:34 +08:00
|
|
|
|
import json
|
|
|
|
|
import base64
|
2021-02-21 22:01:26 +08:00
|
|
|
|
|
|
|
|
|
|
2021-02-28 08:22:34 +08:00
|
|
|
|
def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True):
|
|
|
|
|
'''
|
|
|
|
|
Convert a given html file or website into PDF
|
|
|
|
|
|
|
|
|
|
:param str source: source html file or website link
|
|
|
|
|
:param str target: target location to save the PDF
|
|
|
|
|
:param int timeout: timeout in seconds. Default value is set to 2 seconds
|
|
|
|
|
:param bool compress: whether PDF is compressed or not. Default value is False
|
|
|
|
|
:param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
result = __get_pdf_from_html(source, timeout, install_driver)
|
|
|
|
|
|
|
|
|
|
# if compress:
|
|
|
|
|
# __compress(result, target, power)
|
|
|
|
|
# else:
|
|
|
|
|
with open(target, 'wb') as file:
|
|
|
|
|
file.write(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __send_devtools(driver, cmd, params={}):
|
|
|
|
|
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
|
|
|
|
|
url = driver.command_executor._url + resource
|
|
|
|
|
body = json.dumps({'cmd': cmd, 'params': params})
|
|
|
|
|
response = driver.command_executor._request('POST', url, body)
|
|
|
|
|
|
|
|
|
|
if not response:
|
|
|
|
|
raise Exception(response.get('value'))
|
|
|
|
|
|
|
|
|
|
return response.get('value')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}):
|
|
|
|
|
webdriver_options = Options()
|
|
|
|
|
webdriver_prefs = {}
|
|
|
|
|
driver = None
|
|
|
|
|
|
2021-04-08 10:19:38 +08:00
|
|
|
|
webdriver_options.add_argument('--no-sandbox')
|
2021-02-28 08:22:34 +08:00
|
|
|
|
webdriver_options.add_argument('--headless')
|
|
|
|
|
webdriver_options.add_argument('--disable-gpu')
|
2021-04-08 10:19:38 +08:00
|
|
|
|
webdriver_options.add_argument("--remote-debugging-port=9222")
|
2021-02-28 08:22:34 +08:00
|
|
|
|
webdriver_options.add_argument('--disable-dev-shm-usage')
|
|
|
|
|
webdriver_options.experimental_options['prefs'] = webdriver_prefs
|
|
|
|
|
|
|
|
|
|
webdriver_prefs['profile.default_content_settings'] = {'images': 2}
|
|
|
|
|
|
2021-02-28 20:40:11 +08:00
|
|
|
|
# 使用指定的chromedriver
|
|
|
|
|
if settings.CHROMIUM_DRIVER_PATH is not None:
|
|
|
|
|
driver = webdriver.Chrome(executable_path=settings.CHROMIUM_DRIVER_PATH,options=webdriver_options)
|
|
|
|
|
# 使用默认的chromedriver
|
2021-02-28 08:22:34 +08:00
|
|
|
|
else:
|
2022-01-07 14:49:27 +08:00
|
|
|
|
driver = webdriver.Chrome(options=webdriver_options)
|
2021-02-28 08:22:34 +08:00
|
|
|
|
|
|
|
|
|
driver.get(path)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html')))
|
|
|
|
|
except TimeoutException:
|
|
|
|
|
calculated_print_options = {
|
|
|
|
|
'landscape': False,
|
|
|
|
|
'displayHeaderFooter': False,
|
|
|
|
|
'printBackground': True,
|
|
|
|
|
'preferCSSPageSize': True,
|
|
|
|
|
}
|
|
|
|
|
calculated_print_options.update(print_options)
|
|
|
|
|
result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
|
|
|
|
|
driver.quit()
|
|
|
|
|
return base64.b64decode(result['data'])
|
|
|
|
|
|
2021-02-21 22:01:26 +08:00
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
# print(sys.argv)
|
|
|
|
|
html_path, pdf_path = sys.argv[1],sys.argv[2]
|
2021-02-28 08:22:34 +08:00
|
|
|
|
convert(html_path,pdf_path)
|
|
|
|
|
# html2pdf(html_path=html_path,pdf_path=pdf_path)
|