MrDoc/app_doc/report_html2pdf.py

# coding:utf-8
# @文件: report_html2pdf.py
# @创建者：州的先生
# #日期：2020/12/27
# 博客地址：zmister.com

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import staleness_of
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.utils import ChromeType
from django.conf import settings
from django.utils.translation import gettext_lazy as _
import sys
import json
import base64


def convert(source: str, target: str, timeout: int = 2, compress: bool = False, power: int = 0, install_driver: bool = True):
    '''
    Convert a given html file or website into PDF

    :param str source: source html file or website link
    :param str target: target location to save the PDF
    :param int timeout: timeout in seconds. Default value is set to 2 seconds
    :param bool compress: whether PDF is compressed or not. Default value is False
    :param int power: power of the compression. Default value is 0. This can be 0: default, 1: prepress, 2: printer, 3: ebook, 4: screen
   '''

    result = __get_pdf_from_html(source, timeout, install_driver)

    # if compress:
    #     __compress(result, target, power)
    # else:
    with open(target, 'wb') as file:
        file.write(result)


def __send_devtools(driver, cmd, params={}):
    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
    url = driver.command_executor._url + resource
    body = json.dumps({'cmd': cmd, 'params': params})
    response = driver.command_executor._request('POST', url, body)

    if not response:
        raise Exception(response.get('value'))

    return response.get('value')


def __get_pdf_from_html(path: str, timeout: int, install_driver: bool, print_options={}):
    webdriver_options = Options()
    webdriver_prefs = {}
    driver = None

    webdriver_options.add_argument('--no-sandbox')
    webdriver_options.add_argument('--headless')
    webdriver_options.add_argument('--disable-gpu')
    webdriver_options.add_argument("--remote-debugging-port=9222")
    webdriver_options.add_argument('--disable-dev-shm-usage')
    webdriver_options.experimental_options['prefs'] = webdriver_prefs

    webdriver_prefs['profile.default_content_settings'] = {'images': 2}

    # 使用指定的chromedriver
    if settings.CHROMIUM_DRIVER_PATH is not None:
        driver = webdriver.Chrome(executable_path=settings.CHROMIUM_DRIVER_PATH,options=webdriver_options)
    # 使用默认的chromedriver
    else:
        if install_driver:
            driver = webdriver.Chrome(
                ChromeDriverManager(
                    url='https://npm.taobao.org/mirrors/chromedriver/',
                    latest_release_url='https://npm.taobao.org/mirrors/chromedriver/LATEST_RELEASE',
                    chrome_type=ChromeType.GOOGLE if settings.CHROMIUM_DRIVER == 'Chrome' else ChromeType.CHROMIUM
                ).install(),
                options=webdriver_options
            )
        else:
            driver = webdriver.Chrome(options=webdriver_options)

    driver.get(path)

    try:
       WebDriverWait(driver, timeout).until(staleness_of(driver.find_element_by_tag_name('html')))
    except TimeoutException:
        calculated_print_options = {
            'landscape': False,
            'displayHeaderFooter': False,
            'printBackground': True,
            'preferCSSPageSize': True,
        }
        calculated_print_options.update(print_options)
        result = __send_devtools(driver, "Page.printToPDF", calculated_print_options)
        driver.quit()
        return base64.b64decode(result['data'])

if __name__ == '__main__':
    # print(sys.argv)
    html_path, pdf_path = sys.argv[1],sys.argv[2]
    convert(html_path,pdf_path)
    # html2pdf(html_path=html_path,pdf_path=pdf_path)