MrDoc/app_doc/report_utils.py

1000 lines
42 KiB
Python
Raw Normal View History

# coding:utf-8
# @文件: report_utils.py
# @创建者:州的先生
# #日期2019/12/7
# 博客地址zmister.com
# MrDoc文集文档导出相关功能代码
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.utils.translation import gettext_lazy as _
from bs4 import BeautifulSoup
import subprocess
import datetime,time
import re
import os,sys
import shutil
from django.core.wsgi import get_wsgi_application
sys.path.extend([settings.BASE_DIR])
os.environ.setdefault("DJANGO_SETTINGS_MODULE","MrDoc.settings")
application = get_wsgi_application()
import django
django.setup()
from app_doc.models import *
from subprocess import Popen
from loguru import logger
2021-02-28 08:22:34 +08:00
from app_doc.report_html2pdf import convert
import traceback
import time
import markdown
import yaml
# import PyPDF2
# from pdfminer import high_level
# 替换前端传来的非法字符
def validate_title(title):
rstr = r"[\/\\\:\*\?\"\<\>\|\[\]]" # '/ \ : * ? " < > |'
new_title = re.sub(rstr, "_", title) # 替换为下划线
return new_title
# 导出MD文件压缩包
@logger.catch()
class ReportMD():
def __init__(self,project_id):
# 查询文集信息
self.pro_id = project_id
self.project_data = Project.objects.get(pk=project_id)
# 文集名称
self.project_name = "{0}_{1}_{2}".format(
self.project_data.create_user,
validate_title(self.project_data.name),
str(datetime.date.today())
)
# 判断MD导出临时文件夹是否存在
if os.path.exists(settings.MEDIA_ROOT + "/reportmd_temp") is False:
os.mkdir(settings.MEDIA_ROOT + "/reportmd_temp")
# 判断文集名称文件夹是否存在
self.project_path = settings.MEDIA_ROOT + "/reportmd_temp/{}".format(self.project_name)
is_fold = os.path.exists(self.project_path)
if is_fold is False:
os.mkdir(self.project_path)
# 判断是否存在静态文件文件夹
self.media_path = settings.MEDIA_ROOT + "/reportmd_temp/{}/media".format(self.project_name)
is_media = os.path.exists(self.media_path)
if is_media is False:
os.mkdir(self.media_path)
def work(self):
# 初始化文集YAML数据
project_toc_list = {}
project_toc_list['project_name'] = validate_title(self.project_data.name)
project_toc_list['project_desc'] = self.project_data.intro
project_toc_list['project_role'] = self.project_data.role
project_toc_list['toc'] = []
# 读取指定文集的文档数据
data = Doc.objects.filter(top_doc=self.pro_id, parent_doc=0).order_by("sort")
# 遍历一级文档
for d in data:
top_item = {
'name': validate_title(d.name),
'file': validate_title(d.name)+'.md',
}
md_name = validate_title(d.name) # 文档名称
# 文档内容如果使用Markdown编辑器编写则导出Markdown文本如果使用富文本编辑器编写则导出HTML文本
md_content = self.operat_md_media(d.pre_content) \
if d.editor_mode in [1,2] else self.operat_md_media(d.content)
# 新建MD文件
with open('{}/{}.md'.format(self.project_path,md_name),'w',encoding='utf-8') as files:
files.write(md_content)
# 查询二级文档
data_2 = Doc.objects.filter(parent_doc=d.id).order_by("sort")
if data_2.count() > 0:
top_item['children'] = []
for d2 in data_2:
sec_item = {
'name': validate_title(d2.name),
'file': validate_title(d2.name)+'.md',
}
2021-06-15 08:35:13 +08:00
md_name_2 = validate_title(d2.name)
md_content_2 = self.operat_md_media(d2.pre_content) \
if d2.editor_mode in [1,2] else self.operat_md_media(d2.content)
# 新建MD文件
with open('{}/{}.md'.format(self.project_path, md_name_2), 'w', encoding='utf-8') as files:
files.write(md_content_2)
# 获取第三级文档
data_3 = Doc.objects.filter(parent_doc=d2.id).order_by("sort")
if data_3.count() > 0:
sec_item['children'] = []
for d3 in data_3:
item = {
'name': validate_title(d3.name),
'file': validate_title(d3.name)+'.md',
}
sec_item['children'].append(item)
md_name_3 = validate_title(d3.name)
md_content_3 = self.operat_md_media(d3.pre_content) \
if d3.editor_mode in [1,2] else self.operat_md_media(d3.content)
# 新建MD文件
with open('{}/{}.md'.format(self.project_path, md_name_3), 'w', encoding='utf-8') as files:
files.write(md_content_3)
top_item['children'].append(sec_item)
project_toc_list['toc'].append(top_item)
# 写入层级YAML
with open('{}/mrdoc.yaml'.format(self.project_path), 'a+', encoding='utf-8') as toc_yaml:
yaml.dump(project_toc_list,toc_yaml,allow_unicode=True)
# 压缩文件
md_file = shutil.make_archive(
base_name=self.project_path,
format='zip',
root_dir=self.project_path
)
# print(md_file)
# 删除文件夹
shutil.rmtree(self.project_path)
return "{}.zip".format(self.project_path)
# 处理MD内容中的静态文件
def operat_md_media(self,md_content):
# 查找MD内容中的静态文件
pattern = r"\!\[.*?\]\(.*?\)"
media_list = re.findall(pattern, md_content)
# print(media_list)
# 存在静态文件,进行遍历
if len(media_list) > 0:
for media in media_list:
try:
media_filename = media.replace('//','/').split("(")[-1].split(")")[0] # 媒体文件的文件名
except:
continue # 对本地静态文件进行复制
if media_filename.startswith("/media"):
# print(media_filename)
sub_folder = "/" + media_filename.split("/")[2] # 获取子文件夹的名称
# print(sub_folder)
is_sub_folder = os.path.exists(self.media_path+sub_folder)
# 创建子文件夹
if is_sub_folder is False:
os.mkdir(self.media_path+sub_folder)
# 替换MD内容的静态文件链接
md_content = md_content.replace(media_filename, "." + media_filename)
# 复制静态文件到指定文件夹
try:
shutil.copy(settings.BASE_DIR + media_filename, self.media_path+sub_folder)
except FileNotFoundError:
pass
return md_content
# 不存在静态文件直接返回MD内容
else:
return md_content
# 批量导出文集Markdown压缩包
class ReportMdBatch():
def __init__(self,username,project_id_list):
self.project_list = project_id_list
self.username = username
# 判断MD导出临时文件夹是否存在
if os.path.exists(settings.MEDIA_ROOT + "/reportmd_temp") is False:
os.mkdir(settings.MEDIA_ROOT + "/reportmd_temp")
# 判断用户名+日期文件夹是否存在
self.report_file_path = settings.MEDIA_ROOT + "/reportmd_temp/{}_{}".format(
self.username,datetime.datetime.strftime(datetime.datetime.now(),"%y%m%d%H%M%S")
)
is_fold = os.path.exists(self.report_file_path)
if is_fold is False:
os.mkdir(self.report_file_path)
def work(self):
# 遍历文集列表,打包每一个文集
project_file_list = []
for project_id in self.project_list:
report_func = ReportMD(project_id=project_id)
report_project_zip = report_func.work()
project_file_list.append(report_project_zip)
# 遍历打包好的文集列表,将其移入统一文件夹
for file in project_file_list:
shutil.move(file,self.report_file_path)
# 压缩打包文集合集文件夹
md_file = shutil.make_archive(
base_name=self.report_file_path,
format='zip',
root_dir=self.report_file_path
)
# print(md_file)
# 删除文件夹
shutil.rmtree(self.report_file_path)
return "{}.zip".format(self.report_file_path)
# 导出EPUB
@logger.catch()
class ReportEPUB():
def __init__(self,project_id):
self.project = Project.objects.get(id=project_id)
self.base_path = settings.MEDIA_ROOT + '/report_epub/{}/'.format(project_id)
# 创建相关目录
if os.path.exists(self.base_path + '/OEBPS') is False:
os.makedirs(self.base_path + '/OEBPS')
if os.path.exists(self.base_path + '/OEBPS/Images') is False:
os.makedirs(self.base_path + '/OEBPS/Images')
if os.path.exists(self.base_path + '/OEBPS/Text') is False:
os.makedirs(self.base_path + '/OEBPS/Text')
if os.path.exists(self.base_path + '/OEBPS/Styles') is False:
os.makedirs(self.base_path + '/OEBPS/Styles')
if os.path.exists(self.base_path + '/META-INF') is False:
os.makedirs(self.base_path + '/META-INF')
# 复制样式文件到相关目录
shutil.copyfile(settings.BASE_DIR+'/static/report_epub/style.css',self.base_path + '/OEBPS/Styles/style.css')
# shutil.copyfile(settings.BASE_DIR+'/static/katex/katex.min.css',self.base_path + '/OEBPS/Styles/katex.css')
shutil.copyfile(settings.BASE_DIR+'/static/editor.md/css/editormd.min.css',self.base_path + '/OEBPS/Styles/editormd.css')
# 复制封面图片到相关目录
shutil.copyfile(settings.BASE_DIR+'/static/report_epub/epub_cover1.jpg',self.base_path + '/OEBPS/Images/epub_cover1.jpg')
# 将文档内容写入HTML文件
def write_html(self, d, html_str):
# 使用BeautifulSoup解析拼接好的HTML文本
html_soup = BeautifulSoup(html_str, 'lxml')
src_tag = html_soup.find_all(lambda tag: tag.has_attr("src")) # 查找所有包含src的标签
mindmap_tag = html_soup.select('svg.mindmap') # 查找所有脑图的SVG标签
tex_tag = html_soup.select('.editormd-tex') # 查找所有公式标签
flowchart_tag = html_soup.select('.flowchart') # 查找所有流程图标签
seque_tag = html_soup.select('.sequence-diagram') # 查找所有时序图标签
echart_tag = html_soup.select('.echart') # 查找所有echart图表标签
code_tag = html_soup.find_all(name="code") # 查找code代码标签
2020-07-31 20:23:23 +08:00
iframe_tag = html_soup.find_all(name='iframe') # 查找iframe
# 添加css样式标签
style_link = html_soup.new_tag(name='link',href="../Styles/style.css",rel="stylesheet",type="text/css")
html_soup.body.insert_before(style_link)
editormd_link = html_soup.new_tag(name='link',href='../Styles/editormd.css',rel="stylesheet",type="text/css")
html_soup.body.insert_before(editormd_link)
# 添加html标签的xmlns属性
html_soup.html['xmlns'] = "http://www.w3.org/1999/xhtml"
2020-07-31 20:23:23 +08:00
# 替换iframe视频为视频URL链接文本
for iframe in iframe_tag:
iframe_src = iframe.get('src')
iframe.name = 'p'
iframe.string = _("本格式不支持iframe视频显示视频地址为{}".format(iframe_src))
2020-07-31 20:23:23 +08:00
# 替换HTML文本中静态文件的相对链接为绝对链接
for src in src_tag:
if src['src'].startswith("/"):
src_path = src['src'] # 媒体文件原始路径
src_filename = src['src'].split("/")[-1] # 媒体文件名
src['src'] = '../Images/' + src_filename # 媒体文件在EPUB中的路径
# 复制文件到epub的Images文件夹
try:
shutil.copyfile(
src= settings.BASE_DIR + src_path,
dst= self.base_path + '/OEBPS/Images/' + src_filename
)
except FileNotFoundError as e:
pass
# 创建写入临时HTML文件
temp_file_path = self.base_path + '/OEBPS/Text/{0}.xhtml'.format(d.id)
with open(temp_file_path, 'a+', encoding='utf-8') as htmlfile:
htmlfile.write('<?xml version="1.0" encoding="UTF-8"?>' + str(html_soup))
# 生成文档HTML
def generate_html(self):
# 查询文档
data = Doc.objects.filter(top_doc=self.project.id, parent_doc=0, status=1).order_by("sort")
self.toc_list = [
{
'id': 0,
'link': 'Text/toc_summary.xhtml',
'pid': 0,
'title': _('目录')
}
]
nav_str = '''<navMap>'''
toc_summary_str = '''<ul>'''
nav_num = 1
# content.opf相关
manifest = '''<item id="book_cover" href="Text/book_cover.xhtml" media-type="application/xhtml+xml"/>
<item id="book_title" href="Text/book_title.xhtml" media-type="application/xhtml+xml"/>
<item id="book_desc" href="Text/book_desc.xhtml" media-type="application/xhtml+xml"/>
<item id="toc_summary" href="Text/toc_summary.xhtml" media-type="application/xhtml+xml"/>
'''
spine = '<itemref idref="book_cover" linear="no"/><itemref idref="book_title"/><itemref idref="book_desc"/><itemref idref="toc_summary"/>'
for d in data:
# 拼接HTML字符串
html_str = "<h1 style='page-break-before: always;'>{}</h1>".format(d.name)
if d.content is None:
d.content = markdown.markdown(
d.pre_content,
extensions=['markdown.extensions.fenced_code','markdown.extensions.tables']
)
html_str += d.content
self.write_html(d=d,html_str=html_str) # 生成HTML
# 生成HTML的目录位置
toc = {
'id':d.id,
'link':'{}.xhtml'.format(d.id),
'pid':d.parent_doc,
'title':d.name
}
self.toc_list.append(toc)
# nav
toc_nav = '''<navPoint id="np_{nav_num}" playOrder="{nav_num}">
<navLabel><text>{title}</text></navLabel>
<content src="Text/{file}"/>
'''.format(nav_num=nav_num,title=d.name,file=toc['link'])
nav_str += toc_nav
# toc_summary
toc_summary_str += '''<li><a href="./{}">{}</a>'''.format(toc['link'],toc['title'])
# content.opf
manifest += '<item id="{}" href="Text/{}.xhtml" media-type="application/xhtml+xml"/>'.format(d.id, d.id)
spine += '<itemref idref="{}"/>'.format(d.id)
nav_num += 1
# 获取第二级文档
data_2 = Doc.objects.filter(parent_doc=d.id,status=1).order_by("sort")
if data_2.count() > 0:
toc_summary_str += '<ul>'
for d2 in data_2:
html_str = "<h1>{}</h1>".format(d2.name)
if d2.content is None:
d2.content = markdown.markdown(
d2.pre_content,
extensions=['markdown.extensions.fenced_code', 'markdown.extensions.tables']
)
html_str += d2.content
self.write_html(d=d2,html_str=html_str)
# 生成HTML的目录位置
toc = {
'id': d2.id,
'link': '{}.xhtml'.format(d2.id),
'pid': d2.parent_doc,
'title': d2.name
}
self.toc_list.append(toc)
toc_nav = '''<navPoint id="np_{nav_num}" playOrder="{nav_num}">
<navLabel><text>{title}</text></navLabel>
<content src="Text/{file}"/>
'''.format(nav_num=nav_num, title=d2.name, file=toc['link'])
nav_str += toc_nav
# toc_summary
toc_summary_str += '''<li><a href="./{}">{}</a>'''.format(toc['link'], toc['title'])
# content.opf
manifest += '<item id="{}" href="Text/{}.xhtml" media-type="application/xhtml+xml"/>'.format(d2.id, d2.id)
spine += '<itemref idref="{}"/>'.format(d2.id)
nav_num += 1
# 获取第三级文档
data_3 = Doc.objects.filter(parent_doc=d2.id,status=1).order_by("sort")
if data_3.count() > 0:
toc_summary_str += '<ul>'
for d3 in data_3:
html_str = "<h1>{}</h1>".format(d3.name)
# 如果文档没有HTML内容将Markdown转换为HTML
if d3.content is None:
d3.content = markdown.markdown(
d3.pre_content,
extensions=['markdown.extensions.fenced_code', 'markdown.extensions.tables']
)
html_str += d3.content
self.write_html(d=d3,html_str=html_str)
# 生成HTML的目录位置
toc = {
'id': d3.id,
'link': '{}.xhtml'.format(d3.id),
'pid': d3.parent_doc,
'title': d3.name
}
self.toc_list.append(toc)
toc_nav = '''<navPoint id="np_{nav_num}" playOrder="{nav_num}">
<navLabel><text>{title}</text></navLabel>
<content src="Text/{file}"/>
</navPoint>
'''.format(nav_num=nav_num, title=d3.name, file=toc['link'])
nav_str += toc_nav
# toc_summary
toc_summary_str += '''<li><a href="./{}">{}</a></li>'''.format(toc['link'], toc['title'])
# content.opf
manifest += '<item id="{}" href="Text/{}.xhtml" media-type="application/xhtml+xml"/>'.format(d3.id,
d3.id)
spine += '<itemref idref="{}"/>'.format(d3.id)
nav_num += 1
nav_str += "</navPoint>"
if data_3.count() > 0:
toc_summary_str += "</ul></li>"
else:
toc_summary_str += "</li>"
nav_str += "</navPoint>"
if data_2.count() > 0:
toc_summary_str += "</ul></li>"
else:
toc_summary_str += "</li>"
nav_str += '</navMap>'
toc_summary_str += '</ul>'
# print(nav_str)
# print(toc_summary_str)
self.nav_str = nav_str
self.toc_summary_str = toc_summary_str
# self.config_json['toc'] = self.toc_list
self.manifest = manifest
self.spine = spine
# 生成书籍标题的描述HTML文件
def generate_title_html(self):
title_str = '''<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>书籍标题</title>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<link href="../Styles/style.css" rel="stylesheet" type="text/css"/>
</head>
<body class="bookname">
<div class="main">
<h1 class="title"">{title}</h1>
<p class="author"><b>{author} </b></p><br>
<p class="author">{create_time}</p>
<p class="book-src">本书籍由<a href='http://mrdoc.zmister.com'>MrDoc(mrdoc.zmister.com)</a>生成</p>
</div>
</body>
</html>
'''.format(
title=self.project.name,
author=self.project.create_user,
create_time = time.strftime('%Y{y}%m{m}%d{d}').format(y='',m='',d='')
)
with open(self.base_path+'/OEBPS/Text/book_title.xhtml','a+',encoding='utf-8') as file:
file.write(title_str)
desc_str = '''<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>简介</title>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<link href="../Styles/style.css" rel="stylesheet" type="text/css"/>
</head>
<body class="bookdesc">
<div class="main">
<p class="title">书籍简介</p>
<p class="subtitle">{desc}</p>
</div>
</body>
</html>
'''.format(desc=self.project.intro)
with open(self.base_path+'/OEBPS/Text/book_desc.xhtml','a+',encoding='utf-8') as file:
file.write(desc_str)
# 生成元信息container.xml文件
def generate_metainfo(self):
xml = '''<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container" >
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml" />
</rootfiles>
</container>
'''
folder = self.base_path + '/META-INF'
with open(folder+'/container.xml','a+',encoding='utf-8') as metafile:
metafile.write(xml)
# 生成元类型mimetype文件
def generate_metatype(self):
with open(self.base_path+'/mimetype','a+',encoding='utf-8') as metatype:
metatype.write('application/epub+zip')
# 生成封面
def generate_cover(self):
xml_str = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh">
<head>
<title>封面</title>
<style type="text/css">
svg {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
</style>
</head>
<body>
<div>
<svg xmlns="http://www.w3.org/2000/svg" height="100%" preserveAspectRatio="xMidYMid meet" version="1.1" viewBox="0 0 628 892" width="100%" xmlns:xlink="http://www.w3.org/1999/xlink">
<image height="892" width="628" xlink:href="../Images/epub_cover1.jpg"/>
</svg>
</div>
</body>
</html>
'''
with open(self.base_path + '/OEBPS/Text/book_cover.xhtml','a+', encoding='utf-8') as cover:
cover.write(xml_str)
# 生成文档目录.ncx文件
def generate_toc_ncx(self):
ncx = '''
<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="zh-CN">
<head>
<meta name="dtb:uid" content="urn:uuid:12345"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>{title}</text>
</docTitle>
{nav_map}
</ncx>
'''.format(title=self.project.name,nav_map=self.nav_str)
with open(self.base_path+'/OEBPS/toc.ncx','a+',encoding='utf-8') as file:
file.write(ncx)
# 生成文档目录toc_summary.html文件
def generate_toc_html(self):
summary = '''<?xml version="1.0" encoding="UTF-8"?>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<title>目录</title>
<style>
body{margin: 0px;padding: 0px;}h1{text-align: center;padding: 0px;margin: 0px;}ul,li{list-style: none;}ul{padding-left:0px;}li>ul{padding-left: 2em;}
a{text-decoration: none;color: #4183c4;text-decoration: none;font-size: 16px;line-height: 28px;}
</style>
</head>
<body>
<h1>&nbsp;&nbsp;&nbsp;&nbsp;</h1>
%s
</body>
</html>
''' % (self.toc_summary_str)
with open(self.base_path+'/OEBPS/Text/toc_summary.xhtml','a+',encoding='utf-8') as file:
file.write(summary)
# 生成content.opf文件
def generate_opf(self):
content_info = '''<?xml version="1.0" encoding="utf-8" ?>
<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid" >
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:title>{title}</dc:title>
<dc:language>zh</dc:language>
<dc:creator>{creator}</dc:creator>
<dc:identifier id="bookid">urn:uuid:12345</dc:identifier>
<dc:publisher>MrDoc制作</dc:publisher>
<dc:date opf:event="publication">{create_time}</dc:date>
<dc:description>{desc}</dc:description>
<meta name="cover" content="cover_img" />
<meta name="output encoding" content="utf-8" />
<meta name="primary-writing-mode" content="horizontal-lr" />
</metadata>
<manifest>
{manifest}
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
<item id="css" href="stylesheet.css" media-type="text/css"/>
<item id="cover_img" media-type="image/jpeg" href="Images/epub_cover1.jpg" />
</manifest>
<spine toc="ncx">
{spine}
</spine>
<guide>
<reference type="toc" title="目录" href="Text/toc_summary.xhtml" />
<reference href="Text/book_cover.xhtml" type="cover" title="封面"/>
</guide>
</package>
'''
with open(self.base_path+'/OEBPS/content.opf','a+',encoding='utf-8') as file:
file.write(
content_info.format(
title = self.project.name,
creator = self.project.create_user,
create_time = str(datetime.date.today()),
desc=self.project.intro,
manifest=self.manifest,
spine = self.spine,
)
)
# 生成epub文件
def generate_epub(self):
try:
# 生成ZIP压缩文件
zipfile_name = settings.MEDIA_ROOT + '/report_epub/{}'.format(self.project.name)+'_'+str(int(time.time()))
zip_name = shutil.make_archive(
base_name = zipfile_name,
format='zip',
root_dir= settings.MEDIA_ROOT + '/report_epub/{}'.format(self.project.id)
)
# print(zip_name)
# 修改zip压缩文件后缀为EPUB
os.rename(zip_name,zipfile_name+'.epub')
# 删除生成的临时文件夹
shutil.rmtree(self.base_path)
return zipfile_name
except Exception as e:
if settings.DEBUG:
print(traceback.print_exc())
return None
def work(self):
self.generate_html() # 生成HTML
self.generate_metainfo() # 生成元信息
self.generate_metatype() # 生成元类型
self.generate_toc_ncx() # 生成目录ncx
self.generate_toc_html() # 生成目录html
self.generate_cover() # 生成封面html
self.generate_title_html() # 生产书籍的标题页和简介页
self.generate_opf() # 生成content.opf
epub_file = self.generate_epub()
return epub_file
# 导出PDF
@logger.catch()
class ReportPDF():
def __init__(self,project_id,user_id):
# 查询文集信息
self.pro_id = project_id
self.user_id = user_id
self.editormd_html_str = '''
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>{title}</title>
<link rel="stylesheet" href="../../static/layui/css/layui.css" />
<link rel="stylesheet" href="../../static/editor.md/css/editormd.css" />
<link rel="stylesheet" href="../../static/mrdoc/mrdoc-docs.css" />
<script src="../../static/jquery/3.1.1/jquery.min.js"></script>
<script>var iframe_whitelist = []</script>
<script src="../../static/editor.md/lib/marked.min.js"></script>
<script src="../../static/editor.md/lib/purify.min.js"></script>
<script src="../../static/editor.md/lib/prettify.min.js"></script>
<script src="../../static/editor.md/lib/raphael.min.js"></script>
<script src="../../static/editor.md/lib/underscore.min.js"></script>
<script src="../../static/editor.md/editormd.js"></script>
<style>
pre.linenums {{
max-height: 100%;
}}
ol.linenums li {{
width: 100%;
}}
/*一级无序li显示实心圆点*/
ul li{{
list-style:disc;
}}
/*二级无序li显示空心圆点*/
ul > li > ul > li{{
list-style-type: circle;
}}
/*有序li显示数字*/
ol li{{
list-style-type: decimal;
}}
ol ol ul,ol ul ul,ul ol ul,ul ul ul {{
list-style-type: square;
}}
/* 三级及以下无序li显示小方块 */
ul ul ul li{{
list-style-type: square;
}}
</style>
</head>
<body>
<div style="position: fixed;font-size:8px; bottom: 5px;padding: 5px; right: 10px; color: white;background: black; z-index: 10000">
2022-01-07 13:38:07 +08:00
本文件由MrDoc觅思文档生成
</div>
<div style="text-align:center;margin-top:400px;">
<h1>{project_name}</h1>
<p>作者{author}</p>
<p>日期{create_time}</p>
</div>\n
<div class="markdown-body" id="content" style="padding:0px;font-family:宋体;">
<textarea style="display: none;">{pre_content}</textarea>
</div>
<script>
editormd.markdownToHTML("content", {{
htmlDecode : "style,script,iframe",
emoji : true, //emoji表情
taskList : true, // 任务列表
tex : true, // 科学公式
flowChart : true, // 流程图
sequenceDiagram : true, // 时序图
tocm : true, //目录
toc :true,
tocContainer : "#toc-container",
tocDropdown : false,
atLink : false,//禁用@链接
plugin_path : '../../static/editor.md/lib/',
}});
$('img.emoji').each(function(){{
var img = $(this);
if(img[0].src.indexOf("/static/editor.md/")){{
var src = img[0].src.split('static');
img[0].src = '../../static' + src[1];
}}
}})
</script>
</body>
</html>
'''
self.vditor_html_str = ''''''
self.iceesitor_html_str = ''''''
self.content_str = ""
def work(self):
try:
user = User.objects.get(id=self.user_id)
project = Project.objects.get(pk=self.pro_id,create_user=user)
except ObjectDoesNotExist:
logger.error("查询文集或用户失败")
return False
except:
logger.exception("未知异常")
2021-02-28 08:22:34 +08:00
return False
# 拼接文档的HTML字符串
2021-06-16 17:00:15 +08:00
data = Doc.objects.filter(top_doc=self.pro_id,parent_doc=0,status=1).order_by("sort")
toc_list = {'1':[],'2':[],'3':[]}
for d in data:
self.content_str += "<h1 style='page-break-before: always;'>{}</h1>\n\n".format(d.name)
if d.editor_mode in [1,2]:
self.content_str += d.pre_content + '\n'
elif d.editor_mode == 3:
self.content_str += d.content + '\n'
toc_list['1'].append({'id':d.id,'name':d.name})
# 获取第二级文档
2021-06-16 17:00:15 +08:00
data_2 = Doc.objects.filter(parent_doc=d.id,status=1).order_by("sort")
for d2 in data_2:
self.content_str += "\n\n<h1 style='page-break-before: always;'>{}</h1>\n\n".format(d2.name)
if d2.editor_mode in [1, 2]:
self.content_str += d2.pre_content + '\n'
elif d2.editor_mode == 3:
self.content_str += d2.content + '\n'
toc_list['2'].append({'id':d2.id,'name':d2.name,'parent':d.id})
# 获取第三级文档
2021-06-16 17:00:15 +08:00
data_3 = Doc.objects.filter(parent_doc=d2.id,status=1).order_by("sort")
for d3 in data_3:
# print(d3.name,d3.content)
self.content_str += "\n\n<h1 style='page-break-before: always;'>{}</h1>\n\n".format(d3.name)
if d3.editor_mode in [1, 2]:
self.content_str += d3.pre_content + '\n'
elif d3.editor_mode == 3:
self.content_str += d3.content + '\n'
toc_list['3'].append({'id':d3.id,'name':d3.name,'parent':d2.id})
# 替换所有媒体文件链接
self.content_str = self.content_str.replace('![](/media/','![](../../media/')
# print(self.html_str.format(pre_content=self.content_str))
# 创建写入临时HTML文件
report_pdf_folder = settings.MEDIA_ROOT+'/report_pdf'
is_folder = os.path.exists(report_pdf_folder)
# 创建文件夹
if is_folder is False:
os.mkdir(report_pdf_folder)
# 临时HTML和PDF文件名
temp_file_name = '{}_{}'.format(
project.name,
str(datetime.datetime.today()).replace(' ', '-').replace(':', '-')
)
# 临时HTML文件路径
temp_file_path = report_pdf_folder + '/{0}.html'.format(temp_file_name)
# PDF文件路径
report_file_path = report_pdf_folder + '/{0}.pdf'.format(temp_file_name)
# 写入HTML文件
with open(temp_file_path, 'w', encoding='utf-8') as htmlfile:
htmlfile.write(
self.editormd_html_str.format(
title=project.name,
pre_content=self.content_str,
project_name=project.name,
author=project.create_user.first_name if project.create_user.first_name != '' else project.create_user.username,
create_time=str(datetime.date.today())
)
)
# 执行HTML转PDF
2021-02-28 08:22:34 +08:00
try:
2021-02-28 08:51:33 +08:00
convert('file://'+temp_file_path,report_file_path)
2021-02-28 08:22:34 +08:00
except:
logger.exception(_("生成PDF出错"))
2021-02-28 08:22:34 +08:00
return False
# 处理PDF文件
if os.path.exists(report_file_path):
2021-02-28 08:22:34 +08:00
os.remove(temp_file_path)
return report_file_path
else:
return False
# 导出Docx
class ReportDocx():
def __init__(self,project_id):
self.project = Project.objects.get(id=project_id)
self.base_path = settings.MEDIA_ROOT + '/report/{}/'.format(project_id)
self.content_str = ""
self.doc_str = """<html xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:w="urn:schemas-microsoft-com:office:word"
xmlns="http://www.w3.org/TR/REC-html40">
<head><meta http-equiv=Content-Type content="text/html; charset=utf-8">
<style type="text/css">
table
{
border-collapse: collapse;
border: none;
width: 100%;
}
td,tr
{
border: solid #CCC 1px;
padding:3px;
font-size:9pt;
}
.codestyle{
word-break: break-all;
mso-highlight:rgb(252, 252, 252);
padding-left: 5px; background-color: rgb(252, 252, 252); border: 1px solid rgb(225, 225, 232);
}
img {
width:100;
}
/*预格式*/
pre {
padding: 10px;
background: #f6f6f6;
border: 1px solid #ddd;
white-space: pre-wrap;
word-wrap: break-word;
white-space: -moz-pre-wrap;
white-space: -o-pre-wrap;
}
/*块代码*/
pre code {
border: none;
background: none;
}
pre ol {
padding-left: 2.5em;
margin: 0;
}
/*行内代码*/
code {
border: 1px solid #ddd;
background: #f6f6f6;
padding: 3px;
border-radius: 3px;
font-size: 14px;
}
/* 引用块 */
blockquote {
color: #666;
border-left: 4px solid #ddd;
padding-left: 20px;
margin-left: 0;
font-size: 14px;
font-style: italic;
}
/* 表格 */
table {
display: block;
width: 100%;
overflow: auto;
word-break: normal;
word-break: keep-all;
margin-bottom: 16px;
}
thead {
display: table-header-group;
vertical-align: middle;
border-color: inherit;
}
table thead tr {
background-color: #F8F8F8;
}
table th, table td {
padding: 6px 13px;
border: 1px solid #ddd;
}
/*公式*/
p.editormd-tex {
text-align: center;
}
</style>
<meta name=ProgId content=Word.Document>
<meta name=Generator content="Microsoft Word 11">
<meta name=Originator content="Microsoft Word 11">
<xml><w:WordDocument><w:View>Print</w:View></xml></head>
<body>
"""
def work(self):
# 拼接HTML字符串
2021-06-16 17:00:15 +08:00
data = Doc.objects.filter(top_doc=self.project.id,parent_doc=0,status=1).order_by("sort")
for d in data:
# print(d.name,d.content)
self.content_str += "<h1 style='page-break-before: always;'>{}</h1>".format(d.name)
self.content_str += d.content
# 获取第二级文档
data_2 = Doc.objects.filter(parent_doc=d.id).order_by("sort")
for d2 in data_2:
self.content_str += "<h1>{}</h1>".format(d2.name)
self.content_str += d2.content
# 获取第三级文档
data_3 = Doc.objects.filter(parent_doc=d2.id).order_by("sort")
for d3 in data_3:
# print(d3.name,d3.content)
self.content_str += "<h1>{}</h1>".format(d3.name)
self.content_str += d3.content
# 使用BeautifulSoup解析拼接好的HTML文本
soup = BeautifulSoup(self.content_str,'lxml')
src_tag = soup.find_all(lambda tag:tag.has_attr("src")) # 查找所有包含src的标签
print(src_tag)
# 替换HTML文本中静态文件的相对链接为绝对链接
for src in src_tag:
if src['src'].startswith("/"):
src['src'] = settings.BASE_DIR + src['src']
is_folder = os.path.exists(self.base_path)
# 创建文件夹
if is_folder is False:
os.mkdir(self.base_path)
temp_file_name = str(datetime.datetime.today()).replace(':', '-').replace(' ', '-').replace('.', '')
temp_file_path = self.base_path + '/{0}.docx'.format(temp_file_name)
with open(temp_file_path, 'a+', encoding='utf-8') as htmlfile:
htmlfile.write(self.doc_str + self.content_str + "</body></html>")
if __name__ == '__main__':
# app = ReportMD(
# project_id=7
# )
# app.work()
# app = ReportEPUB(project_id=20)
# app.work()
app = ReportPDF(project_id=20)
app.work()
# app = ReportDocx(project_id=20)
# app.work()