cube-studio/myapp/views/view_job_template.py

462 lines
20 KiB
Python
Raw Normal View History

2021-08-17 17:00:34 +08:00
from flask import render_template,redirect
from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_appbuilder import ModelView, ModelRestApi
from flask_appbuilder import ModelView,AppBuilder,expose,BaseView,has_access
from importlib import reload
from flask import jsonify
from flask_babel import gettext as __
from flask_babel import lazy_gettext as _
import uuid
import re
from wtforms.validators import DataRequired, Length, NumberRange, Optional,Regexp
from kfp import compiler
from sqlalchemy.exc import InvalidRequestError
# 将model添加成视图并控制在前端的显示
from myapp.models.model_job import Repository,Images,Job_Template,Task,Pipeline,Workflow,Tfjob,Xgbjob,RunHistory,Pytorchjob
from myapp.models.model_team import Project,Project_User
from flask_appbuilder.actions import action
from jinja2 import Environment, BaseLoader, DebugUndefined, StrictUndefined
from myapp.utils import core
from myapp import app, appbuilder,db,event_logger
from wtforms import BooleanField, IntegerField,StringField, SelectField,FloatField,DateField,DateTimeField,SelectMultipleField,FormField,FieldList
from flask_appbuilder.fieldwidgets import BS3TextFieldWidget,BS3PasswordFieldWidget,DatePickerWidget,DateTimePickerWidget,Select2ManyWidget,Select2Widget
from myapp.forms import MyBS3TextAreaFieldWidget,MySelect2Widget,MyCodeArea,MyLineSeparatedListField,MyJSONField,MyBS3TextFieldWidget,MySelectMultipleField
import re,copy
from .baseApi import (
MyappModelRestApi
)
from flask import (
current_app,
abort,
flash,
g,
Markup,
make_response,
redirect,
render_template,
request,
send_from_directory,
Response,
url_for,
)
from .base import (
api,
BaseMyappView,
check_ownership,
data_payload_response,
DeleteMixin,
generate_download_headers,
get_error_msg,
get_user_roles,
handle_api_exception,
json_error_response,
json_success,
MyappFilter,
MyappModelView,
)
from flask_appbuilder import CompactCRUDMixin, expose
from myapp.views.view_images import Images_Filter
from myapp.views.view_team import Project_Filter
import pysnooper,datetime,time,json
conf = app.config
logging = app.logger
# 开发者能看到所有模板用户只能看到release的模板
class Job_Tempalte_Filter(MyappFilter):
# @pysnooper.snoop()
def apply(self, query, func):
user_roles = [role.name.lower() for role in list(self.get_user_roles())]
if "admin" in user_roles:
return query
# join_projects_id = security_manager.get_join_projects_id(db.session)
# logging.info(join_projects_id)
return query.filter(self.model.version=='Release')
# 定义数据库视图
class Job_Template_ModelView_Base():
datamodel = SQLAInterface(Job_Template)
label_title='任务模板'
check_redirect_list_url = '/job_template_modelview/list/?_flt_2_name='
help_url = conf.get('HELP_URL', {}).get(datamodel.obj.__tablename__, '') if datamodel else ''
list_columns = ['project','name_title','version','creator','modified']
show_columns = ['project','name','version','describe','images_url','workdir','entrypoint','args_html','demo_html','env','hostAliases','privileged','expand_html']
add_columns = ['project','images','name','version','describe','workdir','entrypoint','volume_mount','job_args_definition','args','env','hostAliases','privileged','accounts','demo','expand']
edit_columns = add_columns
base_filters = [["id", Job_Tempalte_Filter, lambda: []]] # 设置权限过滤器
base_order = ('id', 'desc')
order_columns = ['id']
add_form_query_rel_fields = {
"images": [["name", Images_Filter, None]],
"project": [["name", Project_Filter, 'job-template']],
}
version_list = [[version,version] for version in ['Alpha','Release']]
edit_form_query_rel_fields = add_form_query_rel_fields
add_form_extra_fields = {
"name": StringField(
_(datamodel.obj.lab('name')),
description='英文名(字母、数字、- 组成)最长50个字符',
widget=BS3TextFieldWidget(), # 传给widget函数的是外层的field对象以及widget函数的参数
validators=[Regexp("^[a-z][a-z0-9\-]*[a-z0-9]$"), Length(1, 54)]
),
"version": SelectField(
_(datamodel.obj.lab('version')),
description="job模板的版本release版本的模板才能被所有用户看到",
widget=Select2Widget(),
choices=version_list
),
"volume_mount": StringField(
_(datamodel.obj.lab('volume_mount')),
description='使用该模板的task会在保存时自动添加该挂载',
widget=BS3TextFieldWidget(), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"workdir": StringField(
_(datamodel.obj.lab('workdir')),
description='工作目录,不填写将直接使用镜像默认的工作目录',
widget=BS3TextFieldWidget(), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"entrypoint": StringField(
_(datamodel.obj.lab('entrypoint')),
description='镜像的入口命令直接写成单行字符串例如python xx.py无需添加[]',
widget=BS3TextFieldWidget(), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"args": StringField(
_(datamodel.obj.lab('args')),
description='使用job模板task需要填写的参数需要按Job Args Definition的标准写入',
widget=MyBS3TextAreaFieldWidget(rows=10), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"env": StringField(
_(datamodel.obj.lab('env')),
description='使用模板的task自动添加的环境变量支持模板变量。书写格式:每行一个环境变量env_key=env_value',
widget=MyBS3TextAreaFieldWidget(rows=3), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"hostAliases": StringField(
_(datamodel.obj.lab('hostAliases')),
description='添加到容器内的host映射。书写格式:每行一个dns解析记录ip host1 host2示例1.1.1.1 example1.oa.com example2.oa.com',
widget=MyBS3TextAreaFieldWidget(rows=3), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"demo": StringField(
_(datamodel.obj.lab('demo')),
description='填写demo',
widget=MyBS3TextAreaFieldWidget(rows=10), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"job_args_definition": StringField(
_(datamodel.obj.lab('job_args_definition')),
description='使用job模板参数的标准填写方式',
widget=MyCodeArea(code=core.job_template_args_definition()), # 传给widget函数的是外层的field对象以及widget函数的参数
),
"privileged":BooleanField(
_(datamodel.obj.lab('privileged')),
description='是否启动超级权限'
),
"expand": StringField(
_(datamodel.obj.lab('expand')),
description='json格式的扩展字段支持 index:$模板展示顺序号help_url:$帮助文档地址',
widget=MyBS3TextAreaFieldWidget(rows=3), # 传给widget函数的是外层的field对象以及widget函数的参数
),
}
edit_form_extra_fields = add_form_extra_fields
# 校验是否是json
# @pysnooper.snoop(watch_explode=('job_args'))
def pre_add(self, item):
if not item.env:
item.env=''
envs = item.env.strip().split('\n')
envs = [env.strip() for env in envs if env.strip() and '=' in env]
item.env = '\n'.join(envs)
if not item.args:
item.args = '{}'
item.args = core.validate_job_args(item)
if not item.expand or not item.expand.strip():
item.expand='{}'
core.validate_json(item.expand)
item.expand = json.dumps(json.loads(item.expand), indent=4, ensure_ascii=False)
if not item.demo or not item.demo.strip():
item.demo='{}'
core.validate_json(item.demo)
if item.hostAliases:
# if not item.images.entrypoint:
# raise MyappException('images entrypoint not exist')
all_host = {}
all_rows = re.split('\r|\n',item.hostAliases)
2021-09-07 18:09:47 +08:00
all_rows = [all_row.strip() for all_row in all_rows if all_row.strip()]
2021-08-17 17:00:34 +08:00
for row in all_rows:
hosts = row.split(' ')
hosts = [host for host in hosts if host]
if len(hosts) > 1:
if hosts[0] in all_host:
all_host[hosts[0]]=all_host[hosts[0]]+hosts[1:]
else:
all_host[hosts[0]] = hosts[1:]
hostAliases=''
for ip in all_host:
hostAliases+=ip+" "+" ".join(all_host[ip])
hostAliases+='\n'
item.hostAliases = hostAliases.strip()
task_args = json.loads(item.demo)
job_args = json.loads(item.args)
item.demo = json.dumps(core.validate_task_args(task_args, job_args),indent=4, ensure_ascii=False)
def pre_update(self, item):
self.pre_add(item)
# @pysnooper.snoop()
def post_list(self,items):
def sort_expand_index(items, dbsession):
all = {
0: []
}
for item in items:
try:
if item.expand:
index = float(json.loads(item.expand).get('index', 0))+float(json.loads(item.project.expand).get('index', 0))*1000
if index:
if index in all:
all[index].append(item)
else:
all[index] = [item]
else:
all[0].append(item)
else:
all[0].append(item)
except Exception as e:
print(e)
back = []
for index in sorted(all):
back.extend(all[index])
# 当有小数的时候自动转正
# if float(index)!=int(index):
# pass
return back
return sort_expand_index(items,db.session)
@expose("/run", methods=["POST"])
def run(self):
request_data = request.json
job_template_id=request_data.get('job_template_id','')
job_template_name = request_data.get('job_template_name', '')
run_id = request_data.get('run_id', '').replace('_','-')
resource_memory = request_data.get('resource_memory', '')
resource_cpu = request_data.get('resource_cpu', '')
task_args = request_data.get('args', '')
if (not job_template_id and not job_template_name) or not run_id or task_args=='':
response = make_response("输入参数不齐全")
response.status_code = 400
return response
job_template = None
if job_template_id:
job_template = db.session.query(Job_Template).filter_by(id=int(job_template_id)).first()
elif job_template_name:
job_template = db.session.query(Job_Template).filter_by(name=job_template_name).first()
if not job_template:
response = make_response("no job template exist")
response.status_code = 400
return response
from myapp.utils.py.py_k8s import K8s
k8s = K8s()
namespace = conf.get('PIPELINE_NAMESPACE')
pod_name = "venus-" + run_id.replace('_', '-')
pod_name = pod_name[:60]
pod = k8s.get_pods(namespace=namespace, pod_name=pod_name)
# print(pod)
if pod:
pod = pod[0]
# 有历史,直接删除
if pod:
k8s.delete_pods(namespace=namespace, pod_name=pod_name)
time.sleep(2)
pod=None
# 没有历史或者没有运行态,直接创建
if not pod:
args=[]
job_template_args = json.loads(job_template.args) if job_template.args else {}
for arg_name in task_args:
arg_type=''
for group in job_template_args:
for template_arg in job_template_args[group]:
if template_arg==arg_name:
arg_type=job_template_args[group][template_arg].get('type','')
arg_value = task_args[arg_name]
if arg_value:
args.append(arg_name)
if arg_type=='json':
args.append(json.dumps(arg_value))
else:
args.append(arg_value)
# command = ['sh', '-c','sleep 7200']
volume_mount = 'kubeflow-cfs-workspace(pvc):/mnt,kubeflow-cfs-archives(pvc):/archives'
env = job_template.env + "\n"
env += 'KFJ_TASK_ID=0\n'
env += 'KFJ_TASK_NAME=' + str('venus-'+run_id) + "\n"
env += 'KFJ_TASK_NODE_SELECTOR=cpu=true,train=true\n'
env += 'KFJ_TASK_VOLUME_MOUNT=' + str(volume_mount) + "\n"
env += 'KFJ_TASK_IMAGES=' + str(job_template.images) + "\n"
env += 'KFJ_TASK_RESOURCE_CPU=' + str(resource_cpu) + "\n"
env += 'KFJ_TASK_RESOURCE_MEMORY=' + str(resource_memory) + "\n"
env += 'KFJ_TASK_RESOURCE_GPU=0\n'
env += 'KFJ_PIPELINE_ID=0\n'
env += 'KFJ_RUN_ID=' + run_id + "\n"
env += 'KFJ_CREATOR=' + str(g.user.username) + "\n"
env += 'KFJ_RUNNER=' + str(g.user.username) + "\n"
env += 'KFJ_PIPELINE_NAME=venus\n'
env += 'KFJ_NAMESPACE=pipeline' + "\n"
def template_str(src_str):
rtemplate = Environment(loader=BaseLoader, undefined=DebugUndefined).from_string(src_str)
des_str = rtemplate.render(creator=g.user.username,
datetime=datetime,
runner=g.user.username,
uuid=uuid,
pipeline_id='0',
2021-09-07 18:09:47 +08:00
pipeline_name='venus-task',
cluster_name=conf.get('ENVIRONMENT')
2021-08-17 17:00:34 +08:00
)
return des_str
global_envs = json.loads(template_str(json.dumps(conf.get('GLOBAL_ENV', {}), indent=4, ensure_ascii=False)))
for global_env_key in global_envs:
env += global_env_key + '=' + global_envs[global_env_key] + "\n"
2021-09-07 18:09:47 +08:00
hostAliases=job_template.hostAliases+"\n"+conf.get('HOSTALIASES','')
2021-08-17 17:00:34 +08:00
k8s.create_debug_pod(namespace,
name=pod_name,
labels={'run-rtx': g.user.username},
command=None,
args=args,
volume_mount=volume_mount,
working_dir=None,
node_selector='cpu=true,train=true',
resource_cpu=resource_cpu,
resource_memory=resource_memory,
resource_gpu=0,
image_pull_policy='Always',
image_pull_secrets=[job_template.images.repository.hubsecret],
image=job_template.images.name,
2021-09-07 18:09:47 +08:00
hostAliases=hostAliases,
2021-08-17 17:00:34 +08:00
env=env,
privileged=job_template.privileged,
accounts=job_template.accounts,
username=g.user.username
)
try_num = 5
while (try_num > 0):
pod = k8s.get_pods(namespace=namespace, pod_name=pod_name)
# print(pod)
if pod:
break
try_num = try_num - 1
time.sleep(2)
if try_num == 0:
response = make_response("启动时间过长,一分钟后重试")
response.status_code = 400
return response
user_roles = [role.name.lower() for role in list(g.user.roles)]
if "admin" in user_roles:
pod_url = conf.get('K8S_DASHBOARD_CLUSTER') + "#/log/%s/%s/pod?namespace=%s&container=%s" % (namespace, pod_name, namespace, pod_name)
else:
pod_url = conf.get('K8S_DASHBOARD_PIPELINE') + "#/log/%s/%s/pod?namespace=%s&container=%s" % (namespace, pod_name, namespace, pod_name)
print(pod_url)
response = make_response("启动成功,日志地址: %s"%pod_url)
response.status_code = 200
return response
@expose("/listen", methods=["POST"])
def listen(self):
request_data = request.json
run_id = request_data.get('run_id', '').replace('_','-')
if not run_id:
response = make_response("输入参数不齐全")
response.status_code = 400
return response
from myapp.utils.py.py_k8s import K8s
k8s = K8s()
namespace = conf.get('PIPELINE_NAMESPACE')
pod_name = "venus-" + run_id.replace('_', '-')
pod_name = pod_name[:60]
pod = k8s.get_pods(namespace=namespace, pod_name=pod_name)
# print(pod)
if pod:
pod = pod[0]
if type(pod['start_time'])==datetime.datetime:
pod['start_time'] = pod['start_time'].strftime("%Y-%d-%m %H:%M:%S")
print(pod)
response = make_response(json.dumps(pod))
response.status_code = 200
return response
else:
response = make_response('no pod')
response.status_code = 400
return response
@action(
"copy", __("Copy Job Template"), confirmation=__('Copy Job Template'), icon="fa-copy",multiple=True, single=False
)
def copy(self, job_templates):
if not isinstance(job_templates, list):
job_templates = [job_templates]
try:
for job_template in job_templates:
new_job_template = job_template.clone()
new_job_template.name = new_job_template.name+"_copy_"+uuid.uuid4().hex[:4]
new_job_template.created_on = datetime.datetime.now()
new_job_template.changed_on = datetime.datetime.now()
db.session.add(new_job_template)
db.session.commit()
except InvalidRequestError:
db.session.rollback()
except Exception as e:
raise e
return redirect(request.referrer)
class Job_Template_ModelView(Job_Template_ModelView_Base,MyappModelView,DeleteMixin):
datamodel = SQLAInterface(Job_Template)
appbuilder.add_view(Job_Template_ModelView,"任务模板",href="/job_template_modelview/list/?_flt_2_name=",icon = 'fa-flag-o',category = '训练',category_icon = 'fa-envelope')
# 添加api
class Job_Template_ModelView_Api(Job_Template_ModelView_Base,MyappModelRestApi):
datamodel = SQLAInterface(Job_Template)
route_base = '/job_template_modelview/api'
add_columns = ['project', 'images', 'name', 'version', 'describe', 'args', 'env','hostAliases', 'privileged','accounts', 'demo','expand']
edit_columns = add_columns
list_columns = ['project','name','version','describe','images','workdir','entrypoint','args','demo','env','hostAliases','privileged','accounts','created_by','changed_by','created_on','changed_on','expand']
show_columns = ['project','name','version','describe','images_url','workdir','entrypoint','args','demo','env','hostAliases','privileged','expand']
appbuilder.add_api(Job_Template_ModelView_Api)