This commit is contained in:
FerdinandWard 2022-08-20 13:45:54 +08:00
parent 706a6c7fb3
commit c6130e6d54
8 changed files with 65 additions and 25 deletions

View File

@ -45,8 +45,8 @@ CSV_EXPORT = {"encoding": "utf_8_sig"}
# 跨域配置
WTF_CSRF_ENABLED = False
# 跨域访问允许通过的站点
WTF_CSRF_EXEMPT_LIST = ['example.local.com']
# Add endpoints that need to be exempt from CSRF protection
WTF_CSRF_EXEMPT_LIST = ["myapp.views.core.log"]
# 是否debug模式运行
DEBUG = os.environ.get("FLASK_ENV") == "development"

View File

@ -200,6 +200,10 @@ class InferenceService(Model,AuditMixinNullable,MyappModelBase,service_common):
def input_html(self):
return Markup('<pre><code>' + self.model_input + '</code></pre>')
@property
def resource(self):
return 'cpu:%s,memory:%s,gpu:%s'%(self.resource_cpu,self.resource_memory,self.resource_gpu)
@property
def operate_html(self):
url=self.project.cluster.get('GRAFANA_HOST','').strip('/')+conf.get('GRAFANA_SERVICE_PATH')+self.name

View File

@ -686,6 +686,21 @@ class K8s():
"mountPath": '/etc/localtime'
}
)
if '/dev/shm' not in volume_mount:
k8s_volume_mounts.append(
{
"name": 'dshm',
"mountPath": "/dev/shm"
}
)
k8s_volumes.append(
{
"name": "dshm",
"emptyDir": {
"medium": "Memory"
}
}
)
return k8s_volumes,k8s_volume_mounts

View File

@ -102,10 +102,10 @@ class InferenceService_ModelView_base():
edit_form_query_rel_fields = add_form_query_rel_fields
list_columns = ['project','service_type','label','model_name_url','model_version','inference_host_url','ip','model_status','creator','modified','operate_html']
list_columns = ['project','service_type','label','model_name_url','model_version','inference_host_url','ip','model_status','resource','creator','modified','operate_html']
cols_width={
"project":{"type": "ellip2", "width": 150},
"label": {"type": "ellip2", "width": 350},
"label": {"type": "ellip2", "width": 300},
"service_type": {"type": "ellip2", "width": 100},
"model_name_url":{"type": "ellip2", "width": 300},
"model_version": {"type": "ellip2", "width": 200},
@ -114,6 +114,7 @@ class InferenceService_ModelView_base():
"model_status": {"type": "ellip2", "width": 100},
"modified": {"type": "ellip2", "width": 150},
"operate_html": {"type": "ellip2", "width": 300},
"resource": {"type": "ellip2", "width": 300},
}
search_columns = ['name','created_by','project','service_type','label','model_name','model_version','model_path','host','model_status','resource_gpu']
@ -123,12 +124,21 @@ class InferenceService_ModelView_base():
base_filters = [["id",InferenceService_Filter, lambda: []]]
custom_service = 'serving'
service_type_choices= [custom_service,'tfserving','torch-server','onnxruntime','triton-server']
# label_columns = {
# "host": _("域名测试环境test.xx调试环境 debug.xx"),
# }
# service_type_choices= ['',custom_service,'tfserving','torch-server','onnxruntime','triton-server','kfserving-tf','kfserving-torch','kfserving-onnx','kfserving-sklearn','kfserving-xgboost','kfserving-lightgbm','kfserving-paddle']
service_type_choices= ['',custom_service,'tfserving','torch-server','onnxruntime','triton-server']
sepc_label_columns = {
# "host": _("域名测试环境test.xx调试环境 debug.xx"),
"resource":"资源"
}
service_type_choices = [x.replace('_','-') for x in service_type_choices]
add_form_extra_fields={
"project": QuerySelectField(
_(datamodel.obj.lab('project')),
query_factory=filter_join_org_project,
allow_blank=True,
widget=Select2Widget(),
validators=[DataRequired()]
),
"resource_memory":StringField(_(datamodel.obj.lab('resource_memory')),default='5G',description='内存的资源使用限制示例1G10G 最大100G如需更多联系管路员',widget=BS3TextFieldWidget(),validators=[DataRequired()]),
"resource_cpu":StringField(_(datamodel.obj.lab('resource_cpu')), default='5',description='cpu的资源使用限制(单位核),示例 0.410最大50核如需更多联系管路员',widget=BS3TextFieldWidget(), validators=[DataRequired()]),
"min_replicas": StringField(_(datamodel.obj.lab('min_replicas')), default=InferenceService.min_replicas.default.arg,description='最小副本数,用来配置高可用,流量变动自动伸缩',widget=BS3TextFieldWidget(), validators=[DataRequired()]),
@ -764,13 +774,13 @@ output %s
def clear(self, service_id):
service = db.session.query(InferenceService).filter_by(id=service_id).first()
if service:
self.delete_old_service(service.name,service.project.cluster)
service.model_status='offline'
if not service.deploy_history:
service.deploy_history=''
service.deploy_history = service.deploy_history + "\n" + "clear%s" % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
db.session.commit()
flash('服务清理完成', category='success')
self.delete_old_service(service.name, service.project.cluster)
service.model_status='offline'
if not service.deploy_history:
service.deploy_history=''
service.deploy_history = service.deploy_history + "\n" + "clear: %s %s" % (g.user.username,datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
db.session.commit()
flash('服务清理完成', category='success')
return redirect(conf.get('MODEL_URLS',{}).get('inferenceservice',''))

View File

@ -315,8 +315,6 @@ class Notebook_ModelView_Base():
command=None
workingDir=None
volume_mount = notebook.volume_mount
if '/dev/shm' not in volume_mount:
volume_mount += ',10G(memory):/dev/shm'
rewrite_url = '/'
pre_command = '(nohup sh /init.sh > /notebook_init.log 2>&1 &) ; (nohup sh /mnt/%s/init.sh > /init.log 2>&1 &) ; '%notebook.created_by.username
if notebook.ide_type=='jupyter':
@ -442,10 +440,10 @@ class Notebook_ModelView_Base():
# print(crd_json)
crd = k8s_client.create_crd(group=crd_info['group'], version=crd_info['version'], plural=crd_info['plural'],namespace=namespace, body=crd_json)
# 创建EXTERNAL_IP的服务
SERVICE_EXTERNAL_IP = conf.get('SERVICE_EXTERNAL_IP', None)
if not SERVICE_EXTERNAL_IP and notebook.project.expand:
SERVICE_EXTERNAL_IP = json.loads(notebook.project.expand).get('SERVICE_EXTERNAL_IP', SERVICE_EXTERNAL_IP)
# 边缘模式时需要根据项目组中的配置设置代理ip
SERVICE_EXTERNAL_IP = ''
if notebook.project.expand:
SERVICE_EXTERNAL_IP = json.loads(notebook.project.expand).get('SERVICE_EXTERNAL_IP', '')
if type(SERVICE_EXTERNAL_IP)==str:
SERVICE_EXTERNAL_IP = [SERVICE_EXTERNAL_IP]

View File

@ -1279,7 +1279,7 @@ class Pipeline_ModelView_Api(Pipeline_ModelView_Base,MyappModelRestApi):
# show_columns = ['project','name','describe','namespace','schedule_type','cron_time','node_selector','depends_on_past','max_active_runs','parallelism','global_env','dag_json','pipeline_file_html','pipeline_argo_id','version_id','run_id','created_by','changed_by','created_on','changed_on','expand']
list_columns = ['id','project','pipeline_url','creator','modified']
add_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','global_env','alert_status','expand']
edit_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','dag_json','global_env','alert_status','expand','created_by']
edit_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','dag_json','global_env','alert_status','expand','created_by','parameter']
related_views = [Task_ModelView_Api,]

View File

@ -112,6 +112,12 @@ class Service_ModelView_base():
edit_form_query_rel_fields = add_form_query_rel_fields
add_form_extra_fields={
"project": QuerySelectField(
_(datamodel.obj.lab('project')),
query_factory=filter_join_org_project,
allow_blank=True,
widget=Select2Widget()
),
"name":StringField(_(datamodel.obj.lab('name')), description='英文名(小写字母、数字、- 组成)最长50个字符',widget=BS3TextFieldWidget(), validators=[DataRequired(),Regexp("^[a-z][a-z0-9\-]*[a-z0-9]$"),Length(1,54)]),
"label":StringField(_(datamodel.obj.lab('label')), description='中文名', widget=BS3TextFieldWidget(),validators=[DataRequired()]),
"images": StringField(_(datamodel.obj.lab('images')), description='镜像全称', widget=BS3TextFieldWidget(), validators=[DataRequired()]),

View File

@ -560,18 +560,24 @@ class Task_ModelView_Base():
)
try_num=30
message = '启动时间过长,一分钟后刷新此页面'
while(try_num>0):
pod = k8s_client.get_pods(namespace=namespace, pod_name=pod_name)
# print(pod)
if pod:
pod = pod[0]
# 有历史非运行态,直接删除
if pod and pod['status'] == 'Running':
break
if pod:
if pod['status'] == 'Running':
break
else:
try:
message = '启动时间过长,一分钟后刷新此页面'+", status:"+pod['status']+", message:"+json.dumps(pod['status_more']['conditions'],indent=4,ensure_ascii=False)
except Exception as e:
print(e)
try_num=try_num-1
time.sleep(2)
if try_num==0:
message='启动时间过长,一分钟后重试'
flash(message,'warning')
return self.response(400, **{"status": 1, "result": {}, "message": message})
# return redirect('/pipeline_modelview/web/%s'%str(task.pipeline.id))
@ -755,6 +761,7 @@ class Task_ModelView_Api(Task_ModelView_Base,MyappModelRestApi):
datamodel = SQLAInterface(Task)
route_base = '/task_modelview/api'
# list_columns = ['name','label','job_template_url','volume_mount','debug']
list_columns =['name', 'label','pipeline', 'job_template','volume_mount','node_selector','command','overwrite_entrypoint','working_dir', 'args','resource_memory','resource_cpu','resource_gpu','timeout','retry','created_by','changed_by','created_on','changed_on','monitoring','expand']
add_columns = ['name','label','job_template','pipeline','working_dir','command','args','volume_mount','node_selector','resource_memory','resource_cpu','resource_gpu','timeout','retry','expand']
edit_columns = add_columns
show_columns = ['name', 'label','pipeline', 'job_template','volume_mount','node_selector','command','overwrite_entrypoint','working_dir', 'args','resource_memory','resource_cpu','resource_gpu','timeout','retry','created_by','changed_by','created_on','changed_on','monitoring','expand']