From c6130e6d542524b70485c168a883c895d15b5ed0 Mon Sep 17 00:00:00 2001 From: FerdinandWard Date: Sat, 20 Aug 2022 13:45:54 +0800 Subject: [PATCH] fix tip --- install/docker/config.py | 4 ++-- myapp/models/model_serving.py | 4 ++++ myapp/utils/py/py_k8s.py | 15 ++++++++++++ myapp/views/view_inferenceserving.py | 36 ++++++++++++++++++---------- myapp/views/view_notebook.py | 10 ++++---- myapp/views/view_pipeline.py | 2 +- myapp/views/view_serving.py | 6 +++++ myapp/views/view_task.py | 13 +++++++--- 8 files changed, 65 insertions(+), 25 deletions(-) diff --git a/install/docker/config.py b/install/docker/config.py index c376be60..fba37b7a 100644 --- a/install/docker/config.py +++ b/install/docker/config.py @@ -45,8 +45,8 @@ CSV_EXPORT = {"encoding": "utf_8_sig"} # 跨域配置 WTF_CSRF_ENABLED = False -# 跨域访问允许通过的站点 -WTF_CSRF_EXEMPT_LIST = ['example.local.com'] +# Add endpoints that need to be exempt from CSRF protection +WTF_CSRF_EXEMPT_LIST = ["myapp.views.core.log"] # 是否debug模式运行 DEBUG = os.environ.get("FLASK_ENV") == "development" diff --git a/myapp/models/model_serving.py b/myapp/models/model_serving.py index de7c2e7c..6a7c08ac 100644 --- a/myapp/models/model_serving.py +++ b/myapp/models/model_serving.py @@ -200,6 +200,10 @@ class InferenceService(Model,AuditMixinNullable,MyappModelBase,service_common): def input_html(self): return Markup('
' + self.model_input + '
') + @property + def resource(self): + return 'cpu:%s,memory:%s,gpu:%s'%(self.resource_cpu,self.resource_memory,self.resource_gpu) + @property def operate_html(self): url=self.project.cluster.get('GRAFANA_HOST','').strip('/')+conf.get('GRAFANA_SERVICE_PATH')+self.name diff --git a/myapp/utils/py/py_k8s.py b/myapp/utils/py/py_k8s.py index cb395598..d1ca7695 100755 --- a/myapp/utils/py/py_k8s.py +++ b/myapp/utils/py/py_k8s.py @@ -686,6 +686,21 @@ class K8s(): "mountPath": '/etc/localtime' } ) + if '/dev/shm' not in volume_mount: + k8s_volume_mounts.append( + { + "name": 'dshm', + "mountPath": "/dev/shm" + } + ) + k8s_volumes.append( + { + "name": "dshm", + "emptyDir": { + "medium": "Memory" + } + } + ) return k8s_volumes,k8s_volume_mounts diff --git a/myapp/views/view_inferenceserving.py b/myapp/views/view_inferenceserving.py index bbbbe250..abfdd3ef 100644 --- a/myapp/views/view_inferenceserving.py +++ b/myapp/views/view_inferenceserving.py @@ -102,10 +102,10 @@ class InferenceService_ModelView_base(): edit_form_query_rel_fields = add_form_query_rel_fields - list_columns = ['project','service_type','label','model_name_url','model_version','inference_host_url','ip','model_status','creator','modified','operate_html'] + list_columns = ['project','service_type','label','model_name_url','model_version','inference_host_url','ip','model_status','resource','creator','modified','operate_html'] cols_width={ "project":{"type": "ellip2", "width": 150}, - "label": {"type": "ellip2", "width": 350}, + "label": {"type": "ellip2", "width": 300}, "service_type": {"type": "ellip2", "width": 100}, "model_name_url":{"type": "ellip2", "width": 300}, "model_version": {"type": "ellip2", "width": 200}, @@ -114,6 +114,7 @@ class InferenceService_ModelView_base(): "model_status": {"type": "ellip2", "width": 100}, "modified": {"type": "ellip2", "width": 150}, "operate_html": {"type": "ellip2", "width": 300}, + "resource": {"type": "ellip2", "width": 300}, } search_columns = ['name','created_by','project','service_type','label','model_name','model_version','model_path','host','model_status','resource_gpu'] @@ -123,12 +124,21 @@ class InferenceService_ModelView_base(): base_filters = [["id",InferenceService_Filter, lambda: []]] custom_service = 'serving' - service_type_choices= [custom_service,'tfserving','torch-server','onnxruntime','triton-server'] - # label_columns = { - # "host": _("域名:测试环境test.xx,调试环境 debug.xx"), - # } + # service_type_choices= ['',custom_service,'tfserving','torch-server','onnxruntime','triton-server','kfserving-tf','kfserving-torch','kfserving-onnx','kfserving-sklearn','kfserving-xgboost','kfserving-lightgbm','kfserving-paddle'] + service_type_choices= ['',custom_service,'tfserving','torch-server','onnxruntime','triton-server'] + sepc_label_columns = { + # "host": _("域名:测试环境test.xx,调试环境 debug.xx"), + "resource":"资源" + } service_type_choices = [x.replace('_','-') for x in service_type_choices] add_form_extra_fields={ + "project": QuerySelectField( + _(datamodel.obj.lab('project')), + query_factory=filter_join_org_project, + allow_blank=True, + widget=Select2Widget(), + validators=[DataRequired()] + ), "resource_memory":StringField(_(datamodel.obj.lab('resource_memory')),default='5G',description='内存的资源使用限制,示例1G,10G, 最大100G,如需更多联系管路员',widget=BS3TextFieldWidget(),validators=[DataRequired()]), "resource_cpu":StringField(_(datamodel.obj.lab('resource_cpu')), default='5',description='cpu的资源使用限制(单位核),示例 0.4,10,最大50核,如需更多联系管路员',widget=BS3TextFieldWidget(), validators=[DataRequired()]), "min_replicas": StringField(_(datamodel.obj.lab('min_replicas')), default=InferenceService.min_replicas.default.arg,description='最小副本数,用来配置高可用,流量变动自动伸缩',widget=BS3TextFieldWidget(), validators=[DataRequired()]), @@ -764,13 +774,13 @@ output %s def clear(self, service_id): service = db.session.query(InferenceService).filter_by(id=service_id).first() if service: - self.delete_old_service(service.name,service.project.cluster) - service.model_status='offline' - if not service.deploy_history: - service.deploy_history='' - service.deploy_history = service.deploy_history + "\n" + "clear:%s" % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - db.session.commit() - flash('服务清理完成', category='success') + self.delete_old_service(service.name, service.project.cluster) + service.model_status='offline' + if not service.deploy_history: + service.deploy_history='' + service.deploy_history = service.deploy_history + "\n" + "clear: %s %s" % (g.user.username,datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + db.session.commit() + flash('服务清理完成', category='success') return redirect(conf.get('MODEL_URLS',{}).get('inferenceservice','')) diff --git a/myapp/views/view_notebook.py b/myapp/views/view_notebook.py index e2b7a6d8..909f8c90 100644 --- a/myapp/views/view_notebook.py +++ b/myapp/views/view_notebook.py @@ -315,8 +315,6 @@ class Notebook_ModelView_Base(): command=None workingDir=None volume_mount = notebook.volume_mount - if '/dev/shm' not in volume_mount: - volume_mount += ',10G(memory):/dev/shm' rewrite_url = '/' pre_command = '(nohup sh /init.sh > /notebook_init.log 2>&1 &) ; (nohup sh /mnt/%s/init.sh > /init.log 2>&1 &) ; '%notebook.created_by.username if notebook.ide_type=='jupyter': @@ -442,10 +440,10 @@ class Notebook_ModelView_Base(): # print(crd_json) crd = k8s_client.create_crd(group=crd_info['group'], version=crd_info['version'], plural=crd_info['plural'],namespace=namespace, body=crd_json) - # 创建EXTERNAL_IP的服务 - SERVICE_EXTERNAL_IP = conf.get('SERVICE_EXTERNAL_IP', None) - if not SERVICE_EXTERNAL_IP and notebook.project.expand: - SERVICE_EXTERNAL_IP = json.loads(notebook.project.expand).get('SERVICE_EXTERNAL_IP', SERVICE_EXTERNAL_IP) + # 边缘模式时,需要根据项目组中的配置设置代理ip + SERVICE_EXTERNAL_IP = '' + if notebook.project.expand: + SERVICE_EXTERNAL_IP = json.loads(notebook.project.expand).get('SERVICE_EXTERNAL_IP', '') if type(SERVICE_EXTERNAL_IP)==str: SERVICE_EXTERNAL_IP = [SERVICE_EXTERNAL_IP] diff --git a/myapp/views/view_pipeline.py b/myapp/views/view_pipeline.py index d6e3afdd..820bba37 100644 --- a/myapp/views/view_pipeline.py +++ b/myapp/views/view_pipeline.py @@ -1279,7 +1279,7 @@ class Pipeline_ModelView_Api(Pipeline_ModelView_Base,MyappModelRestApi): # show_columns = ['project','name','describe','namespace','schedule_type','cron_time','node_selector','depends_on_past','max_active_runs','parallelism','global_env','dag_json','pipeline_file_html','pipeline_argo_id','version_id','run_id','created_by','changed_by','created_on','changed_on','expand'] list_columns = ['id','project','pipeline_url','creator','modified'] add_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','global_env','alert_status','expand'] - edit_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','dag_json','global_env','alert_status','expand','created_by'] + edit_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','dag_json','global_env','alert_status','expand','created_by','parameter'] related_views = [Task_ModelView_Api,] diff --git a/myapp/views/view_serving.py b/myapp/views/view_serving.py index dd738568..daf0011f 100644 --- a/myapp/views/view_serving.py +++ b/myapp/views/view_serving.py @@ -112,6 +112,12 @@ class Service_ModelView_base(): edit_form_query_rel_fields = add_form_query_rel_fields add_form_extra_fields={ + "project": QuerySelectField( + _(datamodel.obj.lab('project')), + query_factory=filter_join_org_project, + allow_blank=True, + widget=Select2Widget() + ), "name":StringField(_(datamodel.obj.lab('name')), description='英文名(小写字母、数字、- 组成),最长50个字符',widget=BS3TextFieldWidget(), validators=[DataRequired(),Regexp("^[a-z][a-z0-9\-]*[a-z0-9]$"),Length(1,54)]), "label":StringField(_(datamodel.obj.lab('label')), description='中文名', widget=BS3TextFieldWidget(),validators=[DataRequired()]), "images": StringField(_(datamodel.obj.lab('images')), description='镜像全称', widget=BS3TextFieldWidget(), validators=[DataRequired()]), diff --git a/myapp/views/view_task.py b/myapp/views/view_task.py index f19c0601..836c68dc 100644 --- a/myapp/views/view_task.py +++ b/myapp/views/view_task.py @@ -560,18 +560,24 @@ class Task_ModelView_Base(): ) try_num=30 + message = '启动时间过长,一分钟后刷新此页面' while(try_num>0): pod = k8s_client.get_pods(namespace=namespace, pod_name=pod_name) # print(pod) if pod: pod = pod[0] # 有历史非运行态,直接删除 - if pod and pod['status'] == 'Running': - break + if pod: + if pod['status'] == 'Running': + break + else: + try: + message = '启动时间过长,一分钟后刷新此页面'+", status:"+pod['status']+", message:"+json.dumps(pod['status_more']['conditions'],indent=4,ensure_ascii=False) + except Exception as e: + print(e) try_num=try_num-1 time.sleep(2) if try_num==0: - message='启动时间过长,一分钟后重试' flash(message,'warning') return self.response(400, **{"status": 1, "result": {}, "message": message}) # return redirect('/pipeline_modelview/web/%s'%str(task.pipeline.id)) @@ -755,6 +761,7 @@ class Task_ModelView_Api(Task_ModelView_Base,MyappModelRestApi): datamodel = SQLAInterface(Task) route_base = '/task_modelview/api' # list_columns = ['name','label','job_template_url','volume_mount','debug'] + list_columns =['name', 'label','pipeline', 'job_template','volume_mount','node_selector','command','overwrite_entrypoint','working_dir', 'args','resource_memory','resource_cpu','resource_gpu','timeout','retry','created_by','changed_by','created_on','changed_on','monitoring','expand'] add_columns = ['name','label','job_template','pipeline','working_dir','command','args','volume_mount','node_selector','resource_memory','resource_cpu','resource_gpu','timeout','retry','expand'] edit_columns = add_columns show_columns = ['name', 'label','pipeline', 'job_template','volume_mount','node_selector','command','overwrite_entrypoint','working_dir', 'args','resource_memory','resource_cpu','resource_gpu','timeout','retry','created_by','changed_by','created_on','changed_on','monitoring','expand']