diff --git a/install/docker/config.py b/install/docker/config.py index c376be60..fba37b7a 100644 --- a/install/docker/config.py +++ b/install/docker/config.py @@ -45,8 +45,8 @@ CSV_EXPORT = {"encoding": "utf_8_sig"} # 跨域配置 WTF_CSRF_ENABLED = False -# 跨域访问允许通过的站点 -WTF_CSRF_EXEMPT_LIST = ['example.local.com'] +# Add endpoints that need to be exempt from CSRF protection +WTF_CSRF_EXEMPT_LIST = ["myapp.views.core.log"] # 是否debug模式运行 DEBUG = os.environ.get("FLASK_ENV") == "development" diff --git a/myapp/models/model_serving.py b/myapp/models/model_serving.py index de7c2e7c..6a7c08ac 100644 --- a/myapp/models/model_serving.py +++ b/myapp/models/model_serving.py @@ -200,6 +200,10 @@ class InferenceService(Model,AuditMixinNullable,MyappModelBase,service_common): def input_html(self): return Markup('
' + self.model_input + '
')
+ @property
+ def resource(self):
+ return 'cpu:%s,memory:%s,gpu:%s'%(self.resource_cpu,self.resource_memory,self.resource_gpu)
+
@property
def operate_html(self):
url=self.project.cluster.get('GRAFANA_HOST','').strip('/')+conf.get('GRAFANA_SERVICE_PATH')+self.name
diff --git a/myapp/utils/py/py_k8s.py b/myapp/utils/py/py_k8s.py
index cb395598..d1ca7695 100755
--- a/myapp/utils/py/py_k8s.py
+++ b/myapp/utils/py/py_k8s.py
@@ -686,6 +686,21 @@ class K8s():
"mountPath": '/etc/localtime'
}
)
+ if '/dev/shm' not in volume_mount:
+ k8s_volume_mounts.append(
+ {
+ "name": 'dshm',
+ "mountPath": "/dev/shm"
+ }
+ )
+ k8s_volumes.append(
+ {
+ "name": "dshm",
+ "emptyDir": {
+ "medium": "Memory"
+ }
+ }
+ )
return k8s_volumes,k8s_volume_mounts
diff --git a/myapp/views/view_inferenceserving.py b/myapp/views/view_inferenceserving.py
index bbbbe250..abfdd3ef 100644
--- a/myapp/views/view_inferenceserving.py
+++ b/myapp/views/view_inferenceserving.py
@@ -102,10 +102,10 @@ class InferenceService_ModelView_base():
edit_form_query_rel_fields = add_form_query_rel_fields
- list_columns = ['project','service_type','label','model_name_url','model_version','inference_host_url','ip','model_status','creator','modified','operate_html']
+ list_columns = ['project','service_type','label','model_name_url','model_version','inference_host_url','ip','model_status','resource','creator','modified','operate_html']
cols_width={
"project":{"type": "ellip2", "width": 150},
- "label": {"type": "ellip2", "width": 350},
+ "label": {"type": "ellip2", "width": 300},
"service_type": {"type": "ellip2", "width": 100},
"model_name_url":{"type": "ellip2", "width": 300},
"model_version": {"type": "ellip2", "width": 200},
@@ -114,6 +114,7 @@ class InferenceService_ModelView_base():
"model_status": {"type": "ellip2", "width": 100},
"modified": {"type": "ellip2", "width": 150},
"operate_html": {"type": "ellip2", "width": 300},
+ "resource": {"type": "ellip2", "width": 300},
}
search_columns = ['name','created_by','project','service_type','label','model_name','model_version','model_path','host','model_status','resource_gpu']
@@ -123,12 +124,21 @@ class InferenceService_ModelView_base():
base_filters = [["id",InferenceService_Filter, lambda: []]]
custom_service = 'serving'
- service_type_choices= [custom_service,'tfserving','torch-server','onnxruntime','triton-server']
- # label_columns = {
- # "host": _("域名:测试环境test.xx,调试环境 debug.xx"),
- # }
+ # service_type_choices= ['',custom_service,'tfserving','torch-server','onnxruntime','triton-server','kfserving-tf','kfserving-torch','kfserving-onnx','kfserving-sklearn','kfserving-xgboost','kfserving-lightgbm','kfserving-paddle']
+ service_type_choices= ['',custom_service,'tfserving','torch-server','onnxruntime','triton-server']
+ sepc_label_columns = {
+ # "host": _("域名:测试环境test.xx,调试环境 debug.xx"),
+ "resource":"资源"
+ }
service_type_choices = [x.replace('_','-') for x in service_type_choices]
add_form_extra_fields={
+ "project": QuerySelectField(
+ _(datamodel.obj.lab('project')),
+ query_factory=filter_join_org_project,
+ allow_blank=True,
+ widget=Select2Widget(),
+ validators=[DataRequired()]
+ ),
"resource_memory":StringField(_(datamodel.obj.lab('resource_memory')),default='5G',description='内存的资源使用限制,示例1G,10G, 最大100G,如需更多联系管路员',widget=BS3TextFieldWidget(),validators=[DataRequired()]),
"resource_cpu":StringField(_(datamodel.obj.lab('resource_cpu')), default='5',description='cpu的资源使用限制(单位核),示例 0.4,10,最大50核,如需更多联系管路员',widget=BS3TextFieldWidget(), validators=[DataRequired()]),
"min_replicas": StringField(_(datamodel.obj.lab('min_replicas')), default=InferenceService.min_replicas.default.arg,description='最小副本数,用来配置高可用,流量变动自动伸缩',widget=BS3TextFieldWidget(), validators=[DataRequired()]),
@@ -764,13 +774,13 @@ output %s
def clear(self, service_id):
service = db.session.query(InferenceService).filter_by(id=service_id).first()
if service:
- self.delete_old_service(service.name,service.project.cluster)
- service.model_status='offline'
- if not service.deploy_history:
- service.deploy_history=''
- service.deploy_history = service.deploy_history + "\n" + "clear:%s" % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- db.session.commit()
- flash('服务清理完成', category='success')
+ self.delete_old_service(service.name, service.project.cluster)
+ service.model_status='offline'
+ if not service.deploy_history:
+ service.deploy_history=''
+ service.deploy_history = service.deploy_history + "\n" + "clear: %s %s" % (g.user.username,datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
+ db.session.commit()
+ flash('服务清理完成', category='success')
return redirect(conf.get('MODEL_URLS',{}).get('inferenceservice',''))
diff --git a/myapp/views/view_notebook.py b/myapp/views/view_notebook.py
index e2b7a6d8..909f8c90 100644
--- a/myapp/views/view_notebook.py
+++ b/myapp/views/view_notebook.py
@@ -315,8 +315,6 @@ class Notebook_ModelView_Base():
command=None
workingDir=None
volume_mount = notebook.volume_mount
- if '/dev/shm' not in volume_mount:
- volume_mount += ',10G(memory):/dev/shm'
rewrite_url = '/'
pre_command = '(nohup sh /init.sh > /notebook_init.log 2>&1 &) ; (nohup sh /mnt/%s/init.sh > /init.log 2>&1 &) ; '%notebook.created_by.username
if notebook.ide_type=='jupyter':
@@ -442,10 +440,10 @@ class Notebook_ModelView_Base():
# print(crd_json)
crd = k8s_client.create_crd(group=crd_info['group'], version=crd_info['version'], plural=crd_info['plural'],namespace=namespace, body=crd_json)
- # 创建EXTERNAL_IP的服务
- SERVICE_EXTERNAL_IP = conf.get('SERVICE_EXTERNAL_IP', None)
- if not SERVICE_EXTERNAL_IP and notebook.project.expand:
- SERVICE_EXTERNAL_IP = json.loads(notebook.project.expand).get('SERVICE_EXTERNAL_IP', SERVICE_EXTERNAL_IP)
+ # 边缘模式时,需要根据项目组中的配置设置代理ip
+ SERVICE_EXTERNAL_IP = ''
+ if notebook.project.expand:
+ SERVICE_EXTERNAL_IP = json.loads(notebook.project.expand).get('SERVICE_EXTERNAL_IP', '')
if type(SERVICE_EXTERNAL_IP)==str:
SERVICE_EXTERNAL_IP = [SERVICE_EXTERNAL_IP]
diff --git a/myapp/views/view_pipeline.py b/myapp/views/view_pipeline.py
index d6e3afdd..820bba37 100644
--- a/myapp/views/view_pipeline.py
+++ b/myapp/views/view_pipeline.py
@@ -1279,7 +1279,7 @@ class Pipeline_ModelView_Api(Pipeline_ModelView_Base,MyappModelRestApi):
# show_columns = ['project','name','describe','namespace','schedule_type','cron_time','node_selector','depends_on_past','max_active_runs','parallelism','global_env','dag_json','pipeline_file_html','pipeline_argo_id','version_id','run_id','created_by','changed_by','created_on','changed_on','expand']
list_columns = ['id','project','pipeline_url','creator','modified']
add_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','global_env','alert_status','expand']
- edit_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','dag_json','global_env','alert_status','expand','created_by']
+ edit_columns = ['project','name','describe','schedule_type','cron_time','depends_on_past','max_active_runs','parallelism','dag_json','global_env','alert_status','expand','created_by','parameter']
related_views = [Task_ModelView_Api,]
diff --git a/myapp/views/view_serving.py b/myapp/views/view_serving.py
index dd738568..daf0011f 100644
--- a/myapp/views/view_serving.py
+++ b/myapp/views/view_serving.py
@@ -112,6 +112,12 @@ class Service_ModelView_base():
edit_form_query_rel_fields = add_form_query_rel_fields
add_form_extra_fields={
+ "project": QuerySelectField(
+ _(datamodel.obj.lab('project')),
+ query_factory=filter_join_org_project,
+ allow_blank=True,
+ widget=Select2Widget()
+ ),
"name":StringField(_(datamodel.obj.lab('name')), description='英文名(小写字母、数字、- 组成),最长50个字符',widget=BS3TextFieldWidget(), validators=[DataRequired(),Regexp("^[a-z][a-z0-9\-]*[a-z0-9]$"),Length(1,54)]),
"label":StringField(_(datamodel.obj.lab('label')), description='中文名', widget=BS3TextFieldWidget(),validators=[DataRequired()]),
"images": StringField(_(datamodel.obj.lab('images')), description='镜像全称', widget=BS3TextFieldWidget(), validators=[DataRequired()]),
diff --git a/myapp/views/view_task.py b/myapp/views/view_task.py
index f19c0601..836c68dc 100644
--- a/myapp/views/view_task.py
+++ b/myapp/views/view_task.py
@@ -560,18 +560,24 @@ class Task_ModelView_Base():
)
try_num=30
+ message = '启动时间过长,一分钟后刷新此页面'
while(try_num>0):
pod = k8s_client.get_pods(namespace=namespace, pod_name=pod_name)
# print(pod)
if pod:
pod = pod[0]
# 有历史非运行态,直接删除
- if pod and pod['status'] == 'Running':
- break
+ if pod:
+ if pod['status'] == 'Running':
+ break
+ else:
+ try:
+ message = '启动时间过长,一分钟后刷新此页面'+", status:"+pod['status']+", message:"+json.dumps(pod['status_more']['conditions'],indent=4,ensure_ascii=False)
+ except Exception as e:
+ print(e)
try_num=try_num-1
time.sleep(2)
if try_num==0:
- message='启动时间过长,一分钟后重试'
flash(message,'warning')
return self.response(400, **{"status": 1, "result": {}, "message": message})
# return redirect('/pipeline_modelview/web/%s'%str(task.pipeline.id))
@@ -755,6 +761,7 @@ class Task_ModelView_Api(Task_ModelView_Base,MyappModelRestApi):
datamodel = SQLAInterface(Task)
route_base = '/task_modelview/api'
# list_columns = ['name','label','job_template_url','volume_mount','debug']
+ list_columns =['name', 'label','pipeline', 'job_template','volume_mount','node_selector','command','overwrite_entrypoint','working_dir', 'args','resource_memory','resource_cpu','resource_gpu','timeout','retry','created_by','changed_by','created_on','changed_on','monitoring','expand']
add_columns = ['name','label','job_template','pipeline','working_dir','command','args','volume_mount','node_selector','resource_memory','resource_cpu','resource_gpu','timeout','retry','expand']
edit_columns = add_columns
show_columns = ['name', 'label','pipeline', 'job_template','volume_mount','node_selector','command','overwrite_entrypoint','working_dir', 'args','resource_memory','resource_cpu','resource_gpu','timeout','retry','created_by','changed_by','created_on','changed_on','monitoring','expand']