From b8cde3d78ad97c7e5d8f366348c22caff17938e5 Mon Sep 17 00:00:00 2001
From: "xiaoman.gao@extremevision.com.cn" <825485697@qq.com>
Date: Wed, 3 Apr 2024 22:01:43 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E6=97=A5=E5=BF=97=E6=89=93?=
=?UTF-8?q?=E5=8D=B0=E5=92=8C=E9=83=A8=E5=88=86=E6=98=BE=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
myapp/views/view_docker.py | 9 ++++----
myapp/views/view_inferenceserving.py | 11 +++++++--
myapp/views/view_notebook.py | 17 ++++++++++----
myapp/views/view_pipeline.py | 8 +++----
myapp/views/view_total_resource.py | 4 ++--
myapp/views/view_workflow.py | 34 +++++++++++++---------------
6 files changed, 47 insertions(+), 36 deletions(-)
diff --git a/myapp/views/view_docker.py b/myapp/views/view_docker.py
index 39c19c43..611b6c3e 100644
--- a/myapp/views/view_docker.py
+++ b/myapp/views/view_docker.py
@@ -136,8 +136,7 @@ class Docker_ModelView_Base():
self.add_form_extra_fields['base_image'] = StringField(
_('基础镜像'),
default=conf.get('USER_IMAGE',''),
- description=f'{__("基础镜像和构建方法可参考:")}{__("点击打开")}' % (
- conf.get('HELP_URL').get('docker', '')),
+ description=f'{__("基础镜像和构建方法可参考:")}{__("点击打开")}' % (conf.get('HELP_URL').get('docker', '')),
widget=BS3TextFieldWidget(),
validators=[DataRequired(), ]
)
@@ -180,7 +179,7 @@ class Docker_ModelView_Base():
def debug(self, docker_id):
docker = db.session.query(Docker).filter_by(id=docker_id).first()
from myapp.utils.py.py_k8s import K8s
- k8s_client = K8s(conf.get('CLUSTERS').get(conf.get('ENVIRONMENT')).get('KUBECONFIG', ''))
+ k8s_client = K8s(docker.project.cluster.get('KUBECONFIG', ''))
namespace = json.loads(docker.expand).get("namespace", conf.get('NOTEBOOK_NAMESPACE'))
pod_name = "docker-%s-%s" % (docker.created_by.username, str(docker.id))
pod = k8s_client.get_pods(namespace=namespace, pod_name=pod_name)
@@ -273,7 +272,7 @@ class Docker_ModelView_Base():
def save(self, docker_id):
docker = db.session.query(Docker).filter_by(id=docker_id).first()
from myapp.utils.py.py_k8s import K8s
- k8s_client = K8s(conf.get('CLUSTERS').get(conf.get('ENVIRONMENT')).get('KUBECONFIG', ''))
+ k8s_client = K8s(docker.project.cluster.get('KUBECONFIG', ''))
namespace = json.loads(docker.expand).get("namespace", conf.get('NOTEBOOK_NAMESPACE'))
pod_name = "docker-%s-%s" % (docker.created_by.username, str(docker.id))
pod = None
@@ -359,7 +358,7 @@ class Docker_ModelView_Base():
}
check_docker_commit.apply_async(kwargs=kwargs)
- return redirect("/k8s/web/log/%s/%s/%s" % (conf.get('ENVIRONMENT'), namespace, pod_name))
+ return redirect("/k8s/web/log/%s/%s/%s" % (docker.project.cluster.get('NAME', ''), namespace, pod_name))
# 添加api
diff --git a/myapp/views/view_inferenceserving.py b/myapp/views/view_inferenceserving.py
index 9a85eeef..a36df1f5 100644
--- a/myapp/views/view_inferenceserving.py
+++ b/myapp/views/view_inferenceserving.py
@@ -632,6 +632,12 @@ output %s
if not item.volume_mount:
item.volume_mount = item.project.volume_mount
self.use_expand(item)
+ if not item.resource_memory:
+ item.resource_memory = '2G'
+ if not item.resource_cpu:
+ item.resource_cpu='2'
+ if not item.resource_gpu:
+ item.resource_gpu='0'
# 初始化时没有回话但是也要调用flash,所以会报错
try:
if ('http:' in item.model_path or 'https:' in item.model_path) and ('.zip' in item.model_path or '.tar.gz' in item.model_path):
@@ -845,17 +851,18 @@ output %s
k8s_client.create_configmap(namespace=namespace, name=name, data=config_data, labels={'app': name})
volume_mount += ",%s(configmap):/config/" % name
ports = [int(port) for port in service.ports.split(',')]
+ gpu_num, gpu_type, resource_name = core.get_gpu(service.resource_gpu)
pod_env = service.env
pod_env += "\nKUBEFLOW_ENV=" + env
- pod_env += '\nKUBEFLOW_MODEL_PATH=' + service.model_path if service.model_path else ''
+ pod_env += '\nKUBEFLOW_MODEL_PATH=' + (service.model_path if service.model_path else '')
pod_env += '\nKUBEFLOW_MODEL_VERSION=' + service.model_version
pod_env += '\nKUBEFLOW_MODEL_IMAGES=' + service.images
pod_env += '\nKUBEFLOW_MODEL_NAME=' + service.model_name
pod_env += '\nKUBEFLOW_AREA=' + json.loads(service.project.expand).get('area', 'guangzhou')
pod_env += "\nRESOURCE_CPU=" + service.resource_cpu
pod_env += "\nRESOURCE_MEMORY=" + service.resource_memory
- pod_env += "\nRESOURCE_GPU=" + service.resource_gpu
+ pod_env += "\nRESOURCE_GPU=" + str(int(gpu_num))
pod_env += "\nMODEL_PATH=" + service.model_path
pod_env = pod_env.strip(',')
diff --git a/myapp/views/view_notebook.py b/myapp/views/view_notebook.py
index 421f9ce5..770e5f08 100644
--- a/myapp/views/view_notebook.py
+++ b/myapp/views/view_notebook.py
@@ -220,16 +220,19 @@ class Notebook_ModelView_Base():
self.pre_add(item)
def post_add(self, item):
- flash(__('自动reset 一分钟后生效'), 'warning')
+
try:
self.reset_notebook(item)
except Exception as e:
print(e)
- flash(__('reset后查看运行运行状态'), 'warning')
+ flash(__('start fail, please manually reset: ')+str(e), 'warning')
+ return
+
+ flash(__('自动reset 一分钟后生效'), 'info')
# @pysnooper.snoop(watch_explode=('item'))
def post_update(self, item):
- flash(__('reset以后配置方可生效'), 'warning')
+ flash(__('reset以后配置方可生效'), 'info')
# item.changed_on = datetime.datetime.now()
# db.session.commit()
@@ -237,9 +240,13 @@ class Notebook_ModelView_Base():
# flash('自动reset 一分钟后生效', 'warning')
if self.src_item_json:
- item.changed_by_fk = int(self.src_item_json.get('changed_by_fk'))
+ changed_by_fk = self.src_item_json.get('changed_by_fk','')
+ if changed_by_fk:
+ item.changed_by_fk = int(self.src_item_json.get('changed_by_fk'))
if self.src_item_json:
- item.created_by_fk = int(self.src_item_json.get('created_by_fk'))
+ created_by_fk = self.src_item_json.get('created_by_fk','')
+ if created_by_fk:
+ item.created_by_fk = int(self.src_item_json.get('created_by_fk'))
db.session.commit()
diff --git a/myapp/views/view_pipeline.py b/myapp/views/view_pipeline.py
index 49943349..f645201a 100644
--- a/myapp/views/view_pipeline.py
+++ b/myapp/views/view_pipeline.py
@@ -78,7 +78,7 @@ class Pipeline_Filter(MyappFilter):
-def make_workflow_yaml(pipeline,workflow_label,hubsecret_list,dag_templates,containers_templates):
+def make_workflow_yaml(pipeline,workflow_label,hubsecret_list,dag_templates,containers_templates,dbsession=db.session):
name = pipeline.name+"-"+uuid.uuid4().hex[:4]
workflow_label['workflow-name']=name
workflow_crd_json={
@@ -139,7 +139,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs):
for task_name in dag:
# 使用临时连接,避免连接中断的问题
# try:
- # db.session().ping()
+
task = dbsession.query(Task).filter_by(name=task_name, pipeline_id=pipeline.id).first()
if not task:
raise MyappException('task %s not exist ' % task_name)
@@ -491,7 +491,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs):
# 添加个人创建的所有仓库秘钥
image_pull_secrets = conf.get('HUBSECRET', [])
- user_repositorys = db.session.query(Repository).filter(Repository.created_by_fk == pipeline.created_by.id).all()
+ user_repositorys = dbsession.query(Repository).filter(Repository.created_by_fk == pipeline.created_by.id).all()
hubsecret_list = list(set(image_pull_secrets + [rep.hubsecret for rep in user_repositorys]))
# 配置拉取秘钥
@@ -521,7 +521,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs):
for task_name in dag:
containers_template.append(make_container_template(task_name=task_name,hubsecret_list=hubsecret_list))
- workflow_json = make_workflow_yaml(pipeline=pipeline, workflow_label=workflow_label, hubsecret_list=hubsecret_list, dag_templates=make_dag_template(), containers_templates=containers_template)
+ workflow_json = make_workflow_yaml(pipeline=pipeline, workflow_label=workflow_label, hubsecret_list=hubsecret_list, dag_templates=make_dag_template(), containers_templates=containers_template,dbsession=dbsession)
# 先这是某个模板变量不进行渲染,一直向后传递到argo
pipeline_file = json.dumps(workflow_json,ensure_ascii=False,indent=4)
# print(pipeline_file)
diff --git a/myapp/views/view_total_resource.py b/myapp/views/view_total_resource.py
index cca85eec..ca7aaab8 100644
--- a/myapp/views/view_total_resource.py
+++ b/myapp/views/view_total_resource.py
@@ -198,7 +198,7 @@ def pod_resource():
org = pod['node_selector'].get("org", 'public')
if org not in all_tasks_json[cluster_name][namespace]:
all_tasks_json[cluster_name][namespace][org] = {}
- if pod['status'] == 'Running':
+ if pod['status'] == 'Running' or pod['status_more'].get('phase','')=='Running':
user = pod['labels'].get('user', pod['labels'].get('username', pod['labels'].get('run-rtx',pod['labels'].get('rtx-user','admin'))))
if user:
request_gpu = 0
@@ -456,9 +456,9 @@ class Total_Resource_ModelView_Api(MyappFormRestApi):
if namespace == 'automl':
vcjob_name = pod['label'].get("app", '')
+ k8s_client.delete_pods(namespace=namespace, pod_name=pod['name'])
if vcjob_name:
k8s_client.delete_volcano(namespace=namespace, name=vcjob_name)
- k8s_client.delete_pods(namespace=namespace,pod_name=pod['name'])
k8s_client.delete_service(namespace=namespace,labels={'app':vcjob_name})
k8s_client.delete_istio_ingress(namespace=namespace,name=pod['name'])
diff --git a/myapp/views/view_workflow.py b/myapp/views/view_workflow.py
index 9af7f3f5..b89b4731 100644
--- a/myapp/views/view_workflow.py
+++ b/myapp/views/view_workflow.py
@@ -294,20 +294,19 @@ class Workflow_ModelView_Base(Crd_ModelView_Base):
layout_config["start_time"] = k8s_client.to_local_time(status_more.get('startedAt',''))
layout_config['finish_time'] = k8s_client.to_local_time(status_more.get('finishedAt',''))
- layout_config['crd_json'] = core.decode_unicode_escape(
- {
- "apiVersion": "argoproj.io/v1alpha1",
- "kind": "Workflow",
- "metadata": {
- "annotations": annotations,
- "name": workflow_name,
- "labels": labels,
- "namespace": namespace
- },
- "spec": spec,
- "status": status_more
- }
- )
+ layout_config['crd_json'] = {
+ "apiVersion": "argoproj.io/v1alpha1",
+ "kind": "Workflow",
+ "metadata": {
+ "annotations": core.decode_unicode_escape(annotations),
+ "name": workflow_name,
+ "labels": labels,
+ "namespace": namespace
+ },
+ "spec": spec,
+ "status": status_more
+ }
+
if int(layout_config.get("pipeline-id", '0')):
pipeline = db.session.query(Pipeline).filter_by(id=int(layout_config.get("pipeline-id", '0'))).first()
@@ -666,12 +665,11 @@ class Workflow_ModelView_Base(Crd_ModelView_Base):
containers.append(container_temp)
pod['spec']['containers'] = containers
- pod = core.decode_unicode_escape(pod)
pod_yaml = json.dumps(pod, indent=4, ensure_ascii=False, default=str)
- import yaml
- pod_yaml = yaml.safe_dump(yaml.load(pod_yaml), default_flow_style=False, indent=4)
- # print(pod)
+ # import yaml
+ # pod_yaml = yaml.safe_dump(yaml.load(pod_yaml, Loader=yaml.SafeLoader), default_flow_style=False, indent=4)
+ # # print(pod)
except Exception as e:
print(e)