From b8cde3d78ad97c7e5d8f366348c22caff17938e5 Mon Sep 17 00:00:00 2001 From: "xiaoman.gao@extremevision.com.cn" <825485697@qq.com> Date: Wed, 3 Apr 2024 22:01:43 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E6=97=A5=E5=BF=97=E6=89=93?= =?UTF-8?q?=E5=8D=B0=E5=92=8C=E9=83=A8=E5=88=86=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- myapp/views/view_docker.py | 9 ++++---- myapp/views/view_inferenceserving.py | 11 +++++++-- myapp/views/view_notebook.py | 17 ++++++++++---- myapp/views/view_pipeline.py | 8 +++---- myapp/views/view_total_resource.py | 4 ++-- myapp/views/view_workflow.py | 34 +++++++++++++--------------- 6 files changed, 47 insertions(+), 36 deletions(-) diff --git a/myapp/views/view_docker.py b/myapp/views/view_docker.py index 39c19c43..611b6c3e 100644 --- a/myapp/views/view_docker.py +++ b/myapp/views/view_docker.py @@ -136,8 +136,7 @@ class Docker_ModelView_Base(): self.add_form_extra_fields['base_image'] = StringField( _('基础镜像'), default=conf.get('USER_IMAGE',''), - description=f'{__("基础镜像和构建方法可参考:")}{__("点击打开")}' % ( - conf.get('HELP_URL').get('docker', '')), + description=f'{__("基础镜像和构建方法可参考:")}{__("点击打开")}' % (conf.get('HELP_URL').get('docker', '')), widget=BS3TextFieldWidget(), validators=[DataRequired(), ] ) @@ -180,7 +179,7 @@ class Docker_ModelView_Base(): def debug(self, docker_id): docker = db.session.query(Docker).filter_by(id=docker_id).first() from myapp.utils.py.py_k8s import K8s - k8s_client = K8s(conf.get('CLUSTERS').get(conf.get('ENVIRONMENT')).get('KUBECONFIG', '')) + k8s_client = K8s(docker.project.cluster.get('KUBECONFIG', '')) namespace = json.loads(docker.expand).get("namespace", conf.get('NOTEBOOK_NAMESPACE')) pod_name = "docker-%s-%s" % (docker.created_by.username, str(docker.id)) pod = k8s_client.get_pods(namespace=namespace, pod_name=pod_name) @@ -273,7 +272,7 @@ class Docker_ModelView_Base(): def save(self, docker_id): docker = db.session.query(Docker).filter_by(id=docker_id).first() from myapp.utils.py.py_k8s import K8s - k8s_client = K8s(conf.get('CLUSTERS').get(conf.get('ENVIRONMENT')).get('KUBECONFIG', '')) + k8s_client = K8s(docker.project.cluster.get('KUBECONFIG', '')) namespace = json.loads(docker.expand).get("namespace", conf.get('NOTEBOOK_NAMESPACE')) pod_name = "docker-%s-%s" % (docker.created_by.username, str(docker.id)) pod = None @@ -359,7 +358,7 @@ class Docker_ModelView_Base(): } check_docker_commit.apply_async(kwargs=kwargs) - return redirect("/k8s/web/log/%s/%s/%s" % (conf.get('ENVIRONMENT'), namespace, pod_name)) + return redirect("/k8s/web/log/%s/%s/%s" % (docker.project.cluster.get('NAME', ''), namespace, pod_name)) # 添加api diff --git a/myapp/views/view_inferenceserving.py b/myapp/views/view_inferenceserving.py index 9a85eeef..a36df1f5 100644 --- a/myapp/views/view_inferenceserving.py +++ b/myapp/views/view_inferenceserving.py @@ -632,6 +632,12 @@ output %s if not item.volume_mount: item.volume_mount = item.project.volume_mount self.use_expand(item) + if not item.resource_memory: + item.resource_memory = '2G' + if not item.resource_cpu: + item.resource_cpu='2' + if not item.resource_gpu: + item.resource_gpu='0' # 初始化时没有回话但是也要调用flash,所以会报错 try: if ('http:' in item.model_path or 'https:' in item.model_path) and ('.zip' in item.model_path or '.tar.gz' in item.model_path): @@ -845,17 +851,18 @@ output %s k8s_client.create_configmap(namespace=namespace, name=name, data=config_data, labels={'app': name}) volume_mount += ",%s(configmap):/config/" % name ports = [int(port) for port in service.ports.split(',')] + gpu_num, gpu_type, resource_name = core.get_gpu(service.resource_gpu) pod_env = service.env pod_env += "\nKUBEFLOW_ENV=" + env - pod_env += '\nKUBEFLOW_MODEL_PATH=' + service.model_path if service.model_path else '' + pod_env += '\nKUBEFLOW_MODEL_PATH=' + (service.model_path if service.model_path else '') pod_env += '\nKUBEFLOW_MODEL_VERSION=' + service.model_version pod_env += '\nKUBEFLOW_MODEL_IMAGES=' + service.images pod_env += '\nKUBEFLOW_MODEL_NAME=' + service.model_name pod_env += '\nKUBEFLOW_AREA=' + json.loads(service.project.expand).get('area', 'guangzhou') pod_env += "\nRESOURCE_CPU=" + service.resource_cpu pod_env += "\nRESOURCE_MEMORY=" + service.resource_memory - pod_env += "\nRESOURCE_GPU=" + service.resource_gpu + pod_env += "\nRESOURCE_GPU=" + str(int(gpu_num)) pod_env += "\nMODEL_PATH=" + service.model_path pod_env = pod_env.strip(',') diff --git a/myapp/views/view_notebook.py b/myapp/views/view_notebook.py index 421f9ce5..770e5f08 100644 --- a/myapp/views/view_notebook.py +++ b/myapp/views/view_notebook.py @@ -220,16 +220,19 @@ class Notebook_ModelView_Base(): self.pre_add(item) def post_add(self, item): - flash(__('自动reset 一分钟后生效'), 'warning') + try: self.reset_notebook(item) except Exception as e: print(e) - flash(__('reset后查看运行运行状态'), 'warning') + flash(__('start fail, please manually reset: ')+str(e), 'warning') + return + + flash(__('自动reset 一分钟后生效'), 'info') # @pysnooper.snoop(watch_explode=('item')) def post_update(self, item): - flash(__('reset以后配置方可生效'), 'warning') + flash(__('reset以后配置方可生效'), 'info') # item.changed_on = datetime.datetime.now() # db.session.commit() @@ -237,9 +240,13 @@ class Notebook_ModelView_Base(): # flash('自动reset 一分钟后生效', 'warning') if self.src_item_json: - item.changed_by_fk = int(self.src_item_json.get('changed_by_fk')) + changed_by_fk = self.src_item_json.get('changed_by_fk','') + if changed_by_fk: + item.changed_by_fk = int(self.src_item_json.get('changed_by_fk')) if self.src_item_json: - item.created_by_fk = int(self.src_item_json.get('created_by_fk')) + created_by_fk = self.src_item_json.get('created_by_fk','') + if created_by_fk: + item.created_by_fk = int(self.src_item_json.get('created_by_fk')) db.session.commit() diff --git a/myapp/views/view_pipeline.py b/myapp/views/view_pipeline.py index 49943349..f645201a 100644 --- a/myapp/views/view_pipeline.py +++ b/myapp/views/view_pipeline.py @@ -78,7 +78,7 @@ class Pipeline_Filter(MyappFilter): -def make_workflow_yaml(pipeline,workflow_label,hubsecret_list,dag_templates,containers_templates): +def make_workflow_yaml(pipeline,workflow_label,hubsecret_list,dag_templates,containers_templates,dbsession=db.session): name = pipeline.name+"-"+uuid.uuid4().hex[:4] workflow_label['workflow-name']=name workflow_crd_json={ @@ -139,7 +139,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs): for task_name in dag: # 使用临时连接,避免连接中断的问题 # try: - # db.session().ping() + task = dbsession.query(Task).filter_by(name=task_name, pipeline_id=pipeline.id).first() if not task: raise MyappException('task %s not exist ' % task_name) @@ -491,7 +491,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs): # 添加个人创建的所有仓库秘钥 image_pull_secrets = conf.get('HUBSECRET', []) - user_repositorys = db.session.query(Repository).filter(Repository.created_by_fk == pipeline.created_by.id).all() + user_repositorys = dbsession.query(Repository).filter(Repository.created_by_fk == pipeline.created_by.id).all() hubsecret_list = list(set(image_pull_secrets + [rep.hubsecret for rep in user_repositorys])) # 配置拉取秘钥 @@ -521,7 +521,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs): for task_name in dag: containers_template.append(make_container_template(task_name=task_name,hubsecret_list=hubsecret_list)) - workflow_json = make_workflow_yaml(pipeline=pipeline, workflow_label=workflow_label, hubsecret_list=hubsecret_list, dag_templates=make_dag_template(), containers_templates=containers_template) + workflow_json = make_workflow_yaml(pipeline=pipeline, workflow_label=workflow_label, hubsecret_list=hubsecret_list, dag_templates=make_dag_template(), containers_templates=containers_template,dbsession=dbsession) # 先这是某个模板变量不进行渲染,一直向后传递到argo pipeline_file = json.dumps(workflow_json,ensure_ascii=False,indent=4) # print(pipeline_file) diff --git a/myapp/views/view_total_resource.py b/myapp/views/view_total_resource.py index cca85eec..ca7aaab8 100644 --- a/myapp/views/view_total_resource.py +++ b/myapp/views/view_total_resource.py @@ -198,7 +198,7 @@ def pod_resource(): org = pod['node_selector'].get("org", 'public') if org not in all_tasks_json[cluster_name][namespace]: all_tasks_json[cluster_name][namespace][org] = {} - if pod['status'] == 'Running': + if pod['status'] == 'Running' or pod['status_more'].get('phase','')=='Running': user = pod['labels'].get('user', pod['labels'].get('username', pod['labels'].get('run-rtx',pod['labels'].get('rtx-user','admin')))) if user: request_gpu = 0 @@ -456,9 +456,9 @@ class Total_Resource_ModelView_Api(MyappFormRestApi): if namespace == 'automl': vcjob_name = pod['label'].get("app", '') + k8s_client.delete_pods(namespace=namespace, pod_name=pod['name']) if vcjob_name: k8s_client.delete_volcano(namespace=namespace, name=vcjob_name) - k8s_client.delete_pods(namespace=namespace,pod_name=pod['name']) k8s_client.delete_service(namespace=namespace,labels={'app':vcjob_name}) k8s_client.delete_istio_ingress(namespace=namespace,name=pod['name']) diff --git a/myapp/views/view_workflow.py b/myapp/views/view_workflow.py index 9af7f3f5..b89b4731 100644 --- a/myapp/views/view_workflow.py +++ b/myapp/views/view_workflow.py @@ -294,20 +294,19 @@ class Workflow_ModelView_Base(Crd_ModelView_Base): layout_config["start_time"] = k8s_client.to_local_time(status_more.get('startedAt','')) layout_config['finish_time'] = k8s_client.to_local_time(status_more.get('finishedAt','')) - layout_config['crd_json'] = core.decode_unicode_escape( - { - "apiVersion": "argoproj.io/v1alpha1", - "kind": "Workflow", - "metadata": { - "annotations": annotations, - "name": workflow_name, - "labels": labels, - "namespace": namespace - }, - "spec": spec, - "status": status_more - } - ) + layout_config['crd_json'] = { + "apiVersion": "argoproj.io/v1alpha1", + "kind": "Workflow", + "metadata": { + "annotations": core.decode_unicode_escape(annotations), + "name": workflow_name, + "labels": labels, + "namespace": namespace + }, + "spec": spec, + "status": status_more + } + if int(layout_config.get("pipeline-id", '0')): pipeline = db.session.query(Pipeline).filter_by(id=int(layout_config.get("pipeline-id", '0'))).first() @@ -666,12 +665,11 @@ class Workflow_ModelView_Base(Crd_ModelView_Base): containers.append(container_temp) pod['spec']['containers'] = containers - pod = core.decode_unicode_escape(pod) pod_yaml = json.dumps(pod, indent=4, ensure_ascii=False, default=str) - import yaml - pod_yaml = yaml.safe_dump(yaml.load(pod_yaml), default_flow_style=False, indent=4) - # print(pod) + # import yaml + # pod_yaml = yaml.safe_dump(yaml.load(pod_yaml, Loader=yaml.SafeLoader), default_flow_style=False, indent=4) + # # print(pod) except Exception as e: print(e)