修正日志打印和部分显示

This commit is contained in:
xiaoman.gao@extremevision.com.cn 2024-04-03 22:01:43 +08:00
parent db137aec83
commit b8cde3d78a
6 changed files with 47 additions and 36 deletions

View File

@ -136,8 +136,7 @@ class Docker_ModelView_Base():
self.add_form_extra_fields['base_image'] = StringField(
_('基础镜像'),
default=conf.get('USER_IMAGE',''),
description=f'{__("基础镜像和构建方法可参考:")}<a target="_blank" href="%s">{__("点击打开")}</a>' % (
conf.get('HELP_URL').get('docker', '')),
description=f'{__("基础镜像和构建方法可参考:")}<a target="_blank" href="%s">{__("点击打开")}</a>' % (conf.get('HELP_URL').get('docker', '')),
widget=BS3TextFieldWidget(),
validators=[DataRequired(), ]
)
@ -180,7 +179,7 @@ class Docker_ModelView_Base():
def debug(self, docker_id):
docker = db.session.query(Docker).filter_by(id=docker_id).first()
from myapp.utils.py.py_k8s import K8s
k8s_client = K8s(conf.get('CLUSTERS').get(conf.get('ENVIRONMENT')).get('KUBECONFIG', ''))
k8s_client = K8s(docker.project.cluster.get('KUBECONFIG', ''))
namespace = json.loads(docker.expand).get("namespace", conf.get('NOTEBOOK_NAMESPACE'))
pod_name = "docker-%s-%s" % (docker.created_by.username, str(docker.id))
pod = k8s_client.get_pods(namespace=namespace, pod_name=pod_name)
@ -273,7 +272,7 @@ class Docker_ModelView_Base():
def save(self, docker_id):
docker = db.session.query(Docker).filter_by(id=docker_id).first()
from myapp.utils.py.py_k8s import K8s
k8s_client = K8s(conf.get('CLUSTERS').get(conf.get('ENVIRONMENT')).get('KUBECONFIG', ''))
k8s_client = K8s(docker.project.cluster.get('KUBECONFIG', ''))
namespace = json.loads(docker.expand).get("namespace", conf.get('NOTEBOOK_NAMESPACE'))
pod_name = "docker-%s-%s" % (docker.created_by.username, str(docker.id))
pod = None
@ -359,7 +358,7 @@ class Docker_ModelView_Base():
}
check_docker_commit.apply_async(kwargs=kwargs)
return redirect("/k8s/web/log/%s/%s/%s" % (conf.get('ENVIRONMENT'), namespace, pod_name))
return redirect("/k8s/web/log/%s/%s/%s" % (docker.project.cluster.get('NAME', ''), namespace, pod_name))
# 添加api

View File

@ -632,6 +632,12 @@ output %s
if not item.volume_mount:
item.volume_mount = item.project.volume_mount
self.use_expand(item)
if not item.resource_memory:
item.resource_memory = '2G'
if not item.resource_cpu:
item.resource_cpu='2'
if not item.resource_gpu:
item.resource_gpu='0'
# 初始化时没有回话但是也要调用flash所以会报错
try:
if ('http:' in item.model_path or 'https:' in item.model_path) and ('.zip' in item.model_path or '.tar.gz' in item.model_path):
@ -845,17 +851,18 @@ output %s
k8s_client.create_configmap(namespace=namespace, name=name, data=config_data, labels={'app': name})
volume_mount += ",%s(configmap):/config/" % name
ports = [int(port) for port in service.ports.split(',')]
gpu_num, gpu_type, resource_name = core.get_gpu(service.resource_gpu)
pod_env = service.env
pod_env += "\nKUBEFLOW_ENV=" + env
pod_env += '\nKUBEFLOW_MODEL_PATH=' + service.model_path if service.model_path else ''
pod_env += '\nKUBEFLOW_MODEL_PATH=' + (service.model_path if service.model_path else '')
pod_env += '\nKUBEFLOW_MODEL_VERSION=' + service.model_version
pod_env += '\nKUBEFLOW_MODEL_IMAGES=' + service.images
pod_env += '\nKUBEFLOW_MODEL_NAME=' + service.model_name
pod_env += '\nKUBEFLOW_AREA=' + json.loads(service.project.expand).get('area', 'guangzhou')
pod_env += "\nRESOURCE_CPU=" + service.resource_cpu
pod_env += "\nRESOURCE_MEMORY=" + service.resource_memory
pod_env += "\nRESOURCE_GPU=" + service.resource_gpu
pod_env += "\nRESOURCE_GPU=" + str(int(gpu_num))
pod_env += "\nMODEL_PATH=" + service.model_path
pod_env = pod_env.strip(',')

View File

@ -220,16 +220,19 @@ class Notebook_ModelView_Base():
self.pre_add(item)
def post_add(self, item):
flash(__('自动reset 一分钟后生效'), 'warning')
try:
self.reset_notebook(item)
except Exception as e:
print(e)
flash(__('reset后查看运行运行状态'), 'warning')
flash(__('start fail, please manually reset: ')+str(e), 'warning')
return
flash(__('自动reset 一分钟后生效'), 'info')
# @pysnooper.snoop(watch_explode=('item'))
def post_update(self, item):
flash(__('reset以后配置方可生效'), 'warning')
flash(__('reset以后配置方可生效'), 'info')
# item.changed_on = datetime.datetime.now()
# db.session.commit()
@ -237,9 +240,13 @@ class Notebook_ModelView_Base():
# flash('自动reset 一分钟后生效', 'warning')
if self.src_item_json:
item.changed_by_fk = int(self.src_item_json.get('changed_by_fk'))
changed_by_fk = self.src_item_json.get('changed_by_fk','')
if changed_by_fk:
item.changed_by_fk = int(self.src_item_json.get('changed_by_fk'))
if self.src_item_json:
item.created_by_fk = int(self.src_item_json.get('created_by_fk'))
created_by_fk = self.src_item_json.get('created_by_fk','')
if created_by_fk:
item.created_by_fk = int(self.src_item_json.get('created_by_fk'))
db.session.commit()

View File

@ -78,7 +78,7 @@ class Pipeline_Filter(MyappFilter):
def make_workflow_yaml(pipeline,workflow_label,hubsecret_list,dag_templates,containers_templates):
def make_workflow_yaml(pipeline,workflow_label,hubsecret_list,dag_templates,containers_templates,dbsession=db.session):
name = pipeline.name+"-"+uuid.uuid4().hex[:4]
workflow_label['workflow-name']=name
workflow_crd_json={
@ -139,7 +139,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs):
for task_name in dag:
# 使用临时连接,避免连接中断的问题
# try:
# db.session().ping()
task = dbsession.query(Task).filter_by(name=task_name, pipeline_id=pipeline.id).first()
if not task:
raise MyappException('task %s not exist ' % task_name)
@ -491,7 +491,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs):
# 添加个人创建的所有仓库秘钥
image_pull_secrets = conf.get('HUBSECRET', [])
user_repositorys = db.session.query(Repository).filter(Repository.created_by_fk == pipeline.created_by.id).all()
user_repositorys = dbsession.query(Repository).filter(Repository.created_by_fk == pipeline.created_by.id).all()
hubsecret_list = list(set(image_pull_secrets + [rep.hubsecret for rep in user_repositorys]))
# 配置拉取秘钥
@ -521,7 +521,7 @@ def dag_to_pipeline(pipeline, dbsession, workflow_label=None, **kwargs):
for task_name in dag:
containers_template.append(make_container_template(task_name=task_name,hubsecret_list=hubsecret_list))
workflow_json = make_workflow_yaml(pipeline=pipeline, workflow_label=workflow_label, hubsecret_list=hubsecret_list, dag_templates=make_dag_template(), containers_templates=containers_template)
workflow_json = make_workflow_yaml(pipeline=pipeline, workflow_label=workflow_label, hubsecret_list=hubsecret_list, dag_templates=make_dag_template(), containers_templates=containers_template,dbsession=dbsession)
# 先这是某个模板变量不进行渲染一直向后传递到argo
pipeline_file = json.dumps(workflow_json,ensure_ascii=False,indent=4)
# print(pipeline_file)

View File

@ -198,7 +198,7 @@ def pod_resource():
org = pod['node_selector'].get("org", 'public')
if org not in all_tasks_json[cluster_name][namespace]:
all_tasks_json[cluster_name][namespace][org] = {}
if pod['status'] == 'Running':
if pod['status'] == 'Running' or pod['status_more'].get('phase','')=='Running':
user = pod['labels'].get('user', pod['labels'].get('username', pod['labels'].get('run-rtx',pod['labels'].get('rtx-user','admin'))))
if user:
request_gpu = 0
@ -456,9 +456,9 @@ class Total_Resource_ModelView_Api(MyappFormRestApi):
if namespace == 'automl':
vcjob_name = pod['label'].get("app", '')
k8s_client.delete_pods(namespace=namespace, pod_name=pod['name'])
if vcjob_name:
k8s_client.delete_volcano(namespace=namespace, name=vcjob_name)
k8s_client.delete_pods(namespace=namespace,pod_name=pod['name'])
k8s_client.delete_service(namespace=namespace,labels={'app':vcjob_name})
k8s_client.delete_istio_ingress(namespace=namespace,name=pod['name'])

View File

@ -294,20 +294,19 @@ class Workflow_ModelView_Base(Crd_ModelView_Base):
layout_config["start_time"] = k8s_client.to_local_time(status_more.get('startedAt',''))
layout_config['finish_time'] = k8s_client.to_local_time(status_more.get('finishedAt',''))
layout_config['crd_json'] = core.decode_unicode_escape(
{
"apiVersion": "argoproj.io/v1alpha1",
"kind": "Workflow",
"metadata": {
"annotations": annotations,
"name": workflow_name,
"labels": labels,
"namespace": namespace
},
"spec": spec,
"status": status_more
}
)
layout_config['crd_json'] = {
"apiVersion": "argoproj.io/v1alpha1",
"kind": "Workflow",
"metadata": {
"annotations": core.decode_unicode_escape(annotations),
"name": workflow_name,
"labels": labels,
"namespace": namespace
},
"spec": spec,
"status": status_more
}
if int(layout_config.get("pipeline-id", '0')):
pipeline = db.session.query(Pipeline).filter_by(id=int(layout_config.get("pipeline-id", '0'))).first()
@ -666,12 +665,11 @@ class Workflow_ModelView_Base(Crd_ModelView_Base):
containers.append(container_temp)
pod['spec']['containers'] = containers
pod = core.decode_unicode_escape(pod)
pod_yaml = json.dumps(pod, indent=4, ensure_ascii=False, default=str)
import yaml
pod_yaml = yaml.safe_dump(yaml.load(pod_yaml), default_flow_style=False, indent=4)
# print(pod)
# import yaml
# pod_yaml = yaml.safe_dump(yaml.load(pod_yaml, Loader=yaml.SafeLoader), default_flow_style=False, indent=4)
# # print(pod)
except Exception as e:
print(e)