cube-studio/myapp/cli.py

851 lines
43 KiB
Python
Raw Normal View History

2021-08-17 17:00:34 +08:00
#!/usr/bin/env python
2023-09-03 21:17:55 +08:00
import shutil
2021-08-17 17:00:34 +08:00
from datetime import datetime
2023-12-11 13:25:07 +08:00
from flask_babel import gettext as __
from flask_babel import lazy_gettext as _
2021-08-17 17:00:34 +08:00
import json
from myapp import app, appbuilder, db, security_manager
2023-09-03 21:17:55 +08:00
from myapp.models.model_team import Project, Project_User
from myapp.models.model_job import Repository, Images, Job_Template, Pipeline, Task
from myapp.models.model_dataset import Dataset
2023-09-03 21:17:55 +08:00
from myapp.models.model_serving import Service, InferenceService
from myapp.models.model_train_model import Training_Model
2023-09-03 21:17:55 +08:00
from myapp.models.model_notebook import Notebook
import uuid
2023-04-06 23:03:18 +08:00
import os
2023-09-03 21:17:55 +08:00
import importlib
import traceback
2021-08-17 17:00:34 +08:00
conf = app.config
2023-09-03 21:17:55 +08:00
import pysnooper
2021-08-17 17:00:34 +08:00
def create_app(script_info=None):
return app
2023-09-03 21:17:55 +08:00
2021-08-17 17:00:34 +08:00
@app.shell_context_processor
def make_shell_context():
return dict(app=app, db=db)
2023-12-11 13:25:07 +08:00
# @pysnooper.snoop()
def replace_git(dir_path):
files = os.listdir(dir_path)
for file_name in files:
file_path = os.path.join(dir_path,file_name)
if os.path.isfile(file_path) and '.json' in file_name:
content = open(file_path).read()
content = content.replace('https://github.com/tencentmusic/cube-studio/tree/master',conf.get('GIT_URL',''))
file = open(file_path,mode='w')
file.write(content)
file.close()
2023-09-03 21:17:55 +08:00
2021-08-17 17:00:34 +08:00
# https://dormousehole.readthedocs.io/en/latest/cli.html
@app.cli.command('init')
# @pysnooper.snoop()
2021-08-17 17:00:34 +08:00
def init():
try:
"""Inits the Myapp application"""
2023-09-03 21:17:55 +08:00
appbuilder.add_permissions(update_perms=True) # update_perms为true才会检测新权限
2021-08-17 17:00:34 +08:00
security_manager.sync_role_definitions()
2023-04-06 23:03:18 +08:00
except Exception as e:
print(e)
2023-12-11 13:25:07 +08:00
init_dir='myapp/init' if conf.get('BABEL_DEFAULT_LOCALE','zh')=='zh' else "myapp/init-en"
replace_git(init_dir)
2023-04-06 23:03:18 +08:00
# 初始化创建项目组
try:
2023-09-03 21:17:55 +08:00
def add_project(project_type, name, describe, expand={}):
2023-12-11 13:25:07 +08:00
if not expand:
expand={
"org": "public"
}
print('add project',project_type,name,describe)
2021-11-25 18:04:34 +08:00
project = db.session.query(Project).filter_by(name=name).filter_by(type=project_type).first()
2021-08-17 17:00:34 +08:00
if project is None:
try:
project = Project()
2023-09-03 21:17:55 +08:00
project.type = project_type
2021-08-17 17:00:34 +08:00
project.name = name
2023-09-03 21:17:55 +08:00
project.describe = describe
project.expand = json.dumps(expand, ensure_ascii=False, indent=4)
2021-08-17 17:00:34 +08:00
db.session.add(project)
db.session.commit()
project_user = Project_User()
2023-09-03 21:17:55 +08:00
project_user.project = project
2021-08-17 17:00:34 +08:00
project_user.role = 'creator'
2023-09-03 21:17:55 +08:00
project_user.user_id = 1
2021-08-17 17:00:34 +08:00
db.session.add(project_user)
db.session.commit()
2023-09-03 21:17:55 +08:00
print('add project %s' % name)
2021-08-17 17:00:34 +08:00
except Exception as e:
print(e)
2021-08-17 17:00:34 +08:00
db.session.rollback()
# 添加一些默认的记录
2024-01-03 22:40:42 +08:00
add_project('org', 'public', __('公共项目组'),expand={'cluster':'dev','org':'public'})
add_project('org', __('推荐中心'), __('推荐项目组'),expand={'cluster':'dev','org':'public'})
add_project('org', __('搜索中心'), __('搜索项目组'),expand={'cluster':'dev','org':'public'})
add_project('org', __('广告中心'), __('广告项目组'),expand={'cluster':'dev','org':'public'})
add_project('org', __('安全中心'), __('安全项目组'),expand={'cluster':'dev','org':'public'})
add_project('org', __('多媒体中心'), __('多媒体项目组'),expand={'cluster':'dev','org':'public'})
2023-12-11 13:25:07 +08:00
add_project('job-template', __('基础命令'), __('python/bash等直接在服务器命令行中执行命令的模板'), {"index": 1})
add_project('job-template', __('数据导入导出'), __('集群与用户机器或其他集群之间的数据迁移'), {"index": 2})
add_project('job-template', __('数据预处理'), __('结构化话数据特征处理'), {"index": 3})
add_project('job-template', __('数据处理工具'), __('数据的单机或分布式处理任务,ray/spark/hadoop/volcanojob'), {"index": 4})
add_project('job-template', __('特征处理'), __('特征处理相关功能'), {"index": 5})
add_project('job-template', __('机器学习框架'), __('传统机器学习框架sklearn'), {"index": 6})
add_project('job-template', __('机器学习算法'), __('传统机器学习lr/决策树/gbdt/xgb/fm等'), {"index": 7})
add_project('job-template', __('深度学习'), __('深度框架训练tf/pytorch/mxnet/mpi/horovod/kaldi等'), {"index": 8})
add_project('job-template', __('分布式加速'), __('分布式训练加速框架'), {"index": 9})
add_project('job-template', __('tf分布式'), __('tf相关的训练模型校验离线预测等功能'), {"index": 10})
add_project('job-template', __('pytorch分布式'), __('pytorch相关的训练模型校验离线预测等功能'), {"index": 11})
add_project('job-template', __('模型处理'), __('模型压缩转换处理相关的组件模板'), {"index": 13})
add_project('job-template', __('模型服务化'), __('模型服务化部署相关的组件模板'), {"index": 14})
add_project('job-template', __('推荐类模板'), __('推荐领域常用的任务模板'), {"index": 15})
add_project('job-template', __('搜索类模板'), __('搜索领域常用的任务模板'), {"index": 16})
add_project('job-template', __('广告类模板'), __('广告领域常用的任务模板'), {"index": 17})
add_project('job-template', __('多媒体类模板'), __('音视频图片文本常用的任务模板'), {"index": 18})
add_project('job-template', __('机器视觉'), __('视觉类相关模板'), {"index": 19})
add_project('job-template', __('听觉'), __('听觉类相关模板'), {"index": 20})
add_project('job-template', __('自然语言'), __('自然语言类相关模板'), {"index": 21})
add_project('job-template', __('大模型'), __('大模型相关模板'), {"index": 22})
2022-08-16 11:10:24 +08:00
2021-08-17 17:00:34 +08:00
except Exception as e:
print(e)
2021-10-14 17:36:14 +08:00
2023-09-03 21:17:55 +08:00
# @pysnooper.snoop()
def create_template(repository_id, project_name, image_name, image_describe, job_template_name,
job_template_old_names=[], job_template_describe='',job_template_workdir='', job_template_command='',
job_template_args=None, job_template_volume='', job_template_account='',
job_template_expand=None, job_template_env='', gitpath='',**kwargs):
2022-02-26 22:36:57 +08:00
if not repository_id:
return
images = db.session.query(Images).filter_by(name=image_name).first()
project = db.session.query(Project).filter_by(name=project_name).filter_by(type='job-template').first()
2021-10-14 17:36:14 +08:00
if images is None and project:
try:
images = Images()
2022-02-26 22:36:57 +08:00
images.name = image_name
2023-09-03 21:17:55 +08:00
images.describe = image_describe
images.created_by_fk = 1
images.changed_by_fk = 1
images.project_id = project.id
images.repository_id = repository_id
images.gitpath = gitpath
2021-10-14 17:36:14 +08:00
db.session.add(images)
db.session.commit()
print('add images %s' % image_name)
2021-10-14 17:36:14 +08:00
except Exception as e:
print(e)
2021-10-14 17:36:14 +08:00
db.session.rollback()
2022-02-26 22:36:57 +08:00
job_template = db.session.query(Job_Template).filter_by(name=job_template_name).first()
2022-08-16 11:10:24 +08:00
if not job_template:
for old_name in job_template_old_names:
job_template = db.session.query(Job_Template).filter_by(name=old_name).first()
if job_template:
break
2022-02-26 22:36:57 +08:00
project = db.session.query(Project).filter_by(name=project_name).filter_by(type='job-template').first()
if project and images.id:
if job_template is None:
try:
job_template = Job_Template()
2023-09-03 21:17:55 +08:00
job_template.name = job_template_name.replace('_', '-')
job_template.describe = job_template_describe
2023-09-03 21:17:55 +08:00
job_template.version = kwargs.get('job_template_version','Release')
job_template.workdir = job_template_workdir
job_template.entrypoint = job_template_command
job_template.volume_mount = job_template_volume
job_template.accounts = job_template_account
2022-08-16 11:10:24 +08:00
job_template_expand['source'] = "github"
2023-09-03 21:17:55 +08:00
job_template.expand = json.dumps(job_template_expand, indent=4, ensure_ascii=False) if job_template_expand else '{}'
job_template.created_by_fk = 1
job_template.changed_by_fk = 1
job_template.project_id = project.id
job_template.images_id = images.id
job_template.env = job_template_env
2023-09-03 21:17:55 +08:00
job_template.args = json.dumps(job_template_args, indent=4, ensure_ascii=False) if job_template_args else '{}'
db.session.add(job_template)
db.session.commit()
2023-09-03 21:17:55 +08:00
print('add job_template %s' % job_template_name.replace('_', '-'))
except Exception as e:
print(e)
db.session.rollback()
2023-09-03 21:17:55 +08:00
else:
pass
# try:
# job_template.name = job_template_name.replace('_', '-')
# job_template.describe = job_template_describe
# job_template.entrypoint = job_template_command
# job_template.volume_mount = job_template_volume
# job_template.accounts = job_template_account
# job_template_expand['source'] = "github"
# job_template.expand = json.dumps(job_template_expand, indent=4, ensure_ascii=False) if job_template_expand else '{}'
# job_template.created_by_fk = 1
# job_template.changed_by_fk = 1
# job_template.project_id = project.id
# job_template.images_id = images.id
# job_template.version = 'Release'
# job_template.env = job_template_env
# job_template.args = json.dumps(job_template_args, indent=4, ensure_ascii=False) if job_template_args else '{}'
# db.session.commit()
# print('update job_template %s' % job_template_name.replace('_', '-'))
# except Exception as e:
# print(e)
# db.session.rollback()
2021-10-14 17:36:14 +08:00
2022-02-26 22:36:57 +08:00
# 初始化创建仓库镜像模板任务流
try:
print('begin init repository')
2022-02-26 22:36:57 +08:00
repository = db.session.query(Repository).filter_by(name='hubsecret').first()
if repository is None:
try:
repository = Repository()
repository.name = 'hubsecret'
repository.server='registry.docker-cn.com'
repository.user = 'yourname'
repository.password = 'yourpassword'
repository.hubsecret = 'hubsecret'
2023-09-03 21:17:55 +08:00
repository.created_by_fk = 1
repository.changed_by_fk = 1
2022-02-26 22:36:57 +08:00
db.session.add(repository)
db.session.commit()
print('add repository hubsecret')
2022-02-26 22:36:57 +08:00
except Exception as e:
print(e)
2022-02-26 22:36:57 +08:00
db.session.rollback()
2023-12-11 13:25:07 +08:00
print('begin init job_templates')
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir,'init-job-template.json')
if os.path.exists(init_file):
job_templates = json.load(open(init_file, mode='r'))
for job_template_name in job_templates:
try:
job_template = job_templates[job_template_name]
job_template['repository_id'] = repository.id
create_template(**job_template)
except Exception as e1:
print(e1)
2022-05-17 14:57:21 +08:00
2022-05-22 22:05:10 +08:00
except Exception as e:
print(e)
2022-05-17 14:57:21 +08:00
2022-05-25 17:22:11 +08:00
# 创建demo pipeline
2023-09-03 21:17:55 +08:00
import pysnooper
2022-08-22 13:29:21 +08:00
# @pysnooper.snoop()
2023-09-03 21:17:55 +08:00
def create_pipeline(tasks, pipeline):
# 如果项目组或者task的模板不存在就丢失
org_project = db.session.query(Project).filter_by(name=pipeline['project']).filter_by(type='org').first()
if not org_project:
return
for task in tasks:
job_template = db.session.query(Job_Template).filter_by(name=task['job_templete']).first()
if not job_template:
return
2022-05-25 17:22:11 +08:00
# 创建pipeline
pipeline_model = db.session.query(Pipeline).filter_by(name=pipeline['name']).first()
if pipeline_model is None:
try:
pipeline_model = Pipeline()
pipeline_model.name = pipeline['name']
pipeline_model.describe = pipeline['describe']
2023-09-03 21:17:55 +08:00
pipeline_model.dag_json = json.dumps(pipeline['dag_json'], indent=4, ensure_ascii=False).replace('_', '-')
pipeline_model.created_by_fk = 1
pipeline_model.changed_by_fk = 1
pipeline_model.project_id = org_project.id
2023-09-03 21:17:55 +08:00
pipeline_model.global_env = pipeline.get('global_env','')
pipeline_model.parameter = json.dumps(pipeline.get('parameter', {}), indent=4, ensure_ascii=False)
pipeline_model.expand = json.dumps(pipeline.get('expand', {}), indent=4, ensure_ascii=False)
db.session.add(pipeline_model)
db.session.commit()
print('add pipeline %s' % pipeline['name'])
except Exception as e:
print(e)
db.session.rollback()
else:
2023-09-03 21:17:55 +08:00
return
# pipeline_model.describe = pipeline['describe']
# pipeline_model.dag_json = json.dumps(pipeline['dag_json'], indent=4, ensure_ascii=False).replace('_', '-')
# pipeline_model.created_by_fk = 1
# pipeline_model.changed_by_fk = 1
# pipeline_model.global_env = pipeline['global_env']
# pipeline_model.project_id = org_project.id
# pipeline_model.parameter = json.dumps(pipeline.get('parameter', {}))
# pipeline_model.expand = json.dumps(pipeline.get('expand', {}), indent=4, ensure_ascii=False)
# print('update pipeline %s' % pipeline['name'])
# db.session.commit()
2022-05-25 17:22:11 +08:00
# 创建task
for task in tasks:
task_model = db.session.query(Task).filter_by(name=task['name']).filter_by(pipeline_id=pipeline_model.id).first()
job_template = db.session.query(Job_Template).filter_by(name=task['job_templete']).first()
if task_model is None and job_template:
try:
task_model = Task()
2023-09-03 21:17:55 +08:00
task_model.name = task['name'].replace('_', '-')
2022-05-25 17:22:11 +08:00
task_model.label = task['label']
2023-09-03 21:17:55 +08:00
task_model.args = json.dumps(task['args'], indent=4, ensure_ascii=False)
task_model.volume_mount = task.get('volume_mount', '')
task_model.resource_memory = task.get('resource_memory', '2G')
task_model.resource_cpu = task.get('resource_cpu', '2')
task_model.resource_gpu = task.get('resource_gpu', '0')
2023-12-11 13:25:07 +08:00
task_model.resource_rdma = task.get('resource_rdma', '0')
2022-05-25 17:22:11 +08:00
task_model.created_by_fk = 1
task_model.changed_by_fk = 1
task_model.pipeline_id = pipeline_model.id
task_model.job_template_id = job_template.id
db.session.add(task_model)
db.session.commit()
print('add task %s' % task['name'])
2022-05-25 17:22:11 +08:00
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# # traceback.print_exc()
2022-05-25 17:22:11 +08:00
db.session.rollback()
else:
2023-09-03 21:17:55 +08:00
pass
# task_model.label = task['label']
# task_model.args = json.dumps(task['args'], indent=4, ensure_ascii=False)
# task_model.volume_mount = task.get('volume_mount', '')
# task_model.node_selector = task.get('node_selector', 'cpu=true,train=true,org=public')
# task_model.retry = int(task.get('retry', 0))
# task_model.timeout = int(task.get('timeout', 0))
# task_model.resource_memory = task.get('resource_memory', '2G')
# task_model.resource_cpu = task.get('resource_cpu', '2')
# task_model.resource_gpu = task.get('resource_gpu', '0')
# task_model.created_by_fk = 1
# task_model.changed_by_fk = 1
# task_model.pipeline_id = pipeline_model.id
# task_model.job_template_id = job_template.id
# print('update task %s' % task['name'])
# db.session.commit()
2022-06-05 17:16:36 +08:00
pipeline_model.dag_json = pipeline_model.fix_dag_json() # 修正 dag_json
2023-09-03 21:17:55 +08:00
# 没有设置位置的时候修正pipeline
if not pipeline_model.expand or not json.loads(pipeline_model.expand):
pipeline_model.expand = json.dumps(pipeline_model.fix_expand(), indent=4, ensure_ascii=False) # 修正 前端expand字段缺失
pipeline_model.expand = json.dumps(pipeline_model.fix_position(), indent=4, ensure_ascii=False) # 修正 节点中心位置到视图中间
db.session.commit()
# 自动排版
db_tasks = pipeline_model.get_tasks(db.session)
if db_tasks:
try:
tasks = {}
for task in db_tasks:
tasks[task.name] = task.to_json()
2022-05-25 17:22:11 +08:00
2023-09-03 21:17:55 +08:00
from myapp.utils import core
expand = core.fix_task_position(pipeline_model.to_json(), tasks, json.loads(pipeline_model.expand))
pipeline_model.expand = json.dumps(expand, indent=4, ensure_ascii=False)
db.session.commit()
except Exception as e:
print(e)
# traceback.print_exc()
else:
# 把expand中的任务名换成任务id
pipeline_expands = json.loads(pipeline_model.expand)
tasks = pipeline_model.get_tasks()
tasks_ids = {}
for task in tasks:
tasks_ids[str(task.name)] = task
expands = []
for exp in pipeline_expands:
# 节点信息
if 'source' in exp:
exp = {
"source": str(tasks_ids[exp['source']].id),
"arrowHeadType": "arrow",
"target": str(tasks_ids[exp['target']].id),
"id": "logic__edge-%snull-%snull" % (tasks_ids[exp['source']].id, tasks_ids[exp['target']].id)
}
# 连接线信息
else:
exp = {
"id": str(tasks_ids[exp['id']].id),
"type": "dataSet",
"position": {
"x": int(exp['position']['x']),
"y": int(exp['position']['y'])
},
"data": {
"info": {
"describe": tasks_ids[exp['id']].job_template.describe
},
"name": tasks_ids[exp['id']].name,
"label": tasks_ids[exp['id']].label
}
}
expands.append(exp)
pipeline_model.expand = json.dumps(expands)
db.session.commit()
pass
2022-05-25 17:22:11 +08:00
try:
print('begin init pipeline')
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir,'init-pipeline.json')
if os.path.exists(init_file):
pipelines = json.load(open(init_file, mode='r'))
for pipeline_name in pipelines:
try:
pipeline = pipelines[pipeline_name]['pipeline']
tasks = pipelines[pipeline_name]['tasks']
create_pipeline(pipeline=pipeline, tasks=tasks)
print('add pipeline %s' % pipeline_name)
except Exception as e1:
print(e1)
except Exception as e:
print(e)
# traceback.print_exc()
# 从目录中添加示例 pipeline
try:
print('begin init pipeline example')
pipelines = os.listdir('myapp/example/pipeline/')
for pipeline_name in pipelines:
2023-12-11 13:25:07 +08:00
if os.path.isdir(os.path.join('myapp/example/pipeline/',pipeline_name)):
try:
pipeline_path = os.path.join('myapp/example/pipeline/',pipeline_name,'pipeline.json')
init_path = os.path.join('myapp/example/pipeline/', pipeline_name, 'init.py')
if os.path.exists(pipeline_path):
pipeline = json.load(open(pipeline_path))
tasks = pipeline['tasks']
pipeline = pipeline['pipeline']
create_pipeline(pipeline=pipeline, tasks=tasks)
# 环境要求比较复杂可以直接在notebook里面初始化
os.makedirs('/data/k8s/kubeflow/pipeline/workspace/admin/pipeline/example/',exist_ok=True)
# shutil.copy2(f'myapp/example/pipeline/{pipeline_name}','/data/k8s/kubeflow/pipeline/workspace/admin/pipeline/example/')
if not os.path.exists(f'/data/k8s/kubeflow/pipeline/workspace/admin/pipeline/example/{pipeline_name}'):
shutil.copytree(f'myapp/example/pipeline/{pipeline_name}', f'/data/k8s/kubeflow/pipeline/workspace/admin/pipeline/example/{pipeline_name}')
# if os.path.exists(init_path):
# try:
# params = importlib.import_module(f'myapp.example.pipeline.{pipeline_name}.init')
# init_func = getattr(params, 'init')
# init_func()
# except Exception as e:
# print(e)
# # traceback.print_exc()
print('add job template using example %s' % pipeline_name)
except Exception as e1:
print(e1)
# traceback.print_exc()
2022-05-25 17:22:11 +08:00
except Exception as e:
print(e)
2023-12-11 13:25:07 +08:00
# traceback.print_exc()
2022-05-25 17:22:11 +08:00
# 添加 demo 推理 服务
2022-08-27 22:01:28 +08:00
def create_dataset(**kwargs):
dataset = db.session.query(Dataset).filter_by(name=kwargs['name']).first()
if not dataset:
try:
dataset = Dataset()
2022-08-27 22:01:28 +08:00
dataset.name = kwargs['name']
2023-09-03 21:17:55 +08:00
dataset.field = kwargs.get('field', '')
2023-04-06 23:03:18 +08:00
dataset.version = 'latest'
2023-09-03 21:17:55 +08:00
dataset.label = kwargs.get('label', '')
dataset.status = kwargs.get('status', '')
dataset.describe = kwargs.get('describe', '')
dataset.url = kwargs.get('url', '')
dataset.source = kwargs.get('source', '')
dataset.industry = kwargs.get('industry', '')
dataset.source_type = kwargs.get('source_type', '')
dataset.file_type = kwargs.get('file_type', '')
dataset.research = kwargs.get('research', '')
dataset.usage = kwargs.get('usage', '')
2022-08-27 22:01:28 +08:00
dataset.years = kwargs.get('years', '')
dataset.path = kwargs.get('path', '')
dataset.duration = kwargs.get('duration', '')
dataset.entries_num = kwargs.get('entries_num', '')
dataset.price = kwargs.get('price', '')
dataset.icon = kwargs.get('icon', '')
2023-09-03 21:17:55 +08:00
dataset.storage_class = kwargs.get('storage_class', '')
dataset.storage_size = kwargs.get('storage_size', '')
dataset.download_url = kwargs.get('download_url', '')
2022-08-27 22:01:28 +08:00
dataset.owner = 'admin'
2023-09-03 21:17:55 +08:00
dataset.created_by_fk = 1
dataset.changed_by_fk = 1
db.session.add(dataset)
db.session.commit()
2023-09-03 21:17:55 +08:00
print('add dataset %s' % kwargs.get('name', ''))
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
db.session.rollback()
2023-09-03 21:17:55 +08:00
try:
print('begin init dataset')
datasets = db.session.query(Dataset).all() # 空白数据集才初始化
if not datasets:
import csv
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir, 'init-dataset.csv')
if os.path.exists(init_file):
csv_reader = csv.reader(open(init_file, mode='r', encoding='utf-8-sig'))
header = None
for line in csv_reader:
if not header:
header = line
continue
data = dict(zip(header, line))
create_dataset(**data)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
# 添加 示例 模型
# @pysnooper.snoop()
2023-09-03 21:17:55 +08:00
def create_train_model(name, describe, path, project_name, version, framework, api_type):
train_model = db.session.query(Training_Model).filter_by(name=name).filter_by(version=version).filter_by(framework=framework).first()
project = db.session.query(Project).filter_by(name=project_name).filter_by(type='org').first()
if not train_model and project:
try:
train_model = Training_Model()
train_model.name = name
2023-09-03 21:17:55 +08:00
train_model.describe = describe
train_model.path = path
train_model.project_id = project.id
train_model.describe = describe
train_model.version = version
2023-09-03 21:17:55 +08:00
train_model.framework = framework
train_model.api_type = api_type
train_model.created_by_fk = 1
train_model.changed_by_fk = 1
train_model.run_id = 'random_run_id_' + uuid.uuid4().hex[:32]
db.session.add(train_model)
db.session.commit()
print('add train model %s' % name)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
db.session.rollback()
try:
print('begin init train_models')
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir, 'init-train-model.json')
if os.path.exists(init_file):
train_models = json.load(open(init_file, mode='r'))
for train_model_name in train_models:
try:
train_model = train_models[train_model_name]
create_train_model(**train_model)
except Exception as e1:
print(e1)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
# 添加demo 服务
2022-08-22 13:29:21 +08:00
# @pysnooper.snoop()
2023-09-03 21:17:55 +08:00
def create_service(project_name, service_name, service_describe, image_name, command, env, resource_memory='2G',
resource_cpu='2', resource_gpu='0', ports='80', volume_mount='kubeflow-user-workspace(pvc):/mnt',
expand={}):
service = db.session.query(Service).filter_by(name=service_name).first()
project = db.session.query(Project).filter_by(name=project_name).filter_by(type='org').first()
if service is None and project:
try:
service = Service()
2023-09-03 21:17:55 +08:00
service.name = service_name.replace('_', '-')
service.label = service_describe
service.created_by_fk = 1
service.changed_by_fk = 1
service.project_id = project.id
service.images = image_name
service.command = command
2023-09-03 21:17:55 +08:00
service.resource_memory = resource_memory
service.resource_cpu = resource_cpu
service.resource_gpu = resource_gpu
service.env = '\n'.join([x.strip() for x in env.split('\n') if x.split()])
service.ports = ports
2023-09-03 21:17:55 +08:00
service.volume_mount = volume_mount
2022-08-22 13:29:21 +08:00
service.expand = json.dumps(expand, indent=4, ensure_ascii=False)
db.session.add(service)
db.session.commit()
2023-09-03 21:17:55 +08:00
print('add service %s' % service_name)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
db.session.rollback()
try:
print('begin init services')
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir, 'init-service.json')
if os.path.exists(init_file):
services = json.load(open(init_file, mode='r'))
for service_name in services:
try:
service = services[service_name]
create_service(**service)
except Exception as e1:
print(e1)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
# 添加 demo 推理 服务
2022-08-22 13:29:21 +08:00
# @pysnooper.snoop()
2023-09-03 21:17:55 +08:00
def create_inference(project_name, service_name, service_describe, image_name, command, env, model_name, workdir='',
model_version='', model_path='', service_type='serving', resource_memory='2G',
2023-12-11 13:25:07 +08:00
resource_cpu='2', resource_gpu='0', host='', ports='80',
2023-09-03 21:17:55 +08:00
volume_mount='kubeflow-user-workspace(pvc):/mnt', metrics='', health='', inference_config='',
expand={}):
service = db.session.query(InferenceService).filter_by(name=service_name).first()
project = db.session.query(Project).filter_by(name=project_name).filter_by(type='org').first()
if service is None and project:
try:
service = InferenceService()
2023-09-03 21:17:55 +08:00
service.name = service_name.replace('_', '-')
service.label = service_describe
service.service_type = service_type
service.model_name = model_name
service.model_version = model_version if model_version else datetime.now().strftime('v%Y.%m.%d.1')
service.model_path = model_path
2023-09-03 21:17:55 +08:00
service.created_by_fk = 1
service.changed_by_fk = 1
service.project_id = project.id
service.images = image_name
service.resource_memory = resource_memory
service.resource_cpu = resource_cpu
service.resource_gpu = resource_gpu
2023-12-11 13:25:07 +08:00
service.host = host
2023-09-03 21:17:55 +08:00
service.working_dir = workdir
service.command = command
service.inference_config = inference_config
2023-09-03 21:17:55 +08:00
service.env = '\n'.join([x.strip() for x in env.split('\n') if x.split()])
service.ports = ports
2023-09-03 21:17:55 +08:00
service.volume_mount = volume_mount
service.metrics = metrics
service.health = health
service.expand = json.dumps(expand, indent=4, ensure_ascii=False)
from myapp.views.view_inferenceserving import InferenceService_ModelView_base
inference_class = InferenceService_ModelView_base()
inference_class.src_item_json = {}
inference_class.pre_add(service)
db.session.add(service)
db.session.commit()
print('add inference %s' % service_name)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
db.session.rollback()
try:
print('begin init inferences')
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir, 'init-inference.json')
if os.path.exists(init_file):
inferences = json.load(open(init_file, mode='r'))
for inference_name in inferences:
try:
inference = inferences[inference_name]
create_inference(**inference)
print('add inference %s' % inference_name)
except Exception as e1:
print(e1)
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
2023-04-06 23:03:18 +08:00
def add_aihub(info_path):
from myapp.models.model_aihub import Aihub
if not os.path.exists(info_path):
return
aihubs = json.load(open(info_path, mode='r'))
try:
if len(aihubs) > 0:
# dbsession.query(Aihub).delete()
# dbsession.commit()
2023-09-03 21:17:55 +08:00
print('add aihub ', end=' ')
2023-04-06 23:03:18 +08:00
for data in aihubs:
name = data.get('name', '')
2023-09-03 21:17:55 +08:00
print(name, end=' ')
2023-04-06 23:03:18 +08:00
label = data.get('label', '')
describe = data.get('describe', '')
uuid = data.get('uuid', '')
if name and label and describe and uuid:
aihub = db.session.query(Aihub).filter_by(uuid=uuid).first()
if not aihub:
aihub = Aihub()
aihub.doc = data.get('doc', '')
aihub.name = name
aihub.label = label
aihub.describe = describe
aihub.field = data.get('field', '')
aihub.scenes = data.get('scenes', '')
aihub.type = data.get('type', '')
aihub.pic = data.get('pic', '')
aihub.status = data.get('status', '')
aihub.uuid = uuid
aihub.images = data.get('images', '')
aihub.version = data.get('version', '')
aihub.dataset = json.dumps(data.get('dataset', {}), indent=4, ensure_ascii=False)
aihub.notebook = json.dumps(data.get('notebook', {}), indent=4, ensure_ascii=False)
aihub.job_template = json.dumps(data.get('train', {}), indent=4, ensure_ascii=False)
2023-09-03 21:17:55 +08:00
aihub.pre_train_model = json.dumps(data.get('pre_train_model', {}), indent=4, ensure_ascii=False)
2023-04-06 23:03:18 +08:00
aihub.inference = json.dumps(data.get('inference', {}), indent=4, ensure_ascii=False)
aihub.service = json.dumps(data.get('service', {}), indent=4, ensure_ascii=False)
aihub.hot = int(data.get('hot', '0'))
aihub.price = int(data.get('price', '0'))
aihub.source = data.get('source', '')
if not aihub.id:
db.session.add(aihub)
db.session.commit()
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
2023-04-06 23:03:18 +08:00
2022-11-12 17:40:23 +08:00
# 添加aihub
try:
print('begin add aihub')
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir, 'init-aihub.json')
if os.path.exists(init_file):
add_aihub(init_file)
2022-11-12 17:40:23 +08:00
except Exception as e:
print(e)
2023-09-03 21:17:55 +08:00
# traceback.print_exc()
2022-11-12 17:40:23 +08:00
2023-04-06 23:03:18 +08:00
# 复制cube-studio代码aihub和sdk
from myapp.tasks.schedules import cp_cubestudio
cp_cubestudio()
2023-09-03 21:17:55 +08:00
try:
SQLALCHEMY_DATABASE_URI = os.getenv('MYSQL_SERVICE', '')
if SQLALCHEMY_DATABASE_URI:
import sqlalchemy.engine.url as url
uri = url.make_url(SQLALCHEMY_DATABASE_URI)
database = uri.database
from myapp.models.model_metadata import Metadata_table
tables = db.session.query(Metadata_table).all()
if len(tables)==0:
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='project', owner='admin',describe='项目分组,模板分组,模型分组'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='project_user', owner='admin',describe='项目组用户'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='idex_query', owner='admin',describe='sqllab的查询记录'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='metadata_table', owner='admin',describe='离线库表管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='metadata_metric', owner='admin',describe='指标管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='dimension', owner='admin',describe='维表管理'))
db.session.add(Metadata_table(app='cube-studio',db='kubeflow',table='dataset',owner='admin',describe='数据集市场'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='repository', owner='admin',describe='docker仓库管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='docker', owner='admin', describe='在线docker镜像构建'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='images', owner='admin',describe='镜像管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='notebook', owner='admin', describe='notebook在线开发'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='etl_pipeline', owner='admin',describe='数据ETL的任务流管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='etl_task', owner='admin',describe='数据ETL的任务管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='job_template', owner='admin',describe='任务模板'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='pipeline', owner='admin',describe='ml任务流'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='task', owner='admin', describe='ml任务管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='run', owner='admin',describe='定时调度记录'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='workflow', owner='admin',describe='任务流实例'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='nni', owner='admin', describe='nni超参搜索'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='service', owner='admin',describe='内部服务管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='model', owner='admin', describe='模型管理'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='inferenceservice', owner='admin', describe='推理服务'))
db.session.add(Metadata_table(app='cube-studio',db='kubeflow',table='aihub',owner='admin',describe='模型应用市场,打通自动化标注,一键开发,一键微调,一建部署'))
db.session.add(Metadata_table(app='cube-studio',db='kubeflow',table='chat',owner='admin',describe='私有知识库配置领域知识文档或qa文档智能机器人问答'))
db.session.add(Metadata_table(app='cube-studio',db='kubeflow',table='chat_log',owner='admin',describe='所有的聊天日志记录'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='favorite', owner='admin',describe='收藏的数据记录'))
db.session.add(Metadata_table(app='cube-studio', db='kubeflow', table='logs', owner='admin',describe='用户行为记录'))
db.session.commit()
print('添加离线表成功')
except Exception as e:
print(e)
# traceback.print_exc()
# 添加ETL pipeline
try:
from myapp.models.model_etl_pipeline import ETL_Pipeline
tables = db.session.query(ETL_Pipeline).all()
if len(tables) == 0:
2023-12-11 13:25:07 +08:00
init_file = os.path.join(init_dir, 'init-etl-pipeline.json')
if os.path.exists(init_file):
pipelines = json.load(open(init_file, mode='r'))
for pipeline in pipelines:
db.session.add(ETL_Pipeline(
project_id=1, created_by_fk=1,changed_by_fk=1,
name=pipeline.get('name',''), config=json.dumps(pipeline.get('config',{}),indent=4,ensure_ascii=False),
describe=pipeline.get('describe','pipeline example'),workflow=pipeline.get('workflow','airflow'),
dag_json=json.dumps(pipeline.get('dag_json',{}),indent=4,ensure_ascii=False)))
db.session.commit()
print('添加etl pipeline成功')
2023-09-03 21:17:55 +08:00
except Exception as e:
print(e)
# traceback.print_exc()
2023-12-11 13:25:07 +08:00
# 添加nni超参搜索
2023-09-03 21:17:55 +08:00
try:
2023-12-11 13:25:07 +08:00
from myapp.models.model_nni import NNI
nni = db.session.query(NNI).all()
if len(nni) == 0:
init_file = os.path.join(init_dir, 'init-automl.json')
if os.path.exists(init_file):
nnis = json.load(open(init_file, mode='r'))
for nni in nnis:
db.session.add(NNI(
project_id=1, created_by_fk=1,changed_by_fk=1,
job_type=nni.get('job_type','Job'),name=nni.get('name','test'+uuid.uuid4().hex[:4]),namespace=nni.get('namespace','automl'),
describe=nni.get('describe', ''),parallel_trial_count=nni.get('parallel_trial_count', 3),max_trial_count=nni.get('max_trial_count', 12),
objective_type=nni.get('objective_type', 'maximize'),objective_goal=nni.get('objective_goal', 0.99),objective_metric_name=nni.get('objective_metric_name', 'accuracy'),
algorithm_name=nni.get('algorithm_name','Random'), parameters=json.dumps(nni.get('parameters',{}),indent=4,ensure_ascii=False),
job_json=json.dumps(nni.get('job_json',{}),indent=4,ensure_ascii=False),
job_worker_image = nni.get('job_worker_image', conf.get('NNI_IMAGES','')),
working_dir=nni.get('working_dir', '/mnt/admin/nni/demo/'),
job_worker_command=nni.get('job_worker_command', 'python xx.py'),
resource_memory=nni.get('resource_memory', '1G'),
resource_cpu=nni.get('resource_cpu', '1'),
resource_gpu=nni.get('resource_gpu', '0'),
))
db.session.commit()
print('添加etl pipeline成功')
except Exception as e:
print(e)
# traceback.print_exc()
2023-09-03 21:17:55 +08:00
2023-12-11 13:25:07 +08:00
# 添加镜像在线构建
try:
2023-09-03 21:17:55 +08:00
from myapp.models.model_docker import Docker
docker = db.session.query(Docker).all()
if len(docker) == 0:
db.session.add(Docker(
project_id=1,
created_by_fk=1,
changed_by_fk=1,
2023-12-11 13:25:07 +08:00
describe='build python environment',
base_image=conf.get('USER_IMAGE',''),
target_image=conf.get("REPOSITORY_ORG",'')+'python:2023.06.19.1',
2023-09-03 21:17:55 +08:00
need_gpu=False,
consecutive_build=True,
expand=json.dumps(
{
"volume_mount": "kubeflow-user-workspace(pvc):/mnt",
"resource_memory": "8G",
"resource_cpu": "4",
2023-12-11 13:25:07 +08:00
"resource_gpu": "0",
2023-09-03 21:17:55 +08:00
"namespace": "jupyter"
},indent=4,ensure_ascii=False)
))
db.session.commit()
print('添加在线构建镜像成功')
except Exception as e:
print(e)
# traceback.print_exc()