cube-studio/myapp/views/view_inferenceserving.py

1348 lines
61 KiB
Python
Raw Normal View History

2023-04-06 23:23:11 +08:00
import random
import requests
2023-12-11 13:23:35 +08:00
from myapp.views.baseSQLA import MyappSQLAInterface as SQLAInterface
from flask import jsonify
2023-04-06 23:23:11 +08:00
from jinja2 import Environment, BaseLoader, DebugUndefined
2022-02-26 22:36:57 +08:00
from myapp.models.model_serving import InferenceService
from myapp.utils import core
from flask_babel import gettext as __
from flask_babel import lazy_gettext as _
from flask_appbuilder.actions import action
2023-09-03 21:15:58 +08:00
from myapp import app, appbuilder, db
2022-02-26 22:36:57 +08:00
import re
2023-04-06 23:23:11 +08:00
import pytz
import pysnooper
2022-02-26 22:36:57 +08:00
import copy
from sqlalchemy.exc import InvalidRequestError
2022-02-26 22:36:57 +08:00
from myapp.models.model_job import Repository
from wtforms.ext.sqlalchemy.fields import QuerySelectField
from myapp import security_manager
from wtforms.validators import DataRequired, Length, Regexp
from wtforms import SelectField, StringField
2023-09-03 21:15:58 +08:00
from flask_appbuilder.fieldwidgets import BS3TextFieldWidget, Select2ManyWidget, Select2Widget
from myapp.forms import MyBS3TextAreaFieldWidget, MySelect2Widget, MyBS3TextFieldWidget, MySelectMultipleField
from myapp.views.view_team import Project_Join_Filter, filter_join_org_project
2022-02-26 22:36:57 +08:00
from flask import (
flash,
g,
Markup,
redirect,
2022-10-10 11:44:53 +08:00
request
2022-02-26 22:36:57 +08:00
)
from .base import (
MyappFilter,
MyappModelView,
)
from .baseApi import (
MyappModelRestApi
)
from flask_appbuilder import expose
2023-09-03 21:15:58 +08:00
import datetime, time, json
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
conf = app.config
2022-02-26 22:36:57 +08:00
2023-04-06 23:23:11 +08:00
global_all_service_load = {
2023-09-03 21:15:58 +08:00
"data": None,
"check_time": None
2023-04-06 23:23:11 +08:00
}
2023-09-03 21:15:58 +08:00
2022-02-26 22:36:57 +08:00
class InferenceService_Filter(MyappFilter):
# @pysnooper.snoop()
def apply(self, query, func):
if g.user.is_admin():
return query
join_projects_id = security_manager.get_join_projects_id(db.session)
return query.filter(self.model.project_id.in_(join_projects_id))
class InferenceService_ModelView_base():
2022-02-26 22:36:57 +08:00
datamodel = SQLAInterface(InferenceService)
2023-09-03 21:15:58 +08:00
check_redirect_list_url = conf.get('MODEL_URLS', {}).get('inferenceservice', '')
2022-02-26 22:36:57 +08:00
# add_columns = ['service_type','project','name', 'label','images','resource_memory','resource_cpu','resource_gpu','min_replicas','max_replicas','ports','host','hpa','metrics','health']
2023-09-03 21:15:58 +08:00
add_columns = ['service_type', 'project', 'label', 'model_name', 'model_version', 'images', 'model_path',
'resource_memory', 'resource_cpu', 'resource_gpu', 'min_replicas', 'max_replicas', 'hpa', 'priority',
'canary', 'shadow', 'host', 'inference_config', 'working_dir', 'command', 'volume_mount', 'env',
'ports', 'metrics', 'health', 'expand', 'sidecar']
show_columns = ['service_type', 'project', 'name', 'label', 'model_name', 'model_version', 'images', 'model_path',
'images', 'volume_mount', 'sidecar', 'working_dir', 'command', 'env', 'resource_memory',
'resource_cpu', 'resource_gpu', 'min_replicas', 'max_replicas', 'ports', 'inference_host_url',
'hpa', 'priority', 'canary', 'shadow', 'health', 'model_status', 'expand', 'metrics',
'deploy_history', 'host', 'inference_config']
enable_echart = False
2022-02-26 22:36:57 +08:00
edit_columns = add_columns
add_form_query_rel_fields = {
"project": [["name", Project_Join_Filter, 'org']]
}
edit_form_query_rel_fields = add_form_query_rel_fields
2023-09-03 21:15:58 +08:00
list_columns = ['project', 'service_type', 'label', 'model_name_url', 'model_version', 'inference_host_url', 'ip',
'model_status', 'resource', 'replicas_html', 'creator', 'modified', 'operate_html']
cols_width = {
"project": {"type": "ellip2", "width": 150},
2022-08-28 20:24:10 +08:00
"label": {"type": "ellip2", "width": 300},
"service_type": {"type": "ellip2", "width": 100},
2023-09-03 21:15:58 +08:00
"model_name_url": {"type": "ellip2", "width": 300},
"model_version": {"type": "ellip2", "width": 200},
2023-09-03 21:15:58 +08:00
"inference_host_url": {"type": "ellip1", "width": 500},
2022-11-09 20:46:52 +08:00
"ip": {"type": "ellip2", "width": 250},
"model_status": {"type": "ellip2", "width": 100},
"modified": {"type": "ellip2", "width": 150},
2022-08-22 13:28:23 +08:00
"operate_html": {"type": "ellip2", "width": 350},
2022-09-01 23:32:10 +08:00
"resource": {"type": "ellip2", "width": 300},
}
2023-09-03 21:15:58 +08:00
search_columns = ['name', 'created_by', 'project', 'service_type', 'label', 'model_name', 'model_version',
'model_path', 'host', 'model_status', 'resource_gpu']
2023-04-06 23:23:11 +08:00
ops_link = [
{
2023-12-11 13:23:35 +08:00
"text": _("服务资源监控"),
2023-09-03 21:15:58 +08:00
"url": conf.get('GRAFANA_SERVICE_PATH','/grafana/d/istio-service/istio-service?var-namespace=service&var-service=') + "All"
2023-04-06 23:23:11 +08:00
}
]
2023-12-11 13:23:35 +08:00
label_title = _('推理服务')
2023-09-03 21:15:58 +08:00
base_order = ('id', 'desc')
2022-02-26 22:36:57 +08:00
order_columns = ['id']
2023-09-03 21:15:58 +08:00
base_filters = [["id", InferenceService_Filter, lambda: []]]
images = []
INFERNENCE_IMAGES = list(conf.get('INFERNENCE_IMAGES', {}).values())
for item in INFERNENCE_IMAGES:
images += item
service_type_choices = ['serving', 'tfserving', 'torch-server', 'onnxruntime', 'triton-server', 'ml-server企业版','llm-server企业版',]
2022-08-28 20:24:10 +08:00
spec_label_columns = {
2023-12-11 13:23:35 +08:00
# "host": __("域名测试环境test.xx调试环境 debug.xx"),
2022-08-20 13:45:54 +08:00
}
2022-02-26 22:36:57 +08:00
service_type_choices = [x.replace('_','-') for x in service_type_choices]
2023-12-11 13:23:35 +08:00
host_rule=",<br>".join([cluster+"cluster:*."+conf.get('CLUSTERS')[cluster].get("SERVICE_DOMAIN",conf.get('SERVICE_DOMAIN','')) for cluster in conf.get('CLUSTERS') if conf.get('CLUSTERS')[cluster].get("SERVICE_DOMAIN",conf.get('SERVICE_DOMAIN',''))])
2022-02-26 22:36:57 +08:00
add_form_extra_fields={
2022-08-20 13:45:54 +08:00
"project": QuerySelectField(
2023-12-11 13:23:35 +08:00
_('项目组'),
2022-08-20 13:45:54 +08:00
query_factory=filter_join_org_project,
allow_blank=True,
widget=Select2Widget(),
validators=[DataRequired()]
),
2023-12-11 13:23:35 +08:00
"resource_memory":StringField('memory',default='5G',description= _('内存的资源使用限制示例1G10G 最大100G如需更多联系管路员'),widget=BS3TextFieldWidget(),validators=[DataRequired()]),
"resource_cpu":StringField('cpu', default='5',description= _('cpu的资源使用限制(单位核),示例 0.410最大50核如需更多联系管路员'),widget=BS3TextFieldWidget(), validators=[DataRequired()]),
"min_replicas": StringField(_('最小副本数'), default=InferenceService.min_replicas.default.arg,description= _('最小副本数,用来配置高可用,流量变动自动伸缩'),widget=BS3TextFieldWidget(), validators=[DataRequired()]),
"max_replicas": StringField(_('最大副本数'), default=InferenceService.max_replicas.default.arg,
description= _('最大副本数,用来配置高可用,流量变动自动伸缩'), widget=BS3TextFieldWidget(),
2022-02-26 22:36:57 +08:00
validators=[DataRequired()]),
2023-12-11 13:23:35 +08:00
"host": StringField(_('域名'), default=InferenceService.host.default.arg,description= _('访问域名,')+host_rule,widget=BS3TextFieldWidget()),
"transformer":StringField(_('前后置处理'), default=InferenceService.transformer.default.arg,description= _('前后置处理逻辑用于原生开源框架的请求预处理和响应预处理目前仅支持kfserving下框架'),widget=BS3TextFieldWidget()),
'resource_gpu':StringField(_('gpu'), default='0', description= _('gpu的资源使用限制(单位卡),示例:12训练任务每个容器独占整卡。申请具体的卡型号可以类似 1(V100)<span style="color:red;">虚拟化占用和共享模式占用仅企业版支持</span>'),
2022-07-26 20:47:49 +08:00
widget=BS3TextFieldWidget(),validators=[DataRequired()]),
2022-02-26 22:36:57 +08:00
'sidecar': MySelectMultipleField(
2023-12-11 13:23:35 +08:00
_('sidecar'),
2022-08-19 16:45:59 +08:00
default='',
2023-12-11 13:23:35 +08:00
description= _('容器的agent代理,istio用于服务网格'),
2022-02-26 22:36:57 +08:00
widget=Select2ManyWidget(),
2022-08-19 16:45:59 +08:00
validators=[],
2023-09-03 21:15:58 +08:00
choices=[['istio', 'istio']]
),
"priority": SelectField(
_('服务优先级'),
widget=MySelect2Widget(),
default=1,
2023-12-11 13:23:35 +08:00
description= _('优先满足高优先级的资源需求同时保证每个服务的最低pod副本数'),
choices=[[1, _('高优先级')], [0, _('低优先级')]],
validators=[DataRequired()]
),
'model_name': StringField(
_('模型名称'),
default='',
2023-12-11 13:23:35 +08:00
description= _('英文名(小写字母、数字、- 组成)最长50个字符'),
widget=MyBS3TextFieldWidget(),
validators=[DataRequired(), Regexp("^[a-z][a-z0-9\-]*[a-z0-9]$"), Length(1, 54)]
),
'model_version': StringField(
_('模型版本号'),
2023-09-03 21:15:58 +08:00
default=datetime.datetime.now().strftime('v%Y.%m.%d.1'),
2023-12-11 13:23:35 +08:00
description= _('版本号,时间格式'),
widget=MyBS3TextFieldWidget(),
validators=[DataRequired(), Length(1, 54)]
),
'service_type': SelectField(
2023-12-11 13:23:35 +08:00
_('推理框架类型'),
2022-08-16 11:09:52 +08:00
default='serving',
2023-12-11 13:23:35 +08:00
description= _("推理框架类型"),
widget=MySelect2Widget(retry_info=True),
choices=[[x, x] for x in service_type_choices],
validators=[DataRequired()]
),
'label': StringField(
2023-12-11 13:23:35 +08:00
_('标签'),
default= _("xx模型%s框架xx版"),
description= _('中文描述'),
widget=BS3TextFieldWidget(),
validators=[DataRequired()]
),
"hpa": StringField(
2023-12-11 13:23:35 +08:00
_('弹性伸缩'),
default='cpu:50%,gpu:50%',
2023-12-11 13:23:35 +08:00
description= _('弹性伸缩容的触发条件可以使用cpu/mem/gpu/qps等信息可以使用其中一个指标或者多个指标示例cpu:50%,mem:50%,gpu:50%'),
widget=BS3TextFieldWidget()
),
'expand': StringField(
2023-12-11 13:23:35 +08:00
_('扩展'),
default=json.dumps({
2023-12-11 13:23:35 +08:00
"help_url": conf.get('GIT_URL','')+ "/images/serving"
2023-09-03 21:15:58 +08:00
}, indent=4, ensure_ascii=False),
2023-12-11 13:23:35 +08:00
description= _('扩展字段json格式目前支持help_url帮助文档的地址disable_load_balancer是否禁用服务的负载均衡'),
widget=MyBS3TextAreaFieldWidget(rows=3)
),
'canary': StringField(
_('流量分流'),
default='',
2023-12-11 13:23:35 +08:00
description= _('流量分流,将该服务的所有请求,按比例分流到目标服务上。格式 service1:20%,service2:30%表示分流20%流量到service130%到service2'),
widget=BS3TextFieldWidget()
),
'shadow': StringField(
_('流量复制'),
default='',
2023-12-11 13:23:35 +08:00
description= _('流量复制,将该服务的所有请求,按比例复制到目标服务上,格式 service1:20%,service2:30%表示复制20%流量到service130%到service2'),
widget=BS3TextFieldWidget()
),
2023-09-03 21:15:58 +08:00
'volume_mount': StringField(
2023-12-11 13:23:35 +08:00
_('挂载'),
default='',
description= _('外部挂载,格式:$pvc_name1(pvc):/$container_path1,$hostpath1(hostpath):/$container_path2,4G(memory):/dev/shm,注意pvc会自动挂载对应目录下的个人rtx子目录'),
widget=BS3TextFieldWidget()
),
2023-09-03 21:15:58 +08:00
'model_path': StringField(
_('模型地址'),
default='',
2023-12-11 13:23:35 +08:00
description= _('''
serving自定义镜像的推理服务模型地址随意<br>
ml-server支持sklearn和xgb导出的模型需按文档设置ml推理服务的配置文件<br>
2023-12-11 13:23:35 +08:00
tfserving仅支持添加了服务签名的saved_model目录地址例如/mnt/xx/../saved_model/<br>
torch-servertorch-model-archiver编译后的mar模型文件需保存模型结构和模型参数例如/mnt/xx/../xx.mar或torch script保存的模型<br>
onnxruntimeonnx模型文件的地址例如/mnt/xx/../xx.onnx<br>
triton-server框架:地址onnx:模型文件地址model.onnxpytorch:torchscript模型文件地址model.pttf:模型目录地址saved_modeltensorrt:模型文件地址model.plan
'''.strip()),
widget=BS3TextFieldWidget(),
validators=[]
),
'images': SelectField(
2023-12-11 13:23:35 +08:00
_('镜像'),
default='',
2023-12-11 13:23:35 +08:00
description= _("推理服务镜像"),
widget=MySelect2Widget(can_input=True),
choices=[[x, x] for x in images]
),
'command': StringField(
2023-12-11 13:23:35 +08:00
_('启动命令'),
default='',
2023-12-11 13:23:35 +08:00
description= _('启动命令,<font color="#FF0000">留空时将被自动重置</font>'),
widget=MyBS3TextAreaFieldWidget(rows=3)
),
2023-09-03 21:15:58 +08:00
'env': StringField(
2023-12-11 13:23:35 +08:00
_('环境变量'),
default='',
2023-12-11 13:23:35 +08:00
description= _('使用模板的task自动添加的环境变量支持模板变量。书写格式:每行一个环境变量env_key=env_value'),
widget=MyBS3TextAreaFieldWidget()
),
'ports': StringField(
2023-12-11 13:23:35 +08:00
_('端口'),
default='',
2023-12-11 13:23:35 +08:00
description= _('监听端口号,逗号分隔'),
widget=BS3TextFieldWidget(),
validators=[DataRequired()]
),
'metrics': StringField(
2023-12-11 13:23:35 +08:00
_('指标地址'),
default='',
2023-12-11 13:23:35 +08:00
description= _('请求指标采集,配置端口+url示例8080:/metrics'),
widget=BS3TextFieldWidget()
),
'health': StringField(
2023-12-11 13:23:35 +08:00
_('健康检查'),
default='',
2023-12-11 13:23:35 +08:00
description= _('健康检查接口使用http接口或者shell命令示例8080:/health或者 shell:python health.py'),
widget=BS3TextFieldWidget()
),
'inference_config': StringField(
_('推理配置文件'),
default='',
2023-12-11 13:23:35 +08:00
description= _('会配置文件的形式挂载到容器/config/目录下。<font color="#FF0000">留空时将被自动重置</font>,格式:<br>---文件名<br>多行文件内容<br>---文件名<br>多行文件内容'),
widget=MyBS3TextAreaFieldWidget(rows=5),
validators=[]
)
2022-02-26 22:36:57 +08:00
}
input_demo = '''
[
{
name: "input_name"
data_type: TYPE_FP32
format: FORMAT_NCHW
dims: [ 3, 224, 224 ]
reshape: {
shape: [ 1, 3, 224, 224 ]
}
}
]
'''
output_demo = '''
[
{
name: "output_name"
data_type: TYPE_FP32
dims: [ 1000 ]
reshape: {
shape: [ 1, 1000 ]
}
}
]
'''
2022-02-26 22:36:57 +08:00
edit_form_extra_fields = add_form_extra_fields
2023-12-11 13:23:35 +08:00
# edit_form_extra_fields['name']=StringField(_('名称'), description='英文名(小写字母、数字、- 组成)最长50个字符',widget=MyBS3TextFieldWidget(readonly=True), validators=[Regexp("^[a-z][a-z0-9\-]*[a-z0-9]$"),Length(1,54)]),
2022-02-26 22:36:57 +08:00
model_columns = ['service_type', 'project', 'label', 'model_name', 'model_version', 'images', 'model_path']
service_columns = ['resource_memory', 'resource_cpu', 'resource_gpu', 'min_replicas', 'max_replicas', 'hpa',
2022-08-28 23:02:59 +08:00
'priority', 'canary', 'shadow', 'host', 'volume_mount', 'sidecar']
admin_columns = ['inference_config', 'working_dir', 'command', 'env', 'ports', 'metrics', 'health', 'expand']
2022-02-26 22:36:57 +08:00
add_fieldsets = [
(
2023-12-11 13:23:35 +08:00
_('模型配置'),
{"fields": model_columns, "expanded": True},
),
(
2023-12-11 13:23:35 +08:00
_('推理配置'),
{"fields": service_columns, "expanded": True},
),
(
2023-12-11 13:23:35 +08:00
_('管理员配置'),
{"fields": admin_columns, "expanded": True},
2022-02-26 22:36:57 +08:00
)
]
add_columns = model_columns + service_columns + admin_columns
2022-02-26 22:36:57 +08:00
edit_columns = add_columns
2022-02-26 22:36:57 +08:00
edit_fieldsets = add_fieldsets
2022-02-26 22:36:57 +08:00
2022-11-23 17:01:10 +08:00
def pre_add_web(self):
self.default_filter = {
"created_by": g.user.id
}
2022-02-26 22:36:57 +08:00
# @pysnooper.snoop()
2023-09-03 21:15:58 +08:00
def tfserving_model_config(self, model_name, model_version, model_path):
config_str = '''
2022-02-26 22:36:57 +08:00
model_config_list {
config {
name: "%s"
base_path: "/%s/"
model_platform: "tensorflow"
model_version_policy {
specific {
versions: %s
}
}
}
}
2023-09-03 21:15:58 +08:00
''' % (model_name, model_path.strip('/'), model_version)
2022-02-26 22:36:57 +08:00
return config_str
def tfserving_monitoring_config(self):
2023-09-03 21:15:58 +08:00
config_str = '''
2022-02-26 22:36:57 +08:00
prometheus_config {
enable: true
path: "/metrics"
}
'''
return config_str
def tfserving_platform_config(self):
config_str = '''
platform_configs {
key: "tensorflow"
value {
source_adapter_config {
[type.googleapis.com/tensorflow.serving.SavedModelBundleSourceAdapterConfig] {
legacy_config {
session_config {
gpu_options {
allow_growth: true
}
}
}
}
}
}
}
'''
return config_str
2023-09-03 21:15:58 +08:00
# 这些配置可在环境变量中 TS_<PROPERTY_NAME>中实现
2022-02-26 22:36:57 +08:00
def torch_config(self):
2023-09-03 21:15:58 +08:00
config_str = '''
2022-02-26 22:36:57 +08:00
inference_address=http://0.0.0.0:8080
management_address=http://0.0.0.0:8081
metrics_address=http://0.0.0.0:8082
cors_allowed_origin=*
cors_allowed_methods=GET, POST, PUT, OPTIONS
cors_allowed_headers=X-Custom-Header
number_of_netty_threads=32
enable_metrics_api=true
job_queue_size=1000
enable_envvars_config=true
async_logging=true
default_response_timeout=120
max_request_size=6553500
vmargs=-Dlog4j.configurationFile=file:///config/log4j2.xml
'''
return config_str
def torch_log(self):
2023-09-03 21:15:58 +08:00
config_str = '''
<RollingFile name="access_log" fileName="${env:LOG_LOCATION:-logs}/access_log.log" filePattern="${env:LOG_LOCATION:-logs}/access_log.%d{dd-MMM}.log.gz">
<PatternLayout pattern="%d{ISO8601} - %m%n"/>
<Policies>
<SizeBasedTriggeringPolicy size="100 MB"/>
<TimeBasedTriggeringPolicy/>
</Policies>
<DefaultRolloverStrategy max="5"/>
2022-02-26 22:36:57 +08:00
</RollingFile>
'''
return config_str
2023-09-03 21:15:58 +08:00
def triton_config(self, item, model_type):
plat_form = {
"onnx": "onnxruntime_onnx",
"tensorrt": "tensorrt_plan",
"torch": "pytorch_libtorch",
"pytorch": "pytorch_libtorch",
"tf": "tensorflow_savedmodel"
2022-02-26 22:36:57 +08:00
}
2023-09-03 21:15:58 +08:00
parameters = ''
2022-02-26 22:36:57 +08:00
if model_type == 'tf':
parameters = '''
optimization { execution_accelerators {
gpu_execution_accelerator : [ {
name : "tensorrt"
parameters { key: "precision_mode" value: "FP16" }}]
}}
'''
2023-09-03 21:15:58 +08:00
if model_type == 'onnx':
2022-02-26 22:36:57 +08:00
parameters = '''
parameters { key: "intra_op_thread_count" value: { string_value: "0" } }
parameters { key: "execution_mode" value: { string_value: "1" } }
parameters { key: "inter_op_thread_count" value: { string_value: "0" } }
'''
2023-09-03 21:15:58 +08:00
if model_type == 'pytorch' or model_type == 'torch':
2022-02-26 22:36:57 +08:00
parameters = '''
parameters: { key: "DISABLE_OPTIMIZED_EXECUTION" value: { string_value:"true" } }
parameters: { key: "INFERENCE_MODE" value: { string_value: "false" } }
'''
config_str = '''
name: "%s"
platform: "%s"
max_batch_size: 0
input %s
output %s
%s
2023-09-03 21:15:58 +08:00
''' % (item.model_name, plat_form[model_type], self.input_demo, self.output_demo, parameters)
2022-02-26 22:36:57 +08:00
return config_str
# @pysnooper.snoop(watch_explode=('item'))
def use_expand(self, item):
#
# item.ports = conf.get('INFERNENCE_PORTS',{}).get(item.service_type,item.ports)
# item.env = '\n'.join(conf.get('INFERNENCE_ENV', {}).get(item.service_type, item.env.split('\n') if item.env else []))
# item.metrics = conf.get('INFERNENCE_METRICS', {}).get(item.service_type, item.metrics)
# item.health = conf.get('INFERNENCE_HEALTH', {}).get(item.service_type, item.health)
2022-02-26 22:36:57 +08:00
# 先存储特定参数到expand
expand = json.loads(item.expand) if item.expand else {}
2023-09-03 21:15:58 +08:00
# print(self.src_item_json)
model_version = item.model_version.replace('v', '').replace('.', '').replace(':', '')
model_path = "/" + item.model_path.strip('/') if item.model_path else ''
if not item.ports:
item.ports = conf.get('INFERNENCE_PORTS',{}).get(item.service_type,item.ports)
if not item.env:
item.env = '\n'.join(conf.get('INFERNENCE_ENV', {}).get(item.service_type, item.env.split('\n') if item.env else []))
if not item.metrics:
item.metrics = conf.get('INFERNENCE_METRICS', {}).get(item.service_type, item.metrics)
if not item.health:
2024-04-04 21:47:16 +08:00
item.health = conf.get('INFERNENCE_HEALTH', {}).get(item.service_type, '').replace('$model_name',item.model_name).replace('$model_version',item.model_version)
else:
item.health = item.health.replace('$model_name',item.model_name).replace('$model_version', item.model_version)
# 对网络地址先统一在命令中下载
2023-09-03 21:15:58 +08:00
download_command = ''
if 'http:' in item.model_path or 'https:' in item.model_path:
2023-09-03 21:15:58 +08:00
model_file = item.model_path[item.model_path.rindex('/') + 1:]
model_path = model_file
2023-09-03 21:15:58 +08:00
download_command = 'wget %s && ' % item.model_path
if '.zip' in item.model_path:
download_command+='unzip -O %s && '%model_file
model_path = model_file.replace('.zip', '').replace('.tar.gz', '') # 这就要求压缩文件和目录同名,并且下面直接就是目录。其他格式的文件不能压缩
if '.tar.gz' in item.model_path:
download_command += 'tar -zxvf %s && '%model_file
model_path = model_file.replace('.zip','').replace('.tar.gz','') # 这就要求压缩文件和目录同名,并且下面直接就是目录。其他格式的文件不能压缩
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
if item.service_type == 'tfserving':
2022-02-26 22:36:57 +08:00
des_model_path = "/models/%s/" % (item.model_name,)
2023-09-03 21:15:58 +08:00
des_version_path = "/models/%s/%s/" % (item.model_name, model_version)
2022-02-26 22:36:57 +08:00
if not item.id or not item.command:
item.command=download_command+'''mkdir -p %s && cp -r %s/* %s && /usr/bin/tf_serving_entrypoint.sh --model_config_file=/config/models.config --monitoring_config_file=/config/monitoring.config --platform_config_file=/config/platform.config'''%(des_version_path,model_path,des_version_path)
2023-09-03 21:15:58 +08:00
item.health = '8501:/v1/models/%s/versions/%s/metadata' % (item.model_name, model_version)
2022-02-26 22:36:57 +08:00
expand['models.config']=expand['models.config'] if expand.get('models.config','') else self.tfserving_model_config(item.model_name,model_version,des_model_path)
expand['monitoring.config']=expand['monitoring.config'] if expand.get('monitoring.config','') else self.tfserving_monitoring_config()
expand['platform.config'] = expand['platform.config'] if expand.get('platform.config','') else self.tfserving_platform_config()
if not item.inference_config:
2023-09-03 21:15:58 +08:00
item.inference_config = '''
---models.config
%s
---monitoring.config
%s
---platform.config
%s
2023-09-03 21:15:58 +08:00
''' % (
self.tfserving_model_config(item.model_name, model_version, des_model_path),
self.tfserving_monitoring_config(),
self.tfserving_platform_config()
2023-09-03 21:15:58 +08:00
)
2022-02-26 22:36:57 +08:00
if item.service_type == 'ml-server':
if not item.inference_config:
item.inference_config = '''
---config.json
[
{
"name": "%s",
"model_path": "%s",
"framework": "sklearn",
"version": "%s",
"enable": true
}
]
'''%(item.model_name,model_path,item.model_version)
if not item.command:
item.command = 'python server.py --config_path /config/config.json'
if not item.host:
item.host = f'/v1/models/{item.model_name}/metadata'
item.health = '80:/v1/models/%s/metadata' % (item.model_name,)
2023-09-03 21:15:58 +08:00
if item.service_type == 'torch-server':
2022-02-26 22:36:57 +08:00
if not item.working_dir:
2023-09-03 21:15:58 +08:00
item.working_dir = '/models'
model_file = model_path[model_path.rindex('/') + 1:] if '/' in model_path else model_path
2023-09-03 21:15:58 +08:00
tar_command = 'ls'
if '.mar' not in model_path:
tar_command = 'torch-model-archiver --model-name %s --version %s --handler %s --serialized-file %s --export-path /models -f'%(item.model_name,model_version,item.transformer or item.model_type,model_path)
else:
2023-09-03 21:15:58 +08:00
if ('http:' in item.model_path or 'https://' in item.model_path) and item.working_dir == '/models':
print('has download to des_version_path')
else:
2023-09-03 21:15:58 +08:00
tar_command = 'cp -rf %s /models/' % (model_path)
if not item.id or not item.command:
item.command=download_command+'cp /config/* /models/ && '+tar_command+' && torchserve --start --model-store /models --models %s=%s.mar --foreground --ts-config=/config/config.properties'%(item.model_name,item.model_name)
2022-02-26 22:36:57 +08:00
expand['config.properties'] = expand['config.properties'] if expand.get('config.properties','') else self.torch_config()
expand['log4j2.xml'] = expand['log4j2.xml'] if expand.get('log4j2.xml','') else self.torch_log()
if not item.inference_config:
item.inference_config = '''
---config.properties
%s
---log4j2.xml
%s
''' % (
self.torch_config(),
self.torch_log()
)
2023-09-03 21:15:58 +08:00
if item.service_type == 'triton-server':
2022-02-26 22:36:57 +08:00
# 识别模型类型
model_type = 'tf'
if '.onnx' in model_path:
2023-09-03 21:15:58 +08:00
model_type = 'onnx'
if '.plan' in model_path:
model_type = 'tensorrt'
if '.pt' in model_path or '.pth' in model_path:
model_type = 'pytorch'
2022-02-26 22:36:57 +08:00
if not item.id or not item.command:
if model_type=='tf':
item.command=download_command+'mkdir -p /models/{model_name}/{model_version}/model.savedmodel && cp /config/* /models/{model_name}/ && cp -r /{model_path}/* /models/{model_name}/{model_version}/model.savedmodel && tritonserver --model-repository=/models --strict-model-config=true --log-verbose=1'.format(model_path=model_path.strip('/'),model_name=item.model_name,model_version=model_version)
2022-02-26 22:36:57 +08:00
else:
model_file_ext = model_path.split(".")[-1]
item.command=download_command+'mkdir -p /models/{model_name}/{model_version}/ && cp /config/* /models/{model_name}/ && cp -r {model_path} /models/{model_name}/{model_version}/model.{model_file_ext} && tritonserver --model-repository=/models --strict-model-config=true --log-verbose=1'.format(model_path=model_path,model_name=item.model_name,model_version=model_version,model_file_ext=model_file_ext)
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
config_str = self.triton_config(item, model_type)
old_config_str = json.loads(self.src_item_json['expand']).get('config.pbtxt', '') if item.id else ''
new_config_str = expand.get('config.pbtxt', '')
2022-02-26 22:36:57 +08:00
if not item.id:
2023-09-03 21:15:58 +08:00
expand['config.pbtxt'] = config_str
elif new_config_str == old_config_str and new_config_str != config_str:
expand['config.pbtxt'] = config_str
2022-02-26 22:36:57 +08:00
elif not new_config_str:
expand['config.pbtxt'] = config_str
if not item.inference_config:
item.inference_config = '''
---config.pbtxt
%s
''' % (
config_str,
)
2023-09-03 21:15:58 +08:00
if item.service_type == 'onnxruntime':
2022-02-26 22:36:57 +08:00
if not item.id or not item.command:
2023-09-03 21:15:58 +08:00
item.command = download_command + './onnxruntime_server --log_level info --model_path %s' % model_path
2022-02-26 22:36:57 +08:00
if not item.name:
item.name = item.model_name + "-" + model_version
2023-09-03 21:15:58 +08:00
if len(item.name)>60:
item.name = item.name[:60]
# item.expand = json.dumps(expand,indent=4,ensure_ascii=False)
2024-03-05 10:47:18 +08:00
# @pysnooper.snoop()
2022-02-26 22:36:57 +08:00
def pre_add(self, item):
if item.name:
item.name = item.name.replace("_", "-")
2022-08-16 11:09:52 +08:00
if not item.model_path:
2023-09-03 21:15:58 +08:00
item.model_path = ''
2022-02-26 22:36:57 +08:00
if not item.volume_mount:
2023-09-03 21:15:58 +08:00
item.volume_mount = item.project.volume_mount
2022-02-26 22:36:57 +08:00
self.use_expand(item)
2024-04-03 22:01:43 +08:00
if not item.resource_memory:
item.resource_memory = '2G'
if not item.resource_cpu:
item.resource_cpu='2'
if not item.resource_gpu:
item.resource_gpu='0'
# 初始化时没有回话但是也要调用flash所以会报错
try:
if ('http:' in item.model_path or 'https:' in item.model_path) and ('.zip' in item.model_path or '.tar.gz' in item.model_path):
2023-12-11 13:23:35 +08:00
flash(__('检测到模型地址为网络压缩文件,需压缩文件名和解压后文件夹名相同'), 'warning')
except Exception as e:
pass
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
def delete_old_service(self, service_name, cluster):
try:
from myapp.utils.py.py_k8s import K8s
2023-09-03 21:15:58 +08:00
k8s_client = K8s(cluster.get('KUBECONFIG', ''))
service_namespace = conf.get('SERVICE_NAMESPACE')
2023-09-03 21:15:58 +08:00
for namespace in [service_namespace, ]:
for name in [service_name, 'debug-' + service_name, 'test-' + service_name]:
service_external_name = (name + "-external").lower()[:60].strip('-')
k8s_client.delete_deployment(namespace=namespace, name=name)
2023-12-11 13:23:35 +08:00
k8s_client.delete_statefulset(namespace=namespace, name=name)
k8s_client.delete_service(namespace=namespace, name=name)
k8s_client.delete_service(namespace=namespace, name=service_external_name)
k8s_client.delete_istio_ingress(namespace=namespace, name=name)
k8s_client.delete_hpa(namespace=namespace, name=name)
k8s_client.delete_configmap(namespace=namespace, name=name)
except Exception as e:
print(e)
2022-02-26 22:36:57 +08:00
# @pysnooper.snoop(watch_explode=('item',))
def pre_update(self, item):
self.pre_add(item)
2022-02-26 22:36:57 +08:00
2022-11-23 17:01:10 +08:00
# 如果模型版本和模型名称变了,需要把之前的服务删除掉
2022-07-26 20:47:49 +08:00
if self.src_item_json.get('name','') and item.name!=self.src_item_json.get('name',''):
self.delete_old_service(self.src_item_json.get('name',''), item.project.cluster)
2023-12-11 13:23:35 +08:00
flash(__("发现模型服务变更,启动清理服务")+'%s:%s'%(self.src_item_json.get('model_name',''),self.src_item_json.get('model_version','')),'success')
2022-07-26 20:47:49 +08:00
2023-04-06 23:23:11 +08:00
2023-09-03 21:15:58 +08:00
src_project_id = self.src_item_json.get('project_id', 0)
if src_project_id and src_project_id != item.project.id:
2023-04-06 23:23:11 +08:00
try:
from myapp.models.model_team import Project
src_project = db.session.query(Project).filter_by(id=int(src_project_id)).first()
2023-09-03 21:15:58 +08:00
if src_project and src_project.cluster['NAME'] != item.project.cluster['NAME']:
2023-04-06 23:23:11 +08:00
# 如果集群变了原有集群的已经部署的服务要clear掉
2023-09-03 21:15:58 +08:00
service_name = self.src_item_json.get('name', '')
2023-04-06 23:23:11 +08:00
if service_name:
from myapp.utils.py.py_k8s import K8s
k8s_client = K8s(src_project.cluster.get('KUBECONFIG', ''))
service_namespace = conf.get('SERVICE_NAMESPACE')
for namespace in [service_namespace, ]:
for name in [service_name, 'debug-' + service_name, 'test-' + service_name]:
service_external_name = (name + "-external").lower()[:60].strip('-')
k8s_client.delete_deployment(namespace=namespace, name=name)
2023-12-11 13:23:35 +08:00
k8s_client.delete_statefulset(namespace=namespace, name=name)
2023-04-06 23:23:11 +08:00
k8s_client.delete_service(namespace=namespace, name=name)
k8s_client.delete_service(namespace=namespace, name=service_external_name)
k8s_client.delete_istio_ingress(namespace=namespace, name=name)
k8s_client.delete_hpa(namespace=namespace, name=name)
k8s_client.delete_configmap(namespace=namespace, name=name)
# 域名后缀如果不一样也要变了
if src_project and src_project.cluster['SERVICE_DOMAIN'] != item.project.cluster['SERVICE_DOMAIN']:
item.host=item.host.replace(src_project.cluster['SERVICE_DOMAIN'],item.project.cluster['SERVICE_DOMAIN'])
except Exception as e:
print(e)
# 事后无法读取到project属性
2022-02-26 22:36:57 +08:00
def pre_delete(self, item):
2023-09-03 21:15:58 +08:00
self.delete_old_service(item.name, item.project.cluster)
2023-12-11 13:23:35 +08:00
flash(__('服务清理完成'), category='success')
2022-02-26 22:36:57 +08:00
@expose('/clear/<service_id>', methods=['POST', "GET"])
def clear(self, service_id):
service = db.session.query(InferenceService).filter_by(id=service_id).first()
if service:
2022-08-20 13:45:54 +08:00
self.delete_old_service(service.name, service.project.cluster)
2023-09-03 21:15:58 +08:00
service.model_status = 'offline'
2022-08-20 13:45:54 +08:00
if not service.deploy_history:
service.deploy_history=''
service.deploy_history = service.deploy_history + "\n" + "clear: %s %s" % (g.user.username,datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
db.session.commit()
2023-12-11 13:23:35 +08:00
flash(__('服务清理完成'), category='success')
2023-09-03 21:15:58 +08:00
return redirect(conf.get('MODEL_URLS', {}).get('inferenceservice', ''))
2023-09-03 21:15:58 +08:00
@expose('/deploy/debug/<service_id>', methods=['POST', "GET"])
2022-02-26 22:36:57 +08:00
# @pysnooper.snoop()
2023-09-03 21:15:58 +08:00
def deploy_debug(self, service_id):
return self.deploy(service_id, env='debug')
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
@expose('/deploy/test/<service_id>', methods=['POST', "GET"])
2022-02-26 22:36:57 +08:00
# @pysnooper.snoop()
2023-09-03 21:15:58 +08:00
def deploy_test(self, service_id):
return self.deploy(service_id, env='test')
2022-02-26 22:36:57 +08:00
@expose('/deploy/prod/<service_id>', methods=['POST', "GET"])
# @pysnooper.snoop()
def deploy_prod(self, service_id):
2023-09-03 21:15:58 +08:00
return self.deploy(service_id, env='prod')
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
@expose('/deploy/update/', methods=['POST', 'GET'])
# @pysnooper.snoop(watch_explode=('deploy'))
def update_service(self):
2023-12-11 13:23:35 +08:00
args = request.get_json(silent=True) if request.get_json(silent=True) else {}
namespace = conf.get('SERVICE_NAMESPACE', 'service')
args.update(request.args)
2023-09-03 21:15:58 +08:00
service_id = int(args.get('service_id', 0))
service_name = args.get('service_name', '')
model_name = args.get('model_name', '')
model_version = args.get('model_version', '')
2023-09-03 21:15:58 +08:00
service = None
if service_id:
service = db.session.query(InferenceService).filter_by(id=service_id).first()
elif service_name:
service = db.session.query(InferenceService).filter_by(name=service_name).first()
elif model_name:
if model_version:
2023-09-03 21:15:58 +08:00
service = db.session.query(InferenceService) \
.filter(InferenceService.model_name == model_name) \
.filter(InferenceService.model_version == model_version) \
.filter(InferenceService.model_status == 'online') \
.order_by(InferenceService.id.desc()).first()
else:
2023-09-03 21:15:58 +08:00
service = db.session.query(InferenceService) \
.filter(InferenceService.model_name == model_name) \
.filter(InferenceService.model_status == 'online') \
.order_by(InferenceService.id.desc()).first()
if service:
2023-09-03 21:15:58 +08:00
status = 0
message = 'success'
if request.method == 'POST':
min_replicas = int(args.get('min_replicas', 0))
if min_replicas:
service.min_replicas = min_replicas
if service.max_replicas < min_replicas:
2023-09-03 21:15:58 +08:00
service.max_replicas = min_replicas
db.session.commit()
try:
self.deploy(service.id)
except Exception as e:
print(e)
2023-09-03 21:15:58 +08:00
status = -1
message = str(e)
time.sleep(3)
from myapp.utils.py.py_k8s import K8s
2023-09-03 21:15:58 +08:00
k8s_client = K8s(service.project.cluster.get('KUBECONFIG', ''))
deploy = None
try:
2023-09-03 21:15:58 +08:00
deploy = k8s_client.AppsV1Api.read_namespaced_deployment(name=service.name, namespace=namespace)
except Exception as e:
print(e)
2023-09-03 21:15:58 +08:00
status = -1,
message = str(e)
2023-09-03 21:15:58 +08:00
back = {
"result": {
2023-09-03 21:15:58 +08:00
"service": service.to_json(),
"deploy": deploy.to_dict() if deploy else {}
},
"status": status,
"message": message
}
return jsonify(back)
else:
return jsonify({
2023-09-03 21:15:58 +08:00
"result": "",
"status": -1,
"message": "service not exist or service not online"
})
2022-08-03 10:14:31 +08:00
# @pysnooper.snoop()
2023-09-03 21:15:58 +08:00
def deploy(self, service_id, env='prod'):
2022-02-26 22:36:57 +08:00
service = db.session.query(InferenceService).filter_by(id=service_id).first()
2023-09-03 21:15:58 +08:00
namespace = conf.get('SERVICE_NAMESPACE', 'service')
2022-10-24 17:13:32 +08:00
name = service.name
2022-02-26 22:36:57 +08:00
command = service.command
deployment_replicas = service.min_replicas
2023-09-03 21:15:58 +08:00
if env == 'debug':
name = env + '-' + service.name
2022-02-26 22:36:57 +08:00
command = 'sleep 43200'
deployment_replicas = 1
# namespace=pre_namespace
2023-09-03 21:15:58 +08:00
if env == 'test':
name = env + '-' + service.name
2022-02-26 22:36:57 +08:00
# namespace=pre_namespace
2023-09-03 21:15:58 +08:00
image_pull_secrets = conf.get('HUBSECRET', [])
user_repositorys = db.session.query(Repository).filter(Repository.created_by_fk == g.user.id).all()
image_pull_secrets = list(set(image_pull_secrets + [rep.hubsecret for rep in user_repositorys]))
2022-02-26 22:36:57 +08:00
from myapp.utils.py.py_k8s import K8s
2023-09-03 21:15:58 +08:00
k8s_client = K8s(service.project.cluster.get('KUBECONFIG', ''))
2022-02-26 22:36:57 +08:00
2022-08-22 13:28:23 +08:00
config_datas = service.inference_config.strip().split("\n---") if service.inference_config else []
2022-08-03 10:14:31 +08:00
config_datas = [x.strip() for x in config_datas if x.strip()]
volume_mount = service.volume_mount
2023-09-03 21:15:58 +08:00
print('文件个数:', len(config_datas))
config_data = {}
for data in config_datas:
2023-09-03 21:15:58 +08:00
file_name = re.sub('^-*', '', data.split('\n')[0]).strip()
file_content = '\n'.join(data.split('\n')[1:])
2022-08-03 10:14:31 +08:00
if file_name and file_content:
config_data[file_name] = file_content
if config_data:
print('create configmap')
2023-09-03 21:15:58 +08:00
k8s_client.create_configmap(namespace=namespace, name=name, data=config_data, labels={'app': name})
volume_mount += ",%s(configmap):/config/" % name
2022-02-26 22:36:57 +08:00
ports = [int(port) for port in service.ports.split(',')]
2024-04-03 22:01:43 +08:00
gpu_num, gpu_type, resource_name = core.get_gpu(service.resource_gpu)
2022-02-26 22:36:57 +08:00
pod_env = service.env
2022-11-23 17:01:10 +08:00
pod_env += "\nKUBEFLOW_ENV=" + env
2024-04-03 22:01:43 +08:00
pod_env += '\nKUBEFLOW_MODEL_PATH=' + (service.model_path if service.model_path else '')
2022-11-23 17:01:10 +08:00
pod_env += '\nKUBEFLOW_MODEL_VERSION=' + service.model_version
pod_env += '\nKUBEFLOW_MODEL_IMAGES=' + service.images
pod_env += '\nKUBEFLOW_MODEL_NAME=' + service.model_name
pod_env += '\nKUBEFLOW_AREA=' + json.loads(service.project.expand).get('area', 'guangzhou')
pod_env += "\nRESOURCE_CPU=" + service.resource_cpu
pod_env += "\nRESOURCE_MEMORY=" + service.resource_memory
2024-04-03 22:01:43 +08:00
pod_env += "\nRESOURCE_GPU=" + str(int(gpu_num))
2023-12-11 13:23:35 +08:00
pod_env += "\nMODEL_PATH=" + service.model_path
2022-11-23 17:01:10 +08:00
pod_env = pod_env.strip(',')
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
if env == 'test' or env == 'debug':
2022-02-26 22:36:57 +08:00
try:
2022-08-03 10:14:31 +08:00
print('delete deployment')
2023-09-03 21:15:58 +08:00
k8s_client.delete_deployment(namespace=namespace, name=name)
2023-12-11 13:23:35 +08:00
k8s_client.delete_statefulset(namespace=namespace, name=name)
2022-02-26 22:36:57 +08:00
except Exception as e:
print(e)
# 因为所有的服务流量通过ingress实现所以没有isito的envoy代理
2024-01-03 22:40:42 +08:00
labels = {"app": name, "user": service.created_by.username, 'pod-type': "inference"}
2022-02-26 22:36:57 +08:00
try:
pod_ports = copy.deepcopy(ports)
try:
if service.metrics.strip():
metrics_port = int(service.metrics[:service.metrics.index(":")])
pod_ports.append(metrics_port)
except Exception as e:
print(e)
try:
if service.health.strip():
health_port = int(service.health[:service.health.index(":")])
pod_ports.append(health_port)
except Exception as e:
2023-09-03 21:15:58 +08:00
pass
# print(e)
pod_ports = list(set(pod_ports))
2022-08-03 10:14:31 +08:00
print('create deployment')
2022-08-19 16:45:59 +08:00
# https://istio.io/latest/docs/reference/config/annotations/
2024-01-03 22:40:42 +08:00
if service.sidecar and 'istio' in service.sidecar: # and service.service_type == 'serving'
2023-09-03 21:15:58 +08:00
labels['sidecar.istio.io/inject'] = 'true'
2022-08-19 16:45:59 +08:00
2024-01-03 22:40:42 +08:00
pod_annotations = {
'project': service.project.name
}
2022-02-26 22:36:57 +08:00
k8s_client.create_deployment(
namespace=namespace,
name=name,
replicas=deployment_replicas,
2022-07-26 20:47:49 +08:00
labels=labels,
2024-01-03 22:40:42 +08:00
annotations=pod_annotations,
2023-09-03 21:15:58 +08:00
command=['sh', '-c', command] if command else None,
2022-02-26 22:36:57 +08:00
args=None,
volume_mount=volume_mount,
working_dir=service.working_dir,
node_selector=service.get_node_selector(),
resource_memory=service.resource_memory,
resource_cpu=service.resource_cpu,
resource_gpu=service.resource_gpu if service.resource_gpu else '',
2023-09-03 21:15:58 +08:00
image_pull_policy=conf.get('IMAGE_PULL_POLICY', 'Always'),
image_pull_secrets=image_pull_secrets,
2022-02-26 22:36:57 +08:00
image=service.images,
2023-09-03 21:15:58 +08:00
hostAliases=conf.get('HOSTALIASES', ''),
2022-02-26 22:36:57 +08:00
env=pod_env,
privileged=False,
accounts=None,
username=service.created_by.username,
ports=pod_ports,
2023-09-03 21:15:58 +08:00
health=service.health if ':' in service.health and env != 'debug' else None
2022-02-26 22:36:57 +08:00
)
except Exception as e:
2023-09-03 21:15:58 +08:00
flash('deploymnet:' + str(e), 'warning')
2022-02-26 22:36:57 +08:00
# 监控
if service.metrics:
annotations = {
"prometheus.io/scrape": "true",
"prometheus.io/port": service.metrics.split(":")[0],
"prometheus.io/path": service.metrics.split(":")[1]
}
else:
2023-09-03 21:15:58 +08:00
annotations = {}
2022-08-03 10:14:31 +08:00
print('deploy service')
2023-04-06 23:23:11 +08:00
# 端口改变才重新部署服务
2023-12-11 13:23:35 +08:00
disable_load_balancer = str(json.loads(service.expand).get('disable_load_balancer','false')).lower() if service.expand else 'false'
if disable_load_balancer=='true':
disable_load_balancer=True
else:
disable_load_balancer=False
2023-04-06 23:23:11 +08:00
2022-02-26 22:36:57 +08:00
k8s_client.create_service(
namespace=namespace,
name=name,
username=service.created_by.username,
ports=ports,
2022-07-26 20:47:49 +08:00
annotations=annotations,
2023-12-11 13:23:35 +08:00
selector=labels,
disable_load_balancer=disable_load_balancer
2022-02-26 22:36:57 +08:00
)
2023-12-11 13:23:35 +08:00
2022-02-26 22:36:57 +08:00
# 如果域名配置的gateway就用这个
2023-09-03 21:15:58 +08:00
host = service.name + "." + service.project.cluster.get('SERVICE_DOMAIN', conf.get('SERVICE_DOMAIN', ''))
2022-02-26 22:36:57 +08:00
# 如果系统配置了host并且不是ip
if service.host and not core.checkip(service.host):
2023-09-03 21:15:58 +08:00
config_host = service.host.replace('http://', '').replace('https://', '').strip()
if "/" in config_host:
config_host = config_host[:config_host.index("/")]
if config_host:
host=config_host
2022-02-26 22:36:57 +08:00
# 前缀来区分不同的环境服务
if host and (env == 'debug' or env == 'test'):
2023-09-03 21:15:58 +08:00
host = env + '.' + host
2022-08-03 10:14:31 +08:00
try:
print('deploy istio ingressgateway')
k8s_client.create_istio_ingress(
namespace=namespace,
name=name,
2023-09-03 21:15:58 +08:00
host=host,
2022-08-03 10:14:31 +08:00
ports=service.ports.split(','),
canary=service.canary,
shadow=service.shadow
)
except Exception as e:
print(e)
2022-02-26 22:36:57 +08:00
2022-05-22 22:05:10 +08:00
# 以ip形式访问的话使用的代理ip。不然不好处理机器服务化机器扩容和缩容时ip变化
2022-07-23 20:38:20 +08:00
2023-09-03 21:15:58 +08:00
SERVICE_EXTERNAL_IP = []
2022-07-23 20:38:20 +08:00
# 使用项目组ip
if service.project.expand:
ip = json.loads(service.project.expand).get('SERVICE_EXTERNAL_IP', '')
2022-10-24 17:13:32 +08:00
if ip and type(ip) == str:
2022-07-23 20:38:20 +08:00
SERVICE_EXTERNAL_IP = [ip]
2022-10-24 17:13:32 +08:00
if ip and type(ip) == list:
SERVICE_EXTERNAL_IP = ip
2022-07-23 20:38:20 +08:00
# 使用全局ip
if not SERVICE_EXTERNAL_IP:
SERVICE_EXTERNAL_IP = conf.get('SERVICE_EXTERNAL_IP', None)
# 使用当前ip
if not SERVICE_EXTERNAL_IP:
2023-09-03 21:15:58 +08:00
ip = request.host[:request.host.rindex(':')] if ':' in request.host else request.host # 如果捕获到端口号,要去掉
if ip == '127.0.0.1':
host = service.project.cluster.get('HOST', '')
if host:
host = host[:host.rindex(':')] if ':' in host else host
2023-04-06 23:23:11 +08:00
SERVICE_EXTERNAL_IP = [host]
2022-07-23 20:38:20 +08:00
2023-09-03 21:15:58 +08:00
elif core.checkip(ip):
SERVICE_EXTERNAL_IP = [ip]
2022-05-22 22:05:10 +08:00
if SERVICE_EXTERNAL_IP:
2022-11-09 20:46:52 +08:00
# 对于多网卡模式或者单域名模式代理需要配置内网ip界面访问需要公网ip或域名
SERVICE_EXTERNAL_IP = [ip.split('|')[0].strip() for ip in SERVICE_EXTERNAL_IP]
meet_ports = core.get_not_black_port(20000 + 10 * service.id)
service_ports = [[meet_ports[index], port] for index, port in enumerate(ports)]
2022-05-22 22:05:10 +08:00
service_external_name = (service.name + "-external").lower()[:60].strip('-')
2022-08-03 10:14:31 +08:00
print('deploy proxy ip')
2023-12-11 13:23:35 +08:00
# 监控
annotations = {
"service.kubernetes.io/local-svc-only-bind-node-with-pod": "true",
"service.cloud.tencent.com/local-svc-weighted-balance": "true"
}
2022-05-22 22:05:10 +08:00
k8s_client.create_service(
namespace=namespace,
name=service_external_name,
username=service.created_by.username,
2023-12-11 13:23:35 +08:00
annotations=annotations,
2022-05-22 22:05:10 +08:00
ports=service_ports,
2022-07-26 20:47:49 +08:00
selector=labels,
2023-12-11 13:23:35 +08:00
service_type='ClusterIP' if conf.get('K8S_NETWORK_MODE', 'iptables') != 'ipvs' else 'NodePort',
external_ip=SERVICE_EXTERNAL_IP if conf.get('K8S_NETWORK_MODE', 'iptables') != 'ipvs' else None
2023-09-03 21:15:58 +08:00
# external_traffic_policy='Local'
2022-05-22 22:05:10 +08:00
)
2022-02-26 22:36:57 +08:00
2023-04-06 23:23:11 +08:00
# # 以ip形式访问的话使用的代理ip。不然不好处理机器服务化机器扩容和缩容时ip变化
# ip和端口形式只定向到生产因为不能像泛化域名一样随意添加
2023-09-03 21:15:58 +08:00
TKE_EXISTED_LBID = ''
2023-04-06 23:23:11 +08:00
if service.project.expand:
2023-09-03 21:15:58 +08:00
TKE_EXISTED_LBID = json.loads(service.project.expand).get('TKE_EXISTED_LBID', "")
2023-04-06 23:23:11 +08:00
if not TKE_EXISTED_LBID:
2023-09-03 21:15:58 +08:00
TKE_EXISTED_LBID = service.project.cluster.get("TKE_EXISTED_LBID", '')
2023-04-06 23:23:11 +08:00
if not TKE_EXISTED_LBID:
2023-09-03 21:15:58 +08:00
TKE_EXISTED_LBID = conf.get('TKE_EXISTED_LBID', '')
2023-04-06 23:23:11 +08:00
if not SERVICE_EXTERNAL_IP and TKE_EXISTED_LBID:
TKE_EXISTED_LBID = TKE_EXISTED_LBID.split('|')[0]
meet_ports = core.get_not_black_port(20000 + 10 * self.id)
service_ports = [[meet_ports[index], port] for index, port in enumerate(ports)]
2023-04-06 23:23:11 +08:00
service_external_name = (service.name + "-external").lower()[:60].strip('-')
k8s_client.create_service(
namespace=namespace,
name=service_external_name,
username=service.created_by.username,
ports=service_ports,
selector=labels,
service_type='LoadBalancer',
annotations={
2023-09-03 21:15:58 +08:00
"service.kubernetes.io/tke-existed-lbid": TKE_EXISTED_LBID,
2023-04-06 23:23:11 +08:00
}
)
2023-09-03 21:15:58 +08:00
if env == 'prod':
2022-02-26 22:36:57 +08:00
hpas = re.split(',|;', service.hpa)
2022-07-26 20:47:49 +08:00
regex = re.compile(r"\(.*\)")
2023-09-03 21:15:58 +08:00
if float(regex.sub('', service.resource_gpu)) < 1:
2022-02-26 22:36:57 +08:00
for hpa in copy.deepcopy(hpas):
if 'gpu' in hpa:
hpas.remove(hpa)
# 伸缩容
2023-09-03 21:15:58 +08:00
if int(service.max_replicas) > int(service.min_replicas) and service.hpa:
2022-02-26 22:36:57 +08:00
try:
# 创建+绑定deployment
2022-08-03 10:14:31 +08:00
print('create hpa')
2022-02-26 22:36:57 +08:00
k8s_client.create_hpa(
namespace=namespace,
name=name,
min_replicas=int(service.min_replicas),
max_replicas=int(service.max_replicas),
hpa=','.join(hpas)
)
except Exception as e:
2023-09-03 21:15:58 +08:00
flash('hpa:' + str(e), 'warning')
2022-09-30 13:44:46 +08:00
else:
2023-09-03 21:15:58 +08:00
k8s_client.delete_hpa(namespace=namespace, name=name)
2022-02-26 22:36:57 +08:00
# # 使用激活器
# if int(service.min_replicas)==0:
# flash('检测到最小副本为0已加入激活器装置')
# pass
# 不记录部署测试的情况
2023-09-03 21:15:58 +08:00
if env == 'debug' and service.model_status == 'offline':
2022-02-26 22:36:57 +08:00
service.model_status = 'debug'
2023-09-03 21:15:58 +08:00
if env == 'test' and service.model_status == 'offline':
2022-02-26 22:36:57 +08:00
service.model_status = 'test'
2023-09-03 21:15:58 +08:00
if env == 'prod':
2022-02-26 22:36:57 +08:00
service.model_status = 'online'
service.deploy_history=service.deploy_history+"\n"+"deploy %s: %s %s"%(env,g.user.username,datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
2023-04-06 23:23:11 +08:00
service.deploy_history = '\n'.join(service.deploy_history.split("\n")[-10:])
2022-02-26 22:36:57 +08:00
db.session.commit()
2023-09-03 21:15:58 +08:00
if env == "debug":
2022-02-26 22:36:57 +08:00
time.sleep(2)
2023-09-03 21:15:58 +08:00
pods = k8s_client.get_pods(namespace=namespace, labels={"app": name})
2022-02-26 22:36:57 +08:00
if pods:
pod = pods[0]
2022-08-03 10:14:31 +08:00
print('deploy debug success')
2023-04-06 23:23:11 +08:00
return redirect("/k8s/web/debug/%s/%s/%s/%s" % (service.project.cluster['NAME'], namespace, pod['name'],name))
# 生产环境才有域名代理灰度的问题
2023-09-03 21:15:58 +08:00
if env == 'prod':
from myapp.tasks.async_task import upgrade_service
kwargs = {
"service_id": service.id,
2023-09-03 21:15:58 +08:00
"name": service.name,
"namespace": namespace
}
upgrade_service.apply_async(kwargs=kwargs)
2023-12-11 13:23:35 +08:00
flash(__('服务部署完成,正在进行同域名服务版本切换'), category='success')
2022-08-03 10:14:31 +08:00
print('deploy prod success')
2023-09-03 21:15:58 +08:00
return redirect(conf.get('MODEL_URLS', {}).get('inferenceservice', ''))
2022-02-26 22:36:57 +08:00
2023-12-11 13:23:35 +08:00
@action("copy", "复制", confirmation= '复制所选记录?', icon="fa-copy", multiple=True, single=False)
def copy(self, services):
if not isinstance(services, list):
services = [services]
try:
for service in services:
new_services = service.clone()
2023-09-03 21:15:58 +08:00
index = 1
model_version = datetime.datetime.now().strftime('v%Y.%m.%d.1')
while True:
model_version = datetime.datetime.now().strftime('v%Y.%m.%d.'+str(index))
exits_service = db.session.query(InferenceService).filter_by(model_version=model_version).filter_by(model_name=new_services.model_name).first()
if exits_service:
2023-09-03 21:15:58 +08:00
index += 1
else:
break
new_services.model_version=model_version
new_services.name = new_services.model_name+"-"+new_services.model_version.replace('v','').replace('.','')
new_services.created_on = datetime.datetime.now()
new_services.changed_on = datetime.datetime.now()
db.session.add(new_services)
db.session.commit()
except InvalidRequestError:
db.session.rollback()
except Exception as e:
raise e
return redirect(request.referrer)
2022-02-26 22:36:57 +08:00
2023-04-06 23:23:11 +08:00
# @pysnooper.snoop()
2023-09-03 21:15:58 +08:00
def echart_option(self, filters=None):
2023-04-06 23:23:11 +08:00
print(filters)
global global_all_service_load
if not global_all_service_load:
global_all_service_load['check_time'] = None
option=global_all_service_load['data']
if not global_all_service_load['check_time'] or (datetime.datetime.now() - global_all_service_load['check_time']).total_seconds()>3600:
all_services = db.session.query(InferenceService).filter_by(model_status='online').all()
from myapp.utils.py.py_prometheus import Prometheus
2023-09-03 21:15:58 +08:00
prometheus = Prometheus(conf.get('PROMETHEUS', ''))
2023-04-06 23:23:11 +08:00
# prometheus = Prometheus('10.101.142.16:8081')
all_services_load = prometheus.get_istio_service_metric(namespace='service')
services_metrics = []
legend=['qps','cpu','memory','gpu']
today_time = int(datetime.datetime.strptime(datetime.datetime.now().strftime("%Y-%m-%d"),"%Y-%m-%d").timestamp())
time_during = 5 * 60
end_point = min(int(datetime.datetime.now().timestamp() - today_time)//time_during, 60*60*24//time_during)
start_point = max(end_point - 60, 0)
# @pysnooper.snoop()
2023-09-03 21:15:58 +08:00
def add_metric_data(metric, metric_name, service_name):
2023-04-06 23:23:11 +08:00
if metric_name == 'qps':
metric = [[date_value[0], int(float(date_value[1]))] for date_value in metric if datetime.datetime.now().timestamp() > date_value[0] > today_time]
if metric_name == 'memory':
metric = [[date_value[0], round(float(date_value[1]) / 1024 / 1024 / 1024, 2)] for date_value in metric if datetime.datetime.now().timestamp() > date_value[0] > today_time]
if metric_name == 'cpu' or metric_name == 'gpu':
metric = [[date_value[0], round(float(date_value[1]), 2)] for date_value in metric if datetime.datetime.now().timestamp() > date_value[0] > today_time]
if metric:
# 将时间戳转化为时间段分箱,按分钟分箱
2023-09-03 21:15:58 +08:00
metric_binning = [[0] for x in range(60 * 60 * 24 // time_during)] # 每5分钟一个分箱
2023-04-06 23:23:11 +08:00
for date_value in metric:
timestamp, value = date_value[0], date_value[1]
2023-09-03 21:15:58 +08:00
metric_binning[(timestamp - today_time) // time_during].append(value)
2023-04-06 23:23:11 +08:00
metric_binning = [int(sum(x) / len(x)) for x in metric_binning]
# metric_binning = [[datetime.datetime.fromtimestamp(today_time+time_during*i+time_during).strftime('%Y-%m-%dT%H:%M:%S.000Z'),metric_binning[i]] for i in range(len(metric_binning)) if i<=end_point]
metric_binning = [[(today_time+time_during*i+time_during)*1000,metric_binning[i]] for i in range(len(metric_binning)) if i<=end_point]
services_metrics.append(
{
"name": service_name,
"type": 'line',
"smooth": True,
"showSymbol": False,
"data": metric_binning
}
)
for service in all_services:
# qps_metric = all_services_load['qps'].get(service.name,[])
# add_metric_data(qps_metric, 'qps',service.name)
#
# servie_pod_metrics = []
# for pod_name in all_services_load['memory']:
# if service.name in pod_name:
# pod_metric = all_services_load['memory'][pod_name]
# servie_pod_metrics = servie_pod_metrics + pod_metric
# add_metric_data(servie_pod_metrics, 'memory', service.name)
#
# servie_pod_metrics = []
# for pod_name in all_services_load['cpu']:
# if service.name in pod_name:
# pod_metric = all_services_load['cpu'][pod_name]
# servie_pod_metrics = servie_pod_metrics + pod_metric
# add_metric_data(servie_pod_metrics, 'cpu',service.name)
servie_pod_metrics = []
for pod_name in all_services_load['gpu']:
if service.name in pod_name:
pod_metric = all_services_load['gpu'][pod_name]
2023-09-03 21:15:58 +08:00
servie_pod_metrics = servie_pod_metrics + pod_metric
add_metric_data(servie_pod_metrics, 'gpu', service.created_by.username + ":" + service.label)
2023-04-06 23:23:11 +08:00
# dataZoom: [
# {
# start: {{start_point}},
# end: {{end_point}}
# }
# ],
option = '''
{
"title": {
2023-12-11 13:23:35 +08:00
"text": 'GPU monitor'
2023-04-06 23:23:11 +08:00
},
"tooltip": {
"trigger": 'axis',
"position": [10, 10]
},
"legend": {
"data": {{ legend }}
},
"grid": {
"left": '3%',
"right": '4%',
"bottom": '3%',
"containLabel": true
},
"xAxis": {
"type": "time",
"min": new Date('{{today}}'),
"max": new Date('{{tomorrow}}'),
"boundaryGap": false,
"timezone" : 'Asia/Shanghai',
},
"yAxis": {
"type": "value",
"boundaryGap": false,
"axisLine":{ //y轴
"show":false
},
"axisTick":{ //y轴刻度线
"show":true
},
"splitLine": { //网格线
"show": true,
"color": '#f1f2f6'
}
},
"series": {{services_metric}}
}
'''
# print(services_metrics)
rtemplate = Environment(loader=BaseLoader, undefined=DebugUndefined).from_string(option)
option = rtemplate.render(
legend=legend,
2023-09-03 21:15:58 +08:00
services_metric=json.dumps(services_metrics, ensure_ascii=False, indent=4),
2023-04-06 23:23:11 +08:00
start_point=start_point,
end_point=end_point,
today=datetime.datetime.now().strftime('%Y/%m/%d'),
2023-09-03 21:15:58 +08:00
tomorrow=(datetime.datetime.now() + datetime.timedelta(days=1)).strftime('%Y/%m/%d'),
2023-04-06 23:23:11 +08:00
)
# global_all_service_load['check_time']=datetime.datetime.now()
2023-09-03 21:15:58 +08:00
global_all_service_load['data'] = option
2023-04-06 23:23:11 +08:00
# print(option)
# file = open('myapp/test.txt',mode='w')
# file.write(option)
# file.close()
return option
2023-09-03 21:15:58 +08:00
class InferenceService_ModelView(InferenceService_ModelView_base, MyappModelView):
datamodel = SQLAInterface(InferenceService)
2022-08-28 20:24:10 +08:00
appbuilder.add_view_no_menu(InferenceService_ModelView)
2022-02-26 22:36:57 +08:00
2023-09-03 21:15:58 +08:00
# 添加api
2023-09-03 21:15:58 +08:00
class InferenceService_ModelView_Api(InferenceService_ModelView_base, MyappModelRestApi):
datamodel = SQLAInterface(InferenceService)
route_base = '/inferenceservice_modelview/api'
2023-12-11 13:23:35 +08:00
# def add_more_info(self,response,**kwargs):
# online_services = db.session.query(InferenceService).filter(InferenceService.model_status=='online').filter(InferenceService.resource_gpu!='0').all()
# if len(online_services)>0:
# response['echart']=True
# else:
# response['echart'] = False
2023-09-03 21:15:58 +08:00
def set_columns_related(self, exist_add_args, response_add_columns):
exist_service_type = exist_add_args.get('service_type', '')
service_model_path = {
"ml-server": "/mnt/.../$model_name.pkl",
"tfserving": "/mnt/.../saved_model",
"torch-server": "/mnt/.../$model_name.mar",
"onnxruntime": "/mnt/.../$model_name.onnx",
"triton-server": "onnx:/mnt/.../model.onnx(model.plan,model.bin,model.savedmodel/,model.pt,model.dali)"
}
response_add_columns['images']['values'] = [{"id":x,"value":x} for x in conf.get('INFERNENCE_IMAGES',{}).get(exist_service_type,[])]
response_add_columns['model_path']['default']=service_model_path.get(exist_service_type,'')
response_add_columns['command']['default'] = conf.get('INFERNENCE_COMMAND',{}).get(exist_service_type,'')
response_add_columns['env']['default'] = '\n'.join(conf.get('INFERNENCE_ENV',{}).get(exist_service_type,[]))
response_add_columns['ports']['default'] = conf.get('INFERNENCE_PORTS',{}).get(exist_service_type,'80')
response_add_columns['metrics']['default'] = conf.get('INFERNENCE_METRICS',{}).get(exist_service_type,'')
response_add_columns['health']['default'] = conf.get('INFERNENCE_HEALTH',{}).get(exist_service_type,'')
2023-12-11 13:23:35 +08:00
response_add_columns['health']['default'] = conf.get('INFERNENCE_HEALTH', {}).get(exist_service_type, '')
# if exist_service_type!='triton-server' and "inference_config" in response_add_columns:
# del response_add_columns['inference_config']
2022-02-26 22:36:57 +08:00
appbuilder.add_api(InferenceService_ModelView_Api)