mirror of
https://github.com/tencentmusic/cube-studio.git
synced 2025-02-17 14:40:28 +08:00
更新cube studio初始化部署
This commit is contained in:
parent
08485a7434
commit
d232c2415e
@ -1,58 +1,52 @@
|
||||
# 所需要的所有镜像
|
||||
kubeflow = [
|
||||
'mysql:5.7', # 数据库
|
||||
'bitnami/redis:4.0.14', # 缓存
|
||||
'alpine:3.10',
|
||||
"busybox",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/kubeflow:training-operator", # 分布式训练
|
||||
'ccr.ccs.tencentyun.com/cube-studio/spark-operator:v1beta2-1.3.7-3.1.1', # spark serverless
|
||||
'mysql:8.0.32', # 数据库
|
||||
'bitnami/redis:6.2.12', # 缓存
|
||||
"busybox:1.36.0",
|
||||
"kubeflow/training-operator:v1-8a066f9", # 分布式训练
|
||||
]
|
||||
|
||||
kubernetes_dashboard = [
|
||||
# 'kubernetesui/dashboard:v2.6.1', # k8s dashboard
|
||||
# 'kubernetesui/metrics-scraper:v1.0.8', # k8s dashboard 上的指标监控
|
||||
'kubernetesui/dashboard:v2.2.0', # k8s dashboard
|
||||
'kubernetesui/metrics-scraper:v1.0.6', # k8s dashboard 上的指标监控
|
||||
'kubernetesui/dashboard:v2.6.1', # k8s dashboard
|
||||
'kubernetesui/metrics-scraper:v1.0.8', # k8s dashboard 上的指标监控
|
||||
]
|
||||
|
||||
new_gpu = [
|
||||
'nvidia/k8s-device-plugin:v0.7.1', # gpu k8s插件
|
||||
'nvidia/dcgm-exporter:2.3.1-2.6.1-ubuntu20.04', # gpu监控
|
||||
'tkestack/gpu-manager:1.0.3'
|
||||
'nvidia/k8s-device-plugin:v0.11.0-ubuntu20.04', # gpu k8s插件
|
||||
'nvidia/dcgm-exporter:3.1.7-3.1.4-ubuntu20.04', # gpu监控
|
||||
]
|
||||
|
||||
new_prometheus = [
|
||||
'quay.io/prometheus/alertmanager:v0.15.0', # 报警
|
||||
"prom/prometheus:v2.27.1", # peomethues数据库
|
||||
'prom/node-exporter:v1.5.0', # 机器指标
|
||||
|
||||
'quay.io/prometheus-operator/prometheus-config-reloader:v0.46.0', # prometheus配置翻译
|
||||
"quay.io/prometheus/prometheus:v2.27.1", # peomethues数据库
|
||||
'quay.io/coreos/kube-state-metrics:v1.3.1', # 状态 指标
|
||||
'quay.io/prometheus/node-exporter:v0.15.2', # 机器指标
|
||||
'quay.io/coreos/kube-rbac-proxy:v0.3.1', # 指标
|
||||
'quay.io/coreos/addon-resizer:1.0', # 指标
|
||||
"quay.io/prometheus-operator/prometheus-operator:v0.46.0", # prometheus 部署工具
|
||||
"k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1", # peometheus指标翻译为自定义指标
|
||||
'bitnami/kube-rbac-proxy:0.14.1', # 指标
|
||||
'carlosedp/addon-resizer:v1.8.4', # 指标
|
||||
|
||||
'grafana/grafana:9.1.5' # 监控看板
|
||||
"ccr.ccs.tencentyun.com/cube-studio/prometheus-adapter:v0.9.1", # peometheus指标翻译为自定义指标
|
||||
]
|
||||
|
||||
istio = [
|
||||
"istio/proxyv2:1.14.1", # ingressgateway
|
||||
"istio/pilot:1.14.1" # 数据面
|
||||
"istio/proxyv2:1.15.0", # ingressgateway
|
||||
"istio/pilot:1.15.0" # 数据面
|
||||
]
|
||||
volcano = [
|
||||
'volcanosh/vc-controller-manager:v1.4.0', # 控制器
|
||||
'volcanosh/vc-scheduler:v1.4.0', # 调度器
|
||||
'volcanosh/vc-webhook-manager:v1.4.0' # 拦截器
|
||||
'volcanosh/vc-controller-manager:v1.7.0', # 控制器
|
||||
'volcanosh/vc-scheduler:v1.7.0', # 调度器
|
||||
'volcanosh/vc-webhook-manager:v1.7.0' # 拦截器
|
||||
]
|
||||
|
||||
nni = [
|
||||
'frameworkcontroller/frameworkcontroller' # 超参搜索
|
||||
]
|
||||
pipeline = [
|
||||
'minio/minio',
|
||||
'quay.io/argoproj/argoexec:v3.4.3',
|
||||
'quay.io/argoproj/workflow-controller:latest',
|
||||
'quay.io/argoproj/workflow-controller:v3.4.3',
|
||||
'quay.io/argoproj/argocli:latest'
|
||||
'minio/minio:RELEASE.2023-04-20T17-56-55Z',
|
||||
'argoproj/argoexec:v3.4.3',
|
||||
'argoproj/workflow-controller:v3.4.3',
|
||||
'argoproj/argocli:v3.4.3'
|
||||
]
|
||||
cube_studio = [
|
||||
# notebook基础镜像
|
||||
@ -96,19 +90,14 @@ cube_studio = [
|
||||
'ccr.ccs.tencentyun.com/cube-studio/onnxruntime:latest-cuda',
|
||||
|
||||
# 任务模板的镜像
|
||||
"ubuntu:18.04",
|
||||
"ubuntu:20.04",
|
||||
'python:3.9',
|
||||
"ccr.ccs.tencentyun.com/cube-studio/datax:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/volcano:20211001",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/ray:gpu-20210601",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/sklearn_estimator:v1",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/xgb_train_and_predict:v1",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/tf2.3_keras_train:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/tf2.3_plain_train:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/tf_distributed_train:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/tf2.3_model_evaluation:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/tf_distributed_eval:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/tf_model_offline_predict:latest",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/pytorch_distributed_train_k8s:20201010",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/xgb:20230801",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/pytorch:20201010",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/horovod:20210401",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/video-audio:20210601",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/video-audio:20210601",
|
||||
@ -116,27 +105,11 @@ cube_studio = [
|
||||
"ccr.ccs.tencentyun.com/cube-studio/kaldi_distributed_on_volcano:v2",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/volcano:offline-predict-20220101",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/object_detection_on_darknet:v1",
|
||||
"ccr.ccs.tencentyun.com/cube-studio/deploy-service:20211001"
|
||||
"ccr.ccs.tencentyun.com/cube-studio/deploy-service:20250501",
|
||||
|
||||
# 用户可能使用的基础镜像
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda11.0.3-cudnn8',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda11.0.3-cudnn8-python3.7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda11.0.3-cudnn8-python3.8',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.2-cudnn7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.2-cudnn7-python3.7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.2-cudnn7-python3.8',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.1-cudnn7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.1-cudnn7-python3.6',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.1-cudnn7-python3.7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.1-cudnn7-python3.8',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.0-cudnn7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.0-cudnn7-python3.6',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.0-cudnn7-python3.7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda10.0-cudnn7-python3.8',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda9.1-cudnn7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda9.1-cudnn7-python3.6',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda9.1-cudnn7-python3.7',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda9.1-cudnn7-python3.8',
|
||||
'ccr.ccs.tencentyun.com/cube-studio/ubuntu-gpu:cuda11.8.0-cudnn8-python3.9',
|
||||
|
||||
]
|
||||
|
||||
# images = kubeflow + kubernetes_dashboard + new_gpu + new_prometheus + istio+ volcano + nni+ pipeline+cube_studio
|
||||
@ -160,8 +133,10 @@ for image in images:
|
||||
|
||||
# # 拉取公有镜像
|
||||
image = image.replace('@sha256', '')
|
||||
# print("docker pull %s && docker tag %s %s &" % (image_name,image_name,image))
|
||||
print("docker pull %s &" % (image,))
|
||||
|
||||
|
||||
print('')
|
||||
print('wait')
|
||||
|
||||
|
@ -1,10 +1,9 @@
|
||||
|
||||
for namespace in 'infra' 'kubeflow' 'istio-system' 'pipeline' 'automl' 'jupyter' 'service' 'monitoring' 'logging' 'kube-system'
|
||||
for namespace in 'infra' 'kubeflow' 'istio-system' 'pipeline' 'automl' 'jupyter' 'service' 'monitoring' 'logging' 'kube-system' 'aihub'
|
||||
do
|
||||
kubectl create ns $namespace
|
||||
kubectl delete secret docker-registry hubsecret -n $namespace
|
||||
kubectl create secret docker-registry hubsecret --docker-server=https://index.docker.io/v1/ --docker-username=xxx --docker-password=xxxx -n $namespace
|
||||
# kubectl create secret docker-registry oa-hubsecret --docker-server=docker.oa.com:8080 --docker-username=xxx --docker-password=xxxx -n $namespace
|
||||
kubectl label ns $namespace istio-injection=disabled --overwrite
|
||||
# kubectl label namespace $namespace istio-inhection=enabled --overwrite
|
||||
done
|
||||
|
@ -1,38 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: infra
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: pipeline
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: automl
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: service
|
||||
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: jupyter
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: kubeflow
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: istio-system
|
@ -1,33 +1,31 @@
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/bitnami-redis:4.0.14 && docker tag ccr.ccs.tencentyun.com/cube-studio/bitnami-redis:4.0.14 bitnami/redis:4.0.14 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/kubeflow:training-operator && docker tag ccr.ccs.tencentyun.com/cube-studio/kubeflow:training-operator ccr.ccs.tencentyun.com/cube-studio/kubeflow:training-operator &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/istio-pilot:1.14.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/istio-pilot:1.14.1 istio/pilot:1.14.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/volcanosh-vc-webhook-manager:v1.4.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/volcanosh-vc-webhook-manager:v1.4.0 volcanosh/vc-webhook-manager:v1.4.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/nvidia-k8s-device-plugin:v0.7.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/nvidia-k8s-device-plugin:v0.7.1 nvidia/k8s-device-plugin:v0.7.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/nvidia-dcgm-exporter:2.3.1-2.6.1-ubuntu20.04 && docker tag ccr.ccs.tencentyun.com/cube-studio/nvidia-dcgm-exporter:2.3.1-2.6.1-ubuntu20.04 nvidia/dcgm-exporter:2.3.1-2.6.1-ubuntu20.04 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-coreos-kube-state-metrics:v1.3.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-coreos-kube-state-metrics:v1.3.1 quay.io/coreos/kube-state-metrics:v1.3.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-node-exporter:v0.15.2 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-node-exporter:v0.15.2 quay.io/prometheus/node-exporter:v0.15.2 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-argoexec:v3.4.3 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-argoexec:v3.4.3 quay.io/argoproj/argoexec:v3.4.3 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-alertmanager:v0.15.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-alertmanager:v0.15.0 quay.io/prometheus/alertmanager:v0.15.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-workflow-controller:v3.4.3 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-workflow-controller:v3.4.3 quay.io/argoproj/workflow-controller:v3.4.3 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-coreos-addon-resizer:1.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-coreos-addon-resizer:1.0 quay.io/coreos/addon-resizer:1.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/spark-operator:v1beta2-1.3.7-3.1.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/spark-operator:v1beta2-1.3.7-3.1.1 ccr.ccs.tencentyun.com/cube-studio/spark-operator:v1beta2-1.3.7-3.1.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/kubernetesui-dashboard:v2.2.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/kubernetesui-dashboard:v2.2.0 kubernetesui/dashboard:v2.2.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-coreos-kube-rbac-proxy:v0.3.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-coreos-kube-rbac-proxy:v0.3.1 quay.io/coreos/kube-rbac-proxy:v0.3.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-operator-prometheus-config-reloader:v0.46.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-operator-prometheus-config-reloader:v0.46.0 quay.io/prometheus-operator/prometheus-config-reloader:v0.46.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/grafana-grafana:9.1.5 && docker tag ccr.ccs.tencentyun.com/cube-studio/grafana-grafana:9.1.5 grafana/grafana:9.1.5 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/alpine:3.10 && docker tag ccr.ccs.tencentyun.com/cube-studio/alpine:3.10 alpine:3.10 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/istio-proxyv2:1.14.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/istio-proxyv2:1.14.1 istio/proxyv2:1.14.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/k8s.gcr.io-prometheus-adapter-prometheus-adapter:v0.9.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/k8s.gcr.io-prometheus-adapter-prometheus-adapter:v0.9.1 k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-operator-prometheus-operator:v0.46.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-operator-prometheus-operator:v0.46.0 quay.io/prometheus-operator/prometheus-operator:v0.46.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-workflow-controller:latest && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-workflow-controller:latest quay.io/argoproj/workflow-controller:latest &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-argocli:latest && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-argoproj-argocli:latest quay.io/argoproj/argocli:latest &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/volcanosh-vc-scheduler:v1.4.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/volcanosh-vc-scheduler:v1.4.0 volcanosh/vc-scheduler:v1.4.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/volcanosh-vc-controller-manager:v1.4.0 && docker tag ccr.ccs.tencentyun.com/cube-studio/volcanosh-vc-controller-manager:v1.4.0 volcanosh/vc-controller-manager:v1.4.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/kubernetesui-metrics-scraper:v1.0.6 && docker tag ccr.ccs.tencentyun.com/cube-studio/kubernetesui-metrics-scraper:v1.0.6 kubernetesui/metrics-scraper:v1.0.6 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/minio-minio && docker tag ccr.ccs.tencentyun.com/cube-studio/minio-minio minio/minio &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/mysql:5.7 && docker tag ccr.ccs.tencentyun.com/cube-studio/mysql:5.7 mysql:5.7 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/busybox && docker tag ccr.ccs.tencentyun.com/cube-studio/busybox busybox &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/frameworkcontroller-frameworkcontroller && docker tag ccr.ccs.tencentyun.com/cube-studio/frameworkcontroller-frameworkcontroller frameworkcontroller/frameworkcontroller &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-prometheus:v2.27.1 && docker tag ccr.ccs.tencentyun.com/cube-studio/quay.io-prometheus-prometheus:v2.27.1 quay.io/prometheus/prometheus:v2.27.1 &
|
||||
docker pull istio/proxyv2:1.15.0 &
|
||||
docker pull ccr.ccs.tencentyun.com/tkeimages/gpu-manager:latest &
|
||||
docker pull volcanosh/vc-webhook-manager:v1.7.0 &
|
||||
docker pull busybox:1.36.0 &
|
||||
docker pull postgres:11.5 &
|
||||
docker pull prom/node-exporter:v1.5.0 &
|
||||
docker pull volcanosh/vc-scheduler:v1.7.0 &
|
||||
docker pull argoproj/argoexec:v3.4.3 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/spark-operator:1.3.7-3.1.1 &
|
||||
docker pull prom/prometheus:v2.27.1 &
|
||||
docker pull quay.io/prometheus-operator/prometheus-config-reloader:v0.46.0 &
|
||||
docker pull frameworkcontroller/frameworkcontroller &
|
||||
docker pull kubeflow/training-operator:v1-8a066f9 &
|
||||
docker pull nvidia/k8s-device-plugin:v0.11.0-ubuntu20.04 &
|
||||
docker pull argoproj/argocli:v3.4.3 &
|
||||
docker pull quay.io/prometheus-operator/prometheus-operator:v0.46.0 &
|
||||
docker pull volcanosh/vc-controller-manager:v1.7.0 &
|
||||
docker pull nvidia/dcgm-exporter:3.1.7-3.1.4-ubuntu20.04 &
|
||||
docker pull mysql:8.0.32 &
|
||||
docker pull kubernetesui/metrics-scraper:v1.0.8 &
|
||||
docker pull minio/minio:RELEASE.2023-04-20T17-56-55Z &
|
||||
docker pull carlosedp/addon-resizer:v1.8.4 &
|
||||
docker pull kubernetesui/dashboard:v2.6.1 &
|
||||
docker pull istio/pilot:1.15.0 &
|
||||
docker pull bitnami/redis:6.2.12 &
|
||||
docker pull bitnami/kube-rbac-proxy:0.14.1 &
|
||||
docker pull ccr.ccs.tencentyun.com/cube-studio/label-studio:1.7.3 &
|
||||
docker pull argoproj/workflow-controller:v3.4.3 &
|
||||
docker pull grafana/grafana:9.1.5ccr.ccs.tencentyun.com/cube-studio/prometheus-adapter:v0.9.1 &
|
||||
|
||||
wait
|
||||
|
@ -28,6 +28,45 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
automl-pvname: automl-kubeflow-user-workspace
|
||||
storageClassName: ""
|
||||
volumeName: automl-kubeflow-user-workspace
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# automl-pvname: automl-kubeflow-user-workspace
|
||||
|
||||
|
||||
|
||||
# 模型归档
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: automl-kubeflow-archives
|
||||
labels:
|
||||
automl-pvname: automl-kubeflow-archives
|
||||
spec:
|
||||
capacity:
|
||||
storage: 500Gi
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
hostPath:
|
||||
path: /data/k8s/kubeflow/pipeline/archives
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
---
|
||||
kind: PersistentVolumeClaim
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: kubeflow-archives
|
||||
namespace: automl
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
storageClassName: ""
|
||||
volumeName: automl-kubeflow-archives
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# automl-pvname: automl-kubeflow-archives
|
||||
|
||||
|
@ -26,10 +26,11 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
infra-pvname: infra-kubeflow-global-pv
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# infra-pvname: infra-kubeflow-global-pv
|
||||
storageClassName: ""
|
||||
volumeName: infra-kubeflow-global-pv
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
@ -58,7 +59,8 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
infra-pvname: infra-kubeflow
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# infra-pvname: infra-kubeflow
|
||||
storageClassName: ""
|
||||
volumeName: infra-kubeflow
|
||||
|
@ -27,10 +27,11 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
jupyter-pvname: jupyter-kubeflow-user-workspace
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# jupyter-pvname: jupyter-kubeflow-user-workspace
|
||||
storageClassName: ""
|
||||
volumeName: jupyter-kubeflow-user-workspace
|
||||
|
||||
# 模型归档
|
||||
---
|
||||
@ -60,7 +61,8 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
jupyter-pvname: jupyter-kubeflow-archives
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# jupyter-pvname: jupyter-kubeflow-archives
|
||||
storageClassName: ""
|
||||
volumeName: jupyter-kubeflow-archives
|
||||
|
@ -29,6 +29,8 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
kubeflow-pvname: kubeflow-kubeflow-user-workspace
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# kubeflow-pvname: kubeflow-kubeflow-user-workspace
|
||||
storageClassName: ""
|
||||
volumeName: kubeflow-kubeflow-user-workspace
|
@ -26,10 +26,11 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 100Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
pipeline-pvname: pipeline-kubeflow-global-pv
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# pipeline-pvname: pipeline-kubeflow-global-pv
|
||||
storageClassName: ""
|
||||
volumeName: pipeline-kubeflow-global-pv
|
||||
|
||||
# 模型训练
|
||||
---
|
||||
@ -40,7 +41,7 @@ metadata:
|
||||
labels:
|
||||
pipeline-pvname: pipeline-kubeflow-user-workspace
|
||||
spec:
|
||||
# storageClassName: pipeline
|
||||
|
||||
capacity:
|
||||
storage: 500Gi
|
||||
accessModes:
|
||||
@ -48,6 +49,7 @@ spec:
|
||||
hostPath:
|
||||
path: /data/k8s/kubeflow/pipeline/workspace
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
|
||||
---
|
||||
kind: PersistentVolumeClaim
|
||||
apiVersion: v1
|
||||
@ -60,9 +62,11 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
pipeline-pvname: pipeline-kubeflow-user-workspace
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# pipeline-pvname: pipeline-kubeflow-user-workspace
|
||||
storageClassName: ""
|
||||
volumeName: pipeline-kubeflow-user-workspace
|
||||
# 模型部署
|
||||
---
|
||||
apiVersion: v1
|
||||
@ -108,9 +112,10 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
pipeline-pvname: pipeline-kubeflow-archives
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# pipeline-pvname: pipeline-kubeflow-archives
|
||||
storageClassName: ""
|
||||
volumeName: pipeline-kubeflow-archives
|
||||
|
||||
|
||||
|
@ -27,10 +27,11 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
service-pvname: service-kubeflow-user-workspace
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# service-pvname: service-kubeflow-user-workspace
|
||||
storageClassName: ""
|
||||
volumeName: service-kubeflow-user-workspace
|
||||
|
||||
# 模型归档
|
||||
---
|
||||
@ -60,8 +61,9 @@ spec:
|
||||
resources:
|
||||
requests:
|
||||
storage: 500Gi
|
||||
selector:
|
||||
matchLabels:
|
||||
service-pvname: service-kubeflow-archives
|
||||
|
||||
# selector:
|
||||
# matchLabels:
|
||||
# service-pvname: service-kubeflow-archives
|
||||
storageClassName: ""
|
||||
volumeName: service-kubeflow-archives
|
||||
|
||||
|
@ -5,10 +5,7 @@ metadata:
|
||||
name: kubeflow-clusterrole
|
||||
rules:
|
||||
- apiGroups: ["*"]
|
||||
resources: ["pods","pods/log","services","endpoints","configmaps","nodes","deployments","mpijobs","tfjobs","pytorchjobs","frameworks"]
|
||||
verbs: ["create", "delete", "deletecollection", "patch", "update", "get", "list", "watch"]
|
||||
- apiGroups: ["*"]
|
||||
resources: ["*"]
|
||||
resources: ["pods","pods/exec","pods/log","services","endpoints","events","configmaps","nodes","deployments","mpijobs","tfjobs","pytorchjobs","frameworks","jobs","sparkapplications","mxjobs","paddlejobs","xgboostjobs"]
|
||||
verbs: ["create", "delete", "deletecollection", "patch", "update", "get", "list", "watch"]
|
||||
---
|
||||
apiVersion: v1
|
||||
@ -18,10 +15,11 @@ metadata:
|
||||
namespace: pipeline
|
||||
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: kubeflow-pipeline
|
||||
namespace: pipeline
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kubeflow-pipeline
|
||||
@ -39,10 +37,11 @@ metadata:
|
||||
name: nni
|
||||
namespace: automl
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
kind: RoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: automl-nni-clusterrolebinding
|
||||
namespace: automl
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: nni
|
||||
|
@ -9,7 +9,7 @@ kubectl label node $node train=true cpu=true notebook=true service=true org=publ
|
||||
sh create_ns_secret.sh
|
||||
kubectl apply -f sa-rbac.yaml
|
||||
# 部署dashboard
|
||||
kubectl apply -f dashboard/v2.2.0-cluster.yaml
|
||||
kubectl apply -f dashboard/v2.6.1-cluster.yaml
|
||||
# 部署mysql
|
||||
kubectl create -f mysql/pv-pvc-hostpath.yaml
|
||||
kubectl create -f mysql/service.yaml
|
||||
@ -25,10 +25,7 @@ kubectl create -f redis/master.yaml
|
||||
# 部署istio
|
||||
kubectl apply -f istio/install-crd.yaml
|
||||
kubectl wait crd/envoyfilters.networking.istio.io --for condition=established --timeout=60s
|
||||
kubectl apply -f istio/install.yaml
|
||||
# k8s 1.21+
|
||||
# kubectl delete -f istio/install.yaml
|
||||
# kubectl apply -f istio/install-1.15.0.yaml
|
||||
kubectl apply -f istio/install-1.15.0.yaml
|
||||
|
||||
kubectl wait crd/virtualservices.networking.istio.io --for condition=established --timeout=60s
|
||||
kubectl wait crd/gateways.networking.istio.io --for condition=established --timeout=60s
|
||||
|
@ -3,7 +3,7 @@ bash init_node.sh
|
||||
iptables -P FORWARD ACCEPT
|
||||
iptables -P INPUT ACCEPT
|
||||
iptables -P OUTPUT ACCEPT
|
||||
mkdir -p ~/.kube && cp config ~/.kube/config && cp ~/.kube/config /etc/kubernetes/admin.conf
|
||||
mkdir -p ~/.kube /etc/kubernetes/ && rm -rf ~/.kube/config /etc/kubernetes/admin.conf && cp config ~/.kube/config && cp ~/.kube/config /etc/kubernetes/admin.conf
|
||||
mkdir -p kubeconfig && echo "" > kubeconfig/dev-kubeconfig
|
||||
curl -LO https://dl.k8s.io/release/v1.24.0/bin/linux/amd64/kubectl && chmod +x kubectl && cp kubectl /usr/bin/ && mv kubectl /usr/local/bin/
|
||||
node=`kubectl get node -o wide |grep $1 |awk '{print $1}'| head -n 1`
|
||||
@ -13,20 +13,15 @@ kubectl label node $node train=true cpu=true notebook=true service=true org=publ
|
||||
sh create_ns_secret.sh
|
||||
kubectl apply -f sa-rbac.yaml
|
||||
# 部署dashboard
|
||||
kubectl apply -f dashboard/v2.2.0-cluster.yaml
|
||||
# 高版本k8s部署2.6.1版本
|
||||
#kubectl apply -f dashboard/v2.6.1-cluster.yaml
|
||||
kubectl apply -f dashboard/v2.6.1-cluster.yaml
|
||||
# 部署mysql
|
||||
kubectl create -f mysql/pv-pvc-hostpath.yaml
|
||||
kubectl create -f mysql/service.yaml
|
||||
kubectl create -f mysql/configmap-mysql.yaml
|
||||
kubectl create -f mysql/deploy.yaml
|
||||
# 部署redis
|
||||
kubectl create -f redis/pv-hostpath.yaml
|
||||
kubectl create -f redis/configmap.yaml
|
||||
kubectl create -f redis/service.yaml
|
||||
# 如果自己需要使用pv来保存redis队列数据,可以修改master.yaml
|
||||
kubectl create -f redis/master.yaml
|
||||
kubectl delete -f redis/redis.yaml
|
||||
kubectl create -f redis/redis.yaml
|
||||
|
||||
# 部署prometheus
|
||||
cd prometheus
|
||||
@ -36,18 +31,11 @@ kubectl apply -f ./operator/operator-crd.yml
|
||||
kubectl apply -f ./operator/operator-rbac.yml
|
||||
kubectl wait crd/podmonitors.monitoring.coreos.com --for condition=established --timeout=60s
|
||||
kubectl apply -f ./operator/operator-dp.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main-sa.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main-secret.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main-svc.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main.yml
|
||||
kubectl apply -f ./node-exporter/node-exporter-sa.yml
|
||||
kubectl apply -f ./node-exporter/node-exporter-rbac.yml
|
||||
kubectl apply -f ./node-exporter/node-exporter-svc.yml
|
||||
kubectl apply -f ./node-exporter/node-exporter-ds.yml
|
||||
kubectl apply -f ./kube-state-metrics/kube-state-metrics-sa.yml
|
||||
kubectl apply -f ./kube-state-metrics/kube-state-metrics-rbac.yml
|
||||
kubectl apply -f ./kube-state-metrics/kube-state-metrics-svc.yml
|
||||
kubectl apply -f ./kube-state-metrics/kube-state-metrics-dp.yml
|
||||
|
||||
kubectl apply -f ./grafana/pv-pvc-hostpath.yml
|
||||
kubectl apply -f ./grafana/grafana-sa.yml
|
||||
kubectl apply -f ./grafana/grafana-source.yml
|
||||
@ -71,6 +59,8 @@ kubectl delete -f ./prometheus/prometheus-main.yml
|
||||
sleep 5
|
||||
kubectl apply -f ./prometheus/pv-pvc-hostpath.yaml
|
||||
kubectl apply -f ./prometheus/prometheus-main.yml
|
||||
sleep 5
|
||||
# 部署sm
|
||||
kubectl apply -f ./servicemonitor/alertmanager-sm.yml
|
||||
kubectl apply -f ./servicemonitor/coredns-sm.yml
|
||||
kubectl apply -f ./servicemonitor/kube-apiserver-sm.yml
|
||||
@ -82,6 +72,8 @@ kubectl apply -f ./servicemonitor/node-exporter-sm.yml
|
||||
kubectl apply -f ./servicemonitor/prometheus-operator-sm.yml
|
||||
kubectl apply -f ./servicemonitor/prometheus-sm.yml
|
||||
kubectl apply -f ./servicemonitor/pushgateway-sm.yml
|
||||
|
||||
# 部署prometheus_adapter
|
||||
kubectl apply -f ./prometheus_adapter/metric_rule.yaml
|
||||
kubectl apply -f ./prometheus_adapter/prometheus_adapter.yaml
|
||||
cd ../
|
||||
@ -92,35 +84,16 @@ kubectl apply -f gpu/nvidia-device-plugin.yml
|
||||
kubectl apply -f gpu/dcgm-exporter.yaml
|
||||
kubectl apply -f gpu/dcgm-exporter-sm.yaml
|
||||
|
||||
# 部署frameworkcontroller nni超参搜索使用
|
||||
kubectl create serviceaccount frameworkcontroller --namespace kubeflow
|
||||
kubectl create clusterrolebinding frameworkcontroller-kubeflow --clusterrole=cluster-admin --user=system:serviceaccount:kubeflow:frameworkcontroller
|
||||
kubectl create -f frameworkcontroller/frameworkcontroller-with-default-config.yaml
|
||||
sleep 5
|
||||
kubectl wait crd/frameworks.frameworkcontroller.microsoft.com --for condition=established --timeout=60s
|
||||
|
||||
kubectl create serviceaccount frameworkbarrier --namespace pipeline
|
||||
kubectl create serviceaccount frameworkbarrier --namespace automl
|
||||
kubectl create serviceaccount frameworkbarrier --namespace kubeflow
|
||||
kubectl create clusterrole frameworkbarrier --verb=get,list,watch --resource=frameworks
|
||||
kubectl create clusterrolebinding frameworkbarrier-pipeline --clusterrole=frameworkbarrier --user=system:serviceaccount:pipeline:frameworkbarrier
|
||||
kubectl create clusterrolebinding frameworkbarrier-automl --clusterrole=frameworkbarrier --user=system:serviceaccount:automl:frameworkbarrier
|
||||
kubectl create clusterrolebinding frameworkbarrier-kubeflow --clusterrole=frameworkbarrier --user=system:serviceaccount:kubeflow:frameworkbarrier
|
||||
|
||||
# 部署volcano
|
||||
kubectl delete -f volcano/volcano-development.yaml
|
||||
kubectl delete secret volcano-admission-secret -n kubeflow
|
||||
kubectl apply -f volcano/volcano-development.yaml
|
||||
kubectl wait crd/jobs.batch.volcano.sh --for condition=established --timeout=60s
|
||||
|
||||
# 部署istio
|
||||
kubectl apply -f istio/install-crd.yaml
|
||||
kubectl wait crd/envoyfilters.networking.istio.io --for condition=established --timeout=60s
|
||||
# 在k8s 1.21-部署
|
||||
kubectl apply -f istio/install.yaml
|
||||
# 在k8s 1.21+部署
|
||||
# kubectl delete -f istio/install.yaml
|
||||
# kubectl apply -f istio/install-1.15.0.yaml
|
||||
|
||||
kubectl apply -f istio/install-1.15.0.yaml
|
||||
|
||||
kubectl wait crd/virtualservices.networking.istio.io --for condition=established --timeout=60s
|
||||
kubectl wait crd/gateways.networking.istio.io --for condition=established --timeout=60s
|
||||
@ -133,16 +106,11 @@ kubectl apply -f argo/minio-pv-pvc-hostpath.yaml
|
||||
kubectl apply -f argo/pipeline-runner-rolebinding.yaml
|
||||
kubectl apply -f argo/install-3.4.3-all.yaml
|
||||
|
||||
# 部署trainjob:tfjob/pytorchjob/mpijob/mxnetjob/xgboostjobs
|
||||
# 部署trainjob:tfjob/pytorchjob/mpijob/mxnetjob/xgboostjobs/paddlepaddle
|
||||
kubectl apply -f kubeflow/sa-rbac.yaml
|
||||
|
||||
kubectl apply -k kubeflow/train-operator/manifests/overlays/standalone
|
||||
|
||||
# 部署sparkjob
|
||||
kubectl apply -f spark/install.yaml
|
||||
|
||||
# 部署paddlejob
|
||||
kubectl apply -f paddle/crd.yaml
|
||||
kubectl apply -f paddle/operator.yaml
|
||||
|
||||
# 部署管理平台
|
||||
kubectl delete configmap kubernetes-config -n infra
|
||||
|
@ -1,20 +1,3 @@
|
||||
## 通过vs代理访问其他的服务,包括pipline,argp,minio,grafana等
|
||||
#apiVersion: networking.istio.io/v1alpha3
|
||||
#kind: VirtualService
|
||||
#metadata:
|
||||
# name: infra-kubeflow-dashboard
|
||||
# namespace: infra
|
||||
#spec:
|
||||
# gateways:
|
||||
# - kubeflow/kubeflow-gateway
|
||||
# hosts:
|
||||
# - "*" # 管理平台的域名 kubeflow.local.com
|
||||
# http:
|
||||
# - route:
|
||||
# - destination:
|
||||
# host: kubeflow-dashboard.infra.svc.cluster.local
|
||||
# port:
|
||||
# number: 80
|
||||
|
||||
---
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
@ -42,6 +25,50 @@ spec:
|
||||
---
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
kind: VirtualService
|
||||
metadata:
|
||||
name: kube-system-k8s-dashboard-user1
|
||||
namespace: kube-system
|
||||
spec:
|
||||
gateways:
|
||||
- kubeflow/kubeflow-gateway
|
||||
hosts:
|
||||
- "*" # 配置自己管理的域名kubeflow.local.com
|
||||
http:
|
||||
- match:
|
||||
- uri:
|
||||
prefix: /k8s/dashboard/user1/
|
||||
rewrite:
|
||||
uri: /
|
||||
route:
|
||||
- destination:
|
||||
host: kubernetes-dashboard-user1.kube-system.svc.cluster.local
|
||||
port:
|
||||
number: 9090
|
||||
---
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
kind: VirtualService
|
||||
metadata:
|
||||
name: kubeflow-labelstudio
|
||||
namespace: kubeflow
|
||||
spec:
|
||||
gateways:
|
||||
- kubeflow/kubeflow-gateway
|
||||
hosts:
|
||||
- "*" # 配置自己管理的域名 kubeflow.local.com
|
||||
http:
|
||||
- match:
|
||||
- uri:
|
||||
prefix: /labelstudio/
|
||||
rewrite:
|
||||
uri: /labelstudio/
|
||||
route:
|
||||
- destination:
|
||||
host: labelstudio.kubeflow.svc.cluster.local
|
||||
port:
|
||||
number: 8080
|
||||
---
|
||||
apiVersion: networking.istio.io/v1alpha3
|
||||
kind: VirtualService
|
||||
metadata:
|
||||
name: monitoring-grafana
|
||||
namespace: monitoring
|
||||
|
Loading…
Reference in New Issue
Block a user