kubeflow部署指导(2)——k8s部署

环境信息
openEuler 22.03 LTS SP4 x86
物理机:2288H V5

一、安装准备(所有主机)

# 关闭防火墙
systemctl stop firewalld && systemctl disable firewalld

# 关闭selinux
setenforce 0 && sed -i 's/enforcing/disabled/' /etc/selinux/config

# 关闭swap
swapoff -a && sed -ri 's/.*swap.*/#&/' /etc/fstab

# 配置主机名
hostnamectl set-hostname master
hostnamectl set-hostname node1
hostnamectl set-hostname node2

# 重启
reboot

# 配置主机名解析
cat >> /etc/hosts << EOF
192.168.231.144 master
192.168.231.145 node1
192.168.231.146 node2
EOF
  
# 配置网桥模块
modprobe br_netfilter
cat > /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

# 生效
sysctl --system

# 查看是否加载
lsmod | grep br_netfilter

# 时间同步
yum install ntpdate -y
ntpdate time.windows.com

# 安装docker 18.09.0
dnf install -y docker && systemctl enable docker

二、安装kubectl

# master节点
dnf install -y kubernetes-kubeadm kubernetes-kubelet kubernetes-master conntrack

# node节点
dnf install -y kubernetes-kubeadm kubernetes-kubelet kubernetes-node conntrack

# 所有节点启动kubelet
systemctl enable kubelet && systemctl start kubelet

# 主节点初始化
kubeadm init --apiserver-advertise-address=192.168.122.144 --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.20.2 --service-cidr=10.1.0.0/16 --pod-network-cidr=10.244.0.0/16

# init 成功后执行

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config


# 从节点加入集群
kubeadm join 192.168.122.144:6443 --token az4gxu.y8xhvf8i2i343hhf --discovery-token-ca-cert-hash sha256:d9eaf3ed340b0f9f98f46de1d576d3a06653b52b84eb81b3b2ac4d437066d184

# 配置kubectl环境变量
vi /etc/profile
export KUBECONFIG=/etc/kubernetes/admin.conf
source /etc/profile

# 安装网络插件
dnf install -y containernetworking-plugins
mkdir -p /opt/cni/bin
cp /usr/libexec/cni/* /opt/cni/bin/

# 拉取flannel插件国内镜像
	docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/flannel/flannel:v0.25.5
	docker tag  swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/flannel/flannel:v0.25.5  docker.io/flannel/flannel:v0.25.5
	docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel1
	docker tag  swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel1  docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel1

# 安装flannel插件
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml  # 可以下载yml至本地执行

三、集群状态验证

# 查看所有节点状态
kubectl get nodes

# 查看组件状态
kubectl get cs

# 查看所有pods运行是否正常
kubectl get pods -A

四、其它问题:

1.更改docker的cgroup驱动

/etc/docker/daemon.json
{  
	"exec-opts": ["native.cgroupdriver=systemd"]  
}
systemctl daemon-reload
systemctl restart docker

2.controller-manager和scheduler组件状态Unhealthy

[root@master]# kubectl get cs
	NAME                 STATUS      MESSAGE                                                                                       ERROR
	controller-manager   Unhealthy   Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused   
	scheduler            Unhealthy   Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused   
	etcd-0               Healthy     {"health":"true"}

注释 /etc/kubernetes/manifests/kube-scheduler.yaml	—port=0
注释 /etc/kubernetes/manifests/kube-controller-manager.yaml	—port=0