# 官方提供@yunTaoScripts PROBE 🔥🔥

loading

# 探针的目的

  • deployment的作用用来维持pod的健壮性。当pod挂掉之后,deployment会生成新的pod。如果pod是正常运行的,但pod里面出了问题,此时deployment是监测不到的。 故此需要探针(probe)。

  • 用户定义 “出现什么样的状况 “才叫出问题,当probe监测到此问题,会认为pod出现了问题,通过 “重启” 或者 ”服务不调度“ 来解决问题。即对应 “liveness probe”“readiness probe”

# 存活探针 liveness probe

# 如果检测失败,POD内的容器重新生成,容器id改变,POD名称不变。

# exec

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: liveness-dp
  name: liveness-dp
spec:
  replicas: 2
  selector:
    matchLabels:
      app: liveness-dp
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: liveness-dp
    spec:
      terminationGracePeriodSeconds: 0
      containers:
      - image: busybox
        imagePullPolicy: IfNotPresent
        name: liveness-dp
        command: ['sh','-c','touch /tmp/healthy; sleep 30;rm -f /tmp/healthy;sleep']
        livenessProbe:
          exec:
            command:
            - cat
            - /tmp/healthy
          initialDelaySeconds: 5 #容器启动的5s内不监测 
          periodSeconds: 5 #每5s钟检测一次
        resources: {}
status: {}

# httpGet

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: liveness-dp
  name: liveness-dp
spec:
  replicas: 2
  selector:
    matchLabels:
      app: liveness-dp
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: liveness-dp
    spec:
      terminationGracePeriodSeconds: 0
      containers:
      - image: nginx
        imagePullPolicy: IfNotPresent
        name: liveness-dp
        livenessProbe:  
          httpGet:      ####### 访问网址
            path: /index.html 
            port: 80
            scheme: HTTP
          initialDelaySeconds: 10 
          periodSeconds: 5 
          successThreshold: 1 
          failureThreshold: 3
          timeoutSeconds: 5       
status: {}

# tcpSocket

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: liveness-dp
  name: liveness-dp
spec:
  replicas: 2
  selector:
    matchLabels:
      app: liveness-dp
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: liveness-dp
    spec:
      terminationGracePeriodSeconds: 0
      containers:
      - image: nginx
        imagePullPolicy: IfNotPresent
        name: liveness-dp
        livenessProbe:
          tcpSocket:    ####访问端口
            port: 80 
          initialDelaySeconds: 30 
          periodSeconds: 5 
          successThreshold: 1 
          failureThreshold: 3
          timeoutSeconds: 5       
status: {}

# 就绪探针 readiness probe

# exec

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: readiness-dp
  name: readiness-dp
spec:
  replicas: 2
  selector:
    matchLabels:
      app: readiness-dp
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: readiness-dp
    spec:
      terminationGracePeriodSeconds: 0
      containers:
      - image: busybox
        imagePullPolicy: IfNotPresent
        name: readiness-dp
        command: ['sh','-c','touch /tmp/healthy; sleep 30;rm -f /tmp/healthy;sleep']
        readinessProbe:
          exec:
            command:
            - cat
            - /tmp/healthy
          initialDelaySeconds: 5 #容器启动的5s内不监测 
          periodSeconds: 5 #每5s钟检测一次
        resources: {}

status: {}

# httpGet

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: readiness-dp
  name: readiness-dp
spec:
  replicas: 2
  selector:
    matchLabels:
      app: readiness-dp
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: readiness-dp
    spec:
      terminationGracePeriodSeconds: 0
      containers:
      - image: nginx
        imagePullPolicy: IfNotPresent
        name: readiness-dp
        readinessProbe:
          tcpSocket:
            port: 800
          initialDelaySeconds: 30 
          periodSeconds: 5 
          successThreshold: 1 
          failureThreshold: 3
          timeoutSeconds: 5       
status: {}

# tcpSocket

apiVersion: apps/v1
kind: Deployment
metadata:
  creationTimestamp: null
  labels:
    app: readiness-dp
  name: readiness-dp
spec:
  replicas: 2
  selector:
    matchLabels:
      app: readiness-dp
  strategy: {}
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: readiness-dp
    spec:
      terminationGracePeriodSeconds: 0
      containers:
      - image: nginx
        imagePullPolicy: IfNotPresent
        name: readiness-dp
        readinessProbe:
          tcpSocket:
            port: 800
          initialDelaySeconds: 30
          periodSeconds: 5
          successThreshold: 1
          failureThreshold: 3
          timeoutSeconds: 5

# 两种探针比较

  • 存活探针探测失败后会 立即重启 pod ,并且endpoints保持不变。
root@liveness-dp-67946cd47c-5kqr2:/usr/share/nginx/html# rm index.html 

[root@ctsm1 08_probe]# kubectl get endpoints liveness-dp -w
NAME          ENDPOINTS                           AGE
liveness-dp   10.244.59.146:80,10.244.59.155:80   49s
  • 就绪探针会将不健康的pod 剔除endpoints ,并且 不会重启
root@readiness-dp-6567df8cf5-924jw:/usr/share/nginx/html# rm -f index.html

[root@ctsm1 08_probe]# kubectl get endpoints -w
NAME           ENDPOINTS                           AGE
readiness-dp   10.244.59.150:80,10.244.59.154:80   5m6s
readiness-dp   10.244.59.150:80                    5m32s

参数意义

  • initialDelaySeconds:
    • 容器启动后第一次执行探测是需要等待多少秒。
  • periodSeconds:
    • 执行探测的频率,默认是10秒,最小1秒。
  • timeoutSeconds:
    • 探测超时时间,默认1秒,最小1秒。
  • successThreshold:
    • 探测失败后,最少连续探测成功多少次才被认定为成功,默认是1,对于liveness必须是1,最小值是1。
  • failureThreshold:
    • 当 Pod 启动了并且探测到失败,Kubernetes 的重试次数。
    • 存活探测情况下的放弃就意味着重新启动容器。
    • 就绪探测情况下的放弃Pod 会被打上未就绪的标签。默认值是 3,最小值是 1。