执行 kubectl describe pods -n NAMESPACE POD_NAME
命令或 kubectl logs -n NAMESPACE POD_NAME
命令看看报错日志
[root@localhost ~]# kubectl describe pods -n euler-copilot framework-deploy-777f94d468-x6t8h
Name: framework-deploy-777f94d468-x6t8h
Namespace: euler-copilot
Priority: 0
Service Account: default
Node: localhost/192.168.74.150
Start Time: Tue, 22 Apr 2025 17:06:22 +0800
Labels: app=framework
pod-template-hash=777f94d468
Annotations: checksum/secret: 7047af794b488bedcfa928f9031a6fcec3974ac63e7c8f87ce39a853d4e8b3dc
Status: Running
IP: 10.42.0.43
IPs:
IP: 10.42.0.43
Controlled By: ReplicaSet/framework-deploy-777f94d468
Init Containers:
framework-copy:
Container ID: containerd://e44f268f51ae14cae0a5f2fa567c3b083208e333ddac90973e2637517c73fd7f
Image: Harbor
Image ID: Harbor
Port:
Host Port:
Command:
python3
./main.py
–config
config.yaml
–copy
State: Terminated
Reason: Completed
Exit Code: 0
Started: Tue, 22 Apr 2025 17:06:25 +0800
Finished: Tue, 22 Apr 2025 17:06:25 +0800
Ready: True
Restart Count: 0
Environment:
Mounts:
/app/config.yaml from framework-config (rw,path=“copy-config.yaml”)
/config-rw from framework-shared (rw)
/config/config.toml from framework-config (rw,path=“config.toml”)
/db-secrets from database-secrets (rw)
/system-secrets from system-secrets (rw)
Containers:
framework:
Container ID: containerd://cdcdc5ce7acc5073c802b7419175fcd8bbbc7b97d15e3c412fb150c1994e87a3
Image: Harbor
Image ID: Harbor
Port: 8002/TCP
Host Port: 0/TCP
State: Waiting
Reason: CrashLoopBackOff
Last State: Terminated
Reason: Error
Exit Code: 1
Started: Tue, 22 Apr 2025 22:26:38 +0800
Finished: Tue, 22 Apr 2025 22:26:39 +0800
Ready: False
Restart Count: 67
Requests:
cpu: 200m
memory: 512Mi
Liveness: http-get http://:8002/health_check delay=60s timeout=1s period=90s #success=1 #failure=5
Environment:
TZ: Asia/Shanghai
CONFIG: /app/config/config.toml
Mounts:
/app/config from framework-shared (rw)
/app/data from framework-semantics-vl (rw)
/tmp from framework-tmp-volume (rw)
Conditions:
Type Status
PodReadyToStartContainers True
Initialized True
Ready False
ContainersReady False
PodScheduled True
Volumes:
framework-config:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: framework-config
Optional: false
framework-semantics-vl:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: framework-semantics-claim
ReadOnly: false
database-secrets:
Type: Secret (a volume populated by a Secret)
SecretName: euler-copilot-database
Optional: false
system-secrets:
Type: Secret (a volume populated by a Secret)
SecretName: euler-copilot-system
Optional: false
framework-tmp-volume:
Type: EmptyDir (a temporary directory that shares a pod’s lifetime)
Medium: Memory
SizeLimit:
framework-shared:
Type: EmptyDir (a temporary directory that shares a pod’s lifetime)
Medium: Memory
SizeLimit:
QoS Class: Burstable
Node-Selectors:
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
Warning BackOff 9m45s (x1472 over 5h24m) kubelet Back-off restarting failed container framework in pod framework-deploy-777f94d468-x6t8h_euler-copilot(d211b8a0-00c4-4ed5-9cf8-f371bd5ed4e0)
Normal Pulled 4m46s (x68 over 5h24m) kubelet Container image “Harbor” already present on machine
[root@localhost ~]# kubectl describe pods -n euler-copilot rag-deploy-7457ffbcb-rp69w
Name: rag-deploy-7457ffbcb-rp69w
Namespace: euler-copilot
Priority: 0
Service Account: default
Node: localhost/192.168.74.150
Start Time: Tue, 22 Apr 2025 17:06:22 +0800
Labels: app=rag
pod-template-hash=7457ffbcb
Annotations: checksum/config: de085db6a7460492b67807274c7c90a7126fd9e3b113cba671f2e90a5a0fd8a5
Status: Pending
IP: 10.42.0.40
IPs:
IP: 10.42.0.40
Controlled By: ReplicaSet/rag-deploy-7457ffbcb
Init Containers:
rag-copy-secret:
Container ID: containerd://209e107698fd735c083f93d66e677b812d39d76d93216c8c7f4580b04474319d
Image: Harbor
Image ID: Harbor
Port:
Host Port:
Command:
python3
./main.py
–config
config.yaml
–copy
State: Waiting
Reason: CrashLoopBackOff
Last State: Terminated
Reason: Error
Exit Code: 1
Started: Tue, 22 Apr 2025 22:38:46 +0800
Finished: Tue, 22 Apr 2025 22:38:46 +0800
Ready: False
Restart Count: 70
Environment:
Mounts:
/app/config.yaml from rag-config-vl (rw,path=“copy-config.yaml”)
/config-rw from rag-shared (rw)
/config/.env from rag-config-vl (rw,path=“.env”)
/config/.env-sql from rag-config-vl (rw,path=“.env-sql”)
/db-secrets from database-secret (rw)
/system-secrets from system-secret (rw)
Containers:
rag:
Container ID:
Image: Harbor
Image ID:
Port: 9988/TCP
Host Port: 0/TCP
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Requests:
cpu: 250m
memory: 512Mi
Liveness: http-get http://:9988/health_check delay=60s timeout=1s period=90s #success=1 #failure=5
Environment:
TZ: Asia/Shanghai
Mounts:
/rag-service/chat2db/common/.env from rag-shared (rw,path=“.env-sql”)
/rag-service/data_chain/common/.env from rag-shared (rw,path=“.env”)
Conditions:
Type Status
PodReadyToStartContainers True
Initialized False
Ready False
ContainersReady False
PodScheduled True
Volumes:
rag-config-vl:
Type: ConfigMap (a volume populated by a ConfigMap)
Name: rag-config
Optional: false
database-secret:
Type: Secret (a volume populated by a Secret)
SecretName: euler-copilot-database
Optional: false
system-secret:
Type: Secret (a volume populated by a Secret)
SecretName: euler-copilot-system
Optional: false
rag-shared:
Type: EmptyDir (a temporary directory that shares a pod’s lifetime)
Medium: Memory
SizeLimit:
QoS Class: Burstable
Node-Selectors:
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
Warning BackOff 3m59s (x1518 over 5h34m) kubelet Back-off restarting failed container rag-copy-secret in pod rag-deploy-7457ffbcb-rp69w_euler-copilot(b01cdc39-8d08-4bd1-a857-a8475f3347b8)
[root@localhost scripts]# kubectl logs -n euler-copilot rag-deploy-7457ffbcb-rp69w -c rag-copy-secret --previous
Traceback (most recent call last):
File “/app/./main.py”, line 18, in
with config.open(“r”) as f:
File “/usr/lib64/python3.9/pathlib.py”, line 1252, in open
return io.open(self, mode, buffering, encoding, errors, newline,
IsADirectoryError: [Errno 21] Is a directory: ‘config.yaml’
明显部署的环境有问题