Alerts

Evicted Pod (0 active)
alert: Evicted
  Pod
expr: kube_pod_container_status_terminated_reason{reason="Evicted"}
  == 1
for: 5m
labels:
  severity: warning
  type: source
annotations:
  description: 发生节点被驱逐状况,instance:{{$labels.instance}},namespace:{{$labels.namespace}},pod:{{$labels.pod}}
  summary: 发生节点被驱逐状况
HighRateOfCpuUsed (0 active)
alert: HighRateOfCpuUsed
expr: 100
  * (1 - sum by(instance) (increase(node_cpu_seconds_total{mode="idle"}[1h]))
  / sum by(instance) (increase(node_cpu_seconds_total[1h]))) > 95
for: 5m
labels:
  severity: warning
  type: source
annotations:
  description: 1小时中cpu使用超过95%,instance:{{$labels.instance}}
  summary: 1小时中cpu使用超过95%
HighRateOfDiskUsed (0 active)
alert: HighRateOfDiskUsed
expr: (node_filesystem_size_bytes{mountpoint="/rootfs"}
  - node_filesystem_avail_bytes{mountpoint="/rootfs"}) / node_filesystem_size_bytes{mountpoint="/rootfs"}
  * 100 > 85
for: 5m
labels:
  severity: warning
  type: source
annotations:
  description: 硬盘占用率超过限制85%,instance:{{$labels.instance}},device:{{$labels.device}},fstype:{{$labels.fstype}}
  summary: 硬盘占用率超过限制
HighRateOfMemoryUsed (0 active)
alert: HighRateOfMemoryUsed
expr: 100
  * (1 - ((avg_over_time(node_memory_MemFree_bytes[1h]) + avg_over_time(node_memory_Cached_bytes[1h])
  + avg_over_time(node_memory_Buffers_bytes[1h])) / avg_over_time(node_memory_MemTotal_bytes[1h])))
  > 90
for: 5m
labels:
  severity: warning
  type: source
annotations:
  description: 1小时中内存使用超过90%,instance:{{$labels.instance}},namespace:{{$labels.kubernetes_pod_name}},pod:{{$labels.name}}
  summary: 1小时中内存使用超过90%
Server Busy (0 active)
alert: Server
  Busy
expr: sum
  by(job) (game_busy_count) - sum by(job) (game_busy_count offset 1m) > 10
for: 30s
labels:
  app: android
  severity: warning
  type: application
annotations:
  description: 1分钟内server busy的次数已经超过了10次
  summary: server busy的次数