容器状态监控
根据实际需要写了两版监控容器状态的golang程序。
//容器状态监控
//报警发送企业微信webhook
//每一分钟获取一次容器运行状态
//包括一分钟内容器是否重启,包括启动容器名称,主机ip,外网IP,启动时间,获取启动前三分钟日志的方法(并没有实际执行)
//获取一分钟内已经关机的容器,获取包括容器的名称,外网IP,关机时间
//获取当容器超过指定大小后的,提醒功能
方式一
package main
import (
"context"
"encoding/json"
"fmt"
"github.com/docker/docker/api/types"
"github.com/docker/docker/client"
"github.com/robfig/cron/v3"
"io/ioutil"
"log"
"net"
"net/http"
"os"
"strings"
"time"
)
//容器状态检控
//每一分钟获取一次容器运行状态
//包括一分钟内容器是否重启,包括启动容器名称,主机ip,外网IP,启动时间,获取启动前三分钟日志的方法(并没有实际执行)
//获取一分钟内已经关机的容器,获取包括容器的名称,外网IP,关机时间
//获取当容器超过指定大小后的,提醒功能
type DockerState struct {
Name string //获取容器名称
ID string //容器id
PrivateIP string //指代机器的IP,不指代特定docker ip
PublicIP string //指代公网IP,不指代docker ip
Logpath string //获取容器路径
Size string //获取日志大小
RestartTime string //获取容器启动时间
//FinishedTime string //容器关闭时间,需要记录的是这一次的状态,而不是上一次的状态,所以删除。
LogTime string //启动日志收集容器时间本机utc时间-3分钟
LogSize string // docker日志文件大小
Oversize bool //日志过大,超过预期
OneMinute bool //运行状态是否小于一分钟
IsUP bool // 容器是否为up启动状态,down表示已经被删除。
GetLogcomment string //显示重启前三分钟日志方法
}
//企业微信数据结构定义
type Message struct {
MsgType string `json:"msgtype"`
Text struct {
Content string `json:"content"`
Mentioned_list []string `json:"mentioned_list"`
} `json:"text"`
}
//获取排除的字符串
var Args []string
//保存容器状态,以便于容器是删除,不是重启,或者重新部署重名容器
var d = make(map[string]*DockerState)
var PrivateIP string
var PublicIP string
var ONEGB = "1GB" //设置小于1G都为1G作为参考
func main() {
c := cron.New()
//定时任务一分钟重启一次
c.AddFunc("@every 1m", ProcessStart)
c.Start()
select {}
}
func ProcessStart() {
//企业微信webhook
url := "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxxxx"
//获取公网ip
privateip, err := GetPublicIP()
if err != nil {
fmt.Println(err)
}
PrivateIP = string(privateip)
//获取私网IP
publicip, err := GetPrivateIP()
if err != nil {
fmt.Println(err)
}
PublicIP = string(publicip)
//获取主机名
hostname, err := os.Hostname()
if err != nil {
fmt.Println(err)
}
//每次启动将IsUP标记为down。为了是找到已经关闭的容器
for _, dockerstate := range d {
dockerstate.IsUP = false
}
err = GetDockerState(40)
if err != nil {
fmt.Println(err)
}
//剔除不需要监控的容器
for _,name := range Args{
delete(d,name)
}
//
for name, dockerstate := range d {
now := time.Now().Format("2006-01-02 15:04:05")
if dockerstate.IsUP == false {
fmt.Println(name, "已经关机")
stopTime, err := GetStopTime(dockerstate.ID)
if err != nil {
fmt.Println(err)
}
context := "容器状态: 容器已经关闭" + "\n现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + PrivateIP + "\n外网IP地址:" + PublicIP + "\n容器名称:" + name + "\ndocker关闭时间:" + stopTime
SendMessage(url,context)
delete(d, name)
} else {
if dockerstate.OneMinute {
context := "容器状态: 容器发生重启" + "\n现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + PrivateIP + "\n外网IP地址:" + PublicIP + "\n容器名称:" + name+ "\n重启时间:" +dockerstate.RestartTime+"\n重启日志获取:" + dockerstate.GetLogcomment
SendMessage(url,context)
}
if dockerstate.Oversize {
context := "容器状态: 容器日志过大" + "\n现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + PrivateIP + "\n外网IP地址:" + PublicIP + "\n容器名称:" + name + "\n容器日志大小: " + dockerstate.Size+"\n日志文件位置:" + dockerstate.Logpath
SendMessage(url,context)
}
}
}
}
// 获取容器状态
// 指定日志文件多大时,进行提示。 公司的docker日志没有进行限制出现,所以加了这个参数,如果限制了docker容器,也可以不加。
//默认以G为单位
func GetDockerState(size float64) error {
//获取容器状态,参考docker官方教程
ctx := context.Background()
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
return err
}
containers, err := cli.ContainerList(ctx, types.ContainerListOptions{})
if err != nil {
return err
}
for _, container := range containers {
ds := DockerState{}
//fmt.Println(container.ID)
ds.PrivateIP = PrivateIP
ds.PublicIP = PublicIP
ds.ID = container.ID[:12]
ds.Name = strings.Trim(container.Names[0], "/")
inspect, err := cli.ContainerInspect(ctx, container.ID)
if err != nil {
return err
}
//通过重启获取时间有关记录
rstime := inspect.State.StartedAt
finishat := inspect.State.FinishedAt
restartTime, befortime, minute, err := GetRestartTime(rstime, finishat)
if err != nil {
return err
}
ds.RestartTime = restartTime
ds.LogTime = befortime
ds.OneMinute = minute
logpath := inspect.LogPath
ds.Logpath = logpath
//path := inspect.LogPath
path := "G:\\xrp\\QuickGetP\\controlller\\controller.go"
stat, err := os.Stat(path)
if err != nil {
fmt.Println(err)
}
//为了简单计算,小于1G的都设置为1G
if stat.Size() < 1024*1024*1024 {
ds.Size = ONEGB
} else if float64(stat.Size()) < 1024*1024*1024*size {
ds.Size = fmt.Sprintf("%.2fGB", float64(stat.Size())/float64(1024*1024*1024))
} else {
ds.Size = fmt.Sprintf("%.2fGB", float64(stat.Size())/float64(1024*1024*1024))
ds.Oversize = true
}
ds.GetLogcomment = "docker logs -t --since " + ds.LogTime + " " + ds.Name + " > " + ds.Name + ".log 2>&1"
//判断如果容器时间小于1分钟或者容器日志大于指定大小,才记录。其他情况不记录。
if ds.Oversize || ds.OneMinute {
//通过标志位,说明这台容器记录到d群组中去,为将来标识容器关闭,作记录
ds.IsUP = true
d[ds.Name] = &ds
}
}
return nil
}
//解析docker启动时间,以及启动前三分钟的时间,方便日志查询
//容器inspect时间为utc时间,与docker 运行时-时间不关联
func GetRestartTime(StartedAt, FinishedAt string) (restart, befortime string, OneMinute bool, err error) {
//将时间解析为北京时间
//解析docker启动时间
t1, err := time.Parse("2006-01-02T15:04:05.999999999Z07:00", StartedAt)
if err != nil {
return "", "", false, err
}
cstSh, err := time.LoadLocation("Asia/Shanghai") //上海
if err != nil {
return "", "", false, err
}
befortime = t1.Add(-time.Minute * 3).Format("2006-01-02T15:04:05")
restart = t1.In(cstSh).Format("2006-01-02 15:04:05")
//接下来就是比较重启时间是否小于59秒,定时器设置的一分钟重试,为了准确,这里面设置为59秒,不行的话可以调整为1分钟
//raw time
dockerstarttime := t1.In(cstSh)
//本地时间获取,中国区一般默认+8:00,这里就不改了
nowtime := time.Now()
sub := nowtime.Sub(dockerstarttime)
if sub > time.Duration(time.Second*59) {
//容器在一分钟内没有重启
return restart, befortime, false, nil
}
//容器在一分钟内有重启
return restart, befortime, true, nil
}
//单独获取容器关机时间
//解析docker启动时间,以及启动前三分钟的时间,方便日志查询
//容器inspect时间为utc时间,与docker 运行时-时间不关联
func GetStopTime(id string) (stoptime string, err error) {
//获取容器状态,参考docker官方教程
ctx := context.Background()
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
return "", err
}
inspect, err := cli.ContainerInspect(ctx, id)
if err != nil {
return "", err
}
finishat := inspect.State.FinishedAt
//将时间解析为北京时间
//解析docker启动时间
t1, err := time.Parse("2006-01-02T15:04:05.999999999Z07:00", finishat)
if err != nil {
return "", err
}
cstSh, err := time.LoadLocation("Asia/Shanghai") //上海
if err != nil {
return "", err
}
stoptime = t1.In(cstSh).Format("2006-01-02 15:04:05")
return stoptime, nil
}
//获取内网ip
func GetPrivateIP() (ip string, err error) {
conn, err := net.Dial("udp", "8.8.8.8:53")
if err != nil {
fmt.Println(err)
return
}
localAddr := conn.LocalAddr().(*net.UDPAddr)
ip = strings.Split(localAddr.String(), ":")[0]
return ip, nil
}
//尝试获取公网地址
func GetPublicIP() (string, error) {
ipmessage, err := http.Get("http://myexternalip.com/raw")
if err != nil {
return "", err
}
defer ipmessage.Body.Close()
ip, err := ioutil.ReadAll(ipmessage.Body)
if err != nil {
return "", err
}
return string(ip), err
}
//企业微信webhook发送
func SendMessage(url, msg string) {
var m Message
m.MsgType = "text"
m.Text.Content = msg
//m.Text.Mentioned_list = []string{"@all"}
jsons, err := json.Marshal(m)
if err != nil {
log.Println(err)
return
}
resp := string(jsons)
client := &http.Client{}
req, err := http.NewRequest("POST", url, strings.NewReader(resp))
if err != nil {
log.Println(err)
return
}
req.Header.Set("Content-Type", "application/json")
r, err := client.Do(req)
if err != nil {
log.Println(err)
return
}
defer r.Body.Close()
_, err = ioutil.ReadAll(r.Body)
if err != nil {
log.Println(err)
return
}
}
//获取输入容器名称,已排除监控容器状态
//支持 参数格式以空格方式,或者以分号方式,或者两者结合
//例如 ./dockerstatus "nginx1,nginx2" nginx3 'nginx4,nginx5'
func GetArgs() {
for i := 1; i < len(os.Args); i++ {
trim := strings.Trim(os.Args[i], `"`)
trim = strings.Trim(os.Args[i], `'`)
split := strings.Split(trim, ",")
for _, v := range split {
Args = append(Args, v)
}
}
}
方式二
package main
import (
"bufio"
"encoding/json"
"fmt"
"github.com/robfig/cron/v3"
"io/ioutil"
"log"
"net"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"time"
)
//使用说明
//程序设置为每两分钟执行一次,且不会退出,所以建议使用nohup方式使用
// nohup ./dockerstatus &
//或者配合crontab实现开机启动
//@reboot nohup /xxx/dockerstatus &
//获取运行日志
//分析日志
//报警返回
//通过crontab变成持续进程
//企业微信数据结构定义
type Message struct {
MsgType string `json:"msgtype"`
Text struct {
Content string `json:"content"`
Mentioned_list []string `json:"mentioned_list"`
} `json:"text"`
}
//获取排除的字符串
var Args []string
func main() {
c := cron.New()
c.AddFunc("@every 2m", StartProcess)
c.Start()
select {}
}
func StartProcess() {
//企业微信webhook
url := "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxxx"
//获取输入参数转换为切片字符串
GetArgs()
//获取内网地址
ip, err := GetOutBoundIP()
if err != nil {
fmt.Println(err)
}
//获取公网地址
cip, err := GetPublicIP()
if err != nil {
fmt.Println(err)
}
//获取主机名
hostname, err := os.Hostname()
if err != nil {
fmt.Println(err)
}
//获取docker日志
dockerLog, err := GetDockerLog()
if err != nil {
fmt.Println(err)
}
//进行日志分析
b, err := AnalysStatus(dockerLog)
if err != nil {
fmt.Println(err)
}
//输出结果
for i, b := range b {
now := time.Now().Format("2006-01-02 15:04:05")
dockerTime, befortime,err := GetDockerTime(i)
//docker logs -t --since "2021-11-26T13:58:00" boss > boss.log 2>&1
//从启动前3分钟日志
dockerlog := "docker logs -t --since "+befortime+" "+i+" > "+i+".log 2>&1"
//导出日志
if err != nil {
fmt.Println(err)
context := "现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + ip + "\n外网IP地址:" + cip + "\n容器名称:" + i + "\n重启时间:" + b+""+"\n重启日志获取:" + dockerlog
//文件
SendMessage(url, context)
} else {
context := "现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + ip + "\n外网IP地址:" + cip + "\n容器名称:" + i + "\n重启时间:" + dockerTime+""+"\n重启日志获取:" + dockerlog
SendMessage(url, context)
}
}
//判断日志大小并提醒
docker, logfile, size,max, err := GetMaxlogDocker(0.98)
if err != nil {
fmt.Println(err)
}
if max == true{
now := time.Now().Format("2006-01-02 15:04:05")
context := "现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + ip + "\n外网IP地址:" + cip + "\n容器名称:" + docker + "\n日志大小:" + size+"\n日志位置:" + logfile //文件
SendMessage(url, context)
}
}
//获取内网ip
func GetOutBoundIP() (ip string, err error) {
conn, err := net.Dial("udp", "8.8.8.8:53")
if err != nil {
fmt.Println(err)
return
}
localAddr := conn.LocalAddr().(*net.UDPAddr)
ip = strings.Split(localAddr.String(), ":")[0]
return ip, nil
}
//获取运行日志
func GetDockerLog() (string, error) {
arg := []string{"ps", "--format", `{{.Names}},{{.Status}}`}
command, err := ExecCommand("docker", arg)
if err != nil {
return "", err
}
return command, nil
}
//分析日志
//时间参考
//重启分析
//Up Less than a second
//Up 38 seconds
//Up About a minute
//Up 47 minutes
//Up 3 days
//Up 13 hours
func AnalysStatus(s string) (map[string]string, error) {
//临时容器
b := make(map[string]string)
// 最终容器
c := make(map[string]string)
reader := strings.NewReader(s)
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text() // or
split := strings.Split(line, ",")
name, status := split[0], split[1]
b[name] = status
}
if err := scanner.Err(); err != nil {
log.Printf("Cannot scanner text file: %s, err: [%v]", "", err)
return nil, err
}
//获取差集,就是去掉args包含的容器
for name := range b {
for _, name1 := range Args {
if name == name1 {
delete(b, name)
}
}
}
//剩余容器信息捕获
for name, status := range b {
if strings.Contains(status, "seconds") || strings.Contains(status, "a minute") {
c[name] = status
}
}
return c, nil
}
//命令行运行
func ExecCommand(commandName string, params []string) (con string, err error) {
cmd := exec.Command(commandName, params...)
//显示运行的命令
var b []byte
b, err = cmd.CombinedOutput()
if err != nil {
fmt.Println("Error:", err)
return "", err
}
return string(b), nil
}
//企业微信webhook发送
func SendMessage(url, msg string) {
var m Message
m.MsgType = "text"
m.Text.Content = msg
//m.Text.Mentioned_list = []string{"@all"}
jsons, err := json.Marshal(m)
if err != nil {
log.Println(err)
return
}
resp := string(jsons)
client := &http.Client{}
req, err := http.NewRequest("POST", url, strings.NewReader(resp))
if err != nil {
log.Println(err)
return
}
req.Header.Set("Content-Type", "application/json")
r, err := client.Do(req)
if err != nil {
log.Println(err)
return
}
defer r.Body.Close()
_, err = ioutil.ReadAll(r.Body)
if err != nil {
log.Println(err)
return
}
}
//尝试获取公网地址
func GetPublicIP() (string, error) {
//curl --connect-timeout 1 cip.cc
arg := []string{"-s", "--connect-timeout", "1", "http://myexternalip.com/raw"}
command, err := ExecCommand("curl", arg)
if err != nil {
return "", err
}
ip := net.ParseIP(command)
if ip == nil {
return "", err
}
return command, nil
}
//获取输入容器名称,已排除监控容器状态
//支持 参数格式以空格方式,或者以分号方式,或者两者结合
//例如 ./dockerstatus "nginx1,nginx2" nginx3 'nginx4,nginx5'
func GetArgs() {
for i := 1; i < len(os.Args); i++ {
trim := strings.Trim(os.Args[i], `"`)
trim = strings.Trim(os.Args[i], `'`)
split := strings.Split(trim, ",")
for _, v := range split {
Args = append(Args, v)
}
}
}
//docker inspect nginx-test11 -f "{{.State.StartedAt}}"
// 更改获取容器时间然后转换
//获取运行日志
//docker inspect nginx-test11 -f "{{.State.StartedAt}}"
// 更改获取容器时间然后转换
//获取运行日志
//解析docker启动时间,以及启动前两分钟的时间,方便日志查询
func GetDockerTime(dockername string) (restart,befortime string, err error) {
arg := []string{"inspect"}
arg = append(arg, dockername)
arg = append(arg, "-f", "{{.State.StartedAt}}")
command, err := ExecCommand("docker", arg)
if err != nil {
return "","", err
}
//不去除空格再windows有报错
command = strings.TrimSpace(command)
restart, befortime, err = GetRestartTime(command)
if err != nil {
return "","", err
}
return restart, befortime,nil
}
//解析docker启动时间,以及启动前两分钟的时间,方便日志查询
func GetRestartTime(s string) (restart,befortime string,err error) {
t1, err := time.Parse("2006-01-02T15:04:05.999999999Z07:00", s)
if err != nil {
return "","", err
}
cstSh, err := time.LoadLocation("Asia/Shanghai") //上海
if err != nil {
return "","", err
}
befortime = t1.Add(-time.Minute*3).Format("2006-01-02T15:04:05")
restart = t1.In(cstSh).Format("2006-01-02 15:04:05")
fmt.Println(restart,befortime)
return restart, befortime,nil
}
//将重启日志导出到本地
//获取运行日志
func ExportDockerLog(s string ) (string, error) {
command, err := ExecCommand(s,nil)
if err != nil {
return "", err
}
return command, nil
}
//获取最大日志容器docker
func GetMaxlogDocker(logsize float32 )(name,logfile,size string,max bool,err error) {
err = os.Chdir("/var/lib/docker/containers/")
if err != nil {
return "","","",false,err
}
if err != nil{
fmt.Println(err)
}
arg1:=[]string{"-c",`docker inspect $(du -sh -BM *|sort -n|tail -n1 |awk '{print $2}'|cut -b 1-8) --format "{{.Name}}","{{.LogPath}}"`}
filelog , err := ExecCommand("sh", arg1)
if err != nil {
return "","","",false,err
}
split := strings.Split(filelog, ",")
name,logfile=split[0],split[1]
//去掉名称前面的/
name = strings.Trim(name,"/")
//去掉获取结果的回车键
logfile = strings.TrimSpace(logfile)
//arg := []string{`du -sh -BG *|sort -n|tail -n1|awk '{print $1}'`}
//"sh", "-c", "ifconfig | awk '{print $5}' | head -1"
arg := []string{"-c",`du -sh -BG *|sort -n|tail -n1|awk '{print $1}'`}
size , err = ExecCommand("sh", arg)
if err != nil {
return "","","",false,err
}
//剪切去掉G
len := len([]byte(size))
size1 := string([]byte(size)[:(len-2)])
atoi, err := strconv.Atoi(size1)
if err != nil {
return "","","",false,err
}
if float32(atoi) < logsize{
return name,logfile,size,false,err
}
return name,logfile,size,true,nil
}