容器状态监控


容器状态监控

根据实际需要写了两版监控容器状态的golang程序。

//容器状态监控
//报警发送企业微信webhook
//每一分钟获取一次容器运行状态
//包括一分钟内容器是否重启,包括启动容器名称,主机ip,外网IP,启动时间,获取启动前三分钟日志的方法(并没有实际执行)
//获取一分钟内已经关机的容器,获取包括容器的名称,外网IP,关机时间
//获取当容器超过指定大小后的,提醒功能

方式一

package main

import (
    "context"
    "encoding/json"
    "fmt"
    "github.com/docker/docker/api/types"
    "github.com/docker/docker/client"
    "github.com/robfig/cron/v3"
    "io/ioutil"
    "log"
    "net"
    "net/http"
    "os"
    "strings"
    "time"
)

//容器状态检控
//每一分钟获取一次容器运行状态
//包括一分钟内容器是否重启,包括启动容器名称,主机ip,外网IP,启动时间,获取启动前三分钟日志的方法(并没有实际执行)
//获取一分钟内已经关机的容器,获取包括容器的名称,外网IP,关机时间
//获取当容器超过指定大小后的,提醒功能

type DockerState struct {
    Name        string //获取容器名称
    ID          string //容器id
    PrivateIP   string //指代机器的IP,不指代特定docker ip
    PublicIP    string //指代公网IP,不指代docker ip
    Logpath     string //获取容器路径
    Size        string //获取日志大小
    RestartTime string //获取容器启动时间
    //FinishedTime string //容器关闭时间,需要记录的是这一次的状态,而不是上一次的状态,所以删除。
    LogTime       string //启动日志收集容器时间本机utc时间-3分钟
    LogSize       string // docker日志文件大小
    Oversize      bool   //日志过大,超过预期
    OneMinute     bool   //运行状态是否小于一分钟
    IsUP          bool   // 容器是否为up启动状态,down表示已经被删除。
    GetLogcomment string //显示重启前三分钟日志方法
}

//企业微信数据结构定义
type Message struct {
    MsgType string `json:"msgtype"`
    Text    struct {
        Content        string   `json:"content"`
        Mentioned_list []string `json:"mentioned_list"`
    } `json:"text"`
}

//获取排除的字符串
var Args []string
//保存容器状态,以便于容器是删除,不是重启,或者重新部署重名容器
var d = make(map[string]*DockerState)

var PrivateIP string
var PublicIP string

var ONEGB = "1GB" //设置小于1G都为1G作为参考

func main() {
    c := cron.New()
    //定时任务一分钟重启一次
    c.AddFunc("@every 1m", ProcessStart)
    c.Start()
    select {}
}

func ProcessStart() {
    //企业微信webhook
    url := "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxxxx"
    //获取公网ip
    privateip, err := GetPublicIP()
    if err != nil {
        fmt.Println(err)
    }
    PrivateIP = string(privateip)

    //获取私网IP
    publicip, err := GetPrivateIP()
    if err != nil {
        fmt.Println(err)
    }
    PublicIP = string(publicip)

    //获取主机名
    hostname, err := os.Hostname()
    if err != nil {
        fmt.Println(err)
    }

    //每次启动将IsUP标记为down。为了是找到已经关闭的容器
    for _, dockerstate := range d {
        dockerstate.IsUP = false
    }
    err = GetDockerState(40)
    if err != nil {
        fmt.Println(err)
    }
    //剔除不需要监控的容器
    for _,name := range Args{
        delete(d,name)
    }

    //
    for name, dockerstate := range d {
        now := time.Now().Format("2006-01-02 15:04:05")
        if dockerstate.IsUP == false {
            fmt.Println(name, "已经关机")
            stopTime, err := GetStopTime(dockerstate.ID)
            if err != nil {
                fmt.Println(err)
            }
            context := "容器状态:  容器已经关闭" + "\n现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + PrivateIP + "\n外网IP地址:" + PublicIP + "\n容器名称:" + name + "\ndocker关闭时间:" + stopTime
            SendMessage(url,context)
            delete(d, name)
        } else {
            if dockerstate.OneMinute {
                context := "容器状态:  容器发生重启" + "\n现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + PrivateIP + "\n外网IP地址:" + PublicIP + "\n容器名称:" + name+ "\n重启时间:" +dockerstate.RestartTime+"\n重启日志获取:" + dockerstate.GetLogcomment
                SendMessage(url,context)
            }
            if dockerstate.Oversize {
                context := "容器状态:  容器日志过大" + "\n现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + PrivateIP + "\n外网IP地址:" + PublicIP + "\n容器名称:" + name + "\n容器日志大小: " + dockerstate.Size+"\n日志文件位置:" + dockerstate.Logpath
                SendMessage(url,context)
            }

        }
    }

}

// 获取容器状态
// 指定日志文件多大时,进行提示。 公司的docker日志没有进行限制出现,所以加了这个参数,如果限制了docker容器,也可以不加。
//默认以G为单位
func GetDockerState(size float64) error {
    //获取容器状态,参考docker官方教程
    ctx := context.Background()
    cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
    if err != nil {
        return err
    }

    containers, err := cli.ContainerList(ctx, types.ContainerListOptions{})
    if err != nil {
        return err
    }

    for _, container := range containers {
        ds := DockerState{}
        //fmt.Println(container.ID)
        ds.PrivateIP = PrivateIP
        ds.PublicIP = PublicIP
        ds.ID = container.ID[:12]
        ds.Name = strings.Trim(container.Names[0], "/")
        inspect, err := cli.ContainerInspect(ctx, container.ID)
        if err != nil {
            return err
        }
        //通过重启获取时间有关记录
        rstime := inspect.State.StartedAt
        finishat := inspect.State.FinishedAt
        restartTime, befortime, minute, err := GetRestartTime(rstime, finishat)
        if err != nil {
            return err
        }
        ds.RestartTime = restartTime
        ds.LogTime = befortime

        ds.OneMinute = minute
        logpath := inspect.LogPath
        ds.Logpath = logpath

        //path := inspect.LogPath
        path := "G:\\xrp\\QuickGetP\\controlller\\controller.go"
        stat, err := os.Stat(path)
        if err != nil {
            fmt.Println(err)
        }
        //为了简单计算,小于1G的都设置为1G
        if stat.Size() < 1024*1024*1024 {
            ds.Size = ONEGB
        } else if float64(stat.Size()) < 1024*1024*1024*size {
            ds.Size = fmt.Sprintf("%.2fGB", float64(stat.Size())/float64(1024*1024*1024))
        } else {
            ds.Size = fmt.Sprintf("%.2fGB", float64(stat.Size())/float64(1024*1024*1024))
            ds.Oversize = true
        }
        ds.GetLogcomment = "docker logs -t --since " + ds.LogTime + " " + ds.Name + " > " + ds.Name + ".log 2>&1"

        //判断如果容器时间小于1分钟或者容器日志大于指定大小,才记录。其他情况不记录。
        if ds.Oversize || ds.OneMinute {
            //通过标志位,说明这台容器记录到d群组中去,为将来标识容器关闭,作记录
            ds.IsUP = true
            d[ds.Name] = &ds
        }
    }
    return nil
}

//解析docker启动时间,以及启动前三分钟的时间,方便日志查询
//容器inspect时间为utc时间,与docker 运行时-时间不关联
func GetRestartTime(StartedAt, FinishedAt string) (restart, befortime string, OneMinute bool, err error) {
    //将时间解析为北京时间
    //解析docker启动时间
    t1, err := time.Parse("2006-01-02T15:04:05.999999999Z07:00", StartedAt)
    if err != nil {
        return "", "", false, err
    }

    cstSh, err := time.LoadLocation("Asia/Shanghai") //上海
    if err != nil {
        return "", "", false, err
    }

    befortime = t1.Add(-time.Minute * 3).Format("2006-01-02T15:04:05")
    restart = t1.In(cstSh).Format("2006-01-02 15:04:05")

    //接下来就是比较重启时间是否小于59秒,定时器设置的一分钟重试,为了准确,这里面设置为59秒,不行的话可以调整为1分钟
    //raw time
    dockerstarttime := t1.In(cstSh)

    //本地时间获取,中国区一般默认+8:00,这里就不改了
    nowtime := time.Now()
    sub := nowtime.Sub(dockerstarttime)
    if sub > time.Duration(time.Second*59) {
        //容器在一分钟内没有重启
        return restart, befortime, false, nil
    }
    //容器在一分钟内有重启
    return restart, befortime, true, nil
}

//单独获取容器关机时间
//解析docker启动时间,以及启动前三分钟的时间,方便日志查询
//容器inspect时间为utc时间,与docker 运行时-时间不关联
func GetStopTime(id string) (stoptime string, err error) {

    //获取容器状态,参考docker官方教程
    ctx := context.Background()
    cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
    if err != nil {
        return "", err
    }
    inspect, err := cli.ContainerInspect(ctx, id)
    if err != nil {
        return "", err
    }
    finishat := inspect.State.FinishedAt

    //将时间解析为北京时间
    //解析docker启动时间
    t1, err := time.Parse("2006-01-02T15:04:05.999999999Z07:00", finishat)
    if err != nil {
        return "", err
    }
    cstSh, err := time.LoadLocation("Asia/Shanghai") //上海
    if err != nil {
        return "", err
    }
    stoptime = t1.In(cstSh).Format("2006-01-02 15:04:05")

    return stoptime, nil
}

//获取内网ip
func GetPrivateIP() (ip string, err error) {
    conn, err := net.Dial("udp", "8.8.8.8:53")
    if err != nil {
        fmt.Println(err)
        return
    }
    localAddr := conn.LocalAddr().(*net.UDPAddr)
    ip = strings.Split(localAddr.String(), ":")[0]
    return ip, nil
}

//尝试获取公网地址
func GetPublicIP() (string, error) {

    ipmessage, err := http.Get("http://myexternalip.com/raw")
    if err != nil {
        return "", err
    }
    defer ipmessage.Body.Close()
    ip, err := ioutil.ReadAll(ipmessage.Body)
    if err != nil {
        return "", err
    }
    return string(ip), err

}

//企业微信webhook发送
func SendMessage(url, msg string) {
    var m Message
    m.MsgType = "text"
    m.Text.Content = msg
    //m.Text.Mentioned_list = []string{"@all"}
    jsons, err := json.Marshal(m)
    if err != nil {
        log.Println(err)
        return
    }
    resp := string(jsons)
    client := &http.Client{}
    req, err := http.NewRequest("POST", url, strings.NewReader(resp))
    if err != nil {
        log.Println(err)
        return
    }
    req.Header.Set("Content-Type", "application/json")
    r, err := client.Do(req)
    if err != nil {
        log.Println(err)
        return
    }
    defer r.Body.Close()
    _, err = ioutil.ReadAll(r.Body)
    if err != nil {
        log.Println(err)
        return
    }

}


//获取输入容器名称,已排除监控容器状态
//支持 参数格式以空格方式,或者以分号方式,或者两者结合
//例如 ./dockerstatus   "nginx1,nginx2" nginx3 'nginx4,nginx5'
func GetArgs() {
    for i := 1; i < len(os.Args); i++ {
        trim := strings.Trim(os.Args[i], `"`)
        trim = strings.Trim(os.Args[i], `'`)
        split := strings.Split(trim, ",")
        for _, v := range split {
            Args = append(Args, v)
        }
    }
}

方式二

package main

import (
    "bufio"
    "encoding/json"
    "fmt"
    "github.com/robfig/cron/v3"
    "io/ioutil"
    "log"
    "net"
    "net/http"
    "os"
    "os/exec"
    "strconv"
    "strings"
    "time"
)

//使用说明
//程序设置为每两分钟执行一次,且不会退出,所以建议使用nohup方式使用
// nohup ./dockerstatus &
//或者配合crontab实现开机启动
//@reboot nohup /xxx/dockerstatus &

//获取运行日志
//分析日志
//报警返回
//通过crontab变成持续进程

//企业微信数据结构定义
type Message struct {
    MsgType string `json:"msgtype"`
    Text    struct {
        Content        string   `json:"content"`
        Mentioned_list []string `json:"mentioned_list"`
    } `json:"text"`
}

//获取排除的字符串
var Args []string

func main() {
    c := cron.New()

    c.AddFunc("@every 2m", StartProcess)

    c.Start()
    select {}
}
func StartProcess() {



    //企业微信webhook
    url := "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxxx"

    //获取输入参数转换为切片字符串
    GetArgs()

    //获取内网地址
    ip, err := GetOutBoundIP()
    if err != nil {
        fmt.Println(err)
    }
    //获取公网地址
    cip, err := GetPublicIP()
    if err != nil {
        fmt.Println(err)
    }
    //获取主机名
    hostname, err := os.Hostname()
    if err != nil {
        fmt.Println(err)
    }
    //获取docker日志
    dockerLog, err := GetDockerLog()
    if err != nil {
        fmt.Println(err)
    }
    //进行日志分析
    b, err := AnalysStatus(dockerLog)
    if err != nil {
        fmt.Println(err)
    }

    //输出结果
    for i, b := range b {
        now := time.Now().Format("2006-01-02 15:04:05")
        dockerTime, befortime,err := GetDockerTime(i)
        //docker logs -t --since "2021-11-26T13:58:00" boss > boss.log 2>&1
        //从启动前3分钟日志
        dockerlog := "docker logs -t --since "+befortime+" "+i+" > "+i+".log 2>&1"
        //导出日志

        if err != nil {
            fmt.Println(err)
            context := "现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + ip + "\n外网IP地址:" + cip + "\n容器名称:" + i + "\n重启时间:" + b+""+"\n重启日志获取:" + dockerlog
            //文件
            SendMessage(url, context)
        } else {
            context := "现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + ip + "\n外网IP地址:" + cip + "\n容器名称:" + i + "\n重启时间:" + dockerTime+""+"\n重启日志获取:" + dockerlog
            SendMessage(url, context)
        }
    }

    //判断日志大小并提醒
    docker, logfile, size,max, err := GetMaxlogDocker(0.98)
    if err != nil {
        fmt.Println(err)
    }
    if max == true{
        now := time.Now().Format("2006-01-02 15:04:05")
        context := "现在时间: " + fmt.Sprint(now) + "\n公司机器: " + hostname + "\n内网IP地址:" + ip + "\n外网IP地址:" + cip + "\n容器名称:" + docker + "\n日志大小:" + size+"\n日志位置:" + logfile //文件
        SendMessage(url, context)
    }


}

//获取内网ip
func GetOutBoundIP() (ip string, err error) {
    conn, err := net.Dial("udp", "8.8.8.8:53")
    if err != nil {
        fmt.Println(err)
        return
    }
    localAddr := conn.LocalAddr().(*net.UDPAddr)
    ip = strings.Split(localAddr.String(), ":")[0]
    return ip, nil
}

//获取运行日志
func GetDockerLog() (string, error) {
    arg := []string{"ps", "--format", `{{.Names}},{{.Status}}`}
    command, err := ExecCommand("docker", arg)
    if err != nil {
        return "", err
    }
    return command, nil
}

//分析日志
//时间参考
//重启分析
//Up Less than a second
//Up 38 seconds
//Up About a minute
//Up 47 minutes
//Up 3 days
//Up 13 hours
func AnalysStatus(s string) (map[string]string, error) {
    //临时容器
    b := make(map[string]string)
    // 最终容器
    c := make(map[string]string)
    reader := strings.NewReader(s)
    scanner := bufio.NewScanner(reader)
    for scanner.Scan() {
        line := scanner.Text() // or
        split := strings.Split(line, ",")
        name, status := split[0], split[1]
        b[name] = status
    }
    if err := scanner.Err(); err != nil {
        log.Printf("Cannot scanner text file: %s, err: [%v]", "", err)
        return nil, err
    }

    //获取差集,就是去掉args包含的容器
    for name := range b {
        for _, name1 := range Args {
            if name == name1 {
                delete(b, name)
            }
        }
    }

    //剩余容器信息捕获
    for name, status := range b {
        if strings.Contains(status, "seconds") || strings.Contains(status, "a minute") {
            c[name] = status
        }
    }

    return c, nil
}

//命令行运行
func ExecCommand(commandName string, params []string) (con string, err error) {
    cmd := exec.Command(commandName, params...)
    //显示运行的命令
    var b []byte
    b, err = cmd.CombinedOutput()
    if err != nil {
        fmt.Println("Error:", err)
        return "", err
    }
    return string(b), nil

}

//企业微信webhook发送
func SendMessage(url, msg string) {
    var m Message
    m.MsgType = "text"
    m.Text.Content = msg
    //m.Text.Mentioned_list = []string{"@all"}
    jsons, err := json.Marshal(m)
    if err != nil {
        log.Println(err)
        return
    }
    resp := string(jsons)
    client := &http.Client{}
    req, err := http.NewRequest("POST", url, strings.NewReader(resp))
    if err != nil {
        log.Println(err)
        return
    }
    req.Header.Set("Content-Type", "application/json")
    r, err := client.Do(req)
    if err != nil {
        log.Println(err)
        return
    }
    defer r.Body.Close()
    _, err = ioutil.ReadAll(r.Body)
    if err != nil {
        log.Println(err)
        return
    }

}

//尝试获取公网地址
func GetPublicIP() (string, error) {
    //curl --connect-timeout 1 cip.cc
    arg := []string{"-s", "--connect-timeout", "1", "http://myexternalip.com/raw"}
    command, err := ExecCommand("curl", arg)
    if err != nil {
        return "", err
    }
    ip := net.ParseIP(command)
    if ip == nil {
        return "", err
    }
    return command, nil
}

//获取输入容器名称,已排除监控容器状态
//支持 参数格式以空格方式,或者以分号方式,或者两者结合
//例如 ./dockerstatus   "nginx1,nginx2" nginx3 'nginx4,nginx5'
func GetArgs() {
    for i := 1; i < len(os.Args); i++ {
        trim := strings.Trim(os.Args[i], `"`)
        trim = strings.Trim(os.Args[i], `'`)
        split := strings.Split(trim, ",")
        for _, v := range split {
            Args = append(Args, v)
        }
    }
}

//docker inspect nginx-test11 -f "{{.State.StartedAt}}"
// 更改获取容器时间然后转换
//获取运行日志
//docker inspect nginx-test11 -f "{{.State.StartedAt}}"
// 更改获取容器时间然后转换
//获取运行日志
//解析docker启动时间,以及启动前两分钟的时间,方便日志查询
func GetDockerTime(dockername string) (restart,befortime string, err error) {
    arg := []string{"inspect"}
    arg = append(arg, dockername)
    arg = append(arg, "-f", "{{.State.StartedAt}}")
    command, err := ExecCommand("docker", arg)
    if err != nil {
        return "","", err
    }
    //不去除空格再windows有报错
    command = strings.TrimSpace(command)
    restart, befortime, err = GetRestartTime(command)

    if err != nil {
        return "","", err
    }
    return restart, befortime,nil
}

//解析docker启动时间,以及启动前两分钟的时间,方便日志查询
func GetRestartTime(s string) (restart,befortime string,err error) {
    t1, err := time.Parse("2006-01-02T15:04:05.999999999Z07:00", s)
    if err != nil {
        return "","", err
    }
    cstSh, err := time.LoadLocation("Asia/Shanghai") //上海
    if err != nil {
        return "","", err
    }
    befortime = t1.Add(-time.Minute*3).Format("2006-01-02T15:04:05")
    restart = t1.In(cstSh).Format("2006-01-02 15:04:05")
    fmt.Println(restart,befortime)
    return restart, befortime,nil
}
//将重启日志导出到本地

//获取运行日志
func ExportDockerLog(s string ) (string, error) {
    command, err := ExecCommand(s,nil)
    if err != nil {
        return "", err
    }
    return command, nil
}

//获取最大日志容器docker
func GetMaxlogDocker(logsize float32 )(name,logfile,size string,max  bool,err error)  {
    err = os.Chdir("/var/lib/docker/containers/")
    if err != nil {
        return "","","",false,err
    }
    if err != nil{
        fmt.Println(err)
    }

    arg1:=[]string{"-c",`docker inspect  $(du -sh -BM *|sort -n|tail -n1 |awk '{print $2}'|cut -b 1-8) --format "{{.Name}}","{{.LogPath}}"`}
    filelog , err := ExecCommand("sh", arg1)
    if err != nil {
        return "","","",false,err
    }
    split := strings.Split(filelog, ",")
    name,logfile=split[0],split[1]
    //去掉名称前面的/
    name = strings.Trim(name,"/")
    //去掉获取结果的回车键
    logfile = strings.TrimSpace(logfile)
    //arg := []string{`du -sh -BG *|sort -n|tail -n1|awk '{print $1}'`}
    //"sh", "-c", "ifconfig | awk '{print $5}' | head -1"
    arg := []string{"-c",`du -sh -BG *|sort -n|tail -n1|awk '{print $1}'`}
    size , err = ExecCommand("sh", arg)
    if err != nil {
        return "","","",false,err
    }

    //剪切去掉G
    len := len([]byte(size))
    size1 := string([]byte(size)[:(len-2)])
    atoi, err := strconv.Atoi(size1)
    if err != nil {
        return "","","",false,err
    }
    if float32(atoi) < logsize{
        return name,logfile,size,false,err
    }
    return name,logfile,size,true,nil

}