#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c): 2012-2017, Huawei Tech. Co., Ltd.
# Date         : 2018-04-04
# Description  : CM.py is a class of CM part of Gauss200.
#############################################################################
try:
    import sys
    import os
    import subprocess
    import time
    import socket
    from datetime import datetime, timedelta

    sys.path.append(sys.path[0] + "/../../../../")
    from gspylib.common.ErrorCode import ErrorCode
    from gspylib.common.Common import DefaultValue
    from gspylib.common.DbClusterStatus import DbClusterStatus
    from gspylib.common.DbClusterInfo import dbClusterInfo
    from gspylib.os.gsfile import g_file
    from gspylib.os.gsOSlib import g_OSlib
    from gspylib.component.CM.CM import CM
except ImportError as ie:
    sys.exit("[GAUSS-52200] : Unable to import module: %s." % str(ie))


class CM_OLAP(CM):
    '''
    The class is used to define cluster manager component for olap database.
    '''
    DEFAULT_TIMEOUT = 300
    DEFAULT_RESTART_NODEGROUP_TIMEOUT = 1800

    def get_cluster_info(self):
        """
        """
        user = g_OSlib.getUserInfo()["name"]
        cluster_info = dbClusterInfo()
        cluster_info.initFromStaticConfig(user)
        return cluster_info

    def get_local_node(self, cluster_info):
        """
        """
        local_name = socket.gethostname()
        node_info = cluster_info.getDbNodeByName(local_name)
        if node_info is None:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % local_name)
        return node_info

    def initInstance(self):
        """
        function : install a single cm component
        input  : NA
        output : NA
        """
        if self.instInfo.datadir == "":
            raise Exception("Data directory of instance is invalid.")

        if not os.path.exists(self.instInfo.datadir):
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
                            ("data directory [%s]" % self.instInfo.datadir))

        toolspath = DefaultValue.getEnv(DefaultValue.TOOL_PATH_ENV)
        DefaultValue.checkPathVaild(toolspath)
        if self.dwsMode:
            image_path = DefaultValue.DWS_IMAGE_PATH
            cmaPackagePath = "%s/cm_agent.tar.gz" % image_path
            cmsPackagePath = "%s/cm_server.tar.gz" % image_path
            # old branch that install by image package
            if os.path.exists(cmaPackagePath) or os.path.exists(cmsPackagePath):
                if self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER:
                    g_file.decompressFiles(cmsPackagePath, self.instInfo.datadir)
                elif self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT:
                    g_file.decompressFiles(cmaPackagePath, self.instInfo.datadir)
                    self.configCmaInstance()
            # decouple branch that install by init instance
            else:
                self.initCmInstance()
                if self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT:
                    self.configCmaInstance()
        else:
            self.initCmInstance()
            if self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMAGENT:
                self.configCmaInstance()

    def initCmInstance(self):
        """
        """
        if self.instInfo.instanceRole == DefaultValue.INSTANCE_ROLE_CMSERVER:
            cmd = "%s/gs_initcm -Z cm_server -D %s" % (self.binPath, self.instInfo.datadir)
        else:
            cmd = "%s/gs_initcm -Z cm_agent -D %s" % (self.binPath, self.instInfo.datadir)
        self.logger.debug("Command for initializing instance: %s" % cmd)
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51615"] + " Command:%s. Error:\n%s" % (cmd, output))

    def configCmaInstance(self):
        """
        """
        if self.dwsMode:
            # set parameter of cm.conf
            para_dict = {'enable_log_compress': 'on',
                         'security_mode': 'on',
                         'enable_transaction_read_only': 'on',
                         'coordinator_heartbeat_timeout': 600
                         }
        else:
            para_dict = {}

        cluster_info = self.get_cluster_info()
        node_info = self.get_local_node(cluster_info)
        dws_bigdata_directory = DefaultValue.get_dws_bigdata_directory(self.logger, node_info)
        if dws_bigdata_directory != "":
            para_dict['dws_bigdata_directory '] = "'%s'" % dws_bigdata_directory
            if os.path.exists(dws_bigdata_directory):
                g_file.removeDirectory(dws_bigdata_directory)
            g_file.createDirectory(dws_bigdata_directory, overwrite=True,
                                   mode=DefaultValue.KEY_DIRECTORY_MODE)

        dws_bigdata_vmoptions, dws_bigdata_jvm_heap = DefaultValue.get_dws_bigdata_vmoptions()
        para_dict['dws_bigdata_vmoptions'] = "'%s'" % dws_bigdata_vmoptions
        if dws_bigdata_jvm_heap > 5 * 1024:
            para_dict['enable_dws_bigdata'] = "on"

        if len(para_dict):
            self.setGucConfig(para_dict)

    def get_dws_bigdata_directory_by_conf(self):
        """
        """
        conf_file = DefaultValue.getCmConfigFile(self.instInfo.datadir)
        if not os.path.isfile(conf_file):
            return ""
        cmd = "grep -w '^dws_bigdata_directory' %s | awk -F '=' '{print $2}'" % conf_file
        self.logger.debug("Command for getting dws_bigdata_directory value: %s" % cmd)
        status, output = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51615"] +
                            " Command:%s. Error:\n%s" % (cmd, output))
        if output == "" or len(output.split('\n')) > 1:
            return ""
        dws_bigdata_directory = output.strip().replace('"', '').replace("'", "").strip()
        self.logger.debug("The dws_bigdata_directory is: %s" % dws_bigdata_directory)
        return dws_bigdata_directory

    def uninstall(self):
        """
        function: uninstall the cm component
        input : NA
        output: NA
        """
        dws_bigdata_directory = self.get_dws_bigdata_directory_by_conf()
        if dws_bigdata_directory != "" and os.path.exists(dws_bigdata_directory):
            g_file.removeDirectory(dws_bigdata_directory)
        datadir = self.instInfo.datadir
        if os.path.exists(datadir):
            g_file.removeDirectory(datadir)

    def startCluster(self, user, nodeId=0, timeout=DEFAULT_TIMEOUT, isSwitchOver=True, isSingle=False,
                     cluster_normal_status=None, isSinglePrimaryMultiStandbyCluster=False, azName="", datadir=""):
        """
        function:Start cluster or node
        input:String,int,int
        output:NA
        """
        start_type = "cluster"
        if nodeId > 0:
            start_type = "node"
        if datadir != "":
            start_type = "instance"
        if azName != "":
            start_type = azName
        endTime = datetime.now() + timedelta(seconds=timeout)
        # The output starts the screen-printing information of the group
        self.logger.log("Starting %s." % start_type)
        self.logger.log("======================================================================")
        # Call cm_ctl to start the cluster
        cmd = self.getStartCmd(user, nodeId, timeout, datadir, azName)
        (status, output) = subprocess.getstatusoutput(cmd)
        # The output prompts when the failure to start
        if status != 0:
            self.logger.error(ErrorCode.GAUSS_516["GAUSS_51607"] % start_type + " Error: \n%s" % output)
            self.logger.log("The cluster may continue to start in the background.")
            self.logger.log("If you want to see the cluster status, please try command gs_om -t status.")
            self.logger.log("If you want to stop the cluster, please try command gs_om -t stop.")
            sys.exit(1)

        if isSingle:
            self.logger.log("Successfully started cluster. Waiting for cluster status to become Normal.")
            self.logger.log("======================================================================")
        elif isSinglePrimaryMultiStandbyCluster:
            if azName:
                self.logger.log("Successfully started %s." % start_type)
                self.logger.log("======================================================================")
                self.logger.log("End start %s." % start_type)
                return True
            else:
                self.logger.log("Successfully started single primary multi standby. Wait for standby instance.")
                self.logger.log("======================================================================")
        # Output the startup instance success information
        elif nodeId == 0:
            self.logger.log("Successfully started primary instance. Wait for standby instance.")
            self.logger.log("======================================================================")
        else:
            self.logger.log("Successfully started %s." % start_type)
            self.logger.log("======================================================================")
            self.logger.log("End start %s." % start_type)
            return True

        is_succeed = self.retry_check_cluster_status(user, nodeId, cluster_normal_status, start_type,
                                                     endTime, timeout, isSwitchOver, isSingle)
        return is_succeed

    def retry_check_cluster_status(self, user, node_id, cluster_normal_status, start_type,
                                   endTime, timeout, isSwitchOver, is_single):
        """
        function: check cluster status every 5 seconds
        input: user, node_id, cluster_normal_status, start_type, endTime, timeout, isSwitchOver, is_single
        output: is_succeed
        """
        dotCount = 0
        # 1 -> failed
        # 0 -> success
        is_succeed = False
        # Wait for the cluster to start completely
        while True:
            # A point is output every 5 seconds
            time.sleep(5)
            sys.stdout.write(".")
            dotCount += 1
            # A line break is output per minute
            if dotCount >= 12:
                dotCount = 0
                sys.stdout.write("\n")

            # The cluster status is checked every 5 seconds
            (startStatus, startResult) = self.doCheckStaus(node_id, cluster_normal_status)
            if startStatus == 0:
                # Output successful start information
                if dotCount != 0:
                    sys.stdout.write("\n")
                self.logger.log("Successfully started %s." % start_type)
                is_succeed = True
                break
            # The output prompts when the timeout does not start successfully
            if endTime is not None and datetime.now() >= endTime:
                if dotCount != 0:
                    sys.stdout.write("\n")
                self.logger.log("Failed to start %s " % start_type + " in (%s)s." % timeout)
                self.logger.log("It will continue to start in the background.")
                self.logger.log("If you want to see the cluster status, please try command gs_om -t status.")
                self.logger.log("If you want to stop the cluster, please try command gs_om -t stop.")
                break
        self.logger.log("======================================================================")
        self.logger.log(startResult)
        if is_succeed and isSwitchOver and not is_single:
            # Perform the switch reset operation
            cmd = self.getResetSwitchOverCmd(user, timeout)
            (status, output) = subprocess.getstatusoutput(cmd)
            if status != 0:
                self.logger.log("Failed to reset switchover the cluster. "
                                "Command: \"%s\".\nOutput: \"%s\"." % (cmd, output))
        return is_succeed

    def stopCluster(self, user, nodeId=0, stopMode="", timeout=0, datadir="", azName=""):
        stopType = "cluster"
        # Specifies the stop node
        # Gets the specified node id
        if nodeId > 0:
            stopType = "node"
        if datadir != "":
            stopType = "instance"
        if azName != "":
            stopType = azName
        # Perform a stop operation
        self.logger.log("Stopping %s." % stopType)
        self.logger.log("=========================================")
        timeout = timeout if timeout != 0 else CM_OLAP.DEFAULT_TIMEOUT
        cmd = self.getStopCmd(user, nodeId, stopMode, timeout, datadir, azName)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0 and stopType == "cluster":
            self.logger.log("Failed to stop %s." % stopType + " Try to stop it forcibly." + " Error:\n%s" % output)
            cmd = self.getStopCmd(user, nodeId, "i", timeout, datadir, azName)
            (status, output) = subprocess.getstatusoutput(cmd)
            if status != 0:
                self.logger.debug(output)
                self.logger.log("Failed to stop %s forcibly." % stopType)
            else:
                self.logger.log("Successfully stopped %s forcibly." % stopType)
        elif status != 0:
            self.logger.log(output)
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % stopType)
        else:
            self.logger.log("Successfully stopped %s." % stopType)

        self.logger.log("=========================================")
        self.logger.log("End stop %s." % stopType)

    def restartNodeGroup(self, nodegroupName, timeout=DEFAULT_RESTART_NODEGROUP_TIMEOUT):
        """
        function: restart node group with logical cluster
        input:
          nodegroupName: string, The name if the nodegroup to restart
        output:
          result: bool, The sign of execution result
        """
        cmd = self.getRestartNodegroup(nodegroupName, timeout)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51645"] % "logical cluster [%s]" % nodegroupName +
                            "Error:\n%s" % output)
        else:
            return True

    def queryClusterStatus(self, nodeId=0, outFile="", showDetail=True, isFormat=False):
        """
        function: query cluster status
        input : int,string,boolean
        output: cluster status
        """
        statusFile = None
        if isFormat:
            cmd = self.getQueryStatusCmd(0, outFile, True, True)
        else:
            # query and save status into a file
            statusFile = "%s/gauss_check_status_%d.dat" % (DefaultValue.getTmpDirFromEnv(), os.getpid())
            DefaultValue.cleanTmpFile(statusFile)
            cmd = self.getQueryStatusCmd(0, statusFile)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0 and not isFormat:
            with open(statusFile, "r") as fp:
                output = fp.read()
            DefaultValue.cleanTmpFile(statusFile)
            if output.find("cm_ctl: can't connect to cm_server.") >= 0:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51640"])
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % "cluster" + " Error: \n%s" % output)
        if isFormat:
            return output
        # check cluster status
        clusterStatus = DbClusterStatus()
        clusterStatus.initFromFile(statusFile)
        DefaultValue.cleanTmpFile(statusFile)
        return clusterStatus

    def resetSwitchOver(self, user, timeout=DEFAULT_TIMEOUT):
        """
        function:Reset switchover
        input: string user, int timeout
        output: string output
        """
        cmd = self.getResetSwitchOverCmd(user, timeout)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s" % output)
        return output

    def setMonitor(self, user):
        """"
        function: Set om monitor cron
        input : NA
        output: NA
        """
        self.logger.log("Set CRON.")
        appPath = DefaultValue.getInstallDir(user)
        log_path = DefaultValue.getOMLogPath(DefaultValue.OM_MONITOR_DIR_FILE, user, appPath, "")

        cronFile = "%s/gauss_cron_%d" % (DefaultValue.getTmpDirFromEnv(), os.getpid())
        # get all content by crontab command
        (status, output) = g_OSlib.getAllCrontab()
        if status == 0:
            # overwrite cronFile, make it empty.
            g_file.createFile(cronFile, True)
            content_CronFile = [output]
            if output != "":
                g_file.writeFile(cronFile, content_CronFile)
                g_file.deleteLine(cronFile, "\\/bin\\/om_monitor")

        mpprcFile = DefaultValue.getEnv(DefaultValue.MPPRC_FILE_ENV)
        if mpprcFile != "" and mpprcFile is not None:
            cronContent = "*/1 * * * * source /etc/profile;(if [ -f ~/.profile ];then " \
                          "source ~/.profile;fi);source ~/.bashrc;source %s;" \
                          "nohup %s/bin/om_monitor -L %s >>/dev/null 2>&1 &" % \
                          (mpprcFile, appPath, log_path)
            content_CronFile = [cronContent]
            g_file.writeFile(cronFile, content_CronFile)
        else:
            cronContent = "*/1 * * * * source /etc/profile;(if [ -f ~/.profile ];then " \
                          "source ~/.profile;fi);source ~/.bashrc;" \
                          "nohup %s/bin/om_monitor -L %s >>/dev/null 2>&1 &" % \
                          (appPath, log_path)
            content_CronFile = [cronContent]
            g_file.writeFile(cronFile, content_CronFile)

        g_OSlib.execCrontab(cronFile)
        g_file.removeFile(cronFile)

        output = DefaultValue.get_instance_message(appPath, "om_monitor")
        self.logger.debug("Before restart, the om_monitor process:\n%s." % output)

        self.logger.debug("Start to start om_monitor.")
        DefaultValue.start_om_monitor(self.logger, user, appPath)
        self.logger.debug("Successfully started om_monitor.")

        output = DefaultValue.get_instance_message(appPath, "om_monitor")
        self.logger.debug("After restart, the om_monitor process:\n%s." % output)

    def setGucConfig(self, paraDict=None, setMode='set'):
        """
        function: set cm guc config file cm.conf)
        input : NA
        output: NA
        """
        if paraDict is None:
            paraDict = {}
        userProfile = DefaultValue.getMpprcFile()
        g_OSlib.source(userProfile)
        item = list()
        for key in paraDict.keys():
            item.append(" -c \"%s=%s\" " % (key, paraDict.get(key)))
        guc_str = "".join(item)
        cmd = "gs_guc set -Z cm -D cm_instance_data_path %s" % guc_str
        self.logger.debug("Command for setting cm parameter: %s" % cmd)
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + " Error: \n%s" % str(output))

    def getCMDict(self, user, configItemType=None, alarm_component=None):
        """
        function: Get CM configuration
        input : NA
        output: NA
        """
        tmpCMDict = {"cm_server_log_dir": "'%s/cm/cm_server'" % DefaultValue.getUserLogDirWithUser(user),
                     "cm_agent_log_dir": "'%s/cm/cm_agent'" % DefaultValue.getUserLogDirWithUser(user)}
        if configItemType == "ConfigInstance":
            tmpCMDict["alarm_component"] = "'%s'" % alarm_component
            tmpCMDict["unix_socket_directory"] = "'%s'" % DefaultValue.getTmpDirFromEnv()
        return tmpCMDict

    def configInstance(self, user, configItemType=None, alarm_component=None):
        """
        function: Get CMAgent configuration
        input : user, configItemType, alarm_component
        output: NA
        """
        tmpDict = self.getCMDict(user, configItemType, alarm_component)
        self.setGucConfig(tmpDict)

    def doCheckStaus(self, nodeId, cluster_normal_status=None, expected_redistributing=""):
        """
        function: Check cluster status
        input : user, nodeId, cluster_normal_status, expected_redistributing
        output: status, output
        """
        statusFile = "%s/gauss_check_status_%d.dat" % (DefaultValue.getTmpDirFromEnv(), os.getpid())
        DefaultValue.cleanTmpFile(statusFile)
        cmd = self.getQueryStatusCmd(0, statusFile, showDetail=True, isFormat=False)
        (status, output) = subprocess.getstatusoutput(cmd)
        if (status != 0):
            DefaultValue.cleanTmpFile(statusFile)
            return (status, output)

        clusterStatus = DbClusterStatus()
        clusterStatus.initFromFile(statusFile)
        DefaultValue.cleanTmpFile(statusFile)

        output = ""
        if (nodeId > 0):
            nodeStatus = clusterStatus.getDbNodeStatusById(nodeId)
            if (nodeStatus is None):
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % nodeId)

            status = 0 if nodeStatus.isNodeHealthy() else 1
            statusRep = nodeStatus.getNodeStatusReport()
        else:
            status = 0 if clusterStatus.isAllHealthy(cluster_normal_status) \
                          and (clusterStatus.redistributing == expected_redistributing or
                               expected_redistributing == "") else 1
            statusRep = clusterStatus.getClusterStatusReport()
            output += "cluster_state      : %s\n" % clusterStatus.clusterStatus
            output += "redistributing     : %s\n" % clusterStatus.redistributing
            output += "node_count         : %d\n" % statusRep.nodeCount

        output += "Coordinator State\n"
        output += "    normal         : %d\n" % statusRep.cooNormal
        output += "    abnormal       : %d\n" % statusRep.cooAbnormal
        output += "GTM State\n"
        output += "    primary        : %d\n" % statusRep.gtmPrimary
        output += "    standby        : %d\n" % statusRep.gtmStandby
        output += "    abnormal       : %d\n" % statusRep.gtmAbnormal
        output += "    down           : %d\n" % statusRep.gtmDown
        output += "Datanode State\n"
        output += "    primary        : %d\n" % statusRep.dnPrimary
        output += "    standby        : %d\n" % statusRep.dnStandby
        output += "    secondary      : %d\n" % statusRep.dnDummy
        output += "    building       : %d\n" % statusRep.dnBuild
        output += "    abnormal       : %d\n" % statusRep.dnAbnormal
        output += "    down           : %d\n" % statusRep.dnDown

        return (status, output)

    def getStartCmd(self, user, nodeId=0, timeout=DEFAULT_TIMEOUT, datadir="", azName=""):
        """
        function : Start all cluster or a node
        input : String, int, String, String
        output : String
        """
        userProfile = DefaultValue.getMpprcFile()
        cmd = "source %s ; cm_ctl start" % userProfile
        # check node id
        if nodeId > 0:
            cmd += " -n %d" % nodeId
        # check data directory
        if datadir != "":
            cmd += " -D %s" % datadir
        # check timeout
        if timeout > 0:
            cmd += " -t %d" % timeout
        # azName
        if azName != "":
            cmd += " -z%s" % azName

        return cmd

    def getStopCmd(self, user, nodeId=0, stopMode="", timeout=DEFAULT_TIMEOUT, datadir="", azName=""):
        """
        function : Stop all cluster or a node
        iinput : String,int,String,String
        output : String
        """
        userProfile = DefaultValue.getMpprcFile()
        cmd = "source %s ; cm_ctl stop" % userProfile
        # check node id
        if nodeId > 0:
            cmd += " -n %d" % nodeId
        # check data directory
        if datadir != "":
            cmd += " -D %s" % datadir
        # check stop mode
        if stopMode != "":
            cmd += " -m %s" % stopMode
        # check timeout
        if timeout > 0:
            cmd += " -t %d" % timeout
        # azName
        if azName != "":
            cmd += " -z%s" % azName

        return cmd

    def getRestartNodegroup(self, nodegroupName, timeout=DEFAULT_RESTART_NODEGROUP_TIMEOUT):
        """
        function: get the command of restart node group by group name
        input:
          nodegroupName: string, The name of the nodegroup to restart
        output:
          cmd: string, The command to restart
        """
        userProfile = DefaultValue.getMpprcFile()
        cmd = "source %s ; cm_ctl restart -L %s -t %d" % (userProfile, nodegroupName, timeout)
        return cmd

    @staticmethod
    def getResetSwitchOverCmd(user, timeout):
        """
        function : Reset Switch over
        input : String,String
        output : String
        """
        userProfile = DefaultValue.getMpprcFile()
        cmd = "source %s ; cm_ctl switchover -a -t %d" % (userProfile, timeout)
        # build shell command
        if (user != "") and (os.getuid() == 0):
            cmd = "su - %s -c 'source %s;%s'" % (user, userProfile, cmd)

        return cmd

    @staticmethod
    def getQueryStatusCmd(nodeId=0, outFile="", showDetail=True, isFormat=True):
        """
        function : Get the command of querying status of cluster or node
        input : String
        output : String
        """
        userProfile = DefaultValue.getMpprcFile()
        cmd = "source %s ; cm_ctl query" % userProfile
        # check node id
        if nodeId > 0:
            cmd += " -n %d" % nodeId
        # check -v
        if showDetail:
            cmd += " -v"
            # status format
        if isFormat:
            cmd += " -C -i -d"
        # check out put file
        if outFile != "":
            cmd += " > %s" % outFile

        return cmd
