#!/usr/bin/env python3
# -*- coding:utf-8 -*-
try:
    import os
    import socket
    import sys
    import time
    import threading
    import signal
    import copy
    import subprocess
    import re
    import uuid
    import stat
    from datetime import datetime, timedelta
    from multiprocessing.dummy import Pool as ThreadPool

    sys.path.append(sys.path[0] + "/../../")
    from gspylib.common.GaussLog import GaussLog
    from gspylib.common.DbClusterInfo import dbClusterInfo
    from gspylib.common.Common import DefaultValue, ClusterCommand, TempfileManagement
    from gspylib.common.DbClusterStatus import DbClusterStatus
    from gspylib.common.OMCommand import OMCommand
    from gspylib.os.gsfile import g_file
    from gspylib.threads.SshTool import SshTool
    from gspylib.common.ErrorCode import ErrorCode
    from gspylib.component.CM.CM_OLAP.CM_OLAP import CM_OLAP
    from gspylib.component.ETCD.ETCD_OLAP.ETCD_OLAP import ETCD_OLAP
    from gspylib.component.GTM.GTM_OLAP.GTM_OLAP import GTM_OLAP
    from gspylib.component.Kernel.CN_OLAP.CN_OLAP import CN_OLAP
    from gspylib.component.Kernel.DN_OLAP.DN_OLAP import DN_OLAP
    from gspylib.os.gsplatform import g_Platform
    from gspylib.threads.parallelTool import parallelTool

except ImportError as ie:
    sys.exit("[GAUSS-52200] : Unable to import module: %s." % str(ie))

SPACE_USAGE_DB_USER = 80
SET_ONLINE_PARAMETER = "set_online_parameter"

BACKUP_GUC_LIST = {
    "colvacuum_threshold_scale_factor": "-2",
    "resource_track_duration": 60,
    "resource_track_cost": 60,
    "file_extend_level": 1
}


class ParallelBaseOM(object):
    """
    Base class of parallel command
    """
    ACTION_INSTALL = "install"
    ACTION_CONFIG = "config"
    ACTION_START = "start"
    ACTION_REDISTRIBUTE = "redistribute"
    ACTION_HEALTH_CHECK = "healthcheck"

    HEALTH_CHECK_BEFORE = "before"
    HEALTH_CHECK_AFTER = "after"
    """
    Base class for parallel command
    """

    def __init__(self):
        """
        Constructor
        """
        self.logger = None
        self.clusterInfo = None
        self.sshTool = None
        self.action = ""
        self.logUuid = ""
        self.logAction = ""
        self.logStep = 0

        # Cluster config file.
        self.xmlFile = ""
        self.oldXmlFile = ""

        self.logType = DefaultValue.LOCAL_LOG_FILE
        self.logFile = ""
        self.localLog = ""
        self.user = ""
        self.group = ""
        self.mpprcFile = ""
        # Temporary catalog for install
        self.operateStepDir = TempfileManagement.getTempDir("%s_step" % self.__class__.__name__.lower())
        # Temporary files for install step
        self.operateStepFile = "%s/%s_step.dat" % (self.operateStepDir, self.__class__.__name__.lower())
        self.initStep = ""
        self.dws_mode = False
        self.rollbackCommands = []
        self.etcdCons = []
        self.cmCons = []
        self.gtmCons = []
        self.cnCons = []
        self.dnCons = []
        # localMode is same as isSingle in all OM script, expect for gs_preinstall.
        # in gs_preinstall, localMode means local mode for master-standby cluster.
        # in gs_preinstall, localMode also means local mode for single cluster(will not create os user).
        # in gs_preinstall, isSingle means single cluster, it will create os user.
        # not isSingle and not localMode : master-standby cluster global mode(will create os user).
        # not isSingle and localMode : master-standby cluster local mode(will not create os user).
        # isSingle and not localMode : single cluster(will create os user).
        # isSingle and localMode : single cluster(will not create os user).
        self.localMode = False
        self.isSingle = False
        # Indicates whether there is a logical cluster.
        # If elastic_group exists, the current cluster is a logical cluster.
        # Otherwise, it is a large physical cluster.
        self.isElasticGroup = False
        self.isAddElasticGroup = False
        self.lcGroup_name = ""
        # Lock the cluster mode, there are two modes: exclusive lock and wait lock mode,
        # the default exclusive lock
        self.lockMode = "exclusiveLock"

        # Adapt to 200 and 300
        self.productVersion = None
        # Adapt to appliance
        self.applianceFlagFile = None

        # Shrink CooInst
        self.shrinkRedisCooInst = None

    def changeNoneToEmpytList(self, inList):
        """
        """
        if inList is None:
            inList = []
        return inList

    def initNodeComponent(self, nodeInfo):
        """
        function: start Init component
        input : NA
        output: NA
        """
        self.initCmComponent(nodeInfo)
        self.initGtmComponent(nodeInfo)
        self.initEtcdComponent(nodeInfo)
        self.initKernelComponent(nodeInfo)

    def initComponent(self):
        """
        function: Init component
        input : NA
        output: NA
        """
        # get all cluster infos
        nodeInfoList = [nodeInfo for nodeInfo in self.clusterInfo.dbNodes]
        # get all cores
        pool = ThreadPool(DefaultValue.getCpuSet())
        pool.map(self.initNodeComponent, nodeInfoList)
        pool.close()
        pool.join()

    def initComponentAttributes(self, component):
        """
        function: Init  component attributes on current node
        input : Object component
        output: NA
        """
        component.logger = self.logger
        component.binPath = "%s/bin" % self.clusterInfo.appPath
        component.dwsMode = self.dws_mode

    def initCmComponent(self, nodeInfo):
        """
        function: Init cm component
        input : Object nodeInfo
        output: NA
        """
        for inst in nodeInfo.cmservers:
            component = CM_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.cmCons.append(component)
        for inst in nodeInfo.cmagents:
            component = CM_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.cmCons.append(component)

    def initEtcdComponent(self, nodeInfo):
        """
        function: Init etcd component
        input : Object nodeInfo
        output: NA
        """
        for inst in nodeInfo.etcds:
            component = ETCD_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.etcdCons.append(component)

    def initGtmComponent(self, nodeInfo):
        """
        function: Init gtm component
        input : Object nodeInfo
        output: NA
        """
        for inst in nodeInfo.gtms:
            component = GTM_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.gtmCons.append(component)

    def initKernelComponent(self, nodeInfo):
        """
        function: Init kernel component
        input : Object nodeInfo
        output: NA
        """
        for inst in nodeInfo.coordinators:
            component = CN_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.cnCons.append(component)

        for inst in nodeInfo.datanodes:
            component = DN_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.dnCons.append(component)

    def initLogger(self, module=""):
        """
        function: Init logger
        input : module
        output: NA
        """
        if self.logUuid == "":
            self.logUuid = str(uuid.uuid4())
        PATTERN = "^[a-zA-Z0-9-]{36}$"
        pattern = re.compile(PATTERN)
        result = pattern.match(self.logUuid)
        if result is None:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "--uuid" +
                            " The value of the uuid does not len 36 characters and "
                            "can only contain letters, numbers, and underscores.")
        # log level
        LOG_DEBUG = 1
        self.logger = GaussLog(self.logFile, module, LOG_DEBUG, self.logAction, self.logUuid, self.logStep)
        dirName = os.path.dirname(self.logFile)
        self.localLog = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE)

    def initClusterInfo(self, refresh_id=True, static_conf="", refresh_mode="inherit"):
        """
        function: Init cluster info
        input : NA
        output: NA
        """
        try:
            self.clusterInfo = dbClusterInfo()
            if refresh_id:
                static_config_file = static_conf if static_conf else \
                    "%s/bin/cluster_static_config" % DefaultValue.getInstallDir(self.user)
                self.clusterInfo.initFromXml(self.xmlFile, static_config_file, refresh_mode)
            else:
                self.clusterInfo.initFromXml(self.xmlFile)
        except Exception as e:
            self.logger.logExit(str(e))
        self.logger.debug("Instance information of cluster:\n%s." % str(self.clusterInfo))

    def initClusterInfoFromStaticFile(self, user, log_flag=True, static_conf="", is_lc_cluster=False):
        """
        function: Function to init clusterInfo from static file
        input : user
        output: NA
        """
        try:
            self.clusterInfo = dbClusterInfo()
            self.clusterInfo.initFromStaticConfig(user, static_conf, is_lc_cluster)
        except Exception as e:
            self.logger.logExit(str(e))
        if log_flag:
            self.logger.debug("Instance information of cluster:\n%s." % str(self.clusterInfo))

    def initSshTool(self, nodeNames, key, timeout=0):
        """
        function: Init ssh tool
        input : nodeNames, timeout
        output: NA
        """
        self.sshTool = SshTool(nodeNames, key, self.logger.logFile, timeout)

    def checkBaseFile(self, checkXml=True):
        """
        function: Check xml file and log file
        input : checkXml
        output: NA
        """
        if checkXml:
            if self.xmlFile == "":
                GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".")

            if not os.path.exists(self.xmlFile):
                GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] % ("configuration file [%s]" % self.xmlFile))

            if not os.path.isabs(self.xmlFile):
                GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % ("configuration file [%s]" % self.xmlFile))
        else:
            self.xmlFile = ""

        if self.logFile == "":
            self.logFile = DefaultValue.getOMLogPath(self.logType, self.user, "", self.xmlFile)

        if not os.path.isabs(self.logFile):
            GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")

    def initSignalHandler(self):
        """
        function: Function to init signal handler
        input : NA
        output: NA
        """
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        signal.signal(signal.SIGQUIT, signal.SIG_IGN)
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        signal.signal(signal.SIGALRM, signal.SIG_IGN)
        signal.signal(signal.SIGHUP, signal.SIG_IGN)
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)
        signal.signal(signal.SIGUSR2, signal.SIG_IGN)

    def print_signal_stack(self, frame):
        """
        function: Function to print signal stack
        input : frame
        output: NA
        """
        if self.logger is None:
            return
        try:
            import inspect
            stacks = inspect.getouterframes(frame)
            for curr in range(len(stacks)):
                stack = stacks[curr]
                self.logger.debug("Stack level: %d. File: %s. Function: %s. LineNo: %d." % (curr,
                                                                                            stack[1],
                                                                                            stack[3],
                                                                                            stack[2]))
                self.logger.debug("Code: %s." % (stack[4][0].strip().strip("\n")))
        except Exception as e:
            self.logger.debug(
                "Failed to print signal stack. Error: \n%s" % str(e))

    def setTimer(self, timeout):
        """
        function: Function to set timer
        input : timeout
        output: NA
        """
        self.logger.debug("Set timer. The timeout: %d." % timeout)
        signal.signal(signal.SIGALRM, self.timeout_handler)
        signal.alarm(timeout)

    def resetTimer(self):
        """
        function: Reset timer
        input : NA
        output: NA
        """
        signal.signal(signal.SIGALRM, signal.SIG_IGN)
        self.logger.debug("Reset timer. Left time: %d." % signal.alarm(0))

    def timeout_handler(self, signal_num, frame):
        """
        function: Received the timeout signal
        input : signal_num, frame
        output: NA
        """
        if self.logger is not None:
            self.logger.debug("Received the timeout signal: [%d]." % signal_num)
            self.print_signal_stack(frame)
        raise Timeout("Time out.")

    def set_disk_cache_for_redis(self, action, coor_inst):
        """
         During redistribution, the disk cache still occupies disk space and set the cluster to read-only.
         As a result, redistribution fails, Therefore, need to freeze disk cache growth before redistribution
         and unfreeze disk cache after redistribution.
        :return: NA
        """
        if action == DefaultValue.FREEZE_DISK_CACHE:
            sql = "SELECT pgxc_freeze_disk_cache();"
        elif action == DefaultValue.UNFREEZE_DISK_CACHE:
            sql = "SELECT pgxc_unfreeze_disk_cache();"
        else:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"])
        self.logger.debug("Starting to %s disk cache, sql: %s" % (action, sql))
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           coor_inst.hostname,
                                                           coor_inst.port,
                                                           False,
                                                           DefaultValue.DEFAULT_DB_NAME)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        self.logger.debug("Successfully %s disk cache." % action)

    def __getNodeIdByNodeName(self, user, nodeName):
        """
        """
        node_id = 0
        if nodeName != "":
            cluster_info = dbClusterInfo()
            cluster_info.initFromStaticConfig(user)
            db_node = cluster_info.getDbNodeByName(nodeName)
            if db_node is None:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % nodeName)
            node_id = db_node.id
        return node_id

    def __checkAppPath(self):
        """
        """
        app_path = DefaultValue.getInstallDir(self.user)
        if app_path != "":
            cmd = "ps ux |grep %s/bin |grep -v grep" % app_path
            check_status, check_output = self.sshTool.getSshStatusOutput(cmd)
            self.logger.debug("Status: %s.\nOutput:\n%s" % (str(check_status), check_output))

    def stopNode(self, user="", nodeName="", stopForce=False, timeout=DefaultValue.TIMEOUT_CLUSTER_STOP):
        """
        function: Stop all Instances on the node
        input : user, nodeName, stopForce, timeout
        output: NA
        """
        self.logger.log("Stopping node.")
        if user == "":
            user = self.user
        node_id = self.__getNodeIdByNodeName(user, nodeName)
        stop_mode = "i" if stopForce else "f"
        cmd = ClusterCommand.getStopCmd(user, node_id, stop_mode, timeout)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            self.logger.debug("Stopping failed for the first time. Error: \n%s" % output)
            self.__checkAppPath()

            self.logger.log("Try forcibly stop.")
            cmd = ClusterCommand.getStopCmd(user, node_id, "i")
            (status, output) = subprocess.getstatusoutput(cmd)
            if status != 0:
                self.logger.debug(output)
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] % "the cluster")

        self.logger.log("Successfully stopped node.")

    def startNode(self, user="", nodeName="", doCheck=True, timeout=DefaultValue.TIMEOUT_CLUSTER_START, retry=False):
        """
        function: Start all instance on the node
        input : user, nodeName, doCheck, timeout
        output: NA
        """
        if user == "":
            user = self.user
        node_id = self.__getNodeIdByNodeName(user, nodeName)
        start_type = "node" if nodeName != "" else "cluster"
        end_time = None
        if timeout > 0:
            end_time = datetime.now() + timedelta(seconds=timeout)

        self.logger.log("Starting %s." % start_type)
        cmd = ClusterCommand.getStartCmd(user, node_id, timeout)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0 and retry and \
                output.find("Maybe the cluster is continuely being stopped in the background") >= 0:
            self.stopNode(user, nodeName)
            time.sleep(1)
            self.logger.log("Starting %s." % start_type)
            (status, output) = subprocess.getstatusoutput(cmd)

        if status != 0:
            self.logger.debug(output)
            self.__checkAppPath()
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % start_type + " Error: \n%s" % output)

        if doCheck:
            # The fourth parameter ignore check the status of redistribution
            self.waitForNormal(user, node_id, end_time, "")

    def normalInfoPrint(self, dotCount, expected_redistributing, startType):
        """
        """
        if dotCount != 0:
            sys.stdout.write("\n")
        if expected_redistributing == "Yes":
            self.logger.log("The cluster redistributing status is yes.")
        else:
            self.logger.log("The %s status is normal." % startType)

    def timeOutMessagePrint(self, dotCount, checkResult, expected_redistributing, startType, endTime):
        """
        """
        if endTime is not None and datetime.now() >= endTime:
            if dotCount != 0:
                sys.stdout.write("\n")
            self.logger.debug(checkResult)

            if expected_redistributing == "Yes":
                raise Exception("Timeout." + "\n" + ErrorCode.GAUSS_516["GAUSS_51637"] %
                                ("redistributing status", "Yes"))
            else:
                raise Exception("Timeout." + "\n" + ErrorCode.GAUSS_516["GAUSS_51602"].replace("cluster", startType))

    def is_keep_waiting(self, expected_redistributing, check_result, wait_again):
        """
        """
        match_keyword = "^cluster_state[ ]+: Degraded\n"
        if expected_redistributing in ("No", "Yes"):
            match_keyword += "redistributing[ ]+: %s\n" % expected_redistributing
        pattern = re.compile(match_keyword)
        if pattern.match(check_result) is None:
            return 0, False
        cmd = "cm_ctl query -Cv | grep Building | grep -v grep"
        (status, _) = DefaultValue.retryGetstatusoutput(cmd)
        if status == 0:
            return 0, True
        else:
            if wait_again == 0:
                time.sleep(120)
                return 1, True
            else:
                return 0, False

    def waitForNormal(self, user="", nodeId=0, endTime=None, expected_redistributing="No"):
        """
        function: Wait the node become Normal
        input : user, nodeId, endTime, expected_redistributing
        output: NA
        """
        start_type = "cluster" if nodeId == 0 else "node"
        if expected_redistributing == "Yes":
            self.logger.log("Waiting for the redistributing status to become yes.")
        else:
            self.logger.log("Waiting for the %s status to become normal." % start_type)
        dot_count = 0
        start_wait_time = datetime.now()
        wait_again = 0
        while True:
            (check_status, check_result) = OMCommand.doCheckStaus(user, nodeId, None, expected_redistributing)
            if check_status == 0:
                self.normalInfoPrint(dot_count, expected_redistributing, start_type)
                break

            if datetime.now() > start_wait_time + timedelta(seconds=DefaultValue.TIMEOUT_CLUSTER_START):
                (wait_again, isWait) = self.is_keep_waiting(expected_redistributing, check_result, wait_again)
                if isWait:
                    continue
                else:
                    break

            time.sleep(2)
            sys.stdout.write(".")
            dot_count += 1
            if dot_count >= 30:
                dot_count = 0
                sys.stdout.write("\n")

            self.timeOutMessagePrint(dot_count, check_result, expected_redistributing, start_type, endTime)

        if check_status != 0:
            self.logger.debug(check_result)
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % start_type +
                            " The %s status is: \n%s" % (start_type, check_result))

    def getClusterInfo(self, clusterInfo):
        """
        function: Get all the instances info
        input : clusterInfo
        output: cmservers, cmagents, coordinators, datanodes, gtms, instIdToInfo, nodeIdToName
        """
        cmservers = []
        cmagents = []
        coordinators = []
        datanodes = []
        gtms = []
        inst_id_to_info = {}
        node_id_to_name = {}
        cluster_node_names = clusterInfo.getClusterNodeNames()
        # get all type instance
        for node in cluster_node_names:
            dbNode = clusterInfo.getDbNodeByName(node)
            node_id_to_name[dbNode.id] = node
            for cmserver in dbNode.cmservers:
                cmservers.append(cmserver)
                inst_id_to_info[cmserver.instanceId] = cmserver
            for cmagent in dbNode.cmagents:
                cmagents.append(cmagent)
                inst_id_to_info[cmagent.instanceId] = cmagent
            for coordinator in dbNode.coordinators:
                coordinators.append(coordinator)
                inst_id_to_info[coordinator.instanceId] = coordinator
            for datanode in dbNode.datanodes:
                datanodes.append(datanode)
                inst_id_to_info[datanode.instanceId] = datanode
            for gtm in dbNode.gtms:
                gtms.append(gtm)
                inst_id_to_info[gtm.instanceId] = gtm
        return cmservers, cmagents, coordinators, datanodes, gtms, inst_id_to_info, node_id_to_name

    def checkStatusForGtms(self, gtm_status):
        """
        function: Check every gtm status
        input : gtmStatus
        output: True/False
        """
        # the STANDBY instance need to check CONN status
        if gtm_status.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY:
            return True
        elif gtm_status.status == DbClusterStatus.INSTANCE_STATUS_STANDBY:
            if gtm_status.connStatus != DbClusterStatus.CONN_STATUS_NORMAL:
                return False
            else:
                return True
        else:
            return False

    def checkStatusForCns(self, cn_status):
        """
        """
        # if cn status is not normal,return false
        if cn_status.status != DbClusterStatus.INSTANCE_STATUS_NORMAL:
            return False
        else:
            return True

    def checkStatusForDns(self, dn_status):
        """
        """
        if dn_status.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY:
            return True
        elif dn_status.status == DbClusterStatus.INSTANCE_STATUS_DUMMY:
            return True
        elif dn_status.status == DbClusterStatus.INSTANCE_STATUS_STANDBY:
            if dn_status.haStatus != DbClusterStatus.HA_STATUS_NORMAL:
                return False
        else:
            return False
        return True

    def getCmInstProcStatus(self, app_path, ssh_tool, node_names, inst_type="cm_server"):
        """
        """
        is_normal = {}
        proc_path = "%s/bin/%s" % (app_path, inst_type)
        cmd = DefaultValue.getRuningInstNum(proc_path)
        (status, _) = ssh_tool.getSshStatusOutput(cmd, node_names)
        output_map = ssh_tool.parseSshOutput(node_names)
        for node in status.keys():
            if status[node] != DefaultValue.SUCCESS:
                self.logger.logExit(ErrorCode.GAUSS_516["GAUSS_51607"] % inst_type +
                                    " Hostname: %s. Error: \n%s" % (node, output_map[node]))
            elif status[node] == DefaultValue.SUCCESS:
                out_str = output_map[node].strip().split('\n')[0].strip()
                if out_str.isdigit() and int(out_str) >= 1:
                    is_normal[node] = "Normal"
                else:
                    is_normal[node] = "Abnormal"
        return is_normal

    def getCmConfFileStatus(self, user, ssh_tool, cm_insts, is_normal, inst_type="cm_server"):
        """
        """
        for inst in cm_insts:
            if is_normal[inst.hostname] == "Normal":
                confFile = DefaultValue.getCmConfigFile(inst.datadir)
                checkCmd = "ls '%s'" % confFile
                (status, output) = ssh_tool.getSshStatusOutput(checkCmd, [inst.hostname])
                self.logger.debug("Check %s status: %s\nOutput:%s" % (confFile, status[inst.hostname], str(output)))
                if status[inst.hostname] != DefaultValue.SUCCESS:
                    killCmd = "killall -s 9 -u '%s' %s" % (user, inst_type)
                    (status, output) = ssh_tool.getSshStatusOutput(killCmd, [inst.hostname])
                    self.logger.debug("Kill %s process status: %s\nOutput:%s" %
                                      (inst_type, status[inst.hostname], str(output)))
                    is_normal[inst.hostname] = "Abnormal"
                else:
                    is_normal[inst.hostname] = "Normal"
            else:
                is_normal[inst.hostname] = "Abnormal"
        return is_normal

    def getCmInstStatus(self, user, appPath, sshTool, cmInsts, instType="cm_server"):
        """
        """
        node_names = []
        for inst in cmInsts:
            node_names.append(inst.hostname)
        is_normal = self.getCmInstProcStatus(appPath, sshTool, node_names, instType)
        is_normal = self.getCmConfFileStatus(user, sshTool, cmInsts, is_normal, instType)
        return is_normal

    def waitGtmsForAvailable(self, clusterStatus, gtms):
        """
        function: check all the gtm status
        input : clusterStatus, gtms
        output: True, all gtms status are Normal; else there have some available instances.
        """
        for gtm in gtms:
            gtm_status = clusterStatus.getInstanceStatusById(gtm.instanceId)
            if self.checkStatusForGtms(gtm_status):
                continue
            else:
                return False
        return True

    def waitCnsForAvailable(self, clusterStatus, coordinators):
        """
        function: check all the coordinators status
        input : clusterStatus, coordinators
        output: True, all coordinators status are Normal;else there have some available instances.
        """
        for coordinator in coordinators:
            cn_status = clusterStatus.getInstanceStatusById(coordinator.instanceId)
            if self.checkStatusForCns(cn_status):
                continue
            else:
                return False
        return True

    def waitDnsForAvailable(self, clusterStatus, datanodes, isOnlyMasterInst=False):
        """
        function: check all the coordinators status
        input : clusterStatus, datanodes
        output: True, all checked datanodes status are Normal; else there have some available instances.
        """
        check_inst = []
        if isOnlyMasterInst:
            for datanode in datanodes:
                if datanode.instanceType == DefaultValue.MASTER_INSTANCE:
                    check_inst.append(datanode)
        else:
            check_inst = datanodes
        for datanode in check_inst:
            dn_status = clusterStatus.getInstanceStatusById(datanode.instanceId)
            if self.checkStatusForDns(dn_status):
                continue
            else:
                return False
        return True

    def waitCmaForAvailable(self, user, appPath, sshTool, cmagents):
        """
        """
        is_normal = self.getCmInstStatus(user, appPath, sshTool, cmagents, "cm_agent")
        for inst in cmagents:
            if is_normal[inst.hostname] != "Normal":
                return False
        return True

    def waitCmsForAvailable(self, user, appPath, sshTool, cmservers):
        """
        """
        is_normal = self.getCmInstStatus(user, appPath, sshTool, cmservers, "cm_server")
        for inst in cmservers:
            if is_normal[inst.hostname] != "Normal":
                return False
        return True

    def checkAllInstsStatus(self, user, appPath, clusterStatus, sshTool, cmservers,
                            cmagents, coordinators, datanodes, gtms):
        """
        """
        if not self.waitCmsForAvailable(user, appPath, sshTool, cmservers):
            return False
        if not self.waitCmaForAvailable(user, appPath, sshTool, cmagents):
            return False
        if not self.waitCnsForAvailable(clusterStatus, coordinators):
            return False
        if not self.waitDnsForAvailable(clusterStatus, datanodes, isOnlyMasterInst=True):
            return False
        if not self.waitGtmsForAvailable(clusterStatus, gtms):
            return False
        return True

    def waitInstsForAvailable(self, user, clusterInfo, sshTool, timeout=DefaultValue.TIMEOUT_CLUSTER_START):
        """
        function: Wait the instances become available
        input : user, timeout
        output: NA
        """
        self.logger.log("Waiting for the cluster instances to become available.")
        end_time = datetime.now() + timedelta(seconds=timeout)
        dot_count = 0
        cmservers, cmagents, coordinators, datanodes, gtms, _, _ = self.getClusterInfo(clusterInfo)
        while True:
            time.sleep(5)
            sys.stdout.write(".")
            dot_count += 1
            if dot_count >= 12:
                dot_count = 0
                sys.stdout.write("\n")
            if datetime.now() >= end_time:
                if dot_count != 0:
                    sys.stdout.write("\n")
                check_status = 1
                self.logger.log("Timeout in %d seconds. Cluster instances is still not available!" % timeout)
                break

            cluster_status = OMCommand.getClusterStatus(user)
            if cluster_status is None:
                self.logger.debug("Failed to obtain the cluster status.")
            else:
                if self.checkAllInstsStatus(user, clusterInfo.appPath, cluster_status, sshTool,
                                            cmservers, cmagents, coordinators, datanodes, gtms):
                    check_status = 0
                    break

        if check_status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51602"])

    def waitForAvailable(self, user, timeout=DefaultValue.TIMEOUT_CLUSTER_START, isExpandScene=False):
        """
        function: Wait the node become available(Normal or Degraded)
        input : user, timeout, isExpandScene
        output: NA
        """
        self.logger.log("Waiting for the cluster status to become available.")

        end_time = datetime.now() + timedelta(seconds=timeout)
        dot_count = 0

        while True:
            sys.stdout.write(".")
            dot_count += 1
            if dot_count >= 30:
                dot_count = 0
                sys.stdout.write("\n")

            if datetime.now() >= end_time:
                if dot_count != 0:
                    sys.stdout.write("\n")
                check_status = 1
                self.logger.log("Timeout in %d seconds. Cluster is still not available!" % timeout)
                break

            cluster_status = OMCommand.getClusterStatus(user, isExpandScene)
            if cluster_status is None:
                self.logger.debug("Failed to obtain the cluster status.")
            elif cluster_status.clusterStatus in ["Normal", "Degraded"]:
                check_status = 0
                if dot_count != 0:
                    sys.stdout.write("\n")
                self.logger.log("The cluster status is %s." % cluster_status.clusterStatus)
                break
            else:
                self.logger.debug("Cluster status is %s(%s)." %
                                  (cluster_status.clusterStatus, cluster_status.clusterStatusDetail))

            time.sleep(2)

        if check_status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51602"])

    def waitProcessStop(self, processKeywords, hostname):
        """
        function: Wait the process stop
        input : process name
        output: NA
        """
        count = 0
        while True:
            ps_cmd = "ps ux|grep -v grep |awk '{for(i=1; i<11; i++){\$i=null}; print \$0;}'|" \
                     "grep -E '^ {10}%s'" % processKeywords
            (status, _) = self.sshTool.getSshStatusOutput(ps_cmd, [hostname])
            # Determine whether the process can be found.
            if status[hostname] != DefaultValue.SUCCESS:
                self.logger.debug("The %s process stopped." % processKeywords)
                break

            count += 1
            if count % 20 == 0:
                self.logger.debug("The %s process exists." % processKeywords)
            time.sleep(3)

    def checkCluseterStatus(self, user, expected_status=None, expected_redistributing=None):
        """
        function: Check cluster status
        input : user, expected_status, expected_redistributing
        output: NA
        """
        self.logger.log("Checking the cluster status.")
        try:
            # query and save status into a file
            status_file = "/home/%s/gauss_check_status_%d.dat" % (user, os.getpid())
            TempfileManagement.removeTempFile(status_file)
            cmd = ClusterCommand.getQueryStatusCmd(user, 0, status_file)
            (status, output) = subprocess.getstatusoutput(cmd)
            if status != 0:
                with open(status_file, 'r') as fp:
                    output = fp.read()
                TempfileManagement.removeTempFile(status_file)
                if output.find("cm_ctl: can't connect to cm_server.") >= 0:
                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51640"])
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] % "cluster" + " Error: \n%s" % output)

            # check cluster status
            cluster_status = DbClusterStatus()
            cluster_status.initFromFile(status_file)
            TempfileManagement.removeTempFile(status_file)

            self.logger.debug("The cluster status:%s\nredistributing:%s\nbalanced:%s" %
                              (cluster_status.clusterStatus,
                               cluster_status.redistributing,
                               cluster_status.balanced))
            # check if current status match with expected status
            if not cluster_status.isAllHealthy(expected_status):
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51602"] + " Please check it.")
            if expected_redistributing is not None and \
                    cluster_status.redistributing != expected_redistributing:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51641"] +
                                " The expected redistributing status should be %s, "
                                "Please check it." % expected_redistributing)
        except Exception as e:
            raise Exception(str(e))

        self.logger.log("Successfully checked the cluster status.")

    def managerOperateStepDir(self, action='create', nodes=None):
        """
        function: manager operate step directory
        input : NA
        output: currentStep
        """
        try:
            nodes = self.changeNoneToEmpytList(nodes)
            # Creating the backup directory
            if action == "create":
                cmd = "(if [ ! -d '%s' ];then mkdir -p '%s' -m %s;fi)" % (self.operateStepDir,
                                                                          self.operateStepDir,
                                                                          DefaultValue.KEY_DIRECTORY_MODE)
            else:
                cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (self.operateStepDir, self.operateStepDir)
            self.logger.debug("Command:%s." % cmd)
            DefaultValue.execCommandWithMode(cmd,
                                             "%s temporary directory" % action,
                                             self.sshTool,
                                             self.localMode or self.isSingle,
                                             "",
                                             nodes)
        except Exception as e:
            raise Exception(str(e))

    def readOperateStep(self):
        """
        function: read operate step signal
        input : NA
        output: current_step
        """
        current_step = self.initStep

        # if INSTALL_STEP is not exists
        if not os.path.exists(self.operateStepFile):
            self.logger.debug("The %s does not exits." % self.operateStepFile)
            return current_step

        # if INSTALL_STEP is not a file
        if not os.path.isfile(self.operateStepFile):
            self.logger.debug("The %s must be a file." % self.operateStepFile)
            return current_step

        try:
            # read the step from INSTALL_STEP
            # open file
            with open(self.operateStepFile, "r") as fp:
                line = fp.readline().strip()
            if line:
                current_step = line
        except Exception as e:
            # failed to read the INSTALL_STEP
            raise Exception(str(e))

        return current_step

    def writeOperateStep(self, stepName, nodes=None):
        """
        function: write operate step signal
        input : step
        output: NA
        """
        try:
            # write the step into INSTALL_STEP
            # open the INSTALL_STEP
            flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
            modes = stat.S_IWUSR | stat.S_IRUSR
            self.logger.debug("Record [%s] to [%s]." % (stepName, self.operateStepFile))
            with os.fdopen(os.open(self.operateStepFile, flags, modes), "w") as fp:
                # write the INSTALL_STEP
                fp.write(stepName)
                fp.write(os.linesep)
            # change the INSTALL_STEP permissions
            g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.operateStepFile)

            # distribute file to all nodes
            nodes = self.changeNoneToEmpytList(nodes)
            cmd = "mkdir -p -m %s '%s'" % (DefaultValue.KEY_DIRECTORY_MODE, self.operateStepDir)
            DefaultValue.execCommandWithMode(cmd,
                                             "create backup directory on all nodes",
                                             self.sshTool,
                                             self.localMode or self.isSingle,
                                             "",
                                             nodes)

            if not self.localMode and not self.isSingle:
                self.sshTool.scpFiles(self.operateStepFile, self.operateStepDir, nodes)
        except Exception as e:
            # failed to write the step into INSTALL_STEP
            raise Exception(str(e))

    def distributeFiles(self):
        """
        function: distribute package to every host
        input : NA
        output: NA
        """
        self.logger.debug("Distributing files.")
        try:
            # get the all nodes
            hosts = self.clusterInfo.getClusterNodeNames()
            hosts.remove(socket.gethostname())
            # Send xml file to every host
            DefaultValue.distributeXmlConfFile(self.sshTool, self.xmlFile, hosts, self.mpprcFile)
            # Successfully distributed files
            self.logger.debug("Successfully distributed files.")
        except Exception as e:
            # failed to distribute package to every host
            self.logger.logExit(str(e))

    def checkPreInstall(self, user, flag, nodes=None):
        """
        function: check if have done preinstall on given nodes
        input : user, nodes
        output: NA
        """
        try:
            nodes = self.changeNoneToEmpytList(nodes)
            cmd = "%s -U %s -t %s" % (OMCommand.getLocalScript("Local_Check_PreInstall"), user, flag)
            DefaultValue.execCommandWithMode(cmd,
                                             "check preinstall",
                                             self.sshTool,
                                             self.localMode or self.isSingle,
                                             "",
                                             nodes)
        except Exception as e:
            self.logger.logExit(str(e))

    def checkNodeInstall(self, nodes=None, checkParams=None, strictUserCheck=True):
        """
        function: Check node install
        input : nodes, checkParams, strictUserCheck
        output: NA
        """
        valid_param = ["shared_buffers", "max_connections"]
        coo_guc_param = ""
        nodes = self.changeNoneToEmpytList(nodes)
        check_params = self.changeNoneToEmpytList(checkParams)
        for param in check_params:
            entry = param.split("=")
            if len(entry) != 2:
                self.logger.logExit(ErrorCode.GAUSS_500["GAUSS_50009"])
            if entry[0].strip() in valid_param:
                coo_guc_param += " -C \\\"%s\\\"" % param
        self.logger.log("Checking installation environment on all nodes.")
        cmd = "%s -U %s:%s -R %s %s -l %s -X '%s' --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Check_Install"),
               self.user,
               self.group,
               self.clusterInfo.appPath,
               coo_guc_param,
               self.localLog,
               self.xmlFile,
               self.logAction,
               self.logUuid,
               self.logger.step)
        if not strictUserCheck:
            cmd += " -O"
        self.logger.debug("Checking the install command: %s." % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "check installation environment",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)

    def installApp(self, nodes=None, static_config_file="", dws_mode=False, action="install_cluster"):
        """
        function: Install the applications
        input : nodes, static_config_file, dws_mode
        output: NA
        """
        nodes = self.changeNoneToEmpytList(nodes)
        if self.productVersion is None:
            # Check the product version.
            featureFile = "%s/bin/%s" % (self.clusterInfo.appPath, DefaultValue.DEFAULT_DISABLED_FEATURE_FILE_NAME)
            if os.path.isfile(featureFile):
                self.productVersion, _ = DefaultValue.unencryptedFeatureInfo(featureFile)

        self.logger.log("Installing applications on all nodes.")
        cmd = "%s -U %s:%s -X '%s' -R %s -c %s -l %s -t %s --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Install"),
               self.user,
               self.group,
               self.xmlFile,
               self.clusterInfo.appPath,
               self.clusterInfo.name,
               self.localLog,
               action,
               self.logAction,
               self.logUuid,
               self.logger.step)
        if self.productVersion:
            cmd += " --product=%s" % self.productVersion
        if static_config_file:
            cmd += " -f %s" % static_config_file
        if dws_mode:
            cmd += " --dws-mode"
        self.logger.debug("Install applications command: %s." % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "install application on all nodes",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)

    def uninstallApp(self, nodes=None):
        """
        function: Uninstall the applications
        input : nodes
        output: NA
        """
        self.logger.log("Uninstalling applications on all nodes.")
        nodes = self.changeNoneToEmpytList(nodes)
        cmd = "%s -U %s -R %s -l %s -T --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Uninstall"),
               self.user,
               self.clusterInfo.appPath,
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        self.logger.debug("Uninstall applications command: %s." % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "uninstall application on all nodes",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)
        self.logger.log("Successfully uninstalled applications on all nodes.")

    def cleanInstallProcess(self, nodes=None):
        """
        function: Clean install process under nodes
        input : nodes
        output: NA
        """
        nodes = self.changeNoneToEmpytList(nodes)
        if self.clusterInfo.isSingleInstCluster() or \
                self.clusterInfo.isMasterStandbyCluster() or \
                self.clusterInfo.isMasterStandbyMultiAZCluster():
            # clean building standby dn process
            cmd = "%s --clean_install_process -U %s -l %s " \
                  "--log-action=%s --log-uuid=%s --log-step=%s --include-cm " % \
                  (OMCommand.getLocalScript("Local_Query"),
                   self.user, self.localLog,
                   self.logAction, self.logUuid,
                   self.logger.step)
            self.logger.debug(cmd)
            DefaultValue.execCommandWithMode(cmd,
                                             "clean process",
                                             self.sshTool,
                                             self.localMode or self.isSingle,
                                             "",
                                             nodes)

    def deletedInstances(self, nodes=None, datadirs=None):
        """
        """
        nodes = self.changeNoneToEmpytList(nodes)
        data_dirs = self.changeNoneToEmpytList(datadirs)
        cmd_param_list = []
        for data_dir in data_dirs:
            cmd_param_list.append(" -D %s " % data_dir)
        cmd_param = "".join(cmd_param_list)
        cmd = "%s -U %s %s -l %s --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Clean_Instance"),
               self.user,
               cmd_param,
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        self.logger.debug(cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "clean instance",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)

    def cleanNodeConfig(self, nodes=None, datadirs=None):
        """
        function: Clean instance
        input : nodes, datadirs
        output: NA
        """
        self.logger.log("Deleting instances from all nodes.")
        # Clean install process under nodes
        self.cleanInstallProcess(nodes)
        # Deleted instances under nodes
        self.deletedInstances(nodes, datadirs)
        self.logger.log("Successfully deleted instances from all nodes.")

    def checkNodeConfig(self, nodes=None, cooGucParam=None, dataGucParam=None, checkEmpty=None):
        """
        function: Check node config on all nodes
        input : nodes, cooGucParam, dataGucParam
        output: NA
        """
        self.logger.log("Checking node configuration on all nodes.")
        cmd_param_list = []
        nodes = self.changeNoneToEmpytList(nodes)
        coo_guc_param = self.changeNoneToEmpytList(cooGucParam)
        data_guc_param = self.changeNoneToEmpytList(dataGucParam)
        for param in coo_guc_param:
            cmd_param_list.append(" -C \\\"%s\\\"" % param)
        for param in data_guc_param:
            cmd_param_list.append(" -D \\\"%s\\\"" % param)
        cmd_param = "".join(cmd_param_list)
        cmd = "%s -U %s -l %s %s --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Check_Config"),
               self.user,
               self.localLog,
               cmd_param,
               self.logAction,
               self.logUuid,
               self.logger.step)
        if checkEmpty is not None and checkEmpty is False:
            cmd += " --dont-check-empty"
        self.logger.debug("Command for checking node config:%s." % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "check node configuration",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)
        self.logger.log("Successfully checked node configuration on all nodes.")

    def initNodeInstance(self, nodes=None, dbInitParam=None, gtmInitParam=None, dws_mode=False, vc_mode=False):
        """
        function: Init instances on all nodes
        input : nodes, dbInitParam, gtmInitParam, dws_mode
        output: NA
        """
        self.logger.log("Initializing instances on all nodes.")
        cmd_param_list = []
        nodes = self.changeNoneToEmpytList(nodes)
        db_init_param = self.changeNoneToEmpytList(dbInitParam)
        gtm_init_param = self.changeNoneToEmpytList(gtmInitParam)
        for param in db_init_param:
            cmd_param_list.append(" -P \\\"%s\\\"" % param)
        for param in gtm_init_param:
            cmd_param_list.append(" -G \\\"%s\\\"" % param)
        cmd_param = "".join(cmd_param_list)
        cmd = "%s -U %s %s -l %s --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Init_Instance"),
               self.user,
               cmd_param,
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        if dws_mode:
            cmd += " --dws_mode"
        if vc_mode:
            cmd += " --vc_mode"
        self.logger.debug("Command for initialize instances:%s." % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "initialize instances",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)
        self.logger.log("Successfully initialized instances on all nodes.")

    def configHba(self, nodes=None, add_ips=None):
        """
        function: Configure pg_hba on all nodes
        input : nodes, addIPs
        output: NA
        """
        nodes = self.changeNoneToEmpytList(nodes)
        add_ips = self.changeNoneToEmpytList(add_ips)
        self.logger.log("Configuring pg_hba on all nodes.")
        cmd = "%s -U %s -l %s --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Config_Hba"),
               self.user,
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        if add_ips:
            cmd += " --add-ip=%s" % ','.join(add_ips)

        self.logger.debug("Command for configuring pg_hba: %s." % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "update instances configuration",
                                         self.sshTool,
                                         self.localMode or self.isSingle,
                                         "",
                                         nodes)
        self.logger.log("Successfully configured pg_hba on all nodes.")

    def installKerberos(self, nodes=None):
        """
        function: Configure pg_hba on all nodes
        input : nodes
        output: NA
        """
        self.logger.debug("Installing kerberos on all nodes.")
        # ELK and MPPDB enable install kerberos
        user_profile = DefaultValue.getMpprcFile()
        if DefaultValue.checkKerberos(user_profile):
            nodes = self.changeNoneToEmpytList(nodes)
            self.logger.debug("Installing kerberos on all nodes.")
            cmd = "%s -m %s -U %s --log-action=%s --log-uuid=%s --log-step=%s" % \
                  (OMCommand.getLocalScript("Local_Kerberos"),
                   "install",
                   self.user,
                   self.logAction,
                   self.logUuid,
                   self.logger.step)
            DefaultValue.execCommandWithMode(cmd,
                                             "install kerberos",
                                             self.sshTool,
                                             self.localMode or self.isSingle,
                                             "",
                                             nodes)
            self.logger.debug("Successfully installed kerberos on all nodes.")

    def excuteRollbackCommands(self):
        """
        support two kinds of rollback command:
        1. rollback commands + execute nodes
        2. rollback function + function args
        """
        self.logger.log("Rolling back.")
        if not self.rollbackCommands:
            self.logger.log("There is nothing to roll back.")
            return
        for rollback in self.rollbackCommands:
            if callable(rollback[0]):
                func, args = rollback[0], rollback[1]
                try:
                    if hasattr(func, "__name__"):
                        func_name = func.__name__
                    elif hasattr(func, "__func__") and hasattr(func.__func__, "__name__"):
                        func_name = func.__func__.__name__
                    else:
                        func_name = str(func)
                except Exception as e:
                    self.logger.debug("Failed to parse func_name: %s." % str(e))
                    func_name = str(func)
                try:
                    self.logger.log("Rollback function: %s." % func_name)
                    result = func(*args)
                    self.logger.debug("Rollback function: %s. Result: %s." % (
                        func_name, str(result) if result is not None else "finished."))
                except Exception as e:
                    self.logger.error(ErrorCode.GAUSS_516["GAUSS_51626"] +
                                      " Failed to execute function[%s]. Error: \n%s" %
                                      (func_name, str(e)))
                    sys.exit(3)
            else:
                cmd, nodes = rollback[0], rollback[1]
                self.logger.log("Rollback command: %s" % cmd)
                if nodes is None:
                    (status, output) = self.sshTool.getSshStatusOutput(cmd)
                else:
                    (status, output) = self.sshTool.getSshStatusOutput(cmd, nodes)
                for ret in status.values():
                    if ret != DefaultValue.SUCCESS:
                        self.logger.error(ErrorCode.GAUSS_516["GAUSS_51626"] +
                                          " Failed to execute command[%s]. Result: \n%s" %
                                          (cmd, status))
                        sys.exit(3)
                self.logger.debug(output)
        self.logger.log("Rollback succeeded.")

    def get_incremental_build_lock_timeout(self):
        """
        """
        lockTime = None
        try:
            lockTime = DefaultValue.get_config_parameter_value("expend", "incremental_build_lock_timeout", True)
        except Exception as e:
            raise Exception(str(e))
        if lockTime is None or not str(lockTime).isdigit():
            lockTime = 7200
        return int(lockTime)

    def lockClusterInternal(self, dbNodes, lockTime, logger, sshTool, action, g_opts=None):
        '''
        function: Lock cluster internal
        input :
            dbNodes:  cluster node info
            lockTime: time out number
            logger:
            sshTool:
            action: the action name which call it. eg "managecn", "replace"
            g_opts: command option
        output:
            curNode: Successfully locked  on curNode node
            None: Failed to lock cluster
        '''
        self.logger = logger
        # When executing a lock cluster, multithreading is implemented using daemon threads.
        extra = "--set-daemon"
        cmd = "%s --lock-cluster %s --lock-time=%d -U %s " \
              "-l %s --lock-mode %s --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Query"),
               extra,
               lockTime,
               g_opts.user,
               g_opts.localLog,
               self.lockMode,
               self.logAction,
               self.logUuid,
               self.logger.step)
        self.logger.debug("Command for locking cluster: %s." % cmd)
        if self.lockMode == "waitingLock":
            lockTime = self.get_incremental_build_lock_timeout()
        lock_success = False
        message = ""
        cur_node = ""
        self.logger.debug("All node names: %s." % ",".join([db_node.name for db_node in dbNodes]))

        for db_node in dbNodes:
            # when delete CN at local host or the CN is abnormal, then skip lock cluster
            if action == "managecn":
                if g_opts.mode == "delete" and db_node.name == g_opts.hostname:
                    continue
                if len(g_opts.abnormalCN) and db_node.name == g_opts.abnormalCN[0].hostname:
                    continue

            if len(db_node.coordinators) > 0:
                cur_node = db_node.name
                # when do gs_replace,we should execute gs_clean before lock Cluster
                if action == "replace":
                    gs_clean_cmd = "gs_clean -a -p %d -v -r " % db_node.coordinators[0].port
                    self.logger.debug("Command for gs_clean:%s" % gs_clean_cmd)
                    sshTool.executeCommand(gs_clean_cmd, "execute gs_clean", DefaultValue.SUCCESS, [cur_node])

                # Lock cluster retry 3 times
                lock_success, message = self.lockClusterImpl(cmd, lockTime, sshTool, cur_node,
                                                             logger, action, g_opts)
                break
        if not lock_success:
            self.logger.log("Failed to lock cluster on all nodes.\n%s" % message)
            return None
        else:
            self.logger.debug("Successfully locked cluster on node[%s]." % cur_node)
            return cur_node

    def lockClusterImpl(self, cmd, lockTime, sshTool, curNode, logger, action, g_opts):
        '''
        function: Lock cluster internal
        input :
            curNode:  cluster node
            lockTime: time out number
            logger:
            sshTool:
            action: the action name which call it. eg "managecn", "replace"
            g_opts: command option
        output:
            lock_success:False/True
        '''
        lock_success = False
        # retry 3 times for waiting lock.
        message = ""
        for i in range(3):
            try:
                self.setTimer(lockTime)
                sshTool.executeCommand(cmd, "lock cluster", DefaultValue.SUCCESS, [curNode])
                message = "Successfully locked cluster."
                self.logger.debug(message)
                lock_success = True
                break
            except Exception as lockError:
                try:
                    self.unlockClusterInternal([curNode], lockTime, logger, sshTool, action, g_opts)
                except Timeout as to:
                    self.logger.debug("Caught timeout error [%s] when unlocking the cluster."
                                      " Ignore the error." % str(to))
                except Exception as unlockError:
                    self.logger.debug("Failed to unlock cluster on node [%s]. Error: \n%s" %
                                      (curNode, str(unlockError)))
                finally:
                    message = "Failed to lock cluster on node [%s] at the %d times. Error: \n%s" % (
                        curNode, (i + 1), str(lockError))
                    self.logger.debug(message)
        return lock_success, message

    def unlockClusterInternal(self, dbNodeNames, lockTime, logger, sshTool, action, g_opts=None):
        '''
        function: unLock cluster internal
        input :
            dbNodeNames:  cluster node name
            lockTime: time out number
            logger:
            sshTool:
            action: the action name which call it. eg "managecn", "replace"
            g_opts: command option
        '''
        self.logger = logger
        try:
            self.resetTimer()
            cmd = "%s --release-cluster --lock-time=%d -U %s --lock-mode %s " \
                  "-l %s --log-action=%s --log-uuid=%s --log-step=%s" % \
                  (OMCommand.getLocalScript("Local_Query"),
                   lockTime,
                   g_opts.user,
                   self.lockMode,
                   g_opts.localLog,
                   self.logAction,
                   self.logUuid,
                   self.logger.step)
            # now when do gs_replace, we unlock the cluster by root user
            self.logger.debug("Command for unlocking cluster: %s." % cmd)
            sshTool.executeCommand(cmd, "unlock cluster",
                                   DefaultValue.SUCCESS, dbNodeNames)
        except Exception as e:
            self.logger.log("Warning: Failed to unlock cluster.")
            self.logger.debug(str(e))

    def getClusterRings(self, clusterInfo):
        """
        function: get clusterRings from cluster info
        input: DbclusterInfo() instance
        output: list
        """
        host_per_node_list = self.getDNHostnamesPerNode(clusterInfo)
        # Loop the hostname list on each node where the master and slave of the DN instance.
        for i in range(len(host_per_node_list)):
            # Loop the list after the i-th list
            for per_nodelist in host_per_node_list[i + 1:len(host_per_node_list)]:
                # Define a tag
                flag = 0
                # Loop the elements of each perNodelist
                for host_name_element in per_nodelist:
                    # If elements on the i-th node, each element of the list are joined in hostPerNodeList[i
                    if host_name_element in host_per_node_list[i]:
                        flag = 1
                        for element in per_nodelist:
                            if element not in host_per_node_list[i]:
                                host_per_node_list[i].append(element)
                if flag == 1:
                    host_per_node_list.remove(per_nodelist)

        return host_per_node_list

    def getDNHostnamesPerNode(self, clusterInfo):
        """
        function: get DN hostnames per node
        input: DbclusterInfo() instance
        output: list
        """
        host_per_node_list = []
        for db_node in clusterInfo.dbNodes:
            node_dn_list = []
            # loop per node
            for dnInst in db_node.datanodes:
                if dnInst.instanceType == DefaultValue.MASTER_INSTANCE:
                    if dnInst.hostname not in node_dn_list:
                        node_dn_list.append(dnInst.hostname)
                    # get other standby and dummy hostname
                    instances = clusterInfo.getPeerInstance(dnInst)
                    for inst in instances:
                        if inst.hostname not in node_dn_list:
                            node_dn_list.append(inst.hostname)
            if node_dn_list:
                host_per_node_list.append(node_dn_list)
        return host_per_node_list

    # for olap function
    def checkIsElasticGroupExist(self, dbNodes):
        """
        function: Check if elastic_group exists.
        input : NA
        output: NA
        """
        self.logger.debug("Checking if elastic group exists.")

        self.isElasticGroup = False
        coor_node = []
        # traverse old nodes
        for db_node in dbNodes:
            if len(db_node.coordinators) >= 1:
                coor_node.append(db_node.coordinators[0])
                break

        # check elastic group
        check_group_sql = "SELECT count(*) FROM pg_catalog.pgxc_group " \
                          "WHERE group_name='elastic_group' and group_kind='e'; "
        (check_status, check_output) = ClusterCommand.remoteSQLCommand(check_group_sql, self.user,
                                                                       coor_node[0].hostname, coor_node[0].port,
                                                                       is_inplace_upgrade=True)
        if check_status != 0 or not check_output.isdigit():
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] %
                            "node group" + " Error:\n%s" % str(check_output))
        elif check_output.strip() == '1':
            self.isElasticGroup = True
        elif check_output.strip() == '0':
            self.isElasticGroup = False
        else:
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "the number of node group")

        self.logger.debug("Successfully checked if elastic group exists.")

    def checkHostnameIsLoop(self, node_name_list):
        """
        function: check if hostname is looped
        input : NA
        output: NA
        """
        is_ring = True
        # 1.get ring information in the cluster
        cluster_rings = self.getClusterRings(self.clusterInfo)
        node_ring = ""
        node_name_rings = []
        # 2.Check if the node is in the ring
        for num in range(len(cluster_rings)):
            ring_node_list = []
            for node_name in node_name_list:
                if node_name in cluster_rings[num]:
                    ring_node_list.append(node_name)
            if len(ring_node_list) != 0 and len(ring_node_list) == len(cluster_rings[num]):
                node_name_rings.append(ring_node_list)
            if len(ring_node_list) != 0 and len(ring_node_list) != len(cluster_rings[num]):
                is_ring = False
                break
            else:
                continue
        if not is_ring:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "h" +
                            " The hostname (%s) specified by the -h parameter must be looped." % node_ring)
        return cluster_rings, node_name_rings

    def getSQLResultList(self, sql, user, hostname, port, database="postgres"):
        """
        """
        (status, output) = ClusterCommand.remoteSQLCommand(sql, user, hostname, port, False, database,
                                                           is_inplace_upgrade=True)
        if status != 0 or ClusterCommand.findErrorInSql(output):
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        # split the output string with '\n'
        result_list = output.split("\n")
        return result_list

    def cleanAndReloadSingleGPUCache(self, tmpExecPara):
        """
        """
        database = tmpExecPara[0]
        schema = tmpExecPara[1]
        hostname = tmpExecPara[2]
        port = tmpExecPara[3]
        execute_sql = "set application_name='OM'; SET search_path = \'%s\'; " \
                      "SELECT * FROM pg_catalog.clean_vector_gpu(); " \
                      "SELECT * FROM pg_catalog.add_vector_gpu();" % schema
        self.logger.debug("SQL for clean and reload GPU on database %s: %s." % (database, execute_sql))
        (status, output) = ClusterCommand.remoteSQLCommand(execute_sql, self.user, hostname, port, False, database,
                                                           is_inplace_upgrade=True)
        if status != 0 or ClusterCommand.findErrorInSql(output):
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % execute_sql + " Error:\n%s" % str(output))

    def isGPUAlreadyInUse(self, coorNode):
        """
        """
        # Check if GPU exists
        # GPU is no supported in Gauss300
        if self.productVersion is None:
            # Check the product version.
            feature_file = "%s/bin/%s" % (self.clusterInfo.appPath, DefaultValue.DEFAULT_DISABLED_FEATURE_FILE_NAME)
            if os.path.isfile(feature_file):
                self.productVersion, _ = DefaultValue.unencryptedFeatureInfo(feature_file)
        if (self.productVersion is not None) and (self.productVersion.lower() ==
                                                  DefaultValue.PRODUCT_VERSION_GAUSS300.lower()):
            return True
        sql = "SELECT COUNT(proname) FROM pg_catalog.pg_proc WHERE proname = 'pgxc_get_simsearch_lib_load_status';"
        (status, output) = ClusterCommand.remoteSQLCommand(sql, self.user, coorNode.hostname, coorNode.port, False,
                                                           is_inplace_upgrade=True)
        if status != 0 or not output.isdigit():
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        if 1 != int(output):
            self.logger.debug("The SQL used to check the GPU cache is %s: The output value is %s." % (sql, output))
            return True

        # check if GPU is be used
        sql = "set application_name='OM'; SET search_path = 'public'; SELECT COUNT(lib_load_status) FROM" \
              " pg_catalog.pgxc_get_simsearch_lib_load_status() WHERE lib_load_status = false;"
        (status, output) = ClusterCommand.remoteSQLCommand(sql, self.user, coorNode.hostname, coorNode.port, False,
                                                           is_inplace_upgrade=True)
        output = output.replace("SET\n", '')
        if status != 0 or not output.isdigit():
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        if int(output) > 0:
            self.logger.debug("The SQL used to check the GPU cache is %s: The output value is %s." % (sql, output))
            return True
        return False

    def cleanAndReloadGPUCache(self, coorNode):
        """
        function: excute sql to clean and reload GPU cache
        input : NA
        output: NA
        """
        if self.isGPUAlreadyInUse(coorNode):
            return

        # if the table 'gpu_vector_info' is in user schema, then clean and reload GPU cache
        tmp_exec_para_list = []
        # get all database
        database_sql = "SELECT datname FROM pg_catalog.pg_database WHERE datallowconn ORDER BY 1;"
        database_list = self.getSQLResultList(database_sql, self.user, coorNode.hostname, coorNode.port)
        schema_sql = "SELECT n.nspname FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n ON " \
                     "n.nspname NOT IN ('pg_toast', 'pg_catalog', 'information_schema', 'cstore') AND " \
                     "c.relnamespace = n.oid AND c.oid >= 16384 " \
                     "WHERE c.relname = 'gpu_vector_info';"
        # we can make sure that the length of databaseList must greater than 1
        for database in database_list:
            schema_list = self.getSQLResultList(schema_sql, self.user, coorNode.hostname, coorNode.port, database)
            for schema in schema_list:
                if "" == schema:
                    continue
                tmp_exec_para = [database, schema, coorNode.hostname, coorNode.port]
                tmp_exec_para_list.append(tmp_exec_para)

        if len(tmp_exec_para_list) == 0:
            return
        try:
            # Parallel processing cannot be used.
            # The background will update a system table at the same time and generate an error.
            for exec_para in tmp_exec_para_list:
                self.cleanAndReloadSingleGPUCache(exec_para)
        except Exception as e:
            raise Exception(str(e))

    def getLcGroupMessage(self, lc_list):
        """
        function: Obtain the corresponding OID through the logic cluster name
        input : LClist---the logic cluster name list
        output: group_member---mapping of logic cluster name and hostname
        """
        self.logger.debug("Obtaining OID through the logic cluster name.")
        try:
            # the map of group---hostname
            group_message = {}
            # get CN instance info from cluster
            coo_inst = self.getCooInst()
            for lc_name in lc_list:
                # save the node ip contained in the logical cluster
                node_ips = []
                # save the node name contained in the logical cluster
                node_hosts = []
                # 1.get node host ip by lc name
                host_sql = "SELECT distinct(pgxc_node.node_host) FROM pg_catalog.pgxc_node, pg_catalog.pgxc_group \
                            WHERE pgxc_group.group_name='%s' AND pgxc_node.oid = ANY(pgxc_group.group_members) \
                            order by pgxc_node.node_host; " % lc_name
                (status, output) = ClusterCommand.remoteSQLCommand(host_sql,
                                                                   self.user,
                                                                   coo_inst[0].hostname,
                                                                   coo_inst[0].port,
                                                                   ignoreError=False,
                                                                   is_inplace_upgrade=True)
                if status != 0:
                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % host_sql + " Error:\n%s" % str(output))
                elif output != "":
                    # use the split method of string to transform the output string to a list
                    node_ips = output.split('\n')

                # 2.get hostname by hostip
                for db_node in self.clusterInfo.dbNodes:
                    if (db_node.backIps[0] in node_ips) and (db_node.name not in node_hosts):
                        node_hosts.append(db_node.name)

                # 3.map of lcname-hostname
                group_message[lc_name] = node_hosts

            self.logger.debug("Successfully obtained OID through the logic cluster name.")
            self.logger.debug(group_message)
        except Exception as e:
            raise Exception(str(e))

        return group_message

    def killCmserver(self, killSignal=DefaultValue.SIGNAL_RELOAD_PARA, cmserverNodes=None):
        """
        function: Kill CMServer
        input : NA
        output: NA
        """
        self.logger.debug("Killing CMServer.")
        if cmserverNodes is None:
            cmserverNodes = []
            # traverse old nodes
            for dbNode in self.clusterInfo.dbNodes:
                if len(dbNode.cmservers) > 0:
                    cmserverNodes.append(dbNode.name)
        self.logger.debug("CMServer nodes: %s." % cmserverNodes)
        kill_cmd = "rm -rf '%s/bin/cluster_dynamic_config' && " % self.clusterInfo.appPath
        kill_cmd += "%s" % DefaultValue.killInstProcessCmd("cm_server", True, killSignal)
        DefaultValue.execRemoteCmd(self.sshTool, kill_cmd, cmserverNodes)
        self.logger.debug("Successfully killed CMServer.")

    def killAgent(self, killSignal=DefaultValue.SIGNAL_RELOAD_PARA, cmagentNodeNames=None):
        """
        function: Kill Agent
        input : NA
        output: NA
        """
        self.logger.debug("Killing CMAgent.")
        # Get the cluster's node names
        if cmagentNodeNames is None:
            cmagent_nodes = self.clusterInfo.getClusterNodeNames()
        else:
            cmagent_nodes = copy.copy(cmagentNodeNames)
        self.logger.debug("CMAgent nodes: %s." % cmagent_nodes)
        kill_cmd = DefaultValue.killInstProcessCmd("cm_agent", True, killSignal)
        DefaultValue.execRemoteCmd(self.sshTool, kill_cmd, cmagent_nodes)
        self.logger.debug("Successfully killed CMAgent.")

    def getListenIpList(self, dbNodeNames):
        """
        function:Obtain the listening IPs of the primary DN on the nodes
        input : NA
        output: NA
        """
        ip_list = []

        # obtain the node IP list
        for node_name in dbNodeNames:
            dbNode = self.clusterInfo.getDbNodeByName(node_name)
            for inst in dbNode.datanodes:
                if inst.instanceType == DefaultValue.MASTER_INSTANCE and inst.listenIps[0] not in ip_list:
                    ip_list.append(inst.listenIps[0])

        # check the ipList
        if len(ip_list) == 0:
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "the node ip list")
        return ip_list

    def getDnNameByListenIp(self, ip_list):
        """
        function: Obtain the IP list of the node through the hostname list
        input : NA
        output: NA
        """
        self.logger.debug("Obtaining DN instance info through the IP list.")

        pgxc_node_name = []
        # get CN instance info from cluster
        coo_inst = self.getCooInst()
        for ip in ip_list:
            # obtain DN information
            obtain_cmd_list = ["SELECT node_name FROM pg_catalog.pgxc_node WHERE node_name like 'dn_%'",
                               " and node_host = '%s' ORDER BY node_name;" % ip]
            obtain_cmd = "".join(obtain_cmd_list)
            # execute the sql command
            (status, output) = ClusterCommand.remoteSQLCommand(obtain_cmd,
                                                               self.user,
                                                               coo_inst[0].hostname,
                                                               coo_inst[0].port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % obtain_cmd + " Error:\n%s" % str(output))
            elif len(output.split("\n")) == 0:
                self.logger.log("Warning: node [%s] does not contain DN instance information." % ip)
            else:
                pgxc_node_name.extend(output.split("\n"))

        self.logger.debug("Successfully obtained DN instance info through the IP list.")

        return pgxc_node_name

    def query_vgroup_bucket_count(self, lcGroupName, coo_inst):
        query_sql = "SELECT vgroup_bucket_count FROM pg_catalog.pgxc_group " \
                    "WHERE group_name = '%s' ;" % lcGroupName
        (status, output) = ClusterCommand.remoteSQLCommand(query_sql,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0 or not output.strip().isdigit():
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "vgroup bucket count" + " Error:\n%s" % str(output))
        return int(output.strip())

    def createLogicalNodeGroup(self, lcGroupName, lcNodeNames):
        """
        function: Create node group for logic cluster.
        input : lcGroupName, nodeNames
        output: NA
        """
        self.logger.debug("Creating node group for logic cluster [%s]." % lcGroupName)
        temp_group_name = "group_version_%s_%s" % (time.strftime("%Y%m%d_%H%M%S"), os.getpid())
        tmp_cgroup_file_path = "%s/etc/%s.gscgroup_perfadm.cfg" % (self.clusterInfo.appPath, temp_group_name)
        try:
            # get CN instance info from cluster
            coo_inst = self.getCooInst()
            # if output=1, explain that the cluster has created a new nodegroup
            check_group_sql = "SELECT count(group_name) FROM pg_catalog.pgxc_group " \
                              "WHERE group_name = '%s' AND in_redistribution = 'y';" % lcGroupName
            (status, output) = ClusterCommand.remoteSQLCommand(check_group_sql,
                                                               self.user,
                                                               coo_inst[0].hostname,
                                                               coo_inst[0].port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                                "node group information" + " Error:\n%s" % str(output))

            if str(1) == output.strip():
                self.logger.debug("The cluster has created a new nodegroup for logic cluster [%s]." % lcGroupName)
                return
            # Get IP address by node name '-h' + self.lcName
            ip_list = self.getListenIpList(lcNodeNames)
            # get DN info by ip
            node_name_list = self.getDnNameByListenIp(ip_list)

            # check if contain DN instance
            if len(node_name_list) == 0:
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "DN instance" +
                                " Maybe the node you are pointing to does not contain a DN instance.")

            # convert list to str
            dn_str = ",".join(node_name_list)
            # create tmp cgroup file for temp_group_name
            cmd = "%s/bin/gs_cgroup -c -N %s" % (self.clusterInfo.appPath, temp_group_name)
            self.sshTool.executeCommand(cmd, "create tmp cgroup file", DefaultValue.SUCCESS)
            # 3.the sql command to create node group
            # it will automatically change the old LC's in_redistribution to 'y',
            # and the new LC's in_redistribution to 't'
            # Bind users and LCs after redistribution
            start_transaction_sql = "SET xc_maintenance_mode = on; START TRANSACTION; "
            end_transaction_sql = "COMMIT; RESET xc_maintenance_mode; "
            need_change_bucket, lc_group_dn_nums = self.check_change_buckets()
            if need_change_bucket:
                buckets = self.calculate_table_buckets(lc_group_dn_nums)
                buckets_option = " NBUCKETS(%d) " % buckets
            else:
                buckets = self.query_vgroup_bucket_count(lcGroupName, coo_inst)
                buckets_option = " NBUCKETS(%d) " % buckets
            create_group_sql = "create node group \"%s\" with (%s) vcgroup DISTRIBUTE FROM \"%s\" %s;" % \
                               (temp_group_name, dn_str, lcGroupName, buckets_option)
            load_lcname_sql = "SELECT * FROM pg_catalog.pgxc_cgroup_map_ng_conf('%s'); " % lcGroupName
            sql = "%s%s%s%s" % (start_transaction_sql, create_group_sql, load_lcname_sql, end_transaction_sql)
            self.logger.debug("Sql command for creating nodegroup and user: %s\n" % sql)
            (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                               self.user,
                                                               coo_inst[0].hostname,
                                                               coo_inst[0].port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        except Exception as e:
            self.cleanFilesOnAllNodes(tmp_cgroup_file_path)
            raise Exception(str(e))

        self.logger.debug("Successfully created node group for logic cluster [%s]." % lcGroupName)

    def cleanFilesOnAllNodes(self, filePath, nodeNames=None):
        """
        function: clean files on all nodes.
        input : NA
        output: NA
        """
        try:
            cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (filePath, filePath)
            if nodeNames is None:
                self.sshTool.executeCommand(cmd, "clean files on all nodes")
            else:
                self.sshTool.executeCommand(cmd, "clean files on all nodes", DefaultValue.SUCCESS, nodeNames)
        except Exception as e:
            self.logger.debug("Failed to clean files on all nodes. Output:\n%s" % str(e))

    def loadCgroupFile(self, lcName):
        """
        function: Load the logical cluster cgroup configuration file on the DN and CN of all physical machine nodes
        input : lcName
        output: NA
        """
        self.logger.log("Loading the cgroup config file.")
        try:
            # get CN instance info from cluster
            coo_inst = self.getCooInst()
            # map the DN and gs_cgroup
            sql = "SELECT * FROM pg_catalog.pgxc_cgroup_map_ng_conf('%s'); " % lcName
            self.logger.debug("Sql command for loading the cgroup config file: %s" % sql)
            # load cgroup config file
            (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                               self.user,
                                                               coo_inst[0].hostname,
                                                               coo_inst[0].port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        except Exception as e:
            raise Exception(str(e))

        self.logger.log("Successfully loaded the cgroup config file.")

    def buildNewNodeInst(self, oldCnList, newNodeInstList, buildMode='full', dilatation_mode="read-only"):
        """
        function: Getting DN building mapping.
        input : NA
        output: NA
        """
        # starting build
        self.logger.debug("Building the instances on new nodes by %s mode." % buildMode)
        try:
            new_node_inst_list_bak = copy.deepcopy(newNodeInstList)
            while len(new_node_inst_list_bak) > 0:
                inst_mapping = self.getBuildInstMapping(oldCnList, new_node_inst_list_bak)
                cmd = self.getBuildCommand(buildMode, inst_mapping)
                if dilatation_mode == "insert":
                    cmd += " --online-build"
                self.logger.debug("Command for building the instances on new nodes: %s" % cmd)
                self.sshTool.executeCommand(cmd, "build the new instances", DefaultValue.SUCCESS, self.newNodeNames)
                for instId in inst_mapping.keys():
                    new_node_inst_list_bak.remove(instId)
        except Exception as e:
            kill_cmd_list = []
            # Clean up the gaussdb process that may remain
            for new_node_name in self.newNodeNames:
                kill_cmd_list.append("export LD_LIBRARY_PATH=/lib64:$LD_LIBRARY_PATH; "
                                     "ssh %s -n %s 'killall -9 gaussdb;killall -9 gsql' 2>/dev/null;" %
                                     (new_node_name, DefaultValue.SSH_OPTION))
            kill_cmd = "".join(kill_cmd_list)
            os.system(kill_cmd[:-1])
            raise Exception(str(e))

    def getAllNewNodeInst(self, ignoreCN=False):
        """
        function: Getting new node instance building mapping.
        input : NA
        output: NA
        """
        new_node_dn_tmp_list = []
        max_dn_num_per_node = 0
        new_node_inst_list = []
        # Get a two-dimensional array of the primary DN of each node:newNodeDnList
        for i in range(len(self.newNodes)):
            db_node = self.newNodes[i]
            if not ignoreCN and len(db_node.coordinators) >= 1:
                new_node_inst_list.append(db_node.coordinators[0].instanceId)
            master_dn_list = []
            for datanode in db_node.datanodes:
                if datanode.instanceType == DefaultValue.MASTER_INSTANCE:
                    master_dn_list.append(datanode.instanceId)
            if len(master_dn_list) > max_dn_num_per_node:
                max_dn_num_per_node = len(master_dn_list)
            new_node_dn_tmp_list.append(master_dn_list)

        # Rearrange two-dimensional arrays by column
        for i in range(max_dn_num_per_node):
            for j in new_node_dn_tmp_list:
                if len(j) > i:
                    new_node_inst_list.append(j[i])

        return new_node_inst_list

    def getBuildInstMapping(self, cnList, newNodeInstList):
        """
        function: Getting new node instance building mapping.
        input : cnList, newNodeInstList
        output: newNodeInstMapping
        """
        new_node_inst_mapping = {}
        cn_nums = len(cnList)
        for j in range(cn_nums):
            for i in range(len(newNodeInstList)):
                if i // cn_nums == self.jobs:
                    break
                if i % cn_nums == j:
                    new_node_inst_mapping[newNodeInstList[i]] = cnList[j].instanceId
        return new_node_inst_mapping

    def getGUCStrForBuild(self, nodeNames, mode, NormalcooInsts, tmpFile):
        """
        """
        build_para_map = {'wal_keep_segments': 1000000,
                          'enable_prevent_job_task_startup': 'on'}
        guc_str_list = []
        if mode == "set":
            if os.path.isfile(tmpFile):
                self.logger.debug("Record the guc settings file already exists.")
            else:
                sql = "SELECT name,setting FROM pg_catalog.pg_settings " \
                      "WHERE name IN ('wal_keep_segments', 'enable_prevent_job_task_startup');"
                if NormalcooInsts is None:
                    (status, output) = ClusterCommand.remoteSQLCommand(sql, self.user, self.cooInst.hostname,
                                                                       self.cooInst.port, False,
                                                                       DefaultValue.DEFAULT_DB_NAME,
                                                                       is_inplace_upgrade=True)
                else:
                    (status, output) = ClusterCommand.remoteSQLCommand(sql, self.user, NormalcooInsts[0].hostname,
                                                                       NormalcooInsts[0].port, False,
                                                                       DefaultValue.DEFAULT_DB_NAME,
                                                                       is_inplace_upgrade=True)
                if status != 0:
                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
                self.logger.debug("Record the guc settings before building:\n %s" % output)

                g_file.createFile(tmpFile, True, DefaultValue.KEY_FILE_MODE)
                g_file.writeFile(tmpFile, [output], mode="w")
                self.sshTool.scpFiles(tmpFile, tmpFile, nodeNames)
                for key in build_para_map.keys():
                    guc_str_list.append(" -c \"%s=%s\" " % (key, build_para_map[key]))
        else:
            if os.path.isfile(tmpFile):
                lines = g_file.readFile(tmpFile)
                for line in lines:
                    para_info = line.split('|')
                    para_name = para_info[0].strip()
                    para_value = para_info[1].strip()
                    if para_name == "enable_prevent_job_task_startup":
                        guc_str_list.append("-c \"%s\" " % para_name)
                    else:
                        guc_str_list.append("-c \"%s=%s\" " % (para_name, para_value))
            else:
                self.logger.debug("Record the guc settings file does not exists.")
        guc_str = "".join(guc_str_list)
        return guc_str

    def manageGucParameterForBuild(self, nodeNames, mode="set", NormalcooInsts=None):
        """
        function: Set "wal_keep_segments=1000000" and 'enable_prevent_job_task_startup=on' by guc tool.
                  Before starting CN build DN, you need to set the parameters of
                  Cn and set the parameter value to 1000000 to ensure that
                  the xlog log of the remote CN is not deleted.
        input : mode : set,restore
                     set: Save the values of the parameters to a temporary file
                     restore: Read the wal_keep_segments value of the parameter and restore it.
        output: NA
        """
        self.logger.debug("Setting guc parameter for building new nodes.")
        tmp_file = "%s/wal_keep_segments.dat" % DefaultValue.getTmpDirFromEnv(self.user)
        guc_str = self.getGUCStrForBuild(nodeNames, mode, NormalcooInsts, tmp_file)
        if "" == guc_str:
            return

        cmd = ""
        if NormalcooInsts is None:
            cmd = "gs_guc reload -Z coordinator -N all -I all %s" % guc_str
        else:
            for cooInst in NormalcooInsts:
                if cmd == "":
                    cmd = "gs_guc reload -Z coordinator -N %s -I all %s" % (cooInst.hostname, guc_str)
                else:
                    cmd += " && gs_guc reload -Z coordinator -N %s -I all %s" % (cooInst.hostname, guc_str)

        self.logger.debug("Command to set GUC parameters: %s" % cmd)
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50007"] % "GUC" + " Command:%s. Error:\n%s" % (cmd, output))
        else:
            if mode == "restore":
                # After restoring the wal_keep_segments settings, delete the temporary file
                cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (tmp_file, tmp_file)
                self.sshTool.executeCommand(cmd, "clean the record file of guc settings",
                                            DefaultValue.SUCCESS, nodeNames)
                if os.path.exists(tmp_file):
                    os.remove(tmp_file)
        self.logger.debug("Successfully set guc parameter for building new nodes.")

    def removeBuildFile(self, nodeNames):
        """
        function: Remove build file (gs_build.pid) of master DN instance for new nodes.
                  If you do not delete the file, it will cause the new node to fail to start.
        input : NA
        output: NA
        """
        self.logger.debug("Removing build file for new nodes.")
        cmd = "%s --target=remove_build_file -X '%s' " \
              "-l '%s' --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Dilatation_Config"),
               self.xmlFile,
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        self.logger.debug("Command for removing build file: %s" % cmd)
        self.sshTool.executeCommand(cmd, "remove build file", DefaultValue.SUCCESS, nodeNames)
        self.logger.debug("Successfully removed build file.")

    def checkLcGroupNameExist(self, lcName):
        """
        function: Check if the logical cluster name exists
        input :NA
        output:NA
        """
        self.logger.debug("Checking the logical cluster name.")
        # get CN instance info from cluster
        coo_inst = self.getCooInst()
        sql = "SELECT count(*) FROM pg_catalog.pgxc_group \
               WHERE group_name='%s' and group_kind = 'v';" % lcName
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0 or not output.isdigit():
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))

        self.logger.debug("Successfully checked the logical cluster name.Output:%s." % output)
        return int(output)

    def getCooInst(self):
        """
        function: get CN instance
        input : NA
        output: CN instance
        """
        coor_inst = []
        # get CN on nodes
        for dbNode in self.clusterInfo.dbNodes:
            if len(dbNode.coordinators) >= 1:
                coor_inst.append(dbNode.coordinators[0])
        # check if contain CN on nodes
        if len(coor_inst) == 0:
            raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"])
        else:
            return coor_inst

    def getGroupName(self, fieldName, fieldVaule):
        """
        function: Get nodegroup name by field name and field value.
        input : field name and field value
        output: node group name
        """
        # 1.get CN instance info from cluster
        coo_inst = self.getCooInst()

        # 2.obtain the node group
        obtain_sql = "SELECT group_name FROM pg_catalog.pgxc_group WHERE %s = %s; " % (fieldName, fieldVaule)
        # execute the sql command
        (status, output) = ClusterCommand.remoteSQLCommand(obtain_sql,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % obtain_sql + " Error:\n%s" % str(output))

        return output.strip()

    def getGroupKind(self, fieldName, fieldVaule):
        """
        function: Get nodegroup kind by field name and field value.
        input : field name and field value
        output: node group kind
        """
        # 1.get CN instance info from cluster
        coo_inst = self.getCooInst()

        # 2.obtain the node group
        obtain_sql = "SELECT group_kind FROM pg_catalog.pgxc_group WHERE %s = %s; " % (fieldName, fieldVaule)
        # execute the sql command
        (status, output) = ClusterCommand.remoteSQLCommand(obtain_sql,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % obtain_sql + " Error:\n%s" % str(output))

        return output.strip()

    def getAllDatabase(self):
        """
        function: Get all the databases in the cluster
        output: databaseList
        """
        self.logger.debug("Getting all the databases.")
        try:
            # get CN instance info from cluster
            coo_inst = self.getCooInst()

            # get databases from cluster
            select_sql = "SELECT datname FROM pg_catalog.pg_database WHERE datallowconn ORDER BY 1"
            self.logger.debug("Get databases from cluster command: %s." % select_sql)
            # execute the sql command
            (status, output) = ClusterCommand.remoteSQLCommand(select_sql,
                                                               self.user,
                                                               coo_inst[0].hostname,
                                                               coo_inst[0].port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % select_sql + " Error: \n%s" % str(output))
            self.logger.debug(
                "Get databases from cluster result: %s." % output)
            # split the output string with '\n'
            database_list = output.split("\n")

            self.logger.debug("Successfully get all the databases.")

            return database_list
        except Exception as e:
            raise Exception(str(e))

    def updateTableDepend(self, sourceGroup, destGroup):
        """
        function: Update table dependencies
        input : sourceGroup---source node group
                destGroup---destination node group
        output: NA
        """
        self.logger.log("Updating table dependencies.")
        try:
            # 1.get CN instance info from cluster
            coo_inst = self.getCooInst()

            # 2..get all database
            databases = self.getAllDatabase()
            # 3.remove 'template1'
            databases.remove("template1")

            # 4.execute sql in every database.
            find_table_count_sql = "select count(pgroup) from pgxc_class where pgroup = '%s' " % sourceGroup

            start_transaction_sql = "SET xc_maintenance_mode = on; START TRANSACTION; "
            set_lockwait_sql = "SET update_lockwait_timeout =  1200000;"
            alter_sql = "alter node group \"%s\" set table group \"%s\";" % (sourceGroup, destGroup)

            end_transaction_sql = "COMMIT; RESET xc_maintenance_mode; "
            # total sql command
            rename_sql = "%s%s%s%s" % (start_transaction_sql, set_lockwait_sql, alter_sql, end_transaction_sql)
            # execute sql statement in each database
            while (len(databases) > 0):
                for database in databases:
                    (status, output) = ClusterCommand.remoteSQLCommand(find_table_count_sql,
                                                                       self.user,
                                                                       coo_inst[0].hostname,
                                                                       coo_inst[0].port,
                                                                       False,
                                                                       database,
                                                                       is_inplace_upgrade=True)
                    if status != 0:
                        raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % find_table_count_sql +
                                        " Error:\n%s" % output)
                    if int(output.strip()) != 0:
                        self.logger.debug("execute rename operation in database [%s]: %s." %
                                          (database, rename_sql))
                        # execute the sql command in a different database
                        (status, output) = ClusterCommand.remoteSQLCommand(rename_sql,
                                                                           self.user,
                                                                           coo_inst[0].hostname,
                                                                           coo_inst[0].port,
                                                                           False,
                                                                           database,
                                                                           is_inplace_upgrade=True)
                        if status != 0:
                            raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % rename_sql +
                                            " Error:\n%s" % output)
                    else:
                        databases.remove(database)
        except Exception as e:
            raise Exception(str(e))

        self.logger.log("Successfully updated table dependencies.")

    def updateGroupInfo(self, sourceGroup, destGroup):
        """
        function: Update group_buckets and group_members
        input : sourceGroup---source node group
                destGroup---destination node group
        output: NA
        """
        self.logger.log("Updating group buckets and node list.")
        try:
            # 1.get CN instance info from cluster
            coo_inst = self.getCooInst()
            # 2.build sql for copy buckets
            start_transaction_sql = "SET xc_maintenance_mode = on; START TRANSACTION;"
            alter_sql = "ALTER NODE GROUP \"%s\" COPY BUCKETS FROM \"%s\";" % (destGroup, sourceGroup)
            end_transaction_sql = "COMMIT; RESET xc_maintenance_mode;"
            alter_elastic_sql = ''
            if self.isAddElasticGroup:
                elastic_node = self.getNodeOfAddToElasticGroup()
                if len(elastic_node) != 0:
                    # Shrinking cluster nodes to elastic group
                    alter_elastic_sql = "alter node group elastic_group Add node (%s);" % elastic_node
            update_sql = "%s%s%s%s" % (start_transaction_sql, alter_sql, alter_elastic_sql, end_transaction_sql)
            self.logger.debug("execute update group info : %s." % update_sql)
            # 3.execute query
            (status, output) = ClusterCommand.remoteSQLCommand(update_sql,
                                                               self.user,
                                                               coo_inst[0].hostname,
                                                               coo_inst[0].port,
                                                               False,
                                                               is_inplace_upgrade=True)
            # query failed
            if status != 0:
                raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % update_sql + " Error:\n%s" % output)
        except Exception as e:
            raise Exception(str(e))
        self.logger.log("Successfully update group buckets and node list.")

    def checkOnlineGucParameter(self, mode, Nodenames):
        """
        """
        if mode:
            expectValue = "on"
        else:
            expectValue = "off"

        # 1.get all CN instance info from cluster
        coo_inst = self.getCooInst()
        # 2.get exec cn
        exec_inst = []
        if Nodenames is None:
            exec_inst = coo_inst
        else:
            for cn_inst in coo_inst:
                if cn_inst.hostname in Nodenames:
                    exec_inst.append(cn_inst)

        sql = "SHOW enable_online_ddl_waitlock;"
        for cn_inst in exec_inst:
            (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                               self.user,
                                                               cn_inst.hostname,
                                                               cn_inst.port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            res_list = output.split('\n')
            if status != 0:
                continue
            if len(res_list) < 1:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
            # parser sql result
            if res_list[0].strip() != expectValue:
                output = "The value of enable_online_ddl_waitlock on cn_%s is not %s." % \
                         (cn_inst.instanceId, expectValue)
                raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + " Error: \n%s" % str(output))

    def getMasterDnInst(self):
        """
        function: Getting a real master DN inst.
        input : NA
        output: NA
        """
        status_file = DefaultValue.getTmpFileFromEnv("expand_ha_status.dat", self.user)
        DefaultValue.cleanTmpFile(status_file)
        cmd = ClusterCommand.getQueryStatusCmd(self.user, 0, status_file)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            DefaultValue.cleanTmpFile(status_file)
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51600"] +
                            "\nCommand:\n  %s\nOutput:\n  %s" % (cmd, str(output)))
        cluster_status = DbClusterStatus()
        cluster_status.initFromFile(status_file)
        DefaultValue.cleanTmpFile(status_file)

        # get a real primary DN instance
        instance_id = ""
        for db_node in cluster_status.dbNodes:
            for instance in db_node.datanodes:
                if instance.status == 'Primary':
                    instance_id = instance.instanceId
                    break
        if instance_id == "":
            raise Exception(ErrorCode.GAUSS_526["GAUSS_52633"])

        dn_inst = None
        for db_node in self.clusterInfo.dbNodes:
            for dbInst in db_node.datanodes:
                if dbInst.instanceId == instance_id:
                    dn_inst = dbInst
                    break
        # check if contain dn on nodes
        if dn_inst is None or not dn_inst:
            raise Exception(ErrorCode.GAUSS_526["GAUSS_52633"])
        else:
            return dn_inst

    def set_guc_conf_value(self):
        """
        set guc conf value
        """
        comm_max_datanode = self.get_comm_max_datanode()
        comm_max_stream = self.get_comm_max_stream()
        cn_max_process_memory = self.get_max_process_memory(is_cn_inst=True)
        dn_max_process_memory = self.get_max_process_memory(is_cn_inst=False)
        max_coordinators = self.get_max_coordinators()
        cmd = "gs_guc set -Z coordinator -I all -N all -c 'comm_max_datanode=%s'" \
              " -c 'max_process_memory=%s' -c 'max_coordinators=%s'" % \
              (comm_max_datanode, cn_max_process_memory, max_coordinators)
        cmd += "; gs_guc set -Z datanode -I all -N all -c 'comm_max_stream=%s' " \
               "-c 'comm_max_datanode=%s' -c 'max_process_memory=%s' -c 'max_coordinators=%s'" % \
               (comm_max_stream, comm_max_datanode, dn_max_process_memory, max_coordinators)
        self.logger.debug("Command for setting GUC parameter: %s" % cmd)
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + "Error:\n%s" % output)

        # turn off autovacuum
        cmd = "gs_guc reload -Z coordinator -Z datanode -N all -I all " \
              "-c 'autovacuum=off' -c 'autovacuum_max_workers=0'"
        self.logger.debug("Command for setting GUC parameter: %s" % cmd)
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + "Error:\n%s" % output)

    def get_guc_value(self, guc_name, is_cn_inst=True):
        """
        show guc parameter
        """
        if is_cn_inst:
            inst = self.getCooInst()[0]
        else:
            inst = self.getMasterDnInst()
        if guc_name.find("','") > 0:
            sql = "SELECT name, setting FROM pg_settings WHERE name IN (%s)" % guc_name
        else:
            sql = "SHOW %s" % guc_name
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           inst.hostname,
                                                           inst.port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0 or ClusterCommand.findErrorInSql(output):
            raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % sql + " Error: \n%s" % str(output))
        return output.strip()

    def get_comm_max_datanode(self):
        """
        get real comm_max_datanode
        """
        output = self.get_guc_value("comm_max_datanode", True)
        return output

    def get_comm_max_stream(self):
        """
        get real comm_max_stream
        """
        output = self.get_guc_value("comm_max_stream", False)
        return output

    def get_max_process_memory(self, is_cn_inst=True):
        """
        get real max_process_memory
        """
        output = self.get_guc_value("max_process_memory", is_cn_inst)
        return output

    def get_max_coordinators(self):
        """
        get real max_coordinators
        """
        output = self.get_guc_value("max_coordinators", True)
        return int(output)

    def begin_online_build(self, node_names=None):
        """
        :param node_names:
        :return:
        """
        self.manageOnlineStateControlFile("add", node_names)
        self.setOnlineGucParameter(True, node_names)

    def end_online_build(self, node_names=None):
        """
        :param node_names:
        :return:
        """
        self.manageOnlineStateControlFile("delete", node_names)
        self.setOnlineGucParameter(False, node_names)

    def setOnlineGucParameter(self, mode=False, Nodenames=None):
        """
        function: Set GUC parameters for online dilatation
        input : mode
        output: NA
        """
        self.logger.debug("Setting GUC parameter for online.")
        if Nodenames is None:
            cmd = "gs_guc reload -Z coordinator -N all -I all -c \"enable_online_ddl_waitlock=%s\"" % mode
            self.logger.debug("Command for setting GUC parameter for online: %s" % cmd)
            (status, output) = DefaultValue.retryGetstatusoutput(cmd)
            self.logger.debug("Output for setting GUC parameter for online: %s %s" % (status, output))
            if status != 0:
                raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + "Error:\n%s" % output)
        else:
            cmd = "%s -t %s -X '%s' -l %s --set-mode=%s --log-action=%s --log-uuid=%s --log-step=%s" % (
                OMCommand.getLocalScript("Local_ManageCN"),
                SET_ONLINE_PARAMETER,
                self.xmlFile,
                self.localLog,
                mode,
                self.logAction,
                self.logUuid,
                self.logger.step)
            self.logger.debug("Command for setting GUC parameter for online: %s" % cmd)
            self.sshTool.executeCommand(cmd, "set online parameter", DefaultValue.SUCCESS, Nodenames)
        time.sleep(5)
        self.checkOnlineGucParameter(mode, Nodenames)
        self.logger.debug("Successfully set GUC parameter for online.")

    def cleanPreTransaction(self):
        """
        function:Run gs_clean on all cns to clean temp table
        input:NA
        output:NA
        """
        self.logger.debug("Cleaning up outstanding 2PCs before delete nodegroup.")
        cn_nodenames = []
        for db_node in self.clusterInfo.dbNodes:
            if len(db_node.coordinators) > 0:
                cn_nodenames.append(db_node.name)
        cmd = "%s --target=cleanup2PCs -l '%s' --log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Dilatation_Config"),
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        self.sshTool.executeCommand(cmd, "execute gs_clean", DefaultValue.SUCCESS, cn_nodenames)
        self.logger.debug("Successfully cleaned up outstanding 2PCs before delete nodegroup.")

    def getNodeOfAddToElasticGroup(self):
        """
        function: get nodes string that will be added to the elastic group.
        input : NA
        output: nodes string
        """
        return ''

    def dropNodeGroup(self, sourceGroup, destGroup):
        """
        function: drop new group for logical cluster.
        input : sourceGroup---source node group
                destGroup---destination node group
        output: NA
        """
        self.cleanPreTransaction()
        self.logger.debug("Deleting node group.")

        # 1.get CN instance info from cluster
        coo_inst = self.getCooInst()

        # 2.the sql command to drop node group
        start_transaction_sql = "SET xc_maintenance_mode = on; START TRANSACTION; "
        drop_sql = "drop node group \"%s\" distribute from \"%s\"; " % (sourceGroup, destGroup)
        end_transaction_sql = "COMMIT; RESET xc_maintenance_mode; "
        # total drop node group sql
        drop_sql = "%s%s%s" % (start_transaction_sql, drop_sql, end_transaction_sql)
        self.logger.debug("Sql command for drop nodegroup: %s\n" % drop_sql)
        # execute the sql command
        (status, output) = ClusterCommand.remoteSQLCommand(drop_sql,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True,
                                                           enable_retry=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % drop_sql + " Error:\n%s" % str(output))
        tmp_cgroup_file_path = "%s/etc/%s.gscgroup_%s.cfg" % (self.clusterInfo.appPath, sourceGroup, self.user)
        cmd = "(if [ -f '%s' ];then %s/bin/gs_cgroup -d -N %s ;fi)" % \
              (tmp_cgroup_file_path, self.clusterInfo.appPath, sourceGroup)
        self.logger.debug("Command for cleaning class control groups "
                          "for redistributing temporary nodegroup : " + cmd)
        self.sshTool.executeCommand(cmd, "clean cgroup config")
        self.logger.debug("Successfully deleted node group.")

    def dropAndUpdateGroupMessage(self, kvExpand, changeNodes=None):
        """
        function: drop and update group step
                  1. update table depend.
                  2. exchange group_members of new group with old group.
                  3. update the in_redistribution field information of the old node group.
        input : kvExpand --the flag of capacity expansion, changeNodes.
        output: NA
        """
        self.logger.debug("Deleting and updating node group.")

        # 1.get new group name and old group name
        # get new group name
        new_group = self.getGroupName("in_redistribution", "'t'")
        # get old group name
        old_group = self.getGroupName("in_redistribution", "'y'")
        if new_group == "" or old_group == "":
            self.logger.debug("Can not find new and old node groups, group information ('%s', '%s')." %
                              (new_group, old_group))
            return
        if kvExpand:
            changeNodes = self.getNewDnNodes(old_group, new_group)
        self.lockMode = "waitingLock"
        self.timeout = DefaultValue.CLUSTER_LOCK_TIME
        coor_nodes = []
        for db_node in self.clusterInfo.dbNodes:
            if len(db_node.coordinators) >= 1:
                coor_nodes.append(db_node.name)
        self.unlockClusterInternal(coor_nodes, DefaultValue.CLUSTER_LOCK_TIME, self.logger, self.sshTool, 'redis', self)
        try:
            # ensure no temporary tables remain
            self.dropTempSchemaInOldGroup()
            # 2.Update group_buckets and group_members
            self.updateGroupInfo(new_group, old_group)
            self.setCanInsertIntoNewNode(False)
            self.wait_current_active_ddl_complete(0, 'true', DefaultValue.CLUSTER_LOCK_TIME)

            # 3.Update table dependencies
            self.updateTableDepend(new_group, old_group)

            # 4.drop new group
            self.dropNodeGroup(new_group, old_group)
            # 5.update replication node group if exists
            rep_nodegroup = self.getRepNodeGroupPair()
            if len(rep_nodegroup) != 0:
                self.dropAndUpdateRGroupMessage(rep_nodegroup)
            # 6.dispatch bucket map
            if kvExpand:
                self.expandKV(old_group, changeNodes)
            else:
                self.shrinkKV(old_group, changeNodes)

            # unlock cluster
            self.setCanInsertIntoNewNode(True)
            self.wait_current_active_ddl_complete(0, 'false', DefaultValue.CLUSTER_LOCK_TIME)
        except Exception as ex:
            # unlock cluster
            raise Exception(str(ex))

        self.logger.debug("Successfully deleted and updated node group.")

    def getNewDnNodes(self, old_group, new_group):
        """
        function: get new dn datanodes
        input : old_group, new_group.
        output: newNodesList
        """
        CooInst = self.getCooInst()[0]
        sql_old_group = "select node_name from pgxc_node where oid in (select(regexp_split_to_table((oidvectorout_extend(group_members))::text, ' '))::bigint as oid_list from pgxc_group where group_name = '%s');" % old_group
        sql_new_group = "select node_name from pgxc_node where oid in (select(regexp_split_to_table((oidvectorout_extend(group_members))::text, ' '))::bigint as oid_list from pgxc_group where group_name = '%s');" % new_group
        (status_old_group, output_old_group) = ClusterCommand.remoteSQLCommand(sql_old_group,
                                                                               self.user,
                                                                               CooInst.hostname,
                                                                               CooInst.port,
                                                                               ignoreError=False)

        if status_old_group != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "getNewDnNodes failed" + " Error:\n%s" % str(output_old_group))

        (status_new_group, output_new_group) = ClusterCommand.remoteSQLCommand(sql_new_group,
                                                                               self.user,
                                                                               CooInst.hostname,
                                                                               CooInst.port,
                                                                               ignoreError=False)
        if status_new_group != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "getNewDnNodes failed" + " Error:\n%s" % str(output_new_group))

        old_group_list = output_old_group.split()
        new_group_list = output_new_group.split()
        newNodesList = list(set(new_group_list).difference(set(old_group_list)))

        return newNodesList

    def shrinkKV(self, node_group, deleteNodes):
        """
        function: shrink in kv
        input : node_group --the node_group after shrink and complete cleanup, deleteNodes.
        output: NA
        """
        CooInst = self.getCooInst()[0]

        # Get IP address by node name '-h' + self.lcName
        ip_list = self.getListenIpList(deleteNodes)
        # get DN info by ip
        delete_list = self.getDnNameByListenIp(ip_list)

        sql = "select pgxc_recyclebin_redis('%s');" % node_group

        # 1. dispatch vgroup info
        sql += "select pgxc_dispatch_kv_vgroup('%s');" % node_group

        # 2. dispatch bucket map
        sql += "select pgxc_dispatch_bucket_map_with_csn('%s', txid_current_csn()::text::xid, true);" % node_group

        # 3. redistribute bucket on kv
        sql += "select pgxc_resize_bucket_map('%s');" % node_group

        # 4. offline old bucket map
        sql += "select pgxc_offline_old_bucket_map('%s');" % node_group

        # 5. reset kv on delete_list by new vgroup info and zero buckets
        kvResetSqlList = ["execute direct on(%s) 'select pg_cudesc_kv_reset();';" %
                          deleteNodeName for deleteNodeName in delete_list]
        kvResetSql = "".join(kvResetSqlList)
        sql += kvResetSql

        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           CooInst.hostname,
                                                           CooInst.port,
                                                           ignoreError=False)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "shrinkKV failed" + " Error:\n%s" % str(output))

    def expandKV(self, node_group, newNodeNames):
        """
        function: expand in kv
        input : node_group --the node_group after expand and complete cleanup, newNodeNames.
        output: NA
        """
        CooInst = self.getCooInst()[0]

        sql = "select pgxc_recyclebin_redis('%s');" % node_group

        # 1. clear old vgroup info of newNodeNames
        kvClearSqlList = ["execute direct on(%s) 'select pg_clear_kv_vgroup();';" % newNodeName for newNodeName in
                          newNodeNames]
        kvClearSql = "".join(kvClearSqlList)
        sql += kvClearSql

        # 2. dispatch vgroup info immediately
        sql += "select pgxc_dispatch_kv_vgroup('%s');" % node_group

        # 3. dispatch bucket map
        sql += "select pgxc_dispatch_bucket_map_with_csn('%s', txid_current_csn()::text::xid, true);" % node_group

        # 4. reset kv on newNodeNames by new vgroup info and zero buckets
        kvResetSqlList = ["execute direct on(%s) 'select pg_cudesc_kv_reset();';" % newNodeName for newNodeName in
                          newNodeNames]
        kvResetSql = "".join(kvResetSqlList)
        sql += kvResetSql

        # 5. redistribute bucket on kv
        sql += "select pgxc_resize_bucket_map('%s');" % node_group

        # 6. offline old bucket map immediately
        sql += "select pgxc_offline_old_bucket_map('%s');" % node_group

        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           CooInst.hostname,
                                                           CooInst.port,
                                                           ignoreError=False)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "expandKV failed" + " Error:\n%s" % str(output))

    def dispatchBucketMap(self, node_group):
        """
        function: call pgxc_dispatch_bucket_map_with_csn
        input : node_group --the node_group after expand and complete cleanup, newNodeNames.
        output: NA
        """
        sql = ""
        CooInst = self.getCooInst()[0]

        # 3. dispatch bucket map
        sql += "select pgxc_dispatch_bucket_map_with_csn('%s', txid_current_csn()::text::xid, true);" % node_group

        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           CooInst.hostname,
                                                           CooInst.port,
                                                           ignoreError=False)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "dispatchBucketMap failed" + " Error:\n%s" % str(output))

    def deleteRedisStatusAndDetailTable(self):
        """
        function: Delete the last remaining status and details table of redis
        input : NA
        output: NA
        """
        self.logger.debug("Delete the last remaining status and details table of redis.")
        coo_inst = self.getCooInst()[0]
        sql = "DROP TABLE IF EXISTS redis_progress_detail CASCADE;"
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           coo_inst.hostname,
                                                           coo_inst.port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))

        sql = "DROP TABLE IF EXISTS redis_status CASCADE;"
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           coo_inst.hostname,
                                                           coo_inst.port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        self.logger.debug(
            "Successfully deleted the last remaining status and details table of redis.")

    def manageCMControlFile(self, mode="delete"):
        """
        function: manage cluster O&M status
                Use the cm_ctl tool to set the cluster O&M status of the new node
        input : mode:'add'/'delete'
        output: NA
        """
        self.logger.debug("Managing CMServer control file for '%s' mode." % mode)
        node_id_list = []
        for node_name in self.newNodeNames:
            db_node = self.clusterInfo.getDbNodeByName(node_name)
            if db_node is None:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % node_name)
            node_id_list.append(db_node.id)
        if mode == "add":
            DefaultValue.modifyClusterOMStatus("on", node_id_list)
        else:
            DefaultValue.modifyClusterOMStatus("off")
        self.logger.debug("Successfully managed CMServer control file.")

    def getEtcdUrl(self, clusterInfo=None):
        """
        function: get etcd url and etcd host names
        input : NA
        output: NA
        """
        etcd_host = []
        gauss_home = DefaultValue.getInstallDir(self.user)
        crt_path = "%s/share/sslcert/etcd/etcdca.crt" % gauss_home
        clientkey_path = "%s/share/sslcert/etcd/client.key" % gauss_home
        clientcrt_path = "%s/share/sslcert/etcd/client.crt" % gauss_home
        cmd_pre = "export ETCDCTL_API=3 && etcdctl --cacert=%s --cert=%s --key=%s --endpoints=" % \
                  (crt_path, clientcrt_path, clientkey_path)
        tmp_cmd_pre = cmd_pre
        if None is clusterInfo:
            clusterInfo = self.clusterInfo
        for node in clusterInfo.dbNodes:
            if len(node.etcds) > 0:
                etcd_host.append(node.name)
                etcd_inst = node.etcds[0]
                cmd_pre += "https://%s:%s," % (etcd_inst.listenIps[0], etcd_inst.port)

        if cmd_pre == tmp_cmd_pre:
            return "", []

        if cmd_pre != tmp_cmd_pre:
            cmd_pre = cmd_pre[:-1]

        self.logger.debug("The etcdctl cmdpre:%s" % cmd_pre)
        return cmd_pre, etcd_host

    def deleteTmpFile(self, tmpFile):
        """
        """
        cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (tmpFile, tmpFile)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error: \n%s" % output)

    def getGtmInstanceList(self, clusterInfo):
        """
        """
        gtm_instances = []
        # get gtm instance information
        for dbNode in clusterInfo.dbNodes:
            if len(dbNode.gtms) != 0:
                gtm_instances.extend(dbNode.gtms)
        return gtm_instances

    def getEtcdInfoFromJsonFile(self, gtmControlFile, gtmSequenceFile):
        """
        """
        etcd_dict = {}
        if os.path.isfile(gtmControlFile):
            try:
                line_num = 0
                with open(gtmControlFile, "r") as fp:
                    for line in fp.readlines():
                        if line.strip() == "":
                            continue
                        line_num += 1
                        if 1 == int(line_num):
                            etcd_dict['gxid'] = line.strip()
                            break
            except IOError as e:
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % gtmControlFile + " Error:%s." % str(e))

        if os.path.isfile(gtmSequenceFile):
            try:
                line_num = 0
                with open(gtmSequenceFile, "r") as fp:
                    for line in fp.readlines():
                        if line.strip() == "":
                            continue
                        line_num += 1
                        if int(line_num) >= 2:
                            key = "sequence%d" % int(line_num)
                            etcd_dict[key] = line.strip()
            except IOError as e:
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % gtmSequenceFile + " Error:%s." % str(e))
        return etcd_dict

    def copyGtmDataToJsonFile(self, clusterInfo, tmpDir):
        """
        function: get gtm data from primary gtm
        input : NA
        output: NA
        """
        self.logger.debug("Obtaining master GTM information.")
        master_gtm = None
        master_gtm_xid = -1
        gtm_instances = self.getGtmInstanceList(clusterInfo)

        for one_gtm_instance in gtm_instances:
            # scp gtm.control file from gtm instance path to tmpPath
            local_gtm_config_file = "%s/gtm_control_%s" % (tmpDir, one_gtm_instance.hostname)
            cmd = "export LD_LIBRARY_PATH=/lib64:$LD_LIBRARY_PATH; " \
                  "scp %s:%s/gtm.control %s" % (one_gtm_instance.hostname,
                                                one_gtm_instance.datadir,
                                                local_gtm_config_file)
            (status, output) = subprocess.getstatusoutput(cmd)
            if status != 0:
                self.logger.debug("Command: %s.Error: \n%s" % (cmd, output))
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] % "gtm.control file")
            try:
                # parse gtm_control file
                # get masterGtmXid value
                with open(local_gtm_config_file, "r") as fp:
                    res_list = fp.readlines()
                    current_gtm_xid = int((res_list[0]).strip())
                    if current_gtm_xid > master_gtm_xid:
                        master_gtm_xid = current_gtm_xid
                        master_gtm = one_gtm_instance
            except Exception as e:
                self.logger.debug(str(e))
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50204"] % "GTM.control file")

        # get date from gtm.control
        gtm_control = "%s/gtm_control_%s" % (tmpDir, master_gtm.hostname)
        # scp gtm.sequence file from gtm instance path to tmpPath
        local_gtm_sequence_file = "%s/gtm_sequence_%s" % (tmpDir, master_gtm.hostname)
        cmd = "export LD_LIBRARY_PATH=/lib64:$LD_LIBRARY_PATH; " \
              "scp %s:%s/gtm.sequence %s" % (master_gtm.hostname, master_gtm.datadir, local_gtm_sequence_file)
        (status, output) = subprocess.getstatusoutput(cmd)
        if status != 0:
            self.logger.debug("Command: %s.Error: \n%s" % (cmd, output))
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] % "gtm.sequence file")

        etcd_dict = self.getEtcdInfoFromJsonFile(gtm_control, local_gtm_sequence_file)
        if len(etcd_dict) > 0:
            self.logger.debug("The max gtm xid and sequence information: node[%s]\ninfo:\n%s" %
                              (master_gtm.hostname, etcd_dict))
            # delete the json file
            json_file = "%s/etcd_data.json" % tmpDir
            self.deleteTmpFile(json_file)
            # write data to the json file
            g_file.generateJsonFile(json_file, etcd_dict)

        # clean tmp gtm.control file
        for one_gtm_instance in gtm_instances:
            tmp_file = "%s/gtm_control_%s" % (tmpDir, one_gtm_instance.hostname)
            self.deleteTmpFile(tmp_file)
        self.deleteTmpFile(local_gtm_sequence_file)
        self.logger.debug("Successfully obtained the gtm information.")

    def setEtcdDataByJsonFile(self, clusterInfo, jsonFile, isDataFromGtm=True):
        """
        function: set etcd data
        input : NA
        output: NA
        """
        self.logger.debug("Setting etcd data.")
        # parse the json file to get etcd data(gxid and sequence info)
        if not os.path.isfile(jsonFile):
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % jsonFile)
        guc_paras = g_file.parseJsonFile(jsonFile)

        # get the execute command
        cmd, _ = self.getEtcdUrl(clusterInfo)
        if "" == cmd:
            self.logger.debug("The cluster is not installed etcd.")
            return

        flag = "put"
        for key in guc_paras.keys():
            if isDataFromGtm:
                if key == "gxid":
                    etcd_key = "/%s/gxid" % self.user
                    etcd_value = guc_paras[key]
                else:
                    etcd_key = "/%s/sequence/%s" % (self.user,
                                                    guc_paras[key].split()[0])
                    etcd_value = "\t".join(guc_paras[key].split()[1:])
            else:
                etcd_key = key
                etcd_value = guc_paras[key]

            set_cmd = " %s '%s' '%s'" % (flag, etcd_key, etcd_value)
            exec_cmd = "%s %s" % (cmd, set_cmd)
            self.logger.debug("Command for set etcd information:\n%s" % exec_cmd)
            (status, output) = subprocess.getstatusoutput(exec_cmd)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % exec_cmd + " Error: \n%s" % output)
        self.deleteTmpFile(jsonFile)
        self.logger.debug("Successfully set the etcd data information.")

    def hasETCD(self, clusterInfo=None):
        """
        function: has etcd in cluster or not.
        input : NA
        output: NA
        """
        if None is clusterInfo:
            clusterInfo = self.clusterInfo
        for dbNodes in clusterInfo.dbNodes:
            if len(dbNodes.etcds) > 0:
                return True
        return False

    def startClusterEtcd(self, clusterInfo=None):
        """
        function: start the etcd and check etcd is healthy
        input : NA
        output: NA
        """
        self.logger.debug("Starting cluster Etcd.")
        if None is clusterInfo:
            clusterInfo = self.clusterInfo
        (cmd_pre, etcd_host) = self.getEtcdUrl(clusterInfo)
        if "" == cmd_pre:
            self.logger.debug("The cluster is not installed etcd.")
            return
        gauss_home = DefaultValue.getInstallDir(self.user)
        etcd_flag_file = "%s/bin/etcd_manual_start" % gauss_home
        cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (etcd_flag_file, etcd_flag_file)
        self.logger.debug("Command for deleting ETCD manual start file: %s" % cmd)
        DefaultValue.execCommandWithMode(cmd,
                                         "start etcd",
                                         self.sshTool,
                                         self.isSingle,
                                         self.mpprcFile,
                                         etcd_host)
        time.sleep(5)

        check_etcd_health_cmd = "%s endpoint health" % cmd_pre
        check_times = 0
        while True:
            (status, output) = subprocess.getstatusoutput(check_etcd_health_cmd)
            self.logger.debug("The ETCD healthy cmd:%s\nstatus:%s\noutput:%s" %
                              (check_etcd_health_cmd, status, output))
            if status != 0 or output.find("unhealthy") >= 0:
                if check_times >= 12:
                    raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] % "etcd")
                time.sleep(5)
                check_times += 1
            else:
                break

        self.logger.debug("Successfully start cluster Etcd.")

    def setGxid(self, clusterInfo=None):
        """
        function: set gxid and check it's value equal 20000
        input : NA
        output: NA
        """
        self.logger.debug("Set gxid on etcd.")
        if None is clusterInfo:
            clusterInfo = self.clusterInfo
        (cmd_pre, _) = self.getEtcdUrl(clusterInfo)
        if "" == cmd_pre:
            self.logger.debug("The cluster is not installed etcd.")
            return

        flag = "put"
        set_times = 0
        set_gxid_cmd = "%s %s /%s/gxid 20000" % (cmd_pre, flag, self.user)
        while True:
            (status, output) = subprocess.getstatusoutput(set_gxid_cmd)
            self.logger.debug("Set ETCD gxid cmd:%s\nstatus:%s\noutput:%s" % (set_gxid_cmd, status, output))
            if status != 0:
                if set_times >= 12:
                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % set_gxid_cmd + " Error:\n%s" % str(output))
                time.sleep(5)
                set_times += 1
            else:
                check_gxid_cmd = "%s get /%s/gxid" % (cmd_pre, self.user)
                self.logger.debug("Check ETCD gxid cmd:%s\nstatus:%s\noutput:%s" % (check_gxid_cmd, status, output))
                (status, output) = subprocess.getstatusoutput(check_gxid_cmd)
                if status != 0 or output.find("20000") < 0:
                    if set_times >= 12:
                        raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
                                        check_gxid_cmd + " Error:\n%s" % str(output))
                    time.sleep(5)
                    set_times += 1
                else:
                    break

        self.logger.debug("Successfully set the gxid.")

    def stopClusterEtcd(self, clusterInfo=None):
        """
        function: stop etcd
        input : NA
        output: NA
        """
        self.logger.debug("Stopping cluster ETCD.")
        if None is clusterInfo:
            clusterInfo = self.clusterInfo
        (cmd_pre, etcd_host) = self.getEtcdUrl(clusterInfo)
        if "" == cmd_pre:
            self.logger.debug("The cluster is not installed etcd.")
            return
        gauss_home = DefaultValue.getInstallDir(self.user)
        etcd_flag_file = "%s/bin/etcd_manual_start" % gauss_home
        cmd = g_file.SHELL_CMD_DICT["createFile"] % (etcd_flag_file, DefaultValue.KEY_FILE_MODE, etcd_flag_file)
        DefaultValue.execCommandWithMode(cmd,
                                         "stop etcd",
                                         self.sshTool,
                                         self.isSingle,
                                         self.mpprcFile,
                                         etcd_host)
        time.sleep(10)

        self.logger.debug("Successfully stopped cluster ETCD.")

    def doBuildStandbyDN(self, nodeNames=None, isStartInst=True, isCmCtlBuild=False):
        """
        function: Use the gs_ctl tool to start master instances on all nodes,
                 build the standby instance by full mode,
                 and stop all master and standby instances.
        input : nodeNames:list
        output: NA
        """
        for dbNode in self.clusterInfo.dbNodes:
            for dnInst in dbNode.datanodes:
                if (self.clusterInfo.isSingleInstCluster() and
                        len(self.clusterInfo.getPeerInstance(dnInst)) != 1):
                    return

        self.logger.log("Building the standby DN.")
        try:
            node_names = self.changeNoneToEmpytList(nodeNames)
            if isStartInst:
                # start master dn
                cmd = ("%s --manageDnInstance --actionType start"
                       " --dnType master -U %s -l %s --log-action=%s --log-uuid=%s --log-step=%s") % \
                      (OMCommand.getLocalScript("Local_Query"),
                       self.user,
                       self.localLog,
                       self.logAction,
                       self.logUuid,
                       self.logger.step)
                self.logger.debug("Command for starting master instances: %s" % cmd)
                self.sshTool.executeCommand(cmd, "start master DN", DefaultValue.SUCCESS, node_names)
            # Building standby dn
            cmd = ("%s --manageDnInstance --actionType build"
                   " --dnType standby -U %s %s -l %s --log-action=%s --log-uuid=%s --log-step=%s") % \
                  (OMCommand.getLocalScript("Local_Query"),
                   self.user,
                   "--cmctl-build" if isCmCtlBuild else "",
                   self.localLog,
                   self.logAction,
                   self.logUuid,
                   self.logger.step)
            self.logger.debug("Command for building standby instances: %s" % cmd)
            self.sshTool.executeCommand(cmd, "build standby DN", DefaultValue.SUCCESS, node_names)

            if isStartInst:
                # stop all dn
                cmd = ("%s --manageDnInstance --actionType stop"
                       " --dnType all -U %s -l %s --log-action=%s --log-uuid=%s --log-step=%s") % \
                      (OMCommand.getLocalScript("Local_Query"),
                       self.user,
                       self.localLog,
                       self.logAction,
                       self.logUuid,
                       self.logger.step)
                self.logger.debug("Command for stopping master and standby instances: %s" %
                                  cmd)
                self.sshTool.executeCommand(cmd,
                                            "stop master and standby DN",
                                            DefaultValue.SUCCESS, nodeNames)
        except Exception as e:
            try:
                if isStartInst:
                    cmd = "%s --clean_install_process -U %s -l %s " \
                          "--log-action=%s --log-uuid=%s --log-step=%s" % \
                          (OMCommand.getLocalScript("Local_Query"),
                           self.user,
                           self.localLog,
                           self.logAction,
                           self.logUuid,
                           self.logger.step)
                    self.sshTool.executeCommand(cmd, "clean process", DefaultValue.SUCCESS, nodeNames)
            except Exception as e1:
                self.logger.debug("Failed to clean building process. ERROR:%s" % str(e1))
            raise Exception(str(e))
        self.logger.log("Successfully built the standby DN.")

    def stop_standby_datanode(self, nodeNames):
        # when building standby completed, the standby is started without security mode in dws mode.
        # so we should stop standby datanode, cm_agent will start the standby with security mode.
        # non-dws mode is also need because of the standby is started by standby mode(-M standby),
        # pending mode(-M pending) is correct.
        cmd = ("%s --manageDnInstance --actionType stop"
               " --dnType standby -U %s -l %s --log-action=%s --log-uuid=%s --log-step=%s") % \
              (OMCommand.getLocalScript("Local_Query"),
               self.user, self.localLog, self.logAction, self.logUuid, self.logger.step)
        self.logger.debug("Command for stopping master and standby instances: %s" % cmd)
        self.sshTool.executeCommand(cmd,
                                    "stop standby DN",
                                    DefaultValue.SUCCESS,
                                    nodeNames)

    def createGrpcCa(self, host_list=None, app_path=""):
        """
        function: create grpc ca file
        input : NA
        output: NA
        """
        self.create_grpc_ca_file(host_list, app_path, "kernal")
        self.create_grpc_ca_file(host_list, app_path, "scheduler")

    def create_grpc_ca_file(self, host_list=None, app_path="", ca_type=""):
        """
        function: create grpc ca file
        input : NA
        output: NA
        """

        def scp_cert_file(cert_file):
            scp_file = os.path.join(demo_path, cert_file)
            self.logger.debug("Scp %s to %s." % (scp_file, ca_path))
            g_file.changeMode(DefaultValue.KEY_FILE_MODE, scp_file)
            self.sshTool.scpFiles(scp_file, ca_path + "/", host_list)

        self.logger.debug("Generating grpc CA files.")
        app_path = DefaultValue.getInstallDir(self.user) if app_path == "" else app_path
        if ca_type == "scheduler":
            ca_path = os.path.join(app_path, "share/sslcert/grpcio")
        else:
            ca_path = os.path.join(app_path, "share/sslcert/grpc")
        self.logger.debug("The ca file dir is: %s." % ca_path)
        if host_list is None:
            host_list = self.clusterInfo.getClusterNodeNames()

        # Create CA dir and prepare files for using.
        self.logger.debug("Create CA file directory.")
        DefaultValue.create_tmp_ca_file_dir(self.sshTool, ca_path, host_list, ca_type)

        demo_path = os.path.join(ca_path, "demoCA")
        # Add hostname to openssl.cnf file.
        self.logger.debug("Add hostname to config file.")
        new_openssl_file = os.path.join(demo_path, "openssl.cnf")
        DefaultValue.change_openssl_file(new_openssl_file, host_list)

        self.logger.debug("Generate CA files.")
        DefaultValue.create_ca_file(ca_path, self.logger)

        self.logger.debug("Scp CA files to all nodes.")
        pool = ThreadPool(len(DefaultValue.GRPC_CERT_LIST))
        pool.map(scp_cert_file, DefaultValue.GRPC_CERT_LIST)
        pool.close()
        pool.join()

        # Clean useless files, and change permission of ca file to 600.
        g_file.removeDirectory(demo_path)

        self.logger.debug("Successfully generated grpc CA files.")

    def checkCanInsertIntoNewNode(self):
        """
        function: show guc para disable_insert_into_newnodes
        input : NA
        output: NA
        """
        cooInst = self.getCooInst()
        showSql = "show disable_insert_into_newnodes;"
        for cn in cooInst:
            self.logger.debug("exectue show operation in database %s." % showSql)
            # execute the sql command in a different database
            (status, output) = ClusterCommand.remoteSQLCommand(showSql,
                                                               self.user,
                                                               cn.hostname,
                                                               cn.port,
                                                               False,
                                                               is_inplace_upgrade=True)
            if (status != 0 or output != 'off'):
                raise Exception(
                    ErrorCode.GAUSS_513["GAUSS_51300"] % showSql + " Error:\n%s" % output)

    def createNewNode(self, readonly=False):
        """
        function: create new node name
        input : NA
        output: NA
        """
        self.logger.log("Creating new node.")
        # Create new node
        cn_names = []
        for node in self.clusterInfo.dbNodes:
            if len(node.coordinators) > 0:
                cn_names.append(node.name)
        cmd = "%s --target=create_new_node -X '%s' -l '%s' " \
              "--log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Dilatation_Config"),
               self.xmlFile,
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        if readonly:
            cmd += " --readonly"
        self.logger.debug("Command for creating new node:%s" % cmd)
        self.sshTool.executeCommand(cmd, "Create new node", DefaultValue.SUCCESS, cn_names)
        self.logger.log("Successfully created new node.")

    def dropShrinkNode(self, remainNodes, deleteNodeNames):
        """
        """
        self.logger.log("Deleting shrink node.")
        cn_names = []
        for node in remainNodes:
            if len(node.coordinators) > 0:
                cn_names.append(node.name)
        cmd = "%s --target=delete_shrink_node -l '%s' " \
              "--log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Contraction_Config"),
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        for name in deleteNodeNames:
            cmd = "%s -N %s " % (cmd, name)
        self.logger.debug("Command for deleting shrink node:%s" % cmd)
        self.sshTool.executeCommand(cmd, "Delete shrink node", DefaultValue.SUCCESS, cn_names)
        self.logger.log("Successfully deleted shrink node.")

    def refreshLCPgxcInfo(self, remainNodes, deleteNodeNames):
        """
        """
        self.logger.log("Refreshing logical cluster pgxc info.")
        cn_name = ""
        for node in remainNodes:
            if len(node.coordinators) > 0:
                cn_name = node.name
                break
        cmd = "%s --target=refresh_lc_pgxc_info -l '%s' " \
              "--log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Contraction_Config"),
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        for name in deleteNodeNames:
            cmd = "%s -N %s " % (cmd, name)
        self.logger.debug("Command for refreshing logical cluster pgxc info:%s" % cmd)
        self.sshTool.executeCommand(cmd, "Refresh logical cluster pgxc info", DefaultValue.SUCCESS, [cn_name])
        self.logger.log("Successfully refreshed logical cluster pgxc info.")

    def dropPmkSchema(self, cnInst):
        """
        """
        i = 0
        retry_times = 3
        self.logger.log("Deleting the pmk schema.")
        pmk_sql = "DROP SCHEMA IF EXISTS pmk CASCADE;"
        self.logger.debug("Command for delete the pmk schema: %s" % pmk_sql)
        while i < retry_times:
            status, _ = ClusterCommand.remoteSQLCommand(pmk_sql, self.user, cnInst.hostname, cnInst.port, False,
                                                        is_inplace_upgrade=True)
            if status != 0:
                if i < 2:
                    self.logger.debug("Failed to drop pmk schema in %d times [%s:%d]." %
                                      ((i + 1), cnInst.hostname, cnInst.instanceId))
                else:
                    self.logger.log("NOTICE: Failed to execute SQL command on CN instance, " +
                                    "please re-execute SQL command 'DROP SCHEMA IF EXISTS pmk CASCADE' manually.")
                    return
            else:
                break
            time.sleep(3)
            i += 1
        self.logger.log("Successfully deleted the pmk schema.")

    def checkCgroupInfo(self):
        """
        """
        self.logger.debug("Checking the cgroup status for all nodes.")
        cmd = "%s --target=check_cgroup_status -l '%s' " \
              "--log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Contraction_Config"),
               self.localLog,
               self.logAction,
               self.logUuid,
               self.logger.step)
        self.logger.debug("Command for checking the cgroup status for all nodes:%s" % cmd)
        self.sshTool.executeCommand(cmd, "Check the cgroup status for all nodes", DefaultValue.SUCCESS)
        self.logger.debug("Successfully checked the cgroup status for all nodes.")

    def restoreCmsParaForNewCmaInst(self, nodenames, backup_file_path=None):
        """
        """
        user_profile = DefaultValue.getMpprcFile()
        if backup_file_path:
            tmp_file = backup_file_path
        else:
            tmp_dir = DefaultValue.getTmpDirFromEnv(self.user)
            tmp_file = os.path.join(tmp_dir, "cm_server.json")
        cms_parameter = ["coordinator_heartbeat_timeout",
                         "enable_transaction_read_only",
                         "instance_heartbeat_timeout",
                         "datastorage_threshold_value_check"]
        old_gu_cparas = {}
        # get cm guc parameter
        if os.path.isfile(tmp_file):
            old_gu_cparas = g_file.parseJsonFile(tmp_file)

        guc_str = ""
        for (key, value) in old_gu_cparas.items():
            if key in cms_parameter:
                guc_str += " -c '%s=%s' " % (key, value)
        if guc_str != "":
            for name in nodenames:
                cmd = "source %s; gs_guc set -Z cm -N %s -D cm_instance_data_path %s " % (user_profile, name, guc_str)
                (status, output) = DefaultValue.retryGetstatusoutput(cmd)
                if status != 0:
                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error:\n%s" % output)

    def manageOnlineStateControlFile(self, mode, nodeNames):
        """
        function: manage CM Control file ($GAUSSHOME/bin/om_action_online.state)
                Temporary tables need to be supported when repairing the CN online.
                Online expansion does not support temporary tables, and need to generate a tag file
                during the expansion to distinguish the two scenarios from the kernel.
        input : mode
        output: NA
        """
        if mode not in ["add", "delete"]:
            return
        self.logger.debug("Managing online state control file for '%s' mode." % mode)
        online_state_file = "%s/bin/om_action_online.state" % self.clusterInfo.appPath
        if mode == "add":
            cmd = g_file.SHELL_CMD_DICT["createFile"] % (online_state_file,
                                                         DefaultValue.KEY_FILE_MODE,
                                                         online_state_file)
        else:
            cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (online_state_file, online_state_file)
        # exec cmd on all old nodes for configuring pg_hba
        self.logger.debug("Command for managing online state control file on all old nodes: %s" % cmd)
        self.sshTool.executeCommand(cmd, "manage online state control file", DefaultValue.SUCCESS, nodeNames)
        self.logger.debug("Successfully online state control file.")

    def WaitPreparedXactsCommit(self, nodes):
        """
        function: Waiting for 2 phase transaction commit
        input : NA
        output: NA
        """
        self.logger.debug("Waiting for 2 phase transaction commit.")
        cn_node_names = []
        for node in nodes:
            if len(node.coordinators) > 0:
                cn_node_names.append(node.name)
        try_counts = 600
        cmd = "%s --target=checkPreparedXacts -l '%s' " \
              "--log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Dilatation_Config")
               , self.localLog
               , self.logAction
               , self.logUuid
               , self.logger.step)
        self.logger.debug("Command for checking 2 phase transaction: %s" % cmd)
        while try_counts > 0:
            try:
                self.sshTool.executeCommand(cmd, "check 2-pc transaction", DefaultValue.SUCCESS, cn_node_names)
                self.logger.debug("2 phase transaction has been completed.")
                break
            except Exception as e:
                if str(e).find("[GAUSS-53011]") >= 0:
                    try_counts -= 1
                    time.sleep(3)
                else:
                    raise Exception(str(e))

    def checkLocks(self, coorHostNames):
        """
        check if the current cluster is locked
        :return:
        """
        is_exist_lock = False
        cmd = "%s --target=checkLocks -X '%s' -l '%s' " \
              "--log-action=%s --log-uuid=%s --log-step=%s" % \
              (OMCommand.getLocalScript("Local_Dilatation_Config")
               , self.xmlFile
               , self.localLog
               , self.logAction
               , self.logUuid
               , self.logger.step)
        self.logger.debug("Command for checking if the current cluster is locked: %s" % cmd)
        (status, output) = self.sshTool.getSshStatusOutput(cmd, coorHostNames)
        self.logger.debug("Status: %s.\nOutput:\n%s" % (str(status), output))
        output_map = self.sshTool.parseSshOutput(coorHostNames)
        for node in status.keys():
            if status[node] != DefaultValue.SUCCESS:
                raise Exception(output_map[node])
            lineList = output_map[node].split('\n')
            for line in lineList:
                if line.startswith('lockFlagNum') and line.strip() != "lockFlagNum:0":
                    is_exist_lock = True

        return is_exist_lock

    def checkLockStatus(self, nodes, expectExistLock=True):
        """
        function: Check if the cluster lock exists during the expansion process.
        input : NA
        output: NA
        """
        coor_host_names = []
        for node in nodes:
            if len(node.coordinators) != 0:
                coor_host_names.append(node.coordinators[0].hostname)

        is_exist_lock = self.checkLocks(coor_host_names)
        if not expectExistLock and is_exist_lock:
            raise Exception(ErrorCode.GAUSS_526["GAUSS_52627"])
        if expectExistLock and not is_exist_lock:
            raise Exception(ErrorCode.GAUSS_526["GAUSS_52632"])

    def buildNodeInst(self, cnList, newNodeInstList, buildMode, dilatation_mode, nodeIdList, islcctl):
        """
        """
        if islcctl:
            try:
                DefaultValue.modifyClusterOMStatus("on", nodeIdList)
                self.buildNewNodeInst(cnList, newNodeInstList, buildMode, dilatation_mode)
            except Exception as e:
                raise Exception(str(e))
            finally:
                DefaultValue.modifyClusterOMStatus("off")
        else:
            self.buildNewNodeInst(cnList, newNodeInstList, buildMode, dilatation_mode)

    def rebuildNodes(self, oldNodeNames, newNodeNames, oldNodes, newNodes, dilatation_mode, islcctl):
        """
        function: Build coordinators and data nodes on new nodes
        input : NA
        output: NA
        """
        self.logger.log("Rebuilding new nodes.")

        if islcctl:
            manage_node_names = oldNodeNames + newNodeNames
            lock_nodes = self.clusterInfo.dbNodes
        else:
            manage_node_names = oldNodeNames
            lock_nodes = oldNodes
        # Before starting CN build DN, you need to set the parameters of
        # Cn and set the parameter value to 1000000 to ensure that
        # the xlog log of the remote CN is not deleted.
        self.manageGucParameterForBuild(manage_node_names)

        if dilatation_mode == "read-only":
            self.lockCluster()
        self.logger.log("Restoring new nodes.")

        # Set timeout for full rebuilding new nodes if online expansion.
        if dilatation_mode == "insert":
            self.setTimer(self.timeout)

        # get CN on all nodes
        cn_list = []
        # Find CN instance in cluster for building
        for dbNode in lock_nodes:
            if len(dbNode.coordinators) > 0:
                cn_list.append(dbNode.coordinators[0])
        new_node_inst_list = self.getAllNewNodeInst(islcctl)

        node_id_list = []
        if islcctl:
            # get all new node id list
            for i in range(len(newNodes)):
                dbNode = newNodes[i]
                node_id_list.append(dbNode.id)
        self.buildNodeInst(cn_list, new_node_inst_list, "full", dilatation_mode, node_id_list, islcctl)

        if dilatation_mode == "insert":
            self.begin_online_build(manage_node_names)
            # Reset timeout for incremental rebuilding new nodes.
            self.resetTimer()
            self.lockCluster(is_lock_priority=True)
            self.WaitPreparedXactsCommit(lock_nodes)
            self.checkLockStatus(lock_nodes, True)
            self.buildNodeInst(cn_list, new_node_inst_list, "incremental", dilatation_mode, node_id_list, islcctl)
        self.logger.log("Successfully restored new nodes.")
        self.manageGucParameterForBuild(manage_node_names, "restore")
        self.removeBuildFile(newNodeNames)
        # Reset timeout after rebuild new nodes.
        self.resetTimer()
        usedTime = int(datetime.now().strftime('%s')) - int(self.scriptStartTime.strftime('%s'))
        self.setTimer(self.timeout - usedTime)
        self.logger.log("Successfully rebuild new nodes.")

    def getRepNodeGroup(self, lcGroupName, CooInst=None):
        """
        function: get node group info of replication table
        input: lcGroupName---logical cluster nodegroup name to find replication nodegroup which contain logical cluster
        input: CooInst---Cn instance to execute sql
        output: NA
        """
        if CooInst is None:
            CooInst = self.getCooInst()[0]
        self.logger.log("Getting node group info for replication table")
        sql_string = "SELECT group_name, oid, group_members FROM pg_catalog.pgxc_group WHERE group_kind = 'r' " \
                     "AND group_members @> (SELECT group_members FROM pgxc_group WHERE group_name = '%s');" \
                     % lcGroupName
        (status, output) = ClusterCommand.remoteSQLCommand(sql_string, self.user,
                                                           CooInst.hostname,
                                                           CooInst.port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "node group information" + " Error:\n%s" % str(output))

        group_map = {}
        if output != "":
            pgroups = output.split('\n')
            for group in pgroups:
                group_info = group.split('|')
                group_name = group_info[0].strip()
                oid = group_info[1].strip()
                group_members = group_info[2].strip().split(' ')
                group_map[group_name] = (oid, group_members)

        return group_map

    def getNodeInfo(self, coo_inst=None):
        """
        function: get node info from pgxc_node
        input: CooInst---Cn instance to execute sql
        output: NA
        """
        if coo_inst is None:
            coo_inst = self.getCooInst()[0]
        self.logger.log("Getting node from pgxc_node")
        sql_string = "SELECT oid, node_name, node_host FROM pg_catalog.pgxc_node;"
        (status, output) = ClusterCommand.remoteSQLCommand(sql_string, self.user,
                                                           coo_inst.hostname,
                                                           coo_inst.port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "node information" + " Error:\n%s" % str(output))

        nodes_map = {}
        if output != "":
            pnodes = output.split('\n')
            for node in pnodes:
                nodeInfo = node.split('|')
                oid = nodeInfo[0].strip()
                name = nodeInfo[1].strip()
                host = nodeInfo[2].strip()
                nodes_map[oid] = (name, host)

        return nodes_map

    def createNewRepNodeGroup(self, lcGroupName, handleNodes, isExpand, CooInst):
        """
        function: create new group for replication table
        input: lcGroupName---replication table node group need contain lcgroup members
        input: handleNodes---nodes name or nodes need to handle
        output: NA
        """
        self.logger.log("Creating new node group for replication table")
        group_map = self.getRepNodeGroup(lcGroupName, CooInst)
        rep_nodegroup = {}
        if len(group_map) == 0:
            self.logger.log("There is no nodegroup for replication table, skip this step")
            return rep_nodegroup

        nodes_map = self.getNodeInfo(CooInst)
        for group_name in group_map:
            node_names = []
            for nodeOid in group_map[group_name][1]:
                node_names.append(nodes_map[nodeOid][0])
            if isExpand:
                # Get IP address by node name '-h' + self.lcName
                ip_list = self.getListenIpList(handleNodes)
                # get DN info by ip
                nodename_list = self.getDnNameByListenIp(ip_list)
                node_names.extend(nodename_list)
                dn_str = ",".join(node_names)
            else:
                delete_nodes = []
                for node in handleNodes:
                    delete_nodes.append(node.name)
                # Get IP address by node name '-h' + self.lcName
                ip_list = self.getListenIpList(delete_nodes)
                # get DN info by ip
                delete_list = self.getDnNameByListenIp(ip_list)
                node_list = list(set(node_names) - set(delete_list))
                dn_str = ",".join(node_list)

            temp_group_name = "group_version_%s_%s" % (time.strftime("%Y%m%d_%H%M%S"), group_map[group_name][0])

            start_transaction_sql = "SET xc_maintenance_mode = on; START TRANSACTION; "
            end_transaction_sql = "COMMIT; RESET xc_maintenance_mode; "
            need_change_bucket, lc_group_dn_nums = self.check_change_buckets()
            if need_change_bucket:
                buckets = self.calculate_table_buckets(lc_group_dn_nums)
                buckets_option = " NBUCKETS(%d) " % buckets
            else:
                buckets = self.query_vgroup_bucket_count(lcGroupName, [CooInst])
                buckets_option = " NBUCKETS(%d) " % buckets
            create_group_sql = "create node group \"%s\" with (%s) distribute from \"%s\" %s;" % \
                               (temp_group_name, dn_str, group_name, buckets_option)
            sql = "%s%s%s" % (start_transaction_sql, create_group_sql, end_transaction_sql)
            self.logger.debug("Sql command for creating nodegroup and user: %s\n" % sql)
            (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                               self.user,
                                                               CooInst.hostname,
                                                               CooInst.port,
                                                               ignoreError=False,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))

            rep_nodegroup[group_name] = temp_group_name

        return rep_nodegroup

    def dropRNodeGroup(self, sourceGroup):
        """
        function: drop new group for replication table node group.
        input : sourceGroup---source node group
        output: NA
        """
        self.cleanPreTransaction()
        self.logger.debug("Deleting R node group.")

        # 1.get CN instance info from cluster
        coo_inst = self.getCooInst()

        # 2.the sql command to drop node group
        start_transaction_sql = "SET xc_maintenance_mode = on; START TRANSACTION; "
        drop_sql = "drop node group \"%s\" ; " % sourceGroup
        end_transaction_sql = "COMMIT; RESET xc_maintenance_mode; "

        # total drop node group sql
        drop_sql = "%s%s%s" % (start_transaction_sql, drop_sql, end_transaction_sql)
        self.logger.debug("Sql command for drop nodegroup: %s\n" % drop_sql)
        # execute the sql command
        (status, output) = ClusterCommand.remoteSQLCommand(drop_sql,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % drop_sql + " Error:\n%s" % str(output))

        self.logger.debug("Successfully deleted R node group.")

    def getRepNodeGroupPair(self):
        """
        function: get node group info of replication table
        input: NA
        output: NA
        """
        coo_inst = self.shrinkRedisCooInst
        if coo_inst is None:
            coo_inst = self.getCooInst()[0]

        self.logger.log("Getting node group info for replication table")
        sql_string = "SELECT group_name, oid FROM pg_catalog.pgxc_group WHERE group_kind = 'r';"
        (status, output) = ClusterCommand.remoteSQLCommand(sql_string, self.user,
                                                           coo_inst.hostname,
                                                           coo_inst.port,
                                                           ignoreError=False,
                                                           is_inplace_upgrade=True)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51601"] %
                            "node group information" + " Error:\n%s" % str(output))

        all_group_map = {}
        if output != "":
            pgroups = output.split('\n')
            for group in pgroups:
                groupInfo = group.split('|')
                groupName = groupInfo[0].strip()
                oid = groupInfo[1].strip()
                all_group_map[groupName] = oid

        group_map = {}
        for old_group_name in all_group_map:
            oid = all_group_map[old_group_name]
            for new_group_name in all_group_map:
                if oid in new_group_name:
                    group_map[old_group_name] = new_group_name
                    break
        return group_map

    def dropAndUpdateRGroupMessage(self, repNodegroup):
        self.logger.log("drop new replication node group and update table dependencies")
        for old_group in repNodegroup:
            new_group = repNodegroup[old_group]
            # 1. Update group_buckets and group_members
            self.updateGroupInfo(new_group, old_group)
            # 2.Update table dependencies
            self.updateTableDepend(new_group, old_group)
            # 3.drop new group
            self.dropRNodeGroup(new_group)

        self.logger.log("Successfully dropped new replication node group and update table dependencies")

    def doDropNodeForeignTable(self, inst_list, all_database_list, is_cn=False):
        """
        function: Drop foreign table
        input: Instlist---cn or dn instance list
        input: allDatabaseList---all database
        output: NA
        """
        sql_command = """
                    SET xc_maintenance_mode = on;
                    DECLARE
                        sql_stmt text;
                        my_cursor REFCURSOR;
                        schemaname  text;
                        tablename text;
                    BEGIN
                        sql_stmt := 'SELECT pg_catalog.quote_ident(n.nspname) AS schemaname,
                                            pg_catalog.quote_ident(c.relname) AS tablename
                                    FROM pg_catalog.pg_class c
                                    LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                                    WHERE c.relkind = ''f'' ';

                        OPEN my_cursor FOR EXECUTE sql_stmt;
                        FETCH FROM my_cursor INTO schemaname, tablename;
                        WHILE my_cursor % FOUND LOOP
                            EXECUTE IMMEDIATE 'DROP FOREIGN TABLE IF EXISTS '||schemaname||'.'||tablename||' CASCADE';
                            FETCH FROM my_cursor INTO schemaname, tablename;
                        END LOOP;
                        CLOSE my_cursor;
                    END;
                    """

        if is_cn:
            sql_command = """
                    SET xc_maintenance_mode = on;
                    DECLARE
                        sql_stmt text;
                        my_cursor REFCURSOR;
                        schemaname  text;
                        tablename text;
                    BEGIN
                        sql_stmt := 'SELECT pg_catalog.quote_ident(n.nspname) AS schemaname,
                                            pg_catalog.quote_ident(c.relname) AS tablename
                                    FROM pg_catalog.pg_class c
                                        JOIN pg_catalog.pgxc_class x ON x.pcrelid = c.oid
                                        LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                                        WHERE c.relkind = ''f'' ';

                        OPEN my_cursor FOR EXECUTE sql_stmt;
                        FETCH FROM my_cursor INTO schemaname, tablename;
                        WHILE my_cursor % FOUND LOOP
                            EXECUTE IMMEDIATE 'DROP FOREIGN TABLE IF EXISTS '||schemaname||'.'||tablename||' CASCADE';
                            FETCH FROM my_cursor INTO schemaname, tablename;
                        END LOOP;
                        CLOSE my_cursor;
                    END;
                    """
        for database_name in all_database_list:
            # execute the sql command
            (status, output) = ClusterCommand.remoteSQLCommand(sql_command,
                                                               self.user,
                                                               inst_list.hostname,
                                                               inst_list.port,
                                                               ignoreError=False,
                                                               database=database_name,
                                                               is_inplace_upgrade=True)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql_command + " Error:\n%s" % str(output))

    def doDropForeignTable(self, inst_list, all_database_list, groupName):
        """
        function: Drop foreign table with group
        input: Instlist---cn or dn instance list
        input: allDatabaseList---all database
        output: NA
        """
        sql_command = """
                SET xc_maintenance_mode = on;
                DECLARE
                    sql_stmt text;
                    my_cursor REFCURSOR;
                    schemaname  text;
                    tablename text;
                BEGIN
                    sql_stmt := 'SELECT pg_catalog.quote_ident(n.nspname) AS schemaname,
                                        pg_catalog.quote_ident(c.relname) AS tablename
                                FROM pg_catalog.pg_class c
                                    JOIN pg_catalog.pgxc_class x ON x.pcrelid = c.oid
                                    LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                                    WHERE c.relkind = ''f'' and x.pgroup=''logic_cluster''';

                    OPEN my_cursor FOR EXECUTE sql_stmt;
                    FETCH FROM my_cursor INTO schemaname, tablename;
                    WHILE my_cursor % FOUND LOOP
                        EXECUTE IMMEDIATE 'DROP FOREIGN TABLE IF EXISTS '||schemaname||'.'||tablename||' CASCADE';
                        FETCH FROM my_cursor INTO schemaname, tablename;
                    END LOOP;
                    CLOSE my_cursor;
                END;
                """
        sql_command = sql_command.replace("logic_cluster", groupName)

        for database_name in all_database_list:
            # execute the sql command
            (status, output) = ClusterCommand.remoteSQLCommand(sql_command,
                                                               self.user,
                                                               inst_list.hostname,
                                                               inst_list.port,
                                                               ignoreError=False,
                                                               database=database_name,
                                                               is_inplace_upgrade=True)

            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql_command + " Error:\n%s" % str(output))

    def setCanInsertIntoNewNode(self, can):
        """
        function: set disable_insert_into_newnodes
        input : can insert into newnode or not
        output: NA
        """
        cmd = "gs_guc reload -Z coordinator -N all -I all -c 'disable_insert_into_newnodes=%s'"
        if (can):
            cmd = cmd % ('off')
        else:
            cmd = cmd % ('on')
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50008"] + "Error:\n%s" % output)

    def stop_ddl_task_for_locking(self, waiting_for_killing: int, event: threading.Event):
        """pg_locks contains all locks of all databases"""
        if waiting_for_killing and waiting_for_killing > 0:
            if event.wait(waiting_for_killing):
                return

        cn_list = []
        sql_kill = "SET xc_maintenance_mode = on;select t1.pid, t1.query_id, t1.application_name, t1.query_start, " \
                   "t1.state, t1.query from pg_catalog.pg_stat_activity t1, " \
                   "(select pg_terminate_backend(pid),pid from pg_catalog.pg_locks " \
                   "where locktype = 'advisory' and mode = 'ShareLock' and granted = 't' " \
                   "and classid = '65535' and objid = '65535') t2 where t1.pid=t2.pid;"
        self.logger.debug("SQL of stopping ddl tasks: %s" % sql_kill)
        while not event.is_set():
            try:
                if not cn_list:
                    cn_list = self.getCooInst()
                for cn_node in cn_list:
                    if event.is_set() or not threading.main_thread().is_alive():
                        return
                    (status, output) = ClusterCommand.remoteSQLCommand(
                        sql_kill, self.user,
                        cn_node.hostname,
                        cn_node.port,
                        ignoreError=False)
                    if status != 0:
                        self.logger.debug("Failed to stop  ddl tasks of %s Error:\n%s" % (cn_node.hostname, output))
                    else:
                        self.logger.debug("Stopped ddl tasks infos of %s:\n%s" % (cn_node.hostname, output))
            except Exception as e:
                self.logger.debug("Failed to stop ddl tasks. ERROR:%s" % str(e))
            if not threading.main_thread().is_alive():
                return
            event.wait(15)

    def wait_current_active_ddl_complete(self, guc_index, guc_var, timeout=3600):
        sql_command = "select pg_catalog.pgxc_wait_current_active_ddl_complete(%s, %s, %s);" % \
                      (guc_index, guc_var, timeout)
        self.logger.debug("wait sql: %s" % sql_command)
        coo_inst = self.getCooInst()
        (status, output) = ClusterCommand.remoteSQLCommand(sql_command,
                                                           self.user,
                                                           coo_inst[0].hostname,
                                                           coo_inst[0].port,
                                                           ignoreError=False)
        if status != 0:
            raise Exception("Failed to wait active ddl complete" + " Error:\n%s." % str(output))
        if output == 'f':
            raise Exception("Failed to wait active ddl complete.")
        self.logger.log("Successfully wait all thread guc changed.")

    def getInstanceNameList(self, instancetype, cn_inst):
        """
        function: Get the instance name of coordinator or datanode by the parameter of 'instancetype'
        input : the instance type, 'C' or 'D'
        output: the instance name
        """
        try:
            # get node name string by instance type
            sql = "SELECT string_agg(node_name, ',') FROM pg_catalog.pgxc_node " \
                  "WHERE node_type='%s';" % instancetype
            self.logger.debug("Sql command for get instance names: %s" % sql)
            (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                               self.user,
                                                               cn_inst.hostname,
                                                               cn_inst.port,
                                                               ignoreError=False)
            if status != 0:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql +
                                " Error:\n%s" % str(output))

            if len(output.split('\n')) != 1:
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql +
                                " Error:\n%s" % str(output))
            return output.strip()
        except Exception as ex:
            raise Exception(str(ex))

    def createNodegroupForNewLC(self, new_nodegroup_name, all_new_nodes, node_name,
                                cn_inst, silent_mode, readonly=False):
        """
        function: Create a logic cluster and map in an transaction.
        input : NA
        output: NA
        """
        self.logger.log("Creating a node group.")

        sql = "SELECT group_name,group_members,is_installation,group_kind " \
              "FROM PGXC_GROUP WHERE group_name='%s';" % new_nodegroup_name
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           self.user,
                                                           cn_inst.hostname,
                                                           cn_inst.port,
                                                           ignoreError=False)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql +
                            " Error:\n%s" % str(output))

        self.logger.debug(output)
        if output.find(new_nodegroup_name) >= 0:
            self.logger.log("Skip to create group for %s exists." % new_nodegroup_name)
            return

        # 1.If -h uses 'all' or 'ALL', call getInstanceNameList() to get all DN names in the entire cluster.
        # 2.Else get DN instance information through pgxc_node.
        if all_new_nodes:
            # get all DN instance names
            dn_str = self.getInstanceNameList('D', cn_inst)
            CREATE_GROUP_SQL = "CREATE NODE GROUP \"%s\" WITH(%s) vcgroup;" % \
                               (new_nodegroup_name, dn_str)
        else:
            ip_list = self.getListenIpList(node_name)
            dn_list = self.getDnNameByListenIp(ip_list)
            # check if contain DN instance
            if len(dn_list) == 0:
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "DN instance" +
                                " Maybe the node you are pointing to does not contain a DN instance.")
            else:
                # convert list to str
                dn_str = ",".join(dn_list)
                CREATE_GROUP_SQL = "CREATE NODE GROUP \"%s\" WITH(%s) vcgroup;" % \
                                   (new_nodegroup_name, dn_str)

        if readonly:
            CREATE_GROUP_SQL = CREATE_GROUP_SQL.replace(';', ' READ ONLY;')
        # 3.the sql command to create node group and user
        START_TRANSACTION_SQL = "SET xc_maintenance_mode = on; START TRANSACTION; "
        BIND_SQL = "ALTER ROLE \"%s\" WITH NODE GROUP \"%s\" VCADMIN LOGIN; " % \
                   (new_nodegroup_name, new_nodegroup_name)
        END_TRANSACTION_SQL = "COMMIT; RESET xc_maintenance_mode; "

        # check the interactive mode
        # if the interactive mode is True
        if not silent_mode:
            # total sql command in an transaction
            create_sql = "%s%s%s%s" % (START_TRANSACTION_SQL, CREATE_GROUP_SQL, BIND_SQL, END_TRANSACTION_SQL)
        else:
            create_sql = "%s%s%s" % (START_TRANSACTION_SQL, CREATE_GROUP_SQL, END_TRANSACTION_SQL)
        self.logger.debug("Sql command for creating nodegroup and user: %s\n" % create_sql)
        # execute the sql command and don't care about the result information
        (status, output) = ClusterCommand.remoteSQLCommand(create_sql,
                                                           self.user,
                                                           cn_inst.hostname,
                                                           cn_inst.port,
                                                           ignoreError=False)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % create_sql +
                            " Error:\n%s" % str(output))

        self.logger.log("Successfully created a node group.")

    def doCgroupConfig(self, opr_type, lc_name, is_upgrade=False, is_rollback=False):
        """
        function: Create or delete a logic cluster resource management configuration file in the physical cluster.
        note  : 1.Must be Invoking the gs_cgroup command in the physical cluster.
                2.doCgroupConfig() are used in gs_lcctl -t create and gs_lcctl -t add
                3.during create and add. self.sshTool are different
        input : opr_type--------create or delete a cgroup
                lc_name-------the logic cluster name
                is_upgrade---whether it is a large cluster conversion
        output: NA
        """
        self.logger.log("Operating a logic cluster resource management configuration file.")
        try:
            cmd = ""
            # get gs_cgroup path
            gaussHome = DefaultValue.getInstallDir(self.user)
            cgroup_exe_dir = os.path.join(gaussHome, "bin/gs_cgroup")
            if opr_type == "create":
                # convert the entire large cluster to a logical cluster
                if is_upgrade:
                    cmd = "%s -c -N %s --rename" % (cgroup_exe_dir, lc_name)
                else:
                    # gs_cgroup command, create a new cgroup file
                    cmd = "%s -c -N %s" % (cgroup_exe_dir, lc_name)
            elif opr_type == "delete":
                cgroup_path = os.path.join(gaussHome, "etc/")
                lc_cgroup_file = "%s/%s.gscgroup_%s.cfg" % (cgroup_path, lc_name, self.user)
                # Prevent users from executing ctrl+c when loading config file.
                if os.path.exists(lc_cgroup_file):
                    # 1.Roll back the logical cluster that failed to specify the --upgrade parameter last time.
                    # 2.Roll back the logical cluster with the -t rollback parameter
                    if is_rollback or is_upgrade:
                        cmd = "%s -d -N %s --rename" % (cgroup_exe_dir, lc_name)
                    else:
                        cmd = "%s -d -N %s" % (cgroup_exe_dir, lc_name)
            self.logger.debug("Command for executing gs_cgroup in the physical cluster: %s\n" % cmd)
            # Invoking the gs_cgroup command in a physical cluster
            self.sshTool.executeCommand(cmd, "operate cgroup")
        except Exception as ex:
            raise Exception(str(ex))

        self.logger.log("Successfully operated a logic cluster resource management configuration file.")

    def getDatabases(self):
        """
        function: get databases from cluster
        input : NA
        output: database list
        """
        # get databases from cluster
        getDbSql = "SELECT datname FROM pg_catalog.pg_database WHERE datallowconn ORDER BY 1"
        self.logger.debug("Get databases from cluster command: %s." % getDbSql)
        (status, output) = ClusterCommand.remoteSQLCommand(getDbSql,
                                                           self.user,
                                                           self.cooInst.hostname,
                                                           self.cooInst.port,
                                                           ignoreError=False)
        if status != 0 or output.strip() == "":
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % getDbSql + " Error:\n%s" % str(output))
        # split the output string with '\n'
        databases = output.split("\n")
        return databases

    def get_node_group_dn_nums(self, group_name):
        sql = ("SELECT COUNT(pgxc_node.oid) FROM pg_catalog.pgxc_node, pg_catalog.pgxc_group "
               " WHERE pgxc_group.group_name='%s' AND group_kind = 'v' and (group_flags IS NULL OR group_flags & 1 = 0)"
               "   AND pgxc_node.oid = ANY(pgxc_group.group_members);" % group_name)
        (status, output) = ClusterCommand.remoteSQLCommand(sql, self.user, self.cooInst.hostname,
                                                           self.cooInst.port, ignoreError=False)
        self.logger.debug("Obtains the number of DNs in the current node group: %s." % output)
        if status != 0 or not output.strip().isdigit():
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        return int(output.strip())

    def decide_need_change_bucket(self, lc_group_dn_nums):
        sql = "SELECT nodegroup_need_change_bucket('%s', %s);" % (self.lcGroup_name, str(lc_group_dn_nums))
        (status, output) = ClusterCommand.remoteSQLCommand(sql, self.user, self.cooInst.hostname,
                                                           self.cooInst.port, ignoreError=False)
        self.logger.debug("Check whether need change buckets: %s." % output)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        if output.strip() == "t":
            return True
        return False

    def check_change_buckets(self):
        return False, 0

    def calculate_table_buckets(self, dn_nums):
        """
        """
        if dn_nums <= 3:
            table_buckets = 24
        elif 3 < dn_nums < 30:
            table_buckets = dn_nums * 10
        else:
            table_buckets = dn_nums * 8

        if table_buckets > 960:
            table_buckets = 960

        return table_buckets

    def setGuc(self, parameter, value):
        envfile = DefaultValue.getMpprcFile()
        cmd = 'source %s;gs_guc reload -Z coordinator -Z datanode -N all -I all -c \"%s = %s\"' % \
              (envfile, parameter, value)
        (status, output) = DefaultValue.retryGetstatusoutput(cmd)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error:\n%s" % str(output))

    def getGuc(self, parameter, user, cnInst):
        sql = 'show %s;' % parameter
        (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                           user,
                                                           cnInst.hostname,
                                                           cnInst.port,
                                                           False)
        if status != 0:
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
        return output

    def backupAndSetNewGuc(self, is_online_mode=False):
        # gucList key is guc, value is the old cluster guc value
        oldGucList = {}
        self.logger.debug("Starting read guc.")
        if is_online_mode:
            BACKUP_GUC_LIST["use_workload_manager"] = "on"
        for key in BACKUP_GUC_LIST.keys():
            value = self.getGuc(key, self.user, self.getCooInst()[0])
            oldGucList[key] = value
        self.logger.debug("Successfully read guc. %s" % str(oldGucList))

        self.logger.debug("Starting backup guc.")
        tmpDir = DefaultValue.getTmpDirFromEnv(self.user)
        tmpFile = os.path.join(tmpDir, "expand_backup_guc.json")
        # support Reentrant, if this file exists, that means we have set before,
        # do not overwrite it.
        if not os.path.isfile(tmpFile):
            g_file.generateJsonFile(tmpFile, oldGucList)
            self.sshTool.scpFiles(tmpFile, tmpDir)
        self.logger.debug("Successfully backup guc to %s" % tmpFile)

        self.logger.debug("Starting set new guc.")
        for key in BACKUP_GUC_LIST.keys():
            self.setGuc(key, BACKUP_GUC_LIST[key])
        self.logger.debug("Successfully Starting set new guc.")

    def restoreGuc(self):
        self.logger.debug("Starting restore guc.")
        tmpDir = DefaultValue.getTmpDirFromEnv(self.user)
        tmpFile = os.path.join(tmpDir, "expand_backup_guc.json")
        oldGUCparas = g_file.parseJsonFile(tmpFile)
        self.logger.debug("The restored guc file content is: %s" % str(oldGUCparas))
        for key in oldGUCparas:
            self.setGuc(key, oldGUCparas[key])

        cmd = "rm -rf '%s'" % tmpFile
        DefaultValue.execCommandWithMode(cmd, "delete expand_backup_guc", self.sshTool,
                                         self.localMode or self.isSingle)
        self.logger.debug("Successfully restore guc")

    def dropTempSchemaInOldGroup(self):
        """
        function: Drop temp schema in old node group
        input:NA
        output:NA
        """
        self.logger.debug("Drop temp schema in old node group.")
        try:
            databases = self.getAllDatabase()
            for cn_inst in self.getCooInst():
                temp_schema_sql = "select pn.nspname from pgxc_class as pgxc JOIN pg_class as pc ON " \
                                  "pgxc.pcrelid = pc.oid  JOIN pg_namespace as pn on pc.relnamespace = " \
                                  "pn.oid WHERE pc.relpersistence = 't' and pgxc.pgroup=(select group_name " \
                                  "from pgxc_group where in_redistribution='y');"
                for database in databases:
                    (status, output) = ClusterCommand.remoteSQLCommand(temp_schema_sql,
                                                                       self.user,
                                                                       cn_inst.hostname,
                                                                       cn_inst.port,
                                                                       ignoreError=False,
                                                                       database=database)
                    if status != 0 or ClusterCommand.findErrorInSql(output):
                        raise Exception(ErrorCode.GAUSS_513["GAUSS_51300"] % temp_schema_sql + " Error: \n%s" % str(output))

                    for temp_schema in output.strip().splitlines():
                        drop_schema_sql = "SET xc_maintenance_mode = on;" \
                                          "START TRANSACTION;" \
                                          "DROP schema if exists %s cascade;" \
                                          "COMMIT; RESET xc_maintenance_mode;" \
                                          % temp_schema
                        (status, output) = ClusterCommand.remoteSQLCommand(drop_schema_sql,
                                                                           self.user,
                                                                           cn_inst.hostname,
                                                                           cn_inst.port,
                                                                           ignoreError=False,
                                                                           database=database)
                        if status != 0:
                            self.logger.debug("Failed to execute SQL \"%s\"" % drop_schema_sql
                                              + " Error: \n%s" % str(output))
                        else:
                            self.logger.debug("Successful drop temp schema %s in old node group" % temp_schema)
        except Exception as ex:
            raise Exception(str(ex))


    def checkIfExistUnredistributedTables(self, redisNode, buildTable=False):
        """
        function: Check if there exists unredistributed table
        input : NA
        output: NA
        """

        try:
            self.logger.debug("Checking if there are still unredistributed tables.")
            # get databases from cluster
            databases = self.getAllDatabase()
            sql = "SELECT count(*) FROM pg_catalog.pgxc_class as t1 " \
                  "join pg_catalog.pgxc_group as t2 on t1.pgroup = t2.group_name " \
                  "join pg_class c ON t1.pcrelid = c.oid " \
                  "WHERE t2.in_redistribution ='y' AND c.relpersistence <> 't';"
            self.logger.debug("Sql command to do after-redis check is: %s" % sql)
            UnredisDbList = []
            for database in databases:
                # execute command to check the existence of non-redistributed tables
                (status, output) = ClusterCommand.remoteSQLCommand(sql,
                                                                   self.user,
                                                                   redisNode.hostname,
                                                                   redisNode.port,
                                                                   False,
                                                                   database)
                if status != 0:
                    raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output))
                # It means there exists tables need but do not be redistributed.
                # So the program can not proceed to the next phase.
                # Besides, we definitely need to exclude the condition where --build-redistb parameter is used.
                if output != '0' and not buildTable:
                    UnredisDbList.append(database)
            if len(UnredisDbList) > 0:
                self.logger.debug("There are still non-redistributed tables on the database [%s]." %
                                  ",".join(UnredisDbList))
                raise Exception("Warning:\n"
                                "--------------------------------------------------\n"
                                "gs_redis or database came into an abnormal state.\n"
                                "There still has tables which need to be but do not be redistributed.\n"
                                "Please make further check manually.\n"
                                "--------------------------------------------------")
        except Exception as e:
            raise Exception(str(e))


class Timeout(Exception):
    pass
