# Python imports
import abc
import six
import os
import sys
import weakref
import time
from timeit import default_timer as timer
import shutil
import pickle
import glob

# Project imports
import task_defines as td
import cvmanager_task_status
import cvmanager_task_process
import cvmanager_yaml
import cvmanager_queue_processor as process
import wrappers
import cvmanager_remote_command
import cvmanager_defines
import cvmanager_nfs
import cvmanager_utils
import common
import task_defines
import cvmanager_task_step
import cvmanager_reboot
import HsObject.hs_node

_ALL_TASKS = weakref.WeakValueDictionary()


@six.add_metaclass(abc.ABCMeta)
class TaskObject:
    """ The base class for which EVERY task must originate.

    """
    @classmethod
    def locate_task_file(cls, task_name):
        # I keep fiddling with the structure of the project, so leave this for now.  We can remove later.
        try:
            tmp = getattr(__import__('task_manager.tasks', fromlist=[task_name]), task_name)
        except ImportError:
            tmp = getattr(__import__('tasks', fromlist=[task_name]), task_name)
        return tmp

    @classmethod
    def hasher(cls, task_name, task_kwargs):
        """ Using pickle take the task_name and the arguments passed into the task to get a unique hash for
        identification.  If pickling fails, you need to set the __getstate__ on the object which is failing.  loggers
        and files are common places to fail.

        When workflow jobs are run, a unique token is created for each job, but this doesn't mean these aren't the
        same runs....therefor, do not include the key 'workflow_data' when getting the hash.


        :param task_name:
        :param task_kwargs:
        :return:
        """
        modified_kwargs = task_kwargs.copy()
        if cvmanager_defines.WORKFLOW_DATA_INPUT_KEY in modified_kwargs.keys():
            modified_kwargs.pop(cvmanager_defines.WORKFLOW_DATA_INPUT_KEY)

        if cvmanager_defines.NO_HASH_INPUT in modified_kwargs.keys():
            modified_kwargs.pop(cvmanager_defines.NO_HASH_INPUT)

        return hash((task_name, pickle.dumps(modified_kwargs))) % ((sys.maxsize + 1) * 2)

    def __str__(self):
        return self.task_type.name

    def __del__(self):
        # Cleanup the input yaml file ONLY if its generated.
        try:
            self.log.debug("Tearing down Task({0}).".format(self))
        except Exception:
            pass

        try:
            self.manager.args.remove_input_file()
        except Exception:
            pass

    def __hash__(self):
        return TaskObject.hasher(self.task_type.name, self.kwargs)

    def __new__(cls, task, *args, **kwargs):
        task_type = 'Unknown'
        try:
            if type(task) is cvmanager_yaml.YAMLTask or isinstance(task, cvmanager_yaml.yaml.YAMLObject):
                # This is a !Task tagged yaml object input.  It must have an appropriate subclass Task which will be
                # created here; from the task_manager\tasks directory.
                task_type = task.task_type.name
            elif type(task) is str:
                task_type = task

            tmp = cls.locate_task_file(task_type)

            obj = super(TaskObject, tmp.Task).__new__(tmp.Task)

            return obj
        except AssertionError, ae:
            raise ae
        except Exception, err:
            print("Error loading your task file [{0}] with error: {1}".format(task_type, str(err)))
            raise NotImplementedError("The specified task type {0} is not implemented properly!".
                                      format(task_type))

    def __init__(self, task, mgr, task_tree='', display_tree='Main', *args, **kwargs):
        self.tree = task_tree
        self.display_tree = display_tree
        self.kwargs = getattr(task, 'kwargs', None) or kwargs
        self.task_type = getattr(td.TaskType, str(task))  # The task Enum
        self.manager = mgr
        self.log = self.manager.log
        self.temp_files = []
        self.resumed = False

        # task_meta is dict of valid meta data this task can use as inputs.
        self.validate_args()

        # Set the unique task ID.
        self.uid = self.__hash__()  # For some reason calling hash(self) is giving different results???

        # Register this task in the global reference as an active task.
        _ALL_TASKS[self.uid] = self

        # self.process are the steps of the task, the overall process.
        self.process = None

        # Check if this task is utilizing an NFS share to report back to a MAIN controller node.
        if self.kwargs.get(td.NFS_MOUNT, False):
            self.init_nfs_to_controller()
        else:
            # Init the local NFS server
            self.log.debug("Loading NFS and current exports.")

            # If this is a child Task(), DO NOT CLEAN the export, because parents could be using them still.
            if display_tree == 'Main':
                self.nfs_server = cvmanager_nfs.NFS(cvmanager_defines.NFS_SHARE, clean_export=True)

                # For the root task, initialize the NFS server.
                self.nfs_server.initialize_nfs_server()

            else:
                # Creates NFS object loading the current exports
                self.nfs_server = cvmanager_nfs.NFS(cvmanager_defines.NFS_SHARE, clean_export=False)

        # We need to check if this task has specified a hostname argument.  If it does, that means use this hostname
        # INSTEAD of the machines hostname.
        if self.kwargs.get('hostname', False):
            cvmanager_defines.TaskDir.hostname = self.kwargs.get('hostname')
        else:
            cvmanager_defines.TaskDir.hostname = wrappers.get_hostname()

        self.initialize_task_process()

        # Check if the task needs avahi and start if so.
        if self.kwargs.get('use_avahi', False):
            self.log.debug("Starting avahi service because task has [use_avahi] option=True.")
            wrappers.start_service('avahi-daemon')

    @abc.abstractmethod
    def set_process(self, process_object):
        raise NotImplementedError("You must specify set_process to return a dictionary of steps required to complete.")

    @classmethod
    @abc.abstractmethod
    def task_args(cls):
        raise NotImplementedError("task_args must be Task() class variable with dictionary of TaskArg().")

    def validate_args(self):
        meta = self.task_args
        for prop_name, prop_val in self.kwargs.items():
            if prop_name not in meta:
                self.log.debug("Unknown input argument [{0}].  Please add to the task_args defined in your"
                               " task. See documentation for proper definition.".format(prop_name))

            else:
                # This argument was specified and we have meta data for it.rm -
                if type(prop_val) is not meta[prop_name].arg_type:
                    raise ValueError("Input argument [{0}] is type [{1}] but it should be of type [{2}].".format(
                        prop_name, type(prop_val), meta[prop_name].arg_type
                    ))

        # Make sure all required arguments are specified.
        for m_name, m_val in meta.items():
            if m_name not in self.kwargs.keys() and m_val.required and m_val.default_value is None:
                raise ValueError("Required input argument [{0}] not specified.  Cannot run task!".format(m_name))
            elif m_name not in self.kwargs.keys() and m_val.default_value is not None:
                # Argument was not specified in input, but we have a default, use it.
                self.log.debug("Argument [{0}] not specified for task [{1}], using the default value [{2}].".format(
                    m_name, self.task_type.name, m_val.default_value
                ))
                self.kwargs[m_name] = m_val.default_value

        self.log.debug("Verified all input arguments are valid for [{0}].".format(self.display_tree))

    def init_nfs_to_controller(self):
        """Mounts to the NFS share on the NODE which has called this task.
        The input yaml file must have the mount path like:
              - !Task
              type:  Query_RPM_Node
              kwargs:
                nfs_mount : m4hcadevb0102.commvault.com:/ws/ddb/cvmanager

        On this node, which is running the task, we'll create a directory in /ws/ddb/cvmanager with the hostname of the
        remote node.  Anything this task runs, that wants to report back to the remote node, must be placed here.

        Example:
            NODE1.com is controlling and has created a yaml with the following and sent to NODE2, which has reached this
            init_nfs_to_controller method:
                nfs_mount : NODE1.com:/ws/ddb/cvmanager

            This task is running on NODE2 machine, so on the NODE2.com machine, we'll create a dir
                /ws/ddb/cvmanager/NODE1.com and mount it to NODE1.com:/ws/ddb/cvmanager

                mount NODE1.com:/ws/ddb/cvmanager /ws/ddb/cvmanager/NODE1.com

        This means that this node now has access to the full /ws/ddb/cvmanager on Node1.com.  It should ONLY & ALWAYS
        write to it's directory found by getting hostname.
        """
        remote_nfs_mount = self.kwargs[td.NFS_MOUNT]
        local_mount_path = cvmanager_defines.NFS_SHARE

        # Create local folder to mount nfs share
        wrappers.mkdirs(local_mount_path)
        if not os.path.ismount(local_mount_path):
            ret_code = wrappers.mount(remote_nfs_mount, local_mount_path)
            if ret_code:
                self.log.error("Failed mounting {0} with return code {1}." .format(remote_nfs_mount, ret_code))
                raise Exception("Failed mounting NFS as required by input yaml.")

    def initialize_task_process(self):
        """ Initialize the TaskProcess() which will create new TaskProcess() instance and get the steps of this task
        from the user defined set_process() function.  The task developer is required to write the implementation for
        set_process() which sets the steps and order to run them.

        If the task fails to initialize, it will not be registered as running.

        Returns: None

        """
        # Setup the process (steps) of this task.
        self.process = cvmanager_task_process.TaskProcess(self)

        # Check to make sure we have no conflicting tasks already running.
        if not self.conflicting_task_found():
            self.process.status_code = task_defines.ProcessStatusCode.DUPLICATE_TASK
            return

        # Every process has a status, so initialize the status, which will be a new tracking file OR existing file.
        self.process.status = cvmanager_task_status.ProcessStatus(self)

        # Set the current tree (task level) for this task, so that any child task(s) are spawned from it.
        self.tree = self.process.status.current_tree
        self.display_tree += '-> ' + self.task_type.name
        self.log.debug("[{0}]: Status Tracking file [{1}].".format(self.display_tree, self.process.status.path))

        # Backup the yaml file in the status directory in case of reboots; this way we preserve the input.
        if self.process.status.display_tree == 'Main':
            self.backup_input_yaml()

        # Create the PID file for this task.
        self.setup_task_pid()

        # If the process initialization failed for any reason, do not run the task!
        if self.process.status_code in td.StatusStates.NOT_READY.value:
            self.task_failed_init()
        elif self.process.status_code == td.ProcessStatusCode.RESUMING:
            self.log.info("Resuming task: {0}".format(str(self)))
            self.resumed = True

    def conflicting_task_found(self):
        """ Check all active_tasks to ensure we aren't running this from something else or in any other session.

        Returns:

        """
        # Clean up any potential stale files from crashes\exits\etc.  No point checking non-running processes.
        self.clean_all_task_pid_files()

        delim = cvmanager_defines.TASK_PID_DELIM
        for pid_file in cvmanager_utils.get_all_file_by_pattern(cvmanager_defines.TASK_PID_DIR,
                                                                '*' + delim + '*' + delim + '*.pid'):
            # pid_file = Example2____518657271463066153____22663.pid
            # pid_info[0] = task name (Example2)
            # pid_info[1] = task uid hash (518657271463066153)
            # pid_info[2] = cvmanager process ID (22663)  Child tasks will share parent PID.
            pid_info = os.path.basename(pid_file).strip('.pid').split(delim)

            # Check if this PID is an active cvmanager process.
            if cvmanager_utils.cvmanager_pid_running(pid_info[2]):
                # This is an active cvmanager process.
                if str(pid_info[0]) == str(self.task_type.name) and str(pid_info[1]) == str(self.uid):
                    # We already have another task with SAME name & arguments running.
                    if str(pid_info[1]) == str(self.uid):
                        # This exact task is running, BAIL
                        self.log.error('Duplicate task [{0}] detected! Fatal error'.format(self.task_type.name))
                        self.log.error('Another instance of [{0}] task is already running as PID {1}. Please check.'
                                       .format(pid_info[0], pid_info[2]))
                        return False
        return True

    def __get_task_pid_file(self):
        """ Creates a file that identifies a running task.  This prevents re-running from another instance.
        The file is in the format of TaskName_____Task.uid____PID.pid

        Returns: str - Path to the PID file created for this task.

        """
        if not os.path.exists(cvmanager_defines.TASK_PID_DIR):
            os.makedirs(cvmanager_defines.TASK_PID_DIR)

        task_pid_file = cvmanager_defines.TASK_PID_FILE.format(self.task_type.name, self.uid, os.getpid())
        f_path = os.path.join(cvmanager_defines.TASK_PID_DIR, task_pid_file)
        return f_path

    def clean_all_task_pid_files(self):
        # Cleans all NON-RUNNING task PID files.  Clean up stale entries.
        delim = cvmanager_defines.TASK_PID_DELIM
        for pid_file in cvmanager_utils.get_all_file_by_pattern(cvmanager_defines.TASK_PID_DIR,
                                                                '*'+delim+'*'+delim+'*.pid'):
            pid_info = os.path.basename(pid_file).strip('.pid').split(delim)

            # Check if this PID is an active cvmanager process.
            if cvmanager_utils.cvmanager_pid_running(pid_info[2]):
                # This is an active cvmanager process.
                continue
            else:
                # The process ID identified by this file is NOT running, so just remove it.
                self.log.debug("Task is no longer running; removing PID tracking file [{0}].".format(pid_file))
                os.unlink(pid_file)

    def clean_task_pid_file(self):
        task_pid_file = self.__get_task_pid_file()
        if os.path.exists(task_pid_file):
            self.log.debug("Task is no longer running; removing PID tracking file [{0}].".format(task_pid_file))
            os.unlink(task_pid_file)

    def clean_task_temporary_files(self):
        for file_path in self.temp_files:
            if os.path.exists(str(file_path)):
                self.log.debug("Removing temporary task file [{0}] linked to task [{1}].".format(
                    str(file_path), self.display_tree))
                os.unlink(str(file_path))

    def setup_task_pid(self):
        """ This is a file to identify active running tasks.

        Returns: None

        """
        # Clean up any potential stale files from crashes\exits\etc.
        self.clean_all_task_pid_files()

        task_pid_file = self.__get_task_pid_file()
        open(task_pid_file, 'w').close()
        self.log.debug("Created active task tracking file [{0}].".format(task_pid_file))
        return

    def backup_input_yaml(self):
        # Create a backup of the input yaml file in case of reboots.  Always overwrite with latest.
        # If any task exits with reboot code, this yaml backup will be added into registry for resume operations.
        file_path = self.manager.args.get_input_path()
        if file_path:
            dst_input = os.path.join(self.process.status.status_dir, os.path.basename(file_path))
            self.log.debug("Backing up input yaml [{0}] to [{1}].".format(file_path, dst_input))
            try:
                shutil.copyfile(file_path, dst_input)
                self.log.debug("Successfully backed up input file.")
            except shutil.Error as err:
                if 'are the same file' in err.message:
                    self.log.debug("Input file was the same, this is most likely post reboot scenario.")
                else:
                    raise

            # Successfully backed up the input file; set the backup location.
            self.manager.args.set_input_path(dst_input)
        return

    def run_task(self):
        """ This is the main task & process runner. Up to this point has been all object initialization. Here is
        where we decide what to run and how to run it.
        Returns:  Nothing.

        """
        try:
            self.task_started()

            for phase, process_steps in self.process:
                """self.process is the Task.process which will encapsulate the pre_process, main_process, and post_proc.
                The <phase> is a string name of the phase. <process_steps> is a list of TaskStep partial
                objects.  Because they're partial we don't have an instance of them, so use the keywords argument.
                """
                if all(td.StepStatus[step.keywords['status']] == td.StepStatus.PASSED and not
                        process_steps[0].keywords.get('always_run', False) for step in process_steps) and \
                        len(process_steps) > 0:
                    # All steps for this process are passed OR the process does not have any steps.
                    self.log.info(
                        "[{0}]-[{1}]: is previously completed successfully.  Not re-running.".format(self.display_tree,
                                                                                                    phase)
                    )
                    continue

                # We have 1 or many steps in this process that are not PASSED.
                current_phase = getattr(self.process, phase)
                for i, step in enumerate(current_phase, 1):
                    if not (td.StepStatus[step.keywords['status']] == td.StepStatus.PASSED) or \
                            (step.keywords.get('always_run', False)):
                        # Run any step that is not already PASSED.

                        # If the current step status is REBOOT_AND_RESUME, check system uptime and compare to the step
                        # end time and determine if the node was rebooted.
                        if td.StepStatus[step.keywords['status']] == task_defines.StepStatus.REBOOT_AND_RESUME:
                            prev_end_time = step.keywords['end_time']
                            boot_time = cvmanager_utils.boot_time()
                            if prev_end_time > boot_time and not self.manager.args.skip_reboot:
                                # This system has NOT rebooted....exit!
                                self.log.error("[{0}]-[{1}]: Step [{2}] - The system requires reboot before proceeding"
                                               ", please reboot.  The task will resume automatically.  If it does not"
                                               " you can re-run cvmanager.py with appropriate input file.".format(
                                                self.display_tree, phase, step.keywords['name']))
                                return
                            elif prev_end_time > boot_time and self.manager.args.skip_reboot:
                                self.log.warning("[{0}]-[{1}]: Step [{2}] - The system required reboot before "
                                                 "proceeding, but user has specified -sr (--skip_reboot).  The task "
                                                 "will launch normally, but may have unexpected results.".format(
                                                  self.display_tree, phase, step.keywords['name']))
                            else:
                                # mark this step passed, and continue to next step.
                                self.log.info("[{0}]-[{1}]: Step [{2}] - Node reboot complete, proceeding.".format(
                                    self.display_tree, phase, step.keywords['name']))
                                self.remove_task_reboot_required()

                                if not self.reboot_step_has_child_tasks():
                                    step.keywords['status'] = td.StepStatus.PASSED
                                    continue
                                else:
                                    self.log.info("Detected child tasks for [{0}], will continue running them.".
                                                  format(self.display_tree))

                        self.log.info("[{0}]-[{1}]: Step [{2}] - Running.".format(
                            self.display_tree, phase, step.keywords['name']))

                        # Run the actual step and check it's return code.
                        ret_code = self.process.run_step(step, current_phase, i - 1)

                        if ret_code == task_defines.StepStatus.PASSED:
                            self.log.info("[{0}]-[{1}]: Step [{2}] - Complete.\n".format(
                                self.display_tree, phase, step.keywords['name']))
                        elif ret_code == task_defines.StepStatus.REBOOT_AND_RESUME:
                            if self.kwargs.get('remote_task', False) == 1:
                                # Create file on share to notify remote machine that this is rebooting!
                                self.set_reboot_and_resume_file()
                            else:
                                # Set registry key for local reboot and resume; only parent should
                                if self.process.status.display_tree == 'Main':
                                    self.set_task_reboot_required()
                                else:
                                    self.log.info("Child task [{0}] requires reboot, will set parent input as restart "
                                                  "level in registry.".format(self.process.status.display_tree))

                            self.log.info("[{0}]-[{1}]: Step [{2}] - Reboot Required. Rebooting.".format(
                                self.display_tree, phase, step.keywords['name']))
                            return
                        elif ret_code == task_defines.StepStatus.EXCEPTION:
                            self.log.error("[{0}]-[{1}]: Step [{2}] - Exception.  Exiting!".format(
                                self.display_tree, phase, step.keywords['name']))
                            return
                        else:
                            self.log.error("[{0}]-[{1}]: Step [{2}] - Failed.  Exiting!\n".format(
                                self.display_tree, phase, step.keywords['name']))
                            return
                    else:
                        self.log.info("[{0}]-[{1}]: Step [{2}] - Skipped. Previous run was successful.\n".format(
                            self.display_tree, phase, step.keywords['name']))

        except Exception, err:
            self.log.error(str(err))
            raise
        finally:
            self.task_complete()

    def reboot_step_has_child_tasks(self):
        """ This step, has requested and reboot, and the reboot has been verified to be complete.  Now check if there's
        any child tasks.  If there are, then we should re-run this step so that child tasks are executed.

        Don't worry about checking remote tasks for reboot, that's handled elsewhere.

        :return:
        """
        child_tasks = glob.glob(os.path.join(self.process.status.status_dir, 'Task_*'))
        if len(child_tasks) > 0:
            return True
        else:
            return False

    def __add_task_to_processing_queue(self, task):
        """ This is what pushes a new task into the processing queue.  DO NOT ADD more than once, why we have this as
        __ private method.

        :param task:
        :return:
        TODO: Add check to make sure task kas not already been push or run & exited.
        """

        self.log.info("[{0}]: Adding new child task [{1}] to processing queue.".format(self.display_tree,
                                                                                       task.display_tree))
        self.manager.queue.put(task)

        # Wait for this item to be taken from the queue before returning control; this ensures its running for next call
        while task in self.manager.queue.queue:
            # Not yet picked up; maybe add some delay here.
            # self.log.debug("Child task [{0}] waiting to be processed.".format(task))  <- This logs too much!
            continue

        return True

    def create_child_task(self, task_name, launch=True, *args, **kwargs):
        """

        Args:
            task_name: (str) - The name of the task.py file in the tasks repository.  Example 'Upgrade' would load
            the 'tasks\Upgrade.py' file.
            *args:
            **kwargs:

        Returns:
        :param task_name:
        :param launch:

        """
        # Set the current status to the child task which was launched
        self.process.active_step.keywords['local_child_task'] = task_name
        new_task = TaskObject(task_name, self.manager, task_tree=self.tree, display_tree=self.display_tree, **kwargs)

        if new_task.process.status_code in td.StatusStates.READY.value:
            # When we launch child tasks, we need to ensure we have adequate threads to run them
            q_proc = process.Process(name=new_task.uid, kwargs={'manager': self.manager, 'log_it': False})
            q_proc.start()

            if new_task is not None:
                if launch:
                    self.__add_task_to_processing_queue(new_task)

        return new_task

    def __setup_remote_task_child_status(self, task_name, remote_task_hash):
        # This is a private method, for remote task, args['hostname'] should always be set because we're setting it.
        # Create the directory for this remote child so we know it's status.
        status_dir = self.process.status.setup_remote_child_task_status(remote_task_hash, task_name)

        # We'll now have a directory\file for the remote child task as a SUB-TASK of self (the main task)
        # /ws/ddb/cvmanager/catalog/status/Task_7181527948230575517/Remote_Task_7562788917565088710
        # <default dir>    /catalog/status/Task_UID OF THIS TASK   /Remote_Task_UID OF THE REMOTE CHILD TASK
        return status_dir

    def __remote_task_complete(self, task_name, directory):
        task_complete_file = os.path.join(directory, task_name + '.PASSED')
        if os.path.exists(task_complete_file):
            return True
        else:
            return False

    def create_remote_child_task(self, task_name, remote_host, wait=True, launch=True, *args, **kwargs):
        """
        Launches a task on a remote host, using the arch framework.
        kwargs are critical here as these are the task.kwargs that will be passed via yaml to the remote node.

        Sample YAML.
        - !Task
              type:  Task_Name
              kwargs:
                arg1 : val1
                arg2 : [list1, list2]

        Build the yaml, copy remotely using arch, then launch the task via cvmanager.py.  That's it!!  No more
        conditions or code that need to be passed.

        If with_nfs=True, task status is tracked on the NFS share accessible to all nodes.

        Args:
            task_name (str): Name of the task which we'll be remotely running.
            remote_host (str): Fully qualified hostname for the remote node.
            wait (Bool): Wait for the arch thread to exit before returning. If True, we wait to join
                to finish OR timeout.  If it's False, return the ArchCommand object to caller.
            with_nfs (Bool): Create a local directory on this machine for the remote host to write to & share it!
            launch (Bool): If True, this task thread will be started.  If False, we will NOT launch the process.  The
                reason for this is that sometimes, you launch many remote child processes and when using the NFS mode,
                we have to restart NFS server for changes to take affect.  So if we restart the NFS server while a task
                is running, it could cause the remote task to fail.  So we do not start the process when False and the
                caller must start the process.
            *args: UNUSED
            **kwargs: UNUSED in this case, as we'll be building the yaml for the task!

        Returns:
            ArchCommand - Let caller decided how to handle this.
        """
        if not isinstance(remote_host, HsObject.hs_node.HyperScaleNode):
            remote_host = HsObject.hs_node.HyperScaleNode(remote_host)

        # We need to sanitize the remote_task_kwargs and ONLY include defined meta.  Everything else is tossed out.
        # What this does is removes everything from remote_task_kwargs if its not defined in the Task.task_args
        remote_task_meta = self.locate_task_file(task_name).Task.task_args
        remote_task_kwargs = {}
        for k, v in self.kwargs.items():
            if k in remote_task_meta.keys():
                remote_task_kwargs[k] = v
            else:
                self.log.debug(
                    'Removing undefined task argument [{0}] for remote task [{1}]. If you wish to specify '
                    'this argument for the task, please add it to the task_args dictionary defined in your '
                    'task.'.format(k, task_name))

        # Check and make sure the remote_host IS NOT this same local host.  Maybe the user accidentally added
        # local host and told it to run a remote task.  This is bad and will affect the NFS share\execution\etc.
        # Tell the caller they screwed up and don't proceed.
        if cvmanager_utils.get_local_ip_for_remote_ip(remote_host.hostname) == \
                wrappers.name2ip(remote_host.hostname):
            self.log.error("You've attempted to run a remote task on the local host {0}.  You cannot use the "
                           "create_remote_child_task() on the local host, you should use the create_child_task() "
                           "method instead.".format(remote_host.hostname))
            return cvmanager_remote_command.ProcessCode.FAILED

        # For ALL remote tasks, the remote hostname should be added in the arguments.  This is used for status files
        remote_task_kwargs['hostname'] = remote_host.hostname

        # Setup the space on this controller for mounting NFS. This may or may not be used, but have it ready.
        # We to remember that THIS NODE originated the NFS share, so other nodes can connect back to it.
        for hostname in [remote_host.hostname]:
            if not self.nfs_server.add_host_to_export(hostname):
                self.log.error("Failed creating NFS share to remote host {0}.".format(hostname))
                return cvmanager_remote_command.ProcessCode.FAILED

        # Here we need to handle if the hostname are not resolvable; /etc/hosts can & will affect this.
        if wrappers.name2ip(wrappers.get_hostname()) == '':
            # This hostname is NOT resolvable, the remote node won't be able to reach it.
            local_host = cvmanager_utils.get_local_ip_for_remote_ip(remote_host.hostname)
        else:
            local_host = wrappers.get_hostname()

        remote_task_kwargs[td.NFS_MOUNT] = '{0}:{1}'.format(local_host, cvmanager_defines.NFS_SHARE)

        # This is set here to identify that this task is a remote task, so things like reboot should be skipped.
        remote_task_kwargs['remote_task'] = 1  # True

        # Get the YAML for the specified remote child task; this WILL be a cvmanager_catalog.CatalogFile() object.
        yaml_template = cvmanager_yaml.get_template(task_name, **remote_task_kwargs)
        self.temp_files.append(yaml_template)

        # Create a local status of the remote task.  This is needed so we know that this remote task passed\failed
        # For this specific local task.  In restart cases, if 1 out of 3 remote tasks failed, 2 succeeded, we should
        # know that we don't need to re-start the 2 successful ones.
        remote_task_hash = self.hasher(task_name, remote_task_kwargs)
        status_dir = self.__setup_remote_task_child_status(task_name, remote_task_hash)

        if self.__remote_task_complete(task_name, status_dir):
            self.log.info("Remote task on [{1}] has previously completed successfully; SKIPPING RE-RUN. If you wish"
                          " to re-run this remote task, remove the status file [{0}] and re-launch cvmanager.py"
                          " for this main {2} task.".format(status_dir, remote_host.hostname,
                                                            self.task_type.name))
            return cvmanager_remote_command.ProcessCode.SUCCESS

        # Lets always attempt passwordless ssh connection first.
        remote_command = cvmanager_remote_command.SSHCommand(remote_host.hostname, status_dir,
                                                             task_name, wait, remote_task_hash, yaml_template)

        if not remote_command.test_connection():
            self.log.warning("Attempting to use arch for connection to [{0}].".format(remote_host.hostname))
            run_arch_command = True

            # Destroy this.
            del remote_command
        else:
            # Passwordless ssh connection was successfully validated, do not use archd for connection.
            run_arch_command = False

        if run_arch_command:
            remote_command = cvmanager_remote_command.ArchCommand(remote_host.hostname, status_dir,
                                                                  task_name, wait, remote_task_hash, yaml_template)

        if launch:
            remote_command.run()

        return remote_command

    def task_failed_init(self):
        if not getattr(self, 'process', False):
            # Task failed init before the process was even initialized...perhaps duplicate PID.
            return

        # If this was determined to be a duplicate, just exit.  It wasn't real.
        if self.process.status_code == td.ProcessStatusCode.DUPLICATE_TASK:
            return

        # Clean up the instance of this class.
        if self.uid in _ALL_TASKS.keys():
            del _ALL_TASKS[self.uid]

    def task_complete(self):
        # This function is called when the task thread is exiting.  No longer alive
        # We should handle EVERY call in a try block because they all MUST RUN no matter what or we'll hand\loop\etc

        try:
            with self.manager.mutex:
                self.update_task_status()
        except Exception, err:
            self.log.error("task_complete::update_task_status: {0}".format(err))
            pass

        try:
            # Check if the task needs avahi; only the root task should STOP avahi.
            if self.process.status.display_tree == 'Main':
                if self.kwargs.get('use_avahi', False):
                    self.log.debug("Stopping avahi service because task has [use_avahi] option=True.")
                    wrappers.stop_service_nostdout('avahi-daemon')
        except Exception, err:
            self.log.error("task_complete::stop avahi: {0}".format(err))
            pass

        # ONLY THE ROOT PROCESSES SHOULD DO THIS!!!!  Meaning we move the whole tree or nothing.
        try:
            self.process.save()
            if self.process.status_code == td.ProcessStatusCode.SUCCESS and self.process.status.display_tree == 'Main':
                # This is the root process that started it all, and everything is COMPLETE!  Archive the tree.
                if not self.kwargs.get('do_not_save', False):
                    self.process.status.archive_tree()

                # Delete the input file that spawned this task, so reboots don't try to re-launch it in case of
                # stale reboot registry key.
                self.manager.args.remove_input_file()
        except Exception, err:
            self.log.error("task_complete::process.save: {0}".format(err))
            pass

        # If this task mounted a remote NFS share, unmount it.
        try:
            if self.kwargs.get(td.NFS_MOUNT, False):
                # Un-mount the share.
                mount = self.kwargs.get(td.NFS_MOUNT).split(":")[1]
                self.log.info("Unmounting [{0}].".format(mount))
                if os.path.ismount(mount):
                    cmd = "umount -f -l {0}".format(mount)
                    ret = wrappers.waitsystem_nostdout(cmd)
                    if not ret==0:
                        self.log.error("Failed to execute cmd [{0}]...ignoring".format(cmd))
                    self.log.info("Unmounted [{0}]".format(mount))
        except Exception, err:
            self.log.error("task_complete::unmount share: {0}".format(err))
            pass

        # Remove the PID file for this task
        try:
            self.clean_task_pid_file()
        except Exception, err:
            self.log.error("task_complete::clean_task_pid_file: {0}".format(err))
            pass

        # Cleanup any temporary files created by this task/
        try:
            self.clean_task_temporary_files()
        except Exception, err:
            self.log.error("task_complete::clean_task_temporary_files: {0}".format(err))
            pass

        # The ROOT task should stop the NFS server if it created one.
        try:
            if self.process.status.display_tree == 'Main':
                if getattr(self, 'nfs_server', False):
                    self.log.debug("NFS Server cleanup procedure.")
                    self.nfs_server.remove_all_hosts_from_export()
                    self.nfs_server.stop()

        except Exception, err:
            self.log.error("task_complete::stop_NFS_server: {0}".format(err))
            pass

        if self.kwargs.get('do_not_save', False):
            self.log.debug("Task has Do Not Save argument specified, removing tracking dir [{0}].".format(
                self.process.status.status_dir))
            if os.path.exists(self.process.status.status_dir):
                shutil.rmtree(self.process.status.status_dir)

        # Clean up the instance of this class.
        try:
            if self.uid in _ALL_TASKS.keys():
                del _ALL_TASKS[self.uid]
        except Exception,err:
            self.log.error("task_complete::del _ALL_TASKS: {0}".format(err))
            raise    # This is fatal!

    def update_task_status(self):
        """Updates the main status of the task.
        If all steps in the process are passed, main task will be PASSED.

        self.Process may have partial objects if they've been attempted OR attempted TaskSteps objects.  Handle both.

        THis is not writing to the file, it's just updating the overall status of the task based on the steps in the
        process.

        TODO: Add the reboot handling
        """
        self.log.info("################### [{0}]: has completed, printing phase status(es). ###################".format(
            self.display_tree))
        process_statuses = []
        for phase, process_steps in self.process:
            serialized_steps = self.process(phase)
            if len(serialized_steps) > 0 and \
                    all(td.StepStatus[v['status']] == td.StepStatus.PASSED for k, v in serialized_steps.items()):
                # All steps for this process are passed OR the process does not have any steps.
                self.log.info("[{0}]-[{1}]: Completed successfully.".format(self.display_tree, phase))
                process_statuses.append(td.ProcessStatusCode.SUCCESS)
            elif len(serialized_steps) == 0:
                self.log.debug("[{0}]-[{1}]: No steps implemented for this phase.".format(self.display_tree, phase))
            elif any(td.StepStatus[v['status']] == td.StepStatus.REBOOT_AND_RESUME for k, v in serialized_steps.items()):
                self.log.info("[{0}]-[{1}]: Reboot required.".format(self.display_tree, phase))
                process_statuses.append(td.ProcessStatusCode.RESUME_AFTER_REBOOT)
            else:
                self.log.info("[{0}]-[{1}]: Incomplete.".format(self.display_tree, phase))
                for step in process_steps:
                    if isinstance(step, cvmanager_task_step.TaskStep):
                        step_status = step.status.name
                        step_name = step.name
                    else:
                        step_status = step.keywords.get('status', 'N\\A')
                        step_name = step.keywords.get('name', 'N\\A')
                    self.log.info("\t\t[{0}]: {1}".format(step_name, step_status))
                process_statuses.append(td.ProcessStatusCode.FAILED)

        # Now we have status of all processes.  Set the OVERALL status for the entire task here and ONLY here.
        # These will be the exit codes that cvmanager.py will sys.exit() with.
        if all(proc == td.ProcessStatusCode.SUCCESS for proc in process_statuses):
            self.log.info("[{0}]: Has successfully completed all phases!".format(self.display_tree))
            self.process.status_code = td.ProcessStatusCode.SUCCESS
        elif any(proc == td.ProcessStatusCode.RESUME_AFTER_REBOOT for proc in process_statuses):
            self.log.info("[{0}]: Task will resume after rebooting!".format(self.display_tree))
            self.process.status_code = td.ProcessStatusCode.RESUME_AFTER_REBOOT
        else:
            self.process.status_code = td.ProcessStatusCode.FAILED

        self.log.info("################### End ###################")

    def task_started(self):
        # This function is called when the task thread is started.
        self.process.status_code = td.ProcessStatusCode.RUNNING

        self.process.save()

    def check_and_return_all_child_task_statuses(self, tasks):
        """ Given [Task()] list of task object, check to ensure they're all completed with 'SUCCESS' exit code.

        Many steps return this, so these should be STEP return codes, not process status codes.

        :param tasks: list - List of Task() object, which are child tasks of self.
        :return: bool - True if all tasks are completed with SUCCESS status, else False.
        """
        if any(map(lambda t: t.check_and_return_child_task_status() == task_defines.StepStatus.REBOOT_AND_RESUME
               if isinstance(t, TaskObject) else False, tasks)):
            self.log.info("Some child task requires reboot, will reboot and resume.")
            return task_defines.StepStatus.REBOOT_AND_RESUME

        if not all(map(lambda t: t.check_and_return_child_task_status() if isinstance(t, TaskObject) else False, tasks)):
            self.log.error("Not all child tasks of Task [{0}] completed successfully.  Please check logs.".format(self))
            return False
        return True

    def check_and_return_child_task_status(self):
        # Many steps return this, so these should be STEP return codes, not process status codes.
        if isinstance(self, TaskObject):
            # Checking if the overall process or exit for the child task was resume after reboot
            if self.process.status_code.name == 'RESUME_AFTER_REBOOT':
                return task_defines.StepStatus.REBOOT_AND_RESUME
            if not self.process.status_code.name == 'SUCCESS':
                return False
            return True
        else:
            raise Exception("Unknown child task type.")

    def check_and_return_remote_process_status(self, proc):
        return self.check_and_return_remote_processes_status([proc])

    def check_and_return_remote_processes_status(self, processes, skip_reboot_check=False):
        """ Iterates all remote RemoteCommand processes and checks if they have all exited with .status=0 or True, if
        the process has previously completed.  When dealing with processes, we must be extremely careful about the
        return codes.   This is because process return of 0 is success, anything non-0 is a failure.  So we
        can't use boolean True(1) and False(0) intermixed.

        Args:
            processes:(List) - List of cvmanager_remote_command.ArchCommand() objects.
            :param skip_reboot_check:

        Returns: (Bool) - True if ALL remote processes status code is 0, or False.

        """
        if not all(getattr(proc, 'status', proc) is cvmanager_remote_command.ProcessCode.SUCCESS or
                   getattr(proc, 'status', proc) == 0 for proc in processes):
            # Task is incomplete.

            # Get all of the FAILED ArchCommand tasks only, thats all we care about here.
            failed_arch_cmd_list = [proc for proc in processes
                                    if isinstance(proc, cvmanager_remote_command.RemoteCommand) and
                                    proc.status != cvmanager_remote_command.ProcessCode.SUCCESS.value]

            # Check if any of the tasks is rebooting and wait for it to reboot.
            handle_reboot_processes = []
            for arch in failed_arch_cmd_list:
                local_remote_node_path = arch.local_remote_task_working_dir
                reboot_file = cvmanager_defines.RTC_REBOOT_AND_RESUME.format(arch.uid)
                remote_task_reboot_file = os.path.join(local_remote_node_path, reboot_file)

                if os.path.exists(remote_task_reboot_file):
                    # This means the remote command created a reboot and resume file on the share of the controlling
                    # node, so its rebooting.  Lets handle it!
                    handle_reboot_processes.append(arch)

            if len(handle_reboot_processes) == 0 or skip_reboot_check:
                # Remote command failed and its not a reboot scenario for any remote process.
                self.log.error("Remote task execution failed on remote node(s): {0}".format(", ".join(
                    [t.remote_hostname for t in failed_arch_cmd_list])))
                return False
            elif not len(handle_reboot_processes) == len(failed_arch_cmd_list):
                """This means that some remote process failed, but did not require reboot. Even if 1 process failed
                and another required reboot, there's no point waiting for reboot because we have a straight forward
                process failure. 
                """
                self.log.error("Remote node(s) are rebooting, and will automatically resume: {0}".format(", ".join(
                    [t.remote_hostname for t in handle_reboot_processes])))

                self.log.error("Process failed on remote node(s): {0}".format(", ".join(
                    [t.remote_hostname for t in failed_arch_cmd_list])))
                return False
            else:
                # Some remote commands needed reboot.
                self.log.info("Waiting for remote node(s) [{0}] to reboot and automatically resume.".format(", ".join(
                    [t.remote_hostname for t in handle_reboot_processes])))

                if not all([cvmanager_reboot.RemoteCommandReboot(handle_reboot_processes).wait_for_reboot()]):
                    # All reboots not successful
                    return False
                else:
                    # ALl reboots were successful
                    self.log.info("All nodes successfully rebooted, resuming the remote task.")

                    # Launch and wait for the remote process again.
                    self.wait_for_remote_processes(handle_reboot_processes, launch=True)

                    return self.check_and_return_remote_processes_status(handle_reboot_processes, True)

        # No failed remote processes!
        return True

    def wait_for_remote_process(self, proc, launch=False, timeout=cvmanager_defines.CHILD_TASK_TIMEOUT_SECONDS):
        """
        Wait for many remote processes.
        :param proc: list - List of cvmanager_remote_command.RemoteCommand() processes.
        :param launch: bool - Launch the process if not launched
        :param timeout: int - timeout in seconds to wait for the remote process.
        :return: None
        """
        return self.wait_for_remote_processes([proc], launch, timeout)

    def wait_for_remote_processes(self, processes, launch=False, timeout=cvmanager_defines.CHILD_TASK_TIMEOUT_SECONDS):
        """
        Expects a remote ArchCommand process or SSHCommand process; from cvmanager_remote_command.
        This process is setup while creating remote child task (cvmanager_task.create_remote_child_task()) method.

        Here is where user can explicitly wait for this task with timeout.
        :param processes: cvmanager_remote_command.RemoteCommand() - The remote process; ssh OR arch.
        :param launch: bool - Launch the process if it is not launched.
        :param timeout: int - timeout in seconds to wait for the remote process to complete.
        :return: None
        """
        if launch:
            for p in processes:
                if p and isinstance(p, cvmanager_remote_command.RemoteCommand):
                    p.run(timeout)

        # This is for handling ArchCommand remote processes; p.run() is blocking for cvmanager_remote_command.SSHCommand
        start = timer()
        end = 0
        while any((proc.status is None or proc.status == cvmanager_remote_command.ProcessCode.NOT_STARTED)
                  for proc in processes if isinstance(proc, cvmanager_remote_command.RemoteCommand)) \
                and (end-start) < timeout:
            time.sleep(5)
            end = timer()
        else:
            # This won't actually kill it, but the remote processes are daemon threads, so they are just aborted.
            if (end-start) > timeout:
                for p in processes:
                    p.time_out_tear_down()
                raise OSError('Timed out waiting for remote process to complete!')
        self.log.info("All remote processes have exited.  Checking the exit statuses...")

    def wait_for_child_tasks(self, tasks, launch=False, timeout=cvmanager_defines.CHILD_TASK_TIMEOUT_SECONDS):
        """  Wait for all specified child tasks to complete.  Any defined Task() that launches multiple child tasks,
        can use this for wait for ALL tasks to complete.

        :param launch:
        :param tasks: list - List of child Task() objects.
        :param timeout: int - how long to wait for child Task() to complete before raising timeout exception.
        :return:
        """
        if launch:
            map(self.__add_task_to_processing_queue, tasks)

        self.log.info("Waiting for [{0}] child task thread(s) to exit.".format(len(tasks)))
        return map(lambda t: t.wait_for_child_task(timeout) if isinstance(t, TaskObject) else None, tasks)

    def wait_for_child_task(self, timeout=cvmanager_defines.CHILD_TASK_TIMEOUT_SECONDS):
        """ Any defined Task() that launches a child task locally, should use this to wait for completion of the child
        task.  This is the thread safe approach to use.
        Important thing, these don't return or set any values, they're just waiting for threads to exit.

        NOTE: self should be the CHILD task, not this task.  If parent task calls this, it will endless loop until
        timeout.

        Example:
            child_task = self.create_child_task('child', **kwargs)
            child_task.wait_for_child_task()


        :param timeout: int - Default time limit for how long to wait for the task to complete.
        :return: none.
        """
        global _ALL_TASKS

        complete = False
        start = timer()
        end = 0
        while self.uid in _ALL_TASKS.keys() and not complete and (end-start) < timeout:
            # Task is still alive
            self.log.debug('[{0}]: Active'.format(self.display_tree))

            # check the OVERALL status of this task.  This is not if it was successful or not.
            if self.process.status_code == td.ProcessStatusCode.RUNNING:
                # Check the task status every 10 seconds.
                time.sleep(10)
            else:
                self.log.debug('[{0}]: Completed.'.format(self.display_tree))
                complete = True
            end = timer()
        else:
            if (end-start) > timeout:
                self.log.debug('[{0} - {1}]: Child Task Timeout!'.format(self, self.uid))
                raise OSError('Timed out waiting for child tasks to complete!')

    def kill_all(self):
        global _ALL_TASKS
        for self.uid in _ALL_TASKS.keys():
            self.done()

    def set_task_reboot_required(self):
        # Will set or update the pending reboot task key.
        file_path = self.manager.args.get_input_path()
        if file_path:
            # 4/17: EF - This is no longer needed, as only parent tasks saves input and can be resumed; dont resume
            # any child task(s).
            # saved_input = os.path.join(self.process.status.status_dir, os.path.basename(file_path))
            common.setregistryentry(cvmanager_defines.REG_MA_KEY, "sHyperScaleManagerResumeAfterReboot", file_path)
        else:
            raise Exception("The input file for this task is gone, can't reboot!  You must manually resume this task"
                            " after rebooting.")

    def __reboot_and_resume_file(self):
        """ Constructs a file in the form of <self.uid>.reboot_and_resume
        4385395038709166627.reboot_and_resume

        Presence of this file, placed on the share, will tell the controlling node that remote node has gone down
        for reboot.  Controlling node will monitor that file and wait for it to be removed.  When removed, will
        then wait for remote task to complete, by monitoring remote task process file.

        This does not do anything for local reboot operations, they use registry key.

        :return: str - path to task reboot file.
        """
        if self.kwargs.get(td.NFS_MOUNT, False):
            # This task run is utilizing an NFS share, report the status there.
            status_file_dir = cvmanager_defines.TaskDir.share
        else:
            # This is a local running task, don't create any reboot files, it will reboot itself and resume.
            return False

        reboot_file_name = cvmanager_defines.RTC_REBOOT_AND_RESUME.format(self.uid)
        reboot_file = os.path.join(status_file_dir, reboot_file_name)

        return reboot_file

    def remove_task_reboot_required(self):
        # Remove the reboot file, indicating reboot is complete!  This is only for remote tasks to do.
        reboot_file = self.__reboot_and_resume_file()
        if reboot_file:
            self.log.info("Removing reboot and resume file: [{0}]".format(reboot_file))
            try:
                os.unlink(reboot_file)
            except IOError as ioe:
                # This is non-fatal error, so just keep going!
                self.log.error('{0}'.format(ioe))

        return True

    def set_reboot_and_resume_file(self):
        """This is only used if this task (self) was launched remotely.  Meaning this file is used to communicate
        back to the calling task indicating the remote node is down for reboot."""
        reboot_file = self.__reboot_and_resume_file()
        if reboot_file:
            self.log.info("Saving reboot and resume file: [{0}]".format(reboot_file))
            open(reboot_file, 'w').close()

        return True
