Active Learning(AL)
===================

Active Learning is an usecase of iDDS. The purpose of iDDS AL is to use iDDS to run some 'active learning' process to tell production system whether to continue some process.

iDDS AL  workflow
^^^^^^^^^^^^^^^^^

.. image:: ../images/v2/activelearning.png
         :alt: iDDS ActiveLearning

ActiveLearning employs iDDS DAG workflow management to define tasks.

1. It uses processing template and learning template to define the processing workflow.
2. It uses a Condition branch to control the workflow.
3. When executing, the processing template will generate a PanDA task.
4. When the PanDA task finishes, the learning template will generate a learning task which will run in iDDS internally condor cluster, to analyse the outputs of the PanDA task. The result of the learning task will decide whether to generate new PanDA tasks or to terminate.


processing task (ATLASPandaWork)
----------------------------------

1. upload inputs to Panda cache server and define the task parameter map.

.. code-block:: python

    import json
    import re
    import time
    # import traceback

    try:
        from urllib import quote
    except ImportError:
        from urllib.parse import quote

    from pandatools import Client

    from idds.client.clientmanager import ClientManager
    from idds.common.utils import get_rest_host, run_command

    from idds.workflow.workflow import Condition, Workflow
    from idds.atlas.workflow.atlaspandawork import ATLASPandaWork
    from idds.atlas.workflow.atlasactuatorwork import ATLASActuatorWork


    # Here a fake method is used.
    def get_task_id(output, error):
        m = re.search('jediTaskID=(\d+)', output + error)  # noqa W605
        task_id = int(m.group(1))
        return task_id


    def submit_processing_task():
        outDS = "user.wguan.altest%s" % str(int(time.time()))
        cmd = "cd /afs/cern.ch/user/w/wguan/workdisk/iDDS/main/lib/idds/tests/activelearning_test_codes; prun --exec 'python simplescript.py 0.5 0.5 200 output.json' --outDS %s  --outputs output.json --nJobs=10" % outDS
        status, output, error = run_command(cmd)
        """
        status:
        0
        output:

        error:
        INFO : gathering files under /afs/cern.ch/user/w/wguan/workdisk/iDDS/main/lib/idds/tests/activelearning_test_codes
        INFO : upload source files
        INFO : submit user.wguan.altest1234/
        INFO : succeeded. new jediTaskID=23752996
        """
        if status == 0:
            task_id = get_task_id(output, error)
            return task_id
        else:
            raise Exception(output + error)


    def get_panda_task_paramsmap(panda_task_id):
        status, task_param_map = Client.getTaskParamsMap(panda_task_id)
        if status == 0:
            task_param_map = json.loads(task_param_map)
            return task_param_map
        return None


    def define_panda_task_paramsmap():
        # here is using a fake method by submitting a panda task.
        # Users should define the task params map by themselves.

        # (0, '{"buildSpec": {"jobParameters": "-i ${IN} -o ${OUT} --sourceURL ${SURL} -r . ", "archiveName": "sources.0ca6a2fb-4ad0-42d0-979d-aa7c284f1ff7.tar.gz", "prodSourceLabel": "panda"}, "sourceURL": "https://aipanda048.cern.ch:25443", "cliParams": "prun --exec \\"python simplescript.py 0.5 0.5 200 output.json\\" --outDS user.wguan.altest1234 --outputs output.json --nJobs=10", "site": null, "vo": "atlas", "respectSplitRule": true, "osInfo": "Linux-3.10.0-1127.19.1.el7.x86_64-x86_64-with-centos-7.9.2009-Core", "log": {"type": "template", "param_type": "log", "container": "user.wguan.altest1234.log/", "value": "user.wguan.altest1234.log.$JEDITASKID.${SN}.log.tgz", "dataset": "user.wguan.altest1234.log/"}, "transUses": "", "excludedSite": [], "nMaxFilesPerJob": 200, "uniqueTaskName": true, "noInput": true, "taskName": "user.wguan.altest1234/", "transHome": null, "includedSite": null, "nEvents": 10, "nEventsPerJob": 1, "jobParameters": [{"type": "constant", "value": "-j \\"\\" --sourceURL ${SURL}"}, {"type": "constant", "value": "-r ."}, {"padding": false, "type": "constant", "value": "-p \\""}, {"padding": false, "type": "constant", "value": "python%20simplescript.py%200.5%200.5%20200%20output.json"}, {"type": "constant", "value": "\\""}, {"type": "constant", "value": "-l ${LIB}"}, {"container": "user.wguan.altest1234_output.json/", "value": "user.wguan.$JEDITASKID._${SN/P}.output.json", "dataset": "user.wguan.altest1234_output.json/", "param_type": "output", "hidden": true, "type": "template"}, {"type": "constant", "value": "-o \\"{\'output.json\': \'user.wguan.$JEDITASKID._${SN/P}.output.json\'}\\""}], "prodSourceLabel": "user", "processingType": "panda-client-1.4.47-jedi-run", "architecture": "@centos7", "userName": "Wen Guan", "taskType": "anal", "taskPriority": 1000, "countryGroup": "us"}')  # noqa E501

        task_id = submit_processing_task()
        task_param_map = get_panda_task_paramsmap(task_id)
        cmd_to_arguments = {'arguments': 'python simplescript.py 0.5 0.5 200',
                            'parameters': 'python simplescript.py {m1} {m2} {nevents}'}

        # update the cliParams to have undefined parameters, these parameters {m1}, {m2}, {nevents} will be the outputs of learning script.
        task_param_map['cliParams'] = task_param_map['cliParams'].replace(cmd_to_arguments['arguments'], cmd_to_arguments['parameters'])
        jobParameters = task_param_map['jobParameters']
        for p in jobParameters:
            if 'value' in p:
                p['value'] = p['value'].replace(quote(cmd_to_arguments['arguments']), quote(cmd_to_arguments['parameters']))
        return task_param_map


2. define the panda work.

.. code-block:: python
    
    task_param_map = define_panda_task_paramsmap()
    panda_work = ATLASPandaWork(panda_task_paramsmap=task_param_map)

    # it's needed to parse the panda task parameter information, for example output dataset name, for the next task.
    # if the information is not needed, you don't need to run it manually. iDDS will call it interally to parse the information.
    panda_work.initialize_work()


learning task (ATLASActuatorWork)
----------------------------------

1. define the learning task.
    (a) The input collection of the learning task is the output of the panda task. iDDS will download all files of this dataset to local storage and process them.
    (b) The sandbox is using the panda task's sandbox. You can also use iDDS cache server for it.

.. code-block:: python
    
    work_output_coll = panda_work.get_output_collections()[0]

    input_coll = {'scope': work_output_coll['scope'],
                  'name': work_output_coll['name'],
                  'coll_metadata': {'force_close': True}}  # by default the panda collection is not closed. If it's not closed, iDDS will poll again and again without stop.
    output_coll = {'scope': work_output_coll['scope'],
                   'name': work_output_coll['name'] + "." + str(int(time.time()))}

    # acutator = ATLASActuatorWork(executable='python', arguments='merge.py {output_json} {events} {dataset}/{filename}',
    acutator = ATLASActuatorWork(executable='python', arguments='merge.py {output_json} {events} {dataset}',
                                 parameters={'output_json': 'merge.json',
                                             'events': 200,
                                             'dataset': '{scope}:{name}'.format(**input_coll),
                                             'filename': 'output*.json'},
                                 sandbox=panda_work.sandbox, primary_input_collection=input_coll,
                                 output_collections=output_coll, output_json='merge.json')


Define workflow
----------------------------------

.. code-block:: python
    
    wf = Workflow()
    # because the two tasks are in a loop. It's good to set which one to start.
    wf.add_work(panda_work)
    wf.add_work(acutator)
    cond = Condition(panda_work.is_finished, current_work=panda_work, true_work=acutator, false_work=None)
    wf.add_condition(cond)
    cond1 = Condition(acutator.generate_new_task, current_work=acutator, true_work=panda_work, false_work=None)
    wf.add_condition(cond1)

    # because the two works are in a loop, they are not independent. This call is needed to tell which one to start.
    # otherwise idds will use the first one to start.
    wf.add_initial_works(work)

    # work.set_workflow(wf)
    return wf


The AL example
--------------

See examples in https://github.com/wguanicedew/iDDS/blob/dev/main/lib/idds/tests/test_activelearning.py