Task Suite¶

TaskSuite ¶

TaskSuite(
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
)

Bases: Generic[Env]

A suite of tasks that can be run in an environment. Tasks can be both user tasks and injection tasks. It is not mandatory to have injection tasks in case the suite is used to evaluate the model only for utility.

Parameters:

name (str) –

The name of the suite.
environment_type (type[Env]) –

The environment type that the suite operates on.
tools (list[Function]) –

A list of tools that the agent can use to solve the tasks.
data_path (Path | None, default: None ) –

The path to the suite data directory. It should be provided for non-default suites. The directory should contain the following files: - environment.yaml: The data of the environment. - injection_vectors.yaml: The injection vectors in the environment.

Source code in src/agentdojo/task_suite/task_suite.py

def __init__(
    self,
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
):
    self.name = name
    self.environment_type = environment_type
    self.tools = tools
    self._user_tasks: dict[str, BaseUserTask[Env]] = {}
    self._injection_tasks: dict[str, BaseInjectionTask[Env]] = {}
    self.data_path = data_path

user_tasks `property` ¶

user_tasks: dict[str, BaseUserTask[Env]]

The user tasks in the suite.

Returns:

dict[str, BaseUserTask[Env]] –

A dictionary of user tasks with their IDs as keys.

injection_tasks `property` ¶

injection_tasks: dict[str, BaseInjectionTask[Env]]

The injection tasks in the suite.

Returns:

dict[str, BaseInjectionTask[Env]] –

A dictionary of injection tasks with their IDs as keys.

register_user_task ¶

register_user_task(
    task: type[BaseUserTask[Env]],
) -> type[BaseUserTask[Env]]

Register a user task in the suite.

Parameters:

task (type[BaseUserTask[Env]]) –

The user task class to register.

Source code in src/agentdojo/task_suite/task_suite.py

def register_user_task(self, task: type[BaseUserTask[Env]]) -> type[BaseUserTask[Env]]:
    """Register a user task in the suite.

    Args:
        task: The user task class to register.
    """
    task_n = self._get_task_number(task, "UserTask")
    task_id = f"user_task_{task_n}"
    setattr(task, "ID", task_id)
    self._user_tasks[task_id] = task()
    return task

get_user_task_by_id ¶

get_user_task_by_id(task_id: str) -> BaseUserTask[Env]

Get a user task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py

def get_user_task_by_id(self, task_id: str) -> BaseUserTask[Env]:
    """Get a user task by its ID."""
    return self._user_tasks[task_id]

register_injection_task ¶

register_injection_task(
    task: type[BaseInjectionTask[Env]],
) -> type[BaseInjectionTask[Env]]

Register an injection task in the suite.

Parameters:

task (type[BaseInjectionTask[Env]]) –

The injection task class to register.

Source code in src/agentdojo/task_suite/task_suite.py

def register_injection_task(self, task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
    """Register an injection task in the suite.

    Args:
        task: The injection task class to register.
    """
    task_n = self._get_task_number(task, "InjectionTask")
    task_id = f"injection_task_{task_n}"
    setattr(task, "ID", task_id)
    self._injection_tasks[task_id] = task()
    return task

get_injection_task_by_id ¶

get_injection_task_by_id(
    injection_task_id: str,
) -> BaseInjectionTask[Env]

Get an injection task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py

def get_injection_task_by_id(self, injection_task_id: str) -> BaseInjectionTask[Env]:
    """Get an injection task by its ID."""
    return self._injection_tasks[injection_task_id]

run_task_with_pipeline ¶

run_task_with_pipeline(
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[
        FunctionsRuntime
    ] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]

Run a task with the provided pipeline.

Parameters:

agent_pipeline (BasePipelineElement) –

The pipeline to use for the task.
user_task (BaseUserTask[Env] | BaseInjectionTask[Env]) –

The user task to run.
injection_task (BaseInjectionTask[Env] | None) –

The injection task to run.
injections (dict[str, str]) –

The injections to use for the task.
runtime_class (type[FunctionsRuntime], default: FunctionsRuntime ) –

The runtime class to use for the task.
environment (Env | None, default: None ) –

The environment to use for the task.
verbose (bool, default: False ) –

Whether to print debug information.

Returns:

bool –

A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if
bool –

the injection was successful.

Source code in src/agentdojo/task_suite/task_suite.py

def run_task_with_pipeline(
    self,
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[FunctionsRuntime] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]:
    """Run a task with the provided pipeline.

    Args:
        agent_pipeline: The pipeline to use for the task.
        user_task: The user task to run.
        injection_task: The injection task to run.
        injections: The injections to use for the task.
        runtime_class: The runtime class to use for the task.
        environment: The environment to use for the task.
        verbose: Whether to print debug information.

    Returns:
        A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if
        the injection was successful.
    """
    # If no environment is provided, load the default environment
    if environment is None:
        environment = self.load_and_inject_default_environment(injections)
    # Initialize the environment according to the task
    if isinstance(user_task, BaseUserTask):
        task_environment = user_task.init_environment(environment)
    else:
        task_environment = environment

    # Create a copy of the environment before running the user task to then diff if needed
    pre_environment = task_environment.model_copy(deep=True)
    if isinstance(user_task, BaseUserTask):
        prompt = user_task.PROMPT
    else:
        prompt = user_task.GOAL

    runtime = runtime_class(self.tools)
    model_output = None
    messages = []
    for _ in range(3):
        # Run the user task
        try:
            _, _, task_environment, messages, _ = agent_pipeline.query(prompt, runtime, task_environment)
        except AbortAgentError as e:
            # get last environment and messages
            task_environment = e.task_environment
            messages = e.messages

        model_output = model_output_from_messages(messages)
        if model_output is not None:
            break

    if model_output is None:
        warnings.warn(f"Model output was None for task {user_task.ID}")

    functions_stack_trace = functions_stack_trace_from_messages(messages)
    utility = self._check_task_result(
        user_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    # Early return if no injection was intended
    if injection_task is None:
        return utility, True

    security = self._check_task_result(
        injection_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    return utility, security

Env `module-attribute` ¶

Env = TypeVar('Env', bound=TaskEnvironment)

TaskEnvironment ¶

Bases: BaseModel

Base class for the environment in which the agent is running.

Task Suite¶

TaskSuite ¶

user_tasks property ¶

injection_tasks property ¶

register_user_task ¶

get_user_task_by_id ¶

register_injection_task ¶

get_injection_task_by_id ¶

run_task_with_pipeline ¶

Env module-attribute ¶

TaskEnvironment ¶

user_tasks `property` ¶

injection_tasks `property` ¶

Env `module-attribute` ¶