Skip to content

Task Suite

TaskSuite

TaskSuite(
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
)

Bases: Generic[Env]

A suite of tasks that can be run in an environment. Tasks can be both user tasks and injection tasks. It is not mandatory to have injection tasks in case the suite is used to evaluate the model only for utility.

Parameters:

  • name (str) –

    The name of the suite.

  • environment_type (type[Env]) –

    The environment type that the suite operates on.

  • tools (list[Function]) –

    A list of tools that the agent can use to solve the tasks.

  • data_path (Path | None, default: None ) –

    The path to the suite data directory. It should be provided for non-default suites. The directory should contain the following files: - environment.yaml: The data of the environment. - injection_vectors.yaml: The injection vectors in the environment.

Source code in src/agentdojo/task_suite/task_suite.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def __init__(
    self,
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
):
    self.name = name
    self.environment_type = environment_type
    self.tools = tools
    self._user_tasks: dict[str, BaseUserTask[Env]] = {}
    self._injection_tasks: dict[str, BaseInjectionTask[Env]] = {}
    self.data_path = data_path

user_tasks property

user_tasks: dict[str, BaseUserTask[Env]]

The user tasks in the suite.

Returns:

injection_tasks property

injection_tasks: dict[str, BaseInjectionTask[Env]]

The injection tasks in the suite.

Returns:

register_user_task

register_user_task(
    task: type[BaseUserTask[Env]],
) -> type[BaseUserTask[Env]]

Register a user task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
136
137
138
139
140
141
142
143
144
145
146
def register_user_task(self, task: type[BaseUserTask[Env]]) -> type[BaseUserTask[Env]]:
    """Register a user task in the suite.

    Args:
        task: The user task class to register.
    """
    task_n = self._get_task_number(task, "UserTask")
    task_id = f"user_task_{task_n}"
    setattr(task, "ID", task_id)
    self._user_tasks[task_id] = task()
    return task

get_user_task_by_id

get_user_task_by_id(task_id: str) -> BaseUserTask[Env]

Get a user task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py
157
158
159
def get_user_task_by_id(self, task_id: str) -> BaseUserTask[Env]:
    """Get a user task by its ID."""
    return self._user_tasks[task_id]

register_injection_task

register_injection_task(
    task: type[BaseInjectionTask[Env]],
) -> type[BaseInjectionTask[Env]]

Register an injection task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
161
162
163
164
165
166
167
168
169
170
171
def register_injection_task(self, task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
    """Register an injection task in the suite.

    Args:
        task: The injection task class to register.
    """
    task_n = self._get_task_number(task, "InjectionTask")
    task_id = f"injection_task_{task_n}"
    setattr(task, "ID", task_id)
    self._injection_tasks[task_id] = task()
    return task

get_injection_task_by_id

get_injection_task_by_id(
    injection_task_id: str,
) -> BaseInjectionTask[Env]

Get an injection task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py
182
183
184
def get_injection_task_by_id(self, injection_task_id: str) -> BaseInjectionTask[Env]:
    """Get an injection task by its ID."""
    return self._injection_tasks[injection_task_id]

run_task_with_pipeline

run_task_with_pipeline(
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[
        FunctionsRuntime
    ] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]

Run a task with the provided pipeline.

Parameters:

Returns:

  • bool

    A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if

  • bool

    the injection was successful.

Source code in src/agentdojo/task_suite/task_suite.py
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def run_task_with_pipeline(
    self,
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[FunctionsRuntime] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]:
    """Run a task with the provided pipeline.

    Args:
        agent_pipeline: The pipeline to use for the task.
        user_task: The user task to run.
        injection_task: The injection task to run.
        injections: The injections to use for the task.
        runtime_class: The runtime class to use for the task.
        environment: The environment to use for the task.
        verbose: Whether to print debug information.

    Returns:
        A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if
        the injection was successful.
    """
    # If no environment is provided, load the default environment
    if environment is None:
        environment = self.load_and_inject_default_environment(injections)
    # Initialize the environment according to the task
    if isinstance(user_task, BaseUserTask):
        task_environment = user_task.init_environment(environment)
    else:
        task_environment = environment

    # Create a copy of the environment before running the user task to then diff if needed
    pre_environment = task_environment.model_copy(deep=True)
    if isinstance(user_task, BaseUserTask):
        prompt = user_task.PROMPT
    else:
        prompt = user_task.GOAL

    runtime = runtime_class(self.tools)
    model_output = None
    messages = []
    for _ in range(3):
        # Run the user task
        try:
            _, _, task_environment, messages, _ = agent_pipeline.query(prompt, runtime, task_environment)
        except AbortAgentError as e:
            # get last environment and messages
            task_environment = e.task_environment
            messages = e.messages

        model_output = model_output_from_messages(messages)
        if model_output is not None:
            break

    if model_output is None:
        warnings.warn(f"Model output was None for task {user_task.ID}")

    functions_stack_trace = functions_stack_trace_from_messages(messages)
    utility = self._check_task_result(
        user_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    # Early return if no injection was intended
    if injection_task is None:
        return utility, True

    security = self._check_task_result(
        injection_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    return utility, security

Env module-attribute

Env = TypeVar('Env', bound=TaskEnvironment)

TaskEnvironment

Bases: BaseModel

Base class for the environment in which the agent is running.