Skip to content

Task Suite

TaskSuite

TaskSuite(
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
    benchmark_version: BenchmarkVersion = (1, 0, 0),
)

Bases: Generic[Env]

A suite of tasks that can be run in an environment. Tasks can be both user tasks and injection tasks. It is not mandatory to have injection tasks in case the suite is used to evaluate the model only for utility.

Parameters:

  • name (str) –

    The name of the suite.

  • environment_type (type[Env]) –

    The environment type that the suite operates on.

  • tools (list[Function]) –

    A list of tools that the agent can use to solve the tasks.

  • data_path (Path | None, default: None ) –

    The path to the suite data directory. It should be provided for non-default suites. The directory should contain the following files: - environment.yaml: The data of the environment. - injection_vectors.yaml: The injection vectors in the environment.

Source code in src/agentdojo/task_suite/task_suite.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def __init__(
    self,
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
    benchmark_version: BenchmarkVersion = (1, 0, 0),
):
    self.name = name
    self.environment_type = environment_type
    self.tools = tools
    self._user_tasks: dict[str, dict[BenchmarkVersion, BaseUserTask[Env]]] = defaultdict(dict)
    self._injection_tasks: dict[str, dict[BenchmarkVersion, BaseInjectionTask[Env]]] = defaultdict(dict)
    self.data_path = data_path
    self.benchmark_version = benchmark_version

user_tasks property

user_tasks: dict[str, BaseUserTask[Env]]

The user tasks in the suite.

Returns:

injection_tasks property

injection_tasks: dict[str, BaseInjectionTask[Env]]

The injection tasks in the suite.

Returns:

register_user_task

register_user_task(
    task: type[BaseUserTask[Env]],
) -> type[BaseUserTask[Env]]

Register a user task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
163
164
165
166
167
168
169
170
171
172
173
def register_user_task(self, task: type[BaseUserTask[Env]]) -> type[BaseUserTask[Env]]:
    """Register a user task in the suite.

    Args:
        task: The user task class to register.
    """
    task_n = self._get_task_number(task, "UserTask")
    task_id = f"user_task_{task_n}"
    setattr(task, "ID", task_id)
    self._user_tasks[task_id][(1, 0, 0)] = task()
    return task

update_user_task

update_user_task(benchmark_version: BenchmarkVersion)

Updates a user task in the suite and makes it part of the benchmark with the given version

Parameters:

  • benchmark_version (BenchmarkVersion) –

    The benchmark version this task belongs to.

Source code in src/agentdojo/task_suite/task_suite.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def update_user_task(self, benchmark_version: BenchmarkVersion):
    """Updates a user task in the suite and makes it part of the benchmark
    with the given version

    Args:
        benchmark_version: The benchmark version this task belongs to.
    """

    def decorator(task: type[BaseUserTask[Env]]) -> type[BaseUserTask[Env]]:
        task_n = self._get_task_number(task, "UserTask")
        task_id = f"user_task_{task_n}"
        if task_id not in self._user_tasks:
            raise ValueError(f"User task {task_id} not found in suite")
        setattr(task, "ID", task_id)
        self._user_tasks[task_id][benchmark_version] = task()
        return task

    return decorator

get_user_task_by_id cached

get_user_task_by_id(task_id: str) -> BaseUserTask[Env]

Get a user task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py
203
204
205
206
@lru_cache
def get_user_task_by_id(self, task_id: str) -> BaseUserTask[Env]:
    """Get a user task by its ID."""
    return self.user_tasks[task_id]

get_latest_user_task_by_id cached

get_latest_user_task_by_id(
    task_id: str, version_upperbound: BenchmarkVersion
) -> BaseUserTask[Env]

Get a user task by its ID in its latest version.

Source code in src/agentdojo/task_suite/task_suite.py
208
209
210
211
212
213
@lru_cache
def get_latest_user_task_by_id(self, task_id: str, version_upperbound: BenchmarkVersion) -> BaseUserTask[Env]:
    """Get a user task by its ID in its latest version."""
    tasks = self._user_tasks[task_id]
    latest = max(v for v in tasks.keys() if v < version_upperbound)
    return tasks[latest]

get_latest_injection_task_by_id cached

get_latest_injection_task_by_id(
    task_id: str, version_upperbound: BenchmarkVersion
) -> BaseInjectionTask[Env]

Get a user task by its ID in its latest version.

Source code in src/agentdojo/task_suite/task_suite.py
215
216
217
218
219
220
221
222
@lru_cache
def get_latest_injection_task_by_id(
    self, task_id: str, version_upperbound: BenchmarkVersion
) -> BaseInjectionTask[Env]:
    """Get a user task by its ID in its latest version."""
    tasks = self._injection_tasks[task_id]
    latest = max(v for v in tasks.keys() if v < version_upperbound)
    return tasks[latest]

register_new_injection_task

register_new_injection_task(
    min_version, task: type[BaseInjectionTask[Env]]
) -> type[BaseInjectionTask[Env]]

Register an injection task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
225
226
227
228
229
230
231
232
233
234
235
def register_new_injection_task(self,  min_version, task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
    """Register an injection task in the suite.

    Args:
        task: The injection task class to register.
    """
    task_n = self._get_task_number(task, "InjectionTask")
    task_id = f"injection_task_{task_n}"
    setattr(task, "ID", task_id)
    self._injection_tasks[task_id][min_version] = task()
    return task

register_injection_task

register_injection_task(
    task: type[BaseInjectionTask[Env]],
) -> type[BaseInjectionTask[Env]]

Register an injection task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
237
238
239
240
241
242
243
def register_injection_task(self,  task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
    """Register an injection task in the suite.

    Args:
        task: The injection task class to register.
    """
    return self.register_new_injection_task((1, 0, 0), task)

update_injection_task

update_injection_task(
    benchmark_version: BenchmarkVersion, new=False
)

Updates an injection task in the suite and makes it part of the benchmark with the given version

Parameters:

  • benchmark_version (BenchmarkVersion) –

    The benchmark version this task belongs to.

Source code in src/agentdojo/task_suite/task_suite.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
def update_injection_task(self, benchmark_version: BenchmarkVersion, new=False):
    """Updates an injection task in the suite and makes it part of the benchmark
    with the given version

    Args:
        benchmark_version: The benchmark version this task belongs to.
    """

    def decorator(task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
        task_n = self._get_task_number(task, "InjectionTask")
        task_id = f"injection_task_{task_n}"
        if task_id not in self._injection_tasks:
            if not new:
                raise ValueError(f"Injection task {task_id} not found in suite")
            else:
                self.register_new_injection_task(benchmark_version, task)
        setattr(task, "ID", task_id)
        self._injection_tasks[task_id][benchmark_version] = task()
        return task

    return decorator

get_injection_task_by_id

get_injection_task_by_id(
    injection_task_id: str,
) -> BaseInjectionTask[Env]

Get an injection task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py
276
277
278
def get_injection_task_by_id(self, injection_task_id: str) -> BaseInjectionTask[Env]:
    """Get an injection task by its ID."""
    return self.injection_tasks[injection_task_id]

run_task_with_pipeline

run_task_with_pipeline(
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[
        FunctionsRuntime
    ] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]

Run a task with the provided pipeline.

Parameters:

Returns:

  • bool

    A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if

  • bool

    the injection was successful.

Source code in src/agentdojo/task_suite/task_suite.py
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
def run_task_with_pipeline(
    self,
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[FunctionsRuntime] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]:
    """Run a task with the provided pipeline.

    Args:
        agent_pipeline: The pipeline to use for the task.
        user_task: The user task to run.
        injection_task: The injection task to run.
        injections: The injections to use for the task.
        runtime_class: The runtime class to use for the task.
        environment: The environment to use for the task.
        verbose: Whether to print debug information.

    Returns:
        A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if
        the injection was successful.
    """
    # If no environment is provided, load the default environment
    if environment is None:
        environment = self.load_and_inject_default_environment(injections)
    # Initialize the environment according to the task
    if isinstance(user_task, BaseUserTask):
        task_environment = user_task.init_environment(environment)
    else:
        task_environment = environment

    # Create a copy of the environment before running the user task to then diff if needed
    pre_environment = task_environment.model_copy(deep=True)
    if isinstance(user_task, BaseUserTask):
        prompt = user_task.PROMPT
    else:
        prompt = user_task.GOAL

    runtime = runtime_class(self.tools)
    model_output = None
    messages = []
    for _ in range(3):
        # Run the user task
        try:
            _, _, task_environment, messages, _ = agent_pipeline.query(prompt, runtime, task_environment)
        except AbortAgentError as e:
            # get last environment and messages
            task_environment = e.task_environment
            messages = e.messages

        model_output = model_output_from_messages(messages)
        if model_output is not None:
            break

    if model_output is None:
        warnings.warn(f"Model output was None for task {user_task.ID}")

    functions_stack_trace = functions_stack_trace_from_messages(messages)
    utility = self._check_task_result(
        user_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    # Early return if no injection was intended
    if injection_task is None:
        return utility, True

    security = self._check_task_result(
        injection_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    return utility, security

Env module-attribute

Env = TypeVar('Env', bound=TaskEnvironment)

TaskEnvironment

Bases: BaseModel

Base class for the environment in which the agent is running.