Skip to content

Task Suite

TaskSuite

TaskSuite(
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
    benchmark_version: BenchmarkVersion = (1, 0, 0),
)

Bases: Generic[Env]

A suite of tasks that can be run in an environment. Tasks can be both user tasks and injection tasks. It is not mandatory to have injection tasks in case the suite is used to evaluate the model only for utility.

Parameters:

  • name (str) –

    The name of the suite.

  • environment_type (type[Env]) –

    The environment type that the suite operates on.

  • tools (list[Function]) –

    A list of tools that the agent can use to solve the tasks.

  • data_path (Path | None, default: None ) –

    The path to the suite data directory. It should be provided for non-default suites. The directory should contain the following files: - environment.yaml: The data of the environment. - injection_vectors.yaml: The injection vectors in the environment.

Source code in src/agentdojo/task_suite/task_suite.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def __init__(
    self,
    name: str,
    environment_type: type[Env],
    tools: list[Function],
    data_path: Path | None = None,
    benchmark_version: BenchmarkVersion = (1, 0, 0),
):
    self.name = name
    self.environment_type = environment_type
    self.tools = tools
    self._user_tasks: dict[str, dict[BenchmarkVersion, BaseUserTask[Env]]] = defaultdict(dict)
    self._injection_tasks: dict[str, dict[BenchmarkVersion, BaseInjectionTask[Env]]] = defaultdict(dict)
    self.data_path = data_path
    self.benchmark_version = benchmark_version

user_tasks property

user_tasks: dict[str, BaseUserTask[Env]]

The user tasks in the suite.

Returns:

injection_tasks property

injection_tasks: dict[str, BaseInjectionTask[Env]]

The injection tasks in the suite.

Returns:

register_user_task

register_user_task(
    task: type[BaseUserTask[Env]],
) -> type[BaseUserTask[Env]]

Register a user task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
161
162
163
164
165
166
167
168
169
170
171
def register_user_task(self, task: type[BaseUserTask[Env]]) -> type[BaseUserTask[Env]]:
    """Register a user task in the suite.

    Args:
        task: The user task class to register.
    """
    task_n = self._get_task_number(task, "UserTask")
    task_id = f"user_task_{task_n}"
    setattr(task, "ID", task_id)
    self._user_tasks[task_id][(1, 0, 0)] = task()
    return task

update_user_task

update_user_task(benchmark_version: BenchmarkVersion)

Updates a user task in the suite and makes it part of the benchmark with the given version

Parameters:

  • benchmark_version (BenchmarkVersion) –

    The benchmark version this task belongs to.

Source code in src/agentdojo/task_suite/task_suite.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def update_user_task(self, benchmark_version: BenchmarkVersion):
    """Updates a user task in the suite and makes it part of the benchmark
    with the given version

    Args:
        benchmark_version: The benchmark version this task belongs to.
    """

    def decorator(task: type[BaseUserTask[Env]]) -> type[BaseUserTask[Env]]:
        task_n = self._get_task_number(task, "UserTask")
        task_id = f"user_task_{task_n}"
        if task_id not in self._user_tasks:
            raise ValueError(f"User task {task_id} not found in suite")
        setattr(task, "ID", task_id)
        self._user_tasks[task_id][benchmark_version] = task()
        return task

    return decorator

get_user_task_by_id cached

get_user_task_by_id(task_id: str) -> BaseUserTask[Env]

Get a user task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py
201
202
203
204
@lru_cache
def get_user_task_by_id(self, task_id: str) -> BaseUserTask[Env]:
    """Get a user task by its ID."""
    return self.user_tasks[task_id]

get_latest_user_task_by_id cached

get_latest_user_task_by_id(
    task_id: str, version_upperbound: BenchmarkVersion
) -> BaseUserTask[Env]

Get a user task by its ID in its latest version.

Source code in src/agentdojo/task_suite/task_suite.py
206
207
208
209
210
211
@lru_cache
def get_latest_user_task_by_id(self, task_id: str, version_upperbound: BenchmarkVersion) -> BaseUserTask[Env]:
    """Get a user task by its ID in its latest version."""
    tasks = self._user_tasks[task_id]
    latest = max(v for v in tasks.keys() if v < version_upperbound)
    return tasks[latest]

get_latest_injection_task_by_id cached

get_latest_injection_task_by_id(
    task_id: str, version_upperbound: BenchmarkVersion
) -> BaseInjectionTask[Env]

Get a user task by its ID in its latest version.

Source code in src/agentdojo/task_suite/task_suite.py
213
214
215
216
217
218
219
220
@lru_cache
def get_latest_injection_task_by_id(
    self, task_id: str, version_upperbound: BenchmarkVersion
) -> BaseInjectionTask[Env]:
    """Get a user task by its ID in its latest version."""
    tasks = self._injection_tasks[task_id]
    latest = max(v for v in tasks.keys() if v < version_upperbound)
    return tasks[latest]

register_injection_task

register_injection_task(
    task: type[BaseInjectionTask[Env]],
) -> type[BaseInjectionTask[Env]]

Register an injection task in the suite.

Parameters:

Source code in src/agentdojo/task_suite/task_suite.py
222
223
224
225
226
227
228
229
230
231
232
def register_injection_task(self, task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
    """Register an injection task in the suite.

    Args:
        task: The injection task class to register.
    """
    task_n = self._get_task_number(task, "InjectionTask")
    task_id = f"injection_task_{task_n}"
    setattr(task, "ID", task_id)
    self._injection_tasks[task_id][(1, 0, 0)] = task()
    return task

update_injection_task

update_injection_task(benchmark_version: BenchmarkVersion)

Updates an injection task in the suite and makes it part of the benchmark with the given version

Parameters:

  • benchmark_version (BenchmarkVersion) –

    The benchmark version this task belongs to.

Source code in src/agentdojo/task_suite/task_suite.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def update_injection_task(self, benchmark_version: BenchmarkVersion):
    """Updates an injection task in the suite and makes it part of the benchmark
    with the given version

    Args:
        benchmark_version: The benchmark version this task belongs to.
    """

    def decorator(task: type[BaseInjectionTask[Env]]) -> type[BaseInjectionTask[Env]]:
        task_n = self._get_task_number(task, "InjectionTask")
        task_id = f"injection_task_{task_n}"
        if task_id not in self._injection_tasks:
            raise ValueError(f"Injection task {task_id} not found in suite")
        setattr(task, "ID", task_id)
        self._injection_tasks[task_id][benchmark_version] = task()
        return task

    return decorator

get_injection_task_by_id

get_injection_task_by_id(
    injection_task_id: str,
) -> BaseInjectionTask[Env]

Get an injection task by its ID.

Source code in src/agentdojo/task_suite/task_suite.py
262
263
264
def get_injection_task_by_id(self, injection_task_id: str) -> BaseInjectionTask[Env]:
    """Get an injection task by its ID."""
    return self.injection_tasks[injection_task_id]

run_task_with_pipeline

run_task_with_pipeline(
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[
        FunctionsRuntime
    ] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]

Run a task with the provided pipeline.

Parameters:

Returns:

  • bool

    A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if

  • bool

    the injection was successful.

Source code in src/agentdojo/task_suite/task_suite.py
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def run_task_with_pipeline(
    self,
    agent_pipeline: BasePipelineElement,
    user_task: BaseUserTask[Env] | BaseInjectionTask[Env],
    injection_task: BaseInjectionTask[Env] | None,
    injections: dict[str, str],
    runtime_class: type[FunctionsRuntime] = FunctionsRuntime,
    environment: Env | None = None,
    verbose: bool = False,
) -> tuple[bool, bool]:
    """Run a task with the provided pipeline.

    Args:
        agent_pipeline: The pipeline to use for the task.
        user_task: The user task to run.
        injection_task: The injection task to run.
        injections: The injections to use for the task.
        runtime_class: The runtime class to use for the task.
        environment: The environment to use for the task.
        verbose: Whether to print debug information.

    Returns:
        A tuple of two booleans, the first indicating whether the task was successful, and the second indicating if
        the injection was successful.
    """
    # If no environment is provided, load the default environment
    if environment is None:
        environment = self.load_and_inject_default_environment(injections)
    # Initialize the environment according to the task
    if isinstance(user_task, BaseUserTask):
        task_environment = user_task.init_environment(environment)
    else:
        task_environment = environment

    # Create a copy of the environment before running the user task to then diff if needed
    pre_environment = task_environment.model_copy(deep=True)
    if isinstance(user_task, BaseUserTask):
        prompt = user_task.PROMPT
    else:
        prompt = user_task.GOAL

    runtime = runtime_class(self.tools)
    model_output = None
    messages = []
    for _ in range(3):
        # Run the user task
        try:
            _, _, task_environment, messages, _ = agent_pipeline.query(prompt, runtime, task_environment)
        except AbortAgentError as e:
            # get last environment and messages
            task_environment = e.task_environment
            messages = e.messages

        model_output = model_output_from_messages(messages)
        if model_output is not None:
            break

    if model_output is None:
        warnings.warn(f"Model output was None for task {user_task.ID}")

    functions_stack_trace = functions_stack_trace_from_messages(messages)
    utility = self._check_task_result(
        user_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    # Early return if no injection was intended
    if injection_task is None:
        return utility, True

    security = self._check_task_result(
        injection_task,
        model_output or "",
        pre_environment,
        task_environment,  # type: ignore
        functions_stack_trace,
    )

    return utility, security

Env module-attribute

Env = TypeVar('Env', bound=TaskEnvironment)

TaskEnvironment

Bases: BaseModel

Base class for the environment in which the agent is running.