Skip to content

core / internal_action_interface

core.internal_action_interface

core.internal_action_interface

This interface contains all the agent actions calling to the agent framework internal functions. Most functions are not implemented yet.

InternalActionInterface

Provides static/class methods so it can be used without instantiation. Allow agent to access internal functions of the WhiteCollarAgent framework via actions.

Source code in core\internal_action_interface.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
class InternalActionInterface:
    """
    Provides static/class methods so it can be used without instantiation.
    Allow agent to access internal functions of the WhiteCollarAgent framework
    via actions.
    """

    # Class-level references for LLM
    llm_interface: Optional[LLMInterface] = None
    task_manager: Optional[TaskManager] = None
    state_manager: Optional[StateManager] = None
    vlm_interface: Optional[VLMInterface] = None

    @classmethod
    def initialize(cls, llm_interface: LLMInterface,
                   task_manager: TaskManager, state_manager: StateManager,
                   vlm_interface: VLMInterface | None = None):
        """
        Register the shared interfaces that actions depend on.

        This must be called once at application startup so later static calls can
        access the language model, task manager, state manager, and optional
        vision model without creating new instances.

        Args:
            llm_interface: Core large language model interface for text
                generation and reasoning.
            task_manager: Orchestrates task creation, execution, and state
                updates.
            state_manager: Persists session state and provides access to event
                streams and agent properties.
            vlm_interface: Optional vision-language model interface used for
                image understanding and screen descriptions.
        """
        cls.llm_interface = llm_interface
        cls.task_manager = task_manager
        cls.state_manager = state_manager
        cls.vlm_interface = vlm_interface

    # ─────────────────────── LLM Access for Actions ───────────────────────
    @classmethod
    def use_llm(cls, prompt: str, system_message: Optional[str] = None) -> Dict[str, Any]:
        """
        Generate a response from the configured LLM.

        Args:
            prompt: User or agent prompt sent to the language model.
            system_message: Optional system instructions to steer the response
                style or constraints.

        Returns:
            A mapping containing the key ``"llm_response"`` with the model
            output.

        Raises:
            RuntimeError: If the interface has not been initialised with an
                :class:`LLMInterface`.
        """
        if cls.llm_interface is None:
            raise RuntimeError("InternalActionInterface not initialized with LLMInterface.")
        response = cls.llm_interface.generate_response(prompt, system_message)
        return {"llm_response": response}

    @classmethod
    def describe_image(cls, image_path: str, prompt: str | None = None) -> str:
        """
        Produce a textual description for an image using the VLM.

        Args:
            image_path: Absolute path to the image to describe.
            prompt: Optional user prompt to guide the vision-language model.

        Returns:
            A natural-language description returned by the VLM.

        Raises:
            RuntimeError: If no :class:`VLMInterface` was configured during
                initialization.
        """
        if cls.vlm_interface is None:
            raise RuntimeError("InternalActionInterface not initialized with VLMInterface.")
        return cls.vlm_interface.describe_image(image_path, user_prompt=prompt)

    # ─────────────────────── GUI Actions ───────────────────────

    @classmethod
    def describe_screen(cls) -> dict[str, str]:
        """
        Capture the current virtual desktop and describe it with the VLM.

        The screen is saved to a timestamped PNG inside the agent workspace and
        then passed to the vision model for summarisation.

        Returns:
            A mapping with the VLM description under ``"description"`` and the
            saved screenshot path under ``"file_path"``.

        Raises:
            RuntimeError: If no :class:`VLMInterface` is available.
        """
        if cls.vlm_interface is None:
            raise RuntimeError("InternalActionInterface not initialised with VLMInterface.")

        temp_dir = Path(AGENT_WORKSPACE_ROOT)
        ts       = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
        img_path = os.path.join(temp_dir, f"viewscreen_{ts}.png")

        with mss.mss() as sct:
            shot = sct.grab(sct.monitors[0])          # full virtual desktop
            mss.tools.to_png(shot.rgb, shot.size, output=img_path)

        description = cls.describe_image(img_path)    # default VLM prompt
        return {"description": description, "file_path": img_path}

    @staticmethod
    async def do_chat(
        message: str,
    ) -> None:
        """
        Record an agent-authored chat message and publish it to the event stream.

        Args:
            message: Text content the agent wants to send to the user or log.

        Raises:
            RuntimeError: If the state manager has not been configured.
        """
        if InternalActionInterface.state_manager is None:
            raise RuntimeError("InternalActionInterface not initialized with StateManager.")

        InternalActionInterface.state_manager.record_agent_message(message)

        event_stream_manager = InternalActionInterface.state_manager.event_stream_manager
        event_stream_manager.log(
            "agent",
            message,
            display_message=message
        )
        InternalActionInterface.state_manager.bump_event_stream()

    @staticmethod
    def do_ignore():
        """
        Note that the agent chose to ignore the latest user input.
        """
        logger.debug("[Agent Action] Ignoring user message.")

    # ───────────────── CLI and GUI mode ─────────────────
    @staticmethod
    def switch_to_CLI_mode():
        STATE.update_gui_mode(False)

    @staticmethod
    def switch_to_GUI_mode():
        STATE.update_gui_mode(True)

    # ───────────────── Task Management ─────────────────
    @classmethod
    async def do_create_and_run_task(cls, task_name: str, task_description: str) -> str:
        """
        Create a new task and immediately start it.

        The task metadata is persisted and registered as the active task for the
        current session.

        Args:
            task_name: Short name for the task.
            task_description: Detailed description of the work to perform.

        Returns:
            The created task identifier.

        Raises:
            RuntimeError: If task or state managers have not been initialised.
        """
        if cls.task_manager is None or cls.state_manager is None:
            raise RuntimeError("InternalActionInterface not initialized with Task/State managers.")

        task_id = await cls.task_manager.create_task(task_name, task_description)

        await cls.task_manager.start_task()
        wf: Optional[Task] = cls.task_manager.get_task()
        cls.state_manager.add_to_active_task(wf)
        return task_id

    @classmethod
    async def mark_task_completed(cls, message: Optional[str] = None) -> Dict[str, Any]:
        """
        Mark the current session task as completed.

        If no session is active, returns an error payload instead of raising.

        Args:
            message: Optional completion note to store alongside the task.

        Returns:
            A status dictionary indicating success or failure and the relevant
            task id.
        """
        try:
            ok = await cls.task_manager.mark_task_completed(message=message)
            return {"status": "ok" if ok else "error"}
        except Exception as e:
            logger.error(f"[InternalActions] mark_task_completed failed: {e}", exc_info=True)
            return {"status": "error", "error": str(e)}

    @classmethod
    async def mark_task_cancel(cls, reason: Optional[str] = None) -> Dict[str, Any]:
        """
        Cancel the current session task.

        If no session is active, returns an error payload instead of raising.

        Args:
            reason: Optional explanation of why the task was cancelled.

        Returns:
            A status dictionary indicating success or failure and the relevant
            task id.
        """
        try:
            ok = await cls.task_manager.mark_task_cancel(reason=reason)
            return {"status": "ok" if ok else "error"}
        except Exception as e:
            logger.error(f"[InternalActions] mark_task_cancel failed: {e}", exc_info=True)
            return {"status": "error", "error": str(e)}

    @classmethod
    async def mark_task_error(cls, message: Optional[str] = None) -> Dict[str, Any]:
        """
        Mark the current session task as failed.

        If no session is active, returns an error payload instead of raising.

        Args:
            message: Optional error detail to store alongside the task.

        Returns:
            A status dictionary indicating success or failure and the relevant
            task id.
        """
        try:
            ok = await cls.task_manager.mark_task_error(message=message)
            return {"status": "ok" if ok else "error"}
        except Exception as e:
            logger.error(f"[InternalActions] mark_task_error failed: {e}", exc_info=True)
            return {"status": "error", "error": str(e)}

    @classmethod
    async def start_next_step(
        cls,
        *,
        update_plan: bool = False,
    ) -> Dict[str, Any]:
        """
        Advance the active task to its next step.

        When ``update_plan`` is True, the planner is asked to refresh the plan
        before moving forward.

        Args:
            update_plan: Whether to replan the task before starting the next
                step.

        Returns:
            A status dictionary indicating success and the planner result, or an
            error payload when no task is active.
        """
        try:
            result = await cls.task_manager.start_next_step(
                replan=update_plan,
            )
            return {"status": "ok", "result": result}
        except Exception as e:
            logger.error(f"[InternalActions] start_next_step failed: {e}", exc_info=True)
            return {"status": "error", "error": str(e)}

initialize(llm_interface, task_manager, state_manager, vlm_interface=None) classmethod

Register the shared interfaces that actions depend on.

This must be called once at application startup so later static calls can access the language model, task manager, state manager, and optional vision model without creating new instances.

Parameters:

Name Type Description Default
llm_interface LLMInterface

Core large language model interface for text generation and reasoning.

required
task_manager TaskManager

Orchestrates task creation, execution, and state updates.

required
state_manager StateManager

Persists session state and provides access to event streams and agent properties.

required
vlm_interface VLMInterface | None

Optional vision-language model interface used for image understanding and screen descriptions.

None
Source code in core\internal_action_interface.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@classmethod
def initialize(cls, llm_interface: LLMInterface,
               task_manager: TaskManager, state_manager: StateManager,
               vlm_interface: VLMInterface | None = None):
    """
    Register the shared interfaces that actions depend on.

    This must be called once at application startup so later static calls can
    access the language model, task manager, state manager, and optional
    vision model without creating new instances.

    Args:
        llm_interface: Core large language model interface for text
            generation and reasoning.
        task_manager: Orchestrates task creation, execution, and state
            updates.
        state_manager: Persists session state and provides access to event
            streams and agent properties.
        vlm_interface: Optional vision-language model interface used for
            image understanding and screen descriptions.
    """
    cls.llm_interface = llm_interface
    cls.task_manager = task_manager
    cls.state_manager = state_manager
    cls.vlm_interface = vlm_interface

use_llm(prompt, system_message=None) classmethod

Generate a response from the configured LLM.

Parameters:

Name Type Description Default
prompt str

User or agent prompt sent to the language model.

required
system_message Optional[str]

Optional system instructions to steer the response style or constraints.

None

Returns:

Type Description
Dict[str, Any]

A mapping containing the key "llm_response" with the model

Dict[str, Any]

output.

Raises:

Type Description
RuntimeError

If the interface has not been initialised with an :class:LLMInterface.

Source code in core\internal_action_interface.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@classmethod
def use_llm(cls, prompt: str, system_message: Optional[str] = None) -> Dict[str, Any]:
    """
    Generate a response from the configured LLM.

    Args:
        prompt: User or agent prompt sent to the language model.
        system_message: Optional system instructions to steer the response
            style or constraints.

    Returns:
        A mapping containing the key ``"llm_response"`` with the model
        output.

    Raises:
        RuntimeError: If the interface has not been initialised with an
            :class:`LLMInterface`.
    """
    if cls.llm_interface is None:
        raise RuntimeError("InternalActionInterface not initialized with LLMInterface.")
    response = cls.llm_interface.generate_response(prompt, system_message)
    return {"llm_response": response}

describe_image(image_path, prompt=None) classmethod

Produce a textual description for an image using the VLM.

Parameters:

Name Type Description Default
image_path str

Absolute path to the image to describe.

required
prompt str | None

Optional user prompt to guide the vision-language model.

None

Returns:

Type Description
str

A natural-language description returned by the VLM.

Raises:

Type Description
RuntimeError

If no :class:VLMInterface was configured during initialization.

Source code in core\internal_action_interface.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
@classmethod
def describe_image(cls, image_path: str, prompt: str | None = None) -> str:
    """
    Produce a textual description for an image using the VLM.

    Args:
        image_path: Absolute path to the image to describe.
        prompt: Optional user prompt to guide the vision-language model.

    Returns:
        A natural-language description returned by the VLM.

    Raises:
        RuntimeError: If no :class:`VLMInterface` was configured during
            initialization.
    """
    if cls.vlm_interface is None:
        raise RuntimeError("InternalActionInterface not initialized with VLMInterface.")
    return cls.vlm_interface.describe_image(image_path, user_prompt=prompt)

describe_screen() classmethod

Capture the current virtual desktop and describe it with the VLM.

The screen is saved to a timestamped PNG inside the agent workspace and then passed to the vision model for summarisation.

Returns:

Type Description
dict[str, str]

A mapping with the VLM description under "description" and the

dict[str, str]

saved screenshot path under "file_path".

Raises:

Type Description
RuntimeError

If no :class:VLMInterface is available.

Source code in core\internal_action_interface.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
@classmethod
def describe_screen(cls) -> dict[str, str]:
    """
    Capture the current virtual desktop and describe it with the VLM.

    The screen is saved to a timestamped PNG inside the agent workspace and
    then passed to the vision model for summarisation.

    Returns:
        A mapping with the VLM description under ``"description"`` and the
        saved screenshot path under ``"file_path"``.

    Raises:
        RuntimeError: If no :class:`VLMInterface` is available.
    """
    if cls.vlm_interface is None:
        raise RuntimeError("InternalActionInterface not initialised with VLMInterface.")

    temp_dir = Path(AGENT_WORKSPACE_ROOT)
    ts       = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
    img_path = os.path.join(temp_dir, f"viewscreen_{ts}.png")

    with mss.mss() as sct:
        shot = sct.grab(sct.monitors[0])          # full virtual desktop
        mss.tools.to_png(shot.rgb, shot.size, output=img_path)

    description = cls.describe_image(img_path)    # default VLM prompt
    return {"description": description, "file_path": img_path}

do_chat(message) async staticmethod

Record an agent-authored chat message and publish it to the event stream.

Parameters:

Name Type Description Default
message str

Text content the agent wants to send to the user or log.

required

Raises:

Type Description
RuntimeError

If the state manager has not been configured.

Source code in core\internal_action_interface.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
@staticmethod
async def do_chat(
    message: str,
) -> None:
    """
    Record an agent-authored chat message and publish it to the event stream.

    Args:
        message: Text content the agent wants to send to the user or log.

    Raises:
        RuntimeError: If the state manager has not been configured.
    """
    if InternalActionInterface.state_manager is None:
        raise RuntimeError("InternalActionInterface not initialized with StateManager.")

    InternalActionInterface.state_manager.record_agent_message(message)

    event_stream_manager = InternalActionInterface.state_manager.event_stream_manager
    event_stream_manager.log(
        "agent",
        message,
        display_message=message
    )
    InternalActionInterface.state_manager.bump_event_stream()

do_ignore() staticmethod

Note that the agent chose to ignore the latest user input.

Source code in core\internal_action_interface.py
165
166
167
168
169
170
@staticmethod
def do_ignore():
    """
    Note that the agent chose to ignore the latest user input.
    """
    logger.debug("[Agent Action] Ignoring user message.")

do_create_and_run_task(task_name, task_description) async classmethod

Create a new task and immediately start it.

The task metadata is persisted and registered as the active task for the current session.

Parameters:

Name Type Description Default
task_name str

Short name for the task.

required
task_description str

Detailed description of the work to perform.

required

Returns:

Type Description
str

The created task identifier.

Raises:

Type Description
RuntimeError

If task or state managers have not been initialised.

Source code in core\internal_action_interface.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
@classmethod
async def do_create_and_run_task(cls, task_name: str, task_description: str) -> str:
    """
    Create a new task and immediately start it.

    The task metadata is persisted and registered as the active task for the
    current session.

    Args:
        task_name: Short name for the task.
        task_description: Detailed description of the work to perform.

    Returns:
        The created task identifier.

    Raises:
        RuntimeError: If task or state managers have not been initialised.
    """
    if cls.task_manager is None or cls.state_manager is None:
        raise RuntimeError("InternalActionInterface not initialized with Task/State managers.")

    task_id = await cls.task_manager.create_task(task_name, task_description)

    await cls.task_manager.start_task()
    wf: Optional[Task] = cls.task_manager.get_task()
    cls.state_manager.add_to_active_task(wf)
    return task_id

mark_task_completed(message=None) async classmethod

Mark the current session task as completed.

If no session is active, returns an error payload instead of raising.

Parameters:

Name Type Description Default
message Optional[str]

Optional completion note to store alongside the task.

None

Returns:

Type Description
Dict[str, Any]

A status dictionary indicating success or failure and the relevant

Dict[str, Any]

task id.

Source code in core\internal_action_interface.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
@classmethod
async def mark_task_completed(cls, message: Optional[str] = None) -> Dict[str, Any]:
    """
    Mark the current session task as completed.

    If no session is active, returns an error payload instead of raising.

    Args:
        message: Optional completion note to store alongside the task.

    Returns:
        A status dictionary indicating success or failure and the relevant
        task id.
    """
    try:
        ok = await cls.task_manager.mark_task_completed(message=message)
        return {"status": "ok" if ok else "error"}
    except Exception as e:
        logger.error(f"[InternalActions] mark_task_completed failed: {e}", exc_info=True)
        return {"status": "error", "error": str(e)}

mark_task_cancel(reason=None) async classmethod

Cancel the current session task.

If no session is active, returns an error payload instead of raising.

Parameters:

Name Type Description Default
reason Optional[str]

Optional explanation of why the task was cancelled.

None

Returns:

Type Description
Dict[str, Any]

A status dictionary indicating success or failure and the relevant

Dict[str, Any]

task id.

Source code in core\internal_action_interface.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
@classmethod
async def mark_task_cancel(cls, reason: Optional[str] = None) -> Dict[str, Any]:
    """
    Cancel the current session task.

    If no session is active, returns an error payload instead of raising.

    Args:
        reason: Optional explanation of why the task was cancelled.

    Returns:
        A status dictionary indicating success or failure and the relevant
        task id.
    """
    try:
        ok = await cls.task_manager.mark_task_cancel(reason=reason)
        return {"status": "ok" if ok else "error"}
    except Exception as e:
        logger.error(f"[InternalActions] mark_task_cancel failed: {e}", exc_info=True)
        return {"status": "error", "error": str(e)}

mark_task_error(message=None) async classmethod

Mark the current session task as failed.

If no session is active, returns an error payload instead of raising.

Parameters:

Name Type Description Default
message Optional[str]

Optional error detail to store alongside the task.

None

Returns:

Type Description
Dict[str, Any]

A status dictionary indicating success or failure and the relevant

Dict[str, Any]

task id.

Source code in core\internal_action_interface.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
@classmethod
async def mark_task_error(cls, message: Optional[str] = None) -> Dict[str, Any]:
    """
    Mark the current session task as failed.

    If no session is active, returns an error payload instead of raising.

    Args:
        message: Optional error detail to store alongside the task.

    Returns:
        A status dictionary indicating success or failure and the relevant
        task id.
    """
    try:
        ok = await cls.task_manager.mark_task_error(message=message)
        return {"status": "ok" if ok else "error"}
    except Exception as e:
        logger.error(f"[InternalActions] mark_task_error failed: {e}", exc_info=True)
        return {"status": "error", "error": str(e)}

start_next_step(*, update_plan=False) async classmethod

Advance the active task to its next step.

When update_plan is True, the planner is asked to refresh the plan before moving forward.

Parameters:

Name Type Description Default
update_plan bool

Whether to replan the task before starting the next step.

False

Returns:

Type Description
Dict[str, Any]

A status dictionary indicating success and the planner result, or an

Dict[str, Any]

error payload when no task is active.

Source code in core\internal_action_interface.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
@classmethod
async def start_next_step(
    cls,
    *,
    update_plan: bool = False,
) -> Dict[str, Any]:
    """
    Advance the active task to its next step.

    When ``update_plan`` is True, the planner is asked to refresh the plan
    before moving forward.

    Args:
        update_plan: Whether to replan the task before starting the next
            step.

    Returns:
        A status dictionary indicating success and the planner result, or an
        error payload when no task is active.
    """
    try:
        result = await cls.task_manager.start_next_step(
            replan=update_plan,
        )
        return {"status": "ok", "result": result}
    except Exception as e:
        logger.error(f"[InternalActions] start_next_step failed: {e}", exc_info=True)
        return {"status": "error", "error": str(e)}