Skip to content

Runtime Client API

RuntimeClient is the high-level public Python API. Use it when you want the same resolver, planner, loader, and executor behavior that powers the CLI.

High-level runtime API shared by the CLI and the Python library.

Attributes:

Name Type Description
runtime_loader RuntimeLoader

Resolver, planner, materialization, and backend-loading boundary.

runtime_executor RuntimeExecutor

Prompt execution boundary used once a runtime has been loaded.

Source code in src/ollm/client.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
@dataclass(slots=True)
class RuntimeClient:
    """High-level runtime API shared by the CLI and the Python library.

    Attributes:
        runtime_loader (RuntimeLoader): Resolver, planner, materialization, and
            backend-loading boundary.
        runtime_executor (RuntimeExecutor): Prompt execution boundary used once
            a runtime has been loaded.
    """

    runtime_loader: RuntimeLoader = field(default_factory=RuntimeLoader)
    runtime_executor: RuntimeExecutor = field(default_factory=RuntimeExecutor)

    def resolve(
        self, model_reference: str, models_dir: Path = Path("models")
    ) -> ResolvedModel:
        """Resolve a model reference without loading a runtime.

        Args:
            model_reference (str): User-facing model reference such as a built-in
                alias, Hugging Face repository, or local model path.
            models_dir (Path): Local models root used for implicit path
                resolution.

        Returns:
            ResolvedModel: Normalized model metadata for planning or inspection.
        """
        return self.runtime_loader.resolve(
            model_reference, models_dir.expanduser().resolve()
        )

    def discover_local_models(
        self, models_dir: Path = Path("models")
    ) -> tuple[ResolvedModel, ...]:
        """Discover local materialized models under a models directory.

        Args:
            models_dir (Path): Local models root to inspect.

        Returns:
            tuple[ResolvedModel, ...]: Materialized model directories discovered
            under the given root.
        """
        return self.runtime_loader.discover_local_models(
            models_dir.expanduser().resolve()
        )

    def plan(self, runtime_config: RuntimeConfig) -> RuntimePlan:
        """Build a runtime plan without loading a backend.

        Args:
            runtime_config (RuntimeConfig): Execution configuration to inspect.

        Returns:
            RuntimePlan: Planned backend, specialization, and capability result.

        Raises:
            ValueError: Raised when the runtime configuration is invalid or no
                executable plan can be produced.
        """
        return self.runtime_loader.plan(runtime_config)

    def describe_plan(self, runtime_config: RuntimeConfig) -> PlanJsonPayload:
        """Return a JSON-serializable inspection payload for a runtime plan.

        Args:
            runtime_config (RuntimeConfig): Execution configuration to inspect.

        Returns:
            PlanJsonPayload: Serialized inspection payload for CLI or HTTP use.

        Raises:
            ValueError: Raised when the runtime configuration is invalid or no
                executable plan can be produced.
        """
        return plan_json_payload(runtime_config, self.plan(runtime_config))

    def load(self, runtime_config: RuntimeConfig) -> LoadedRuntime:
        """Resolve and load a runtime backend for the given configuration.

        Args:
            runtime_config (RuntimeConfig): Execution configuration to load.

        Returns:
            LoadedRuntime: Loaded backend runtime bundle ready for execution.

        Raises:
            ValueError: Raised when the model cannot be resolved, materialized,
                planned, or loaded.
        """
        return self.runtime_loader.load(runtime_config)

    def prompt(
        self,
        prompt: str,
        *,
        runtime_config: RuntimeConfig,
        generation_config: GenerationConfig | None = None,
        system_prompt: str = DEFAULT_SYSTEM_PROMPT,
        images: tuple[str, ...] = (),
        audio: tuple[str, ...] = (),
        sink: StreamSink | None = None,
    ) -> PromptResponse:
        """Execute one prompt using text plus optional image or audio inputs.

        Args:
            prompt (str): Primary text prompt.
            runtime_config (RuntimeConfig): Runtime configuration to execute.
            generation_config (GenerationConfig | None): Optional generation
                overrides. Defaults to ``GenerationConfig()`` when omitted.
            system_prompt (str): System instruction prepended to the request
                when non-empty.
            images (tuple[str, ...]): Optional image input paths or URIs.
            audio (tuple[str, ...]): Optional audio input paths or URIs.
            sink (StreamSink | None): Optional streaming sink for incremental
                text callbacks.

        Returns:
            PromptResponse: Final prompt response and assistant message payload.

        Raises:
            ValueError: Raised when the runtime or generation configuration is
                invalid or when no executable backend exists.
        """
        parts = [ContentPart.text(prompt)]
        parts.extend(ContentPart.image(item) for item in images)
        parts.extend(ContentPart.audio(item) for item in audio)
        return self.prompt_parts(
            parts,
            runtime_config=runtime_config,
            generation_config=generation_config,
            system_prompt=system_prompt,
            sink=sink,
        )

    def prompt_parts(
        self,
        parts: list[ContentPart],
        *,
        runtime_config: RuntimeConfig,
        generation_config: GenerationConfig | None = None,
        system_prompt: str = DEFAULT_SYSTEM_PROMPT,
        history: list[Message] | None = None,
        sink: StreamSink | None = None,
    ) -> PromptResponse:
        """Execute a prompt composed from explicit content parts.

        Args:
            parts (list[ContentPart]): Prompt payload parts in final user-message
                order.
            runtime_config (RuntimeConfig): Runtime configuration to execute.
            generation_config (GenerationConfig | None): Optional generation
                overrides. Defaults to ``GenerationConfig()`` when omitted.
            system_prompt (str): System instruction prepended to the request
                when non-empty.
            history (list[Message] | None): Optional prior conversation messages
                to prepend before the new user message.
            sink (StreamSink | None): Optional streaming sink for incremental
                callbacks.

        Returns:
            PromptResponse: Final prompt response and assistant message payload.

        Raises:
            ValueError: Raised when ``parts`` is empty or when runtime/generation
                validation or backend loading fails.
        """
        if not parts:
            raise ValueError("A prompt requires at least one content part")
        effective_runtime_config = self._runtime_config_for_parts(runtime_config, parts)
        effective_generation_config = (
            GenerationConfig() if generation_config is None else generation_config
        )
        effective_runtime_config.validate()
        effective_generation_config.validate()
        runtime = self.runtime_loader.load(effective_runtime_config)
        request_messages = []
        if system_prompt:
            request_messages.append(Message.system_text(system_prompt))
        if history:
            request_messages.extend(history)
        request_messages.append(Message(role=MessageRole.USER, content=list(parts)))
        request = PromptRequest(
            runtime_config=runtime.config,
            generation_config=effective_generation_config,
            messages=request_messages,
        )
        return self.runtime_executor.execute(runtime, request, sink=sink)

    def session(
        self,
        *,
        runtime_config: RuntimeConfig,
        generation_config: GenerationConfig | None = None,
        session_name: str = "default",
        system_prompt: str = DEFAULT_SYSTEM_PROMPT,
        messages: list[Message] | None = None,
        autosave_path: Path | None = None,
    ) -> ChatSession:
        """Create a reusable chat session over the shared runtime stack.

        Args:
            runtime_config (RuntimeConfig): Runtime configuration for the
                session.
            generation_config (GenerationConfig | None): Optional generation
                overrides. Defaults to ``GenerationConfig()`` when omitted.
            session_name (str): Human-readable session label.
            system_prompt (str): Session-wide system instruction.
            messages (list[Message] | None): Optional initial transcript
                messages.
            autosave_path (Path | None): Optional transcript autosave path.

        Returns:
            ChatSession: Reusable session object bound to the shared runtime
            stack.

        Raises:
            ValueError: Raised when the runtime or generation configuration is
                invalid.
        """
        session = ChatSession(
            runtime_loader=self.runtime_loader,
            runtime_executor=self.runtime_executor,
            runtime_config=runtime_config,
            generation_config=GenerationConfig()
            if generation_config is None
            else generation_config,
            session_name=session_name,
            system_prompt=system_prompt,
            autosave_path=autosave_path,
        )
        session.runtime_config.validate()
        session.generation_config.validate()
        if messages:
            session.messages.extend(messages)
        return session

    def _runtime_config_for_parts(
        self,
        runtime_config: RuntimeConfig,
        parts: list[ContentPart],
    ) -> RuntimeConfig:
        """Enable multimodal planning automatically when non-text parts are present."""
        requires_multimodal = any(part.kind is not ContentKind.TEXT for part in parts)
        if not requires_multimodal or runtime_config.multimodal:
            return runtime_config
        return replace(runtime_config, multimodal=True)

resolve

resolve(
    model_reference: str, models_dir: Path = Path("models")
) -> ResolvedModel

Resolve a model reference without loading a runtime.

Parameters:

Name Type Description Default
model_reference str

User-facing model reference such as a built-in alias, Hugging Face repository, or local model path.

required
models_dir Path

Local models root used for implicit path resolution.

Path('models')

Returns:

Name Type Description
ResolvedModel ResolvedModel

Normalized model metadata for planning or inspection.

Source code in src/ollm/client.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def resolve(
    self, model_reference: str, models_dir: Path = Path("models")
) -> ResolvedModel:
    """Resolve a model reference without loading a runtime.

    Args:
        model_reference (str): User-facing model reference such as a built-in
            alias, Hugging Face repository, or local model path.
        models_dir (Path): Local models root used for implicit path
            resolution.

    Returns:
        ResolvedModel: Normalized model metadata for planning or inspection.
    """
    return self.runtime_loader.resolve(
        model_reference, models_dir.expanduser().resolve()
    )

discover_local_models

discover_local_models(
    models_dir: Path = Path("models"),
) -> tuple[ResolvedModel, ...]

Discover local materialized models under a models directory.

Parameters:

Name Type Description Default
models_dir Path

Local models root to inspect.

Path('models')

Returns:

Type Description
ResolvedModel

tuple[ResolvedModel, ...]: Materialized model directories discovered

...

under the given root.

Source code in src/ollm/client.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def discover_local_models(
    self, models_dir: Path = Path("models")
) -> tuple[ResolvedModel, ...]:
    """Discover local materialized models under a models directory.

    Args:
        models_dir (Path): Local models root to inspect.

    Returns:
        tuple[ResolvedModel, ...]: Materialized model directories discovered
        under the given root.
    """
    return self.runtime_loader.discover_local_models(
        models_dir.expanduser().resolve()
    )

plan

plan(runtime_config: RuntimeConfig) -> RuntimePlan

Build a runtime plan without loading a backend.

Parameters:

Name Type Description Default
runtime_config RuntimeConfig

Execution configuration to inspect.

required

Returns:

Name Type Description
RuntimePlan RuntimePlan

Planned backend, specialization, and capability result.

Raises:

Type Description
ValueError

Raised when the runtime configuration is invalid or no executable plan can be produced.

Source code in src/ollm/client.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def plan(self, runtime_config: RuntimeConfig) -> RuntimePlan:
    """Build a runtime plan without loading a backend.

    Args:
        runtime_config (RuntimeConfig): Execution configuration to inspect.

    Returns:
        RuntimePlan: Planned backend, specialization, and capability result.

    Raises:
        ValueError: Raised when the runtime configuration is invalid or no
            executable plan can be produced.
    """
    return self.runtime_loader.plan(runtime_config)

describe_plan

describe_plan(
    runtime_config: RuntimeConfig,
) -> PlanJsonPayload

Return a JSON-serializable inspection payload for a runtime plan.

Parameters:

Name Type Description Default
runtime_config RuntimeConfig

Execution configuration to inspect.

required

Returns:

Name Type Description
PlanJsonPayload PlanJsonPayload

Serialized inspection payload for CLI or HTTP use.

Raises:

Type Description
ValueError

Raised when the runtime configuration is invalid or no executable plan can be produced.

Source code in src/ollm/client.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def describe_plan(self, runtime_config: RuntimeConfig) -> PlanJsonPayload:
    """Return a JSON-serializable inspection payload for a runtime plan.

    Args:
        runtime_config (RuntimeConfig): Execution configuration to inspect.

    Returns:
        PlanJsonPayload: Serialized inspection payload for CLI or HTTP use.

    Raises:
        ValueError: Raised when the runtime configuration is invalid or no
            executable plan can be produced.
    """
    return plan_json_payload(runtime_config, self.plan(runtime_config))

load

load(runtime_config: RuntimeConfig) -> LoadedRuntime

Resolve and load a runtime backend for the given configuration.

Parameters:

Name Type Description Default
runtime_config RuntimeConfig

Execution configuration to load.

required

Returns:

Name Type Description
LoadedRuntime LoadedRuntime

Loaded backend runtime bundle ready for execution.

Raises:

Type Description
ValueError

Raised when the model cannot be resolved, materialized, planned, or loaded.

Source code in src/ollm/client.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def load(self, runtime_config: RuntimeConfig) -> LoadedRuntime:
    """Resolve and load a runtime backend for the given configuration.

    Args:
        runtime_config (RuntimeConfig): Execution configuration to load.

    Returns:
        LoadedRuntime: Loaded backend runtime bundle ready for execution.

    Raises:
        ValueError: Raised when the model cannot be resolved, materialized,
            planned, or loaded.
    """
    return self.runtime_loader.load(runtime_config)

prompt

prompt(
    prompt: str,
    *,
    runtime_config: RuntimeConfig,
    generation_config: GenerationConfig | None = None,
    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    images: tuple[str, ...] = (),
    audio: tuple[str, ...] = (),
    sink: StreamSink | None = None,
) -> PromptResponse

Execute one prompt using text plus optional image or audio inputs.

Parameters:

Name Type Description Default
prompt str

Primary text prompt.

required
runtime_config RuntimeConfig

Runtime configuration to execute.

required
generation_config GenerationConfig | None

Optional generation overrides. Defaults to GenerationConfig() when omitted.

None
system_prompt str

System instruction prepended to the request when non-empty.

DEFAULT_SYSTEM_PROMPT
images tuple[str, ...]

Optional image input paths or URIs.

()
audio tuple[str, ...]

Optional audio input paths or URIs.

()
sink StreamSink | None

Optional streaming sink for incremental text callbacks.

None

Returns:

Name Type Description
PromptResponse PromptResponse

Final prompt response and assistant message payload.

Raises:

Type Description
ValueError

Raised when the runtime or generation configuration is invalid or when no executable backend exists.

Source code in src/ollm/client.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def prompt(
    self,
    prompt: str,
    *,
    runtime_config: RuntimeConfig,
    generation_config: GenerationConfig | None = None,
    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    images: tuple[str, ...] = (),
    audio: tuple[str, ...] = (),
    sink: StreamSink | None = None,
) -> PromptResponse:
    """Execute one prompt using text plus optional image or audio inputs.

    Args:
        prompt (str): Primary text prompt.
        runtime_config (RuntimeConfig): Runtime configuration to execute.
        generation_config (GenerationConfig | None): Optional generation
            overrides. Defaults to ``GenerationConfig()`` when omitted.
        system_prompt (str): System instruction prepended to the request
            when non-empty.
        images (tuple[str, ...]): Optional image input paths or URIs.
        audio (tuple[str, ...]): Optional audio input paths or URIs.
        sink (StreamSink | None): Optional streaming sink for incremental
            text callbacks.

    Returns:
        PromptResponse: Final prompt response and assistant message payload.

    Raises:
        ValueError: Raised when the runtime or generation configuration is
            invalid or when no executable backend exists.
    """
    parts = [ContentPart.text(prompt)]
    parts.extend(ContentPart.image(item) for item in images)
    parts.extend(ContentPart.audio(item) for item in audio)
    return self.prompt_parts(
        parts,
        runtime_config=runtime_config,
        generation_config=generation_config,
        system_prompt=system_prompt,
        sink=sink,
    )

prompt_parts

prompt_parts(
    parts: list[ContentPart],
    *,
    runtime_config: RuntimeConfig,
    generation_config: GenerationConfig | None = None,
    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    history: list[Message] | None = None,
    sink: StreamSink | None = None,
) -> PromptResponse

Execute a prompt composed from explicit content parts.

Parameters:

Name Type Description Default
parts list[ContentPart]

Prompt payload parts in final user-message order.

required
runtime_config RuntimeConfig

Runtime configuration to execute.

required
generation_config GenerationConfig | None

Optional generation overrides. Defaults to GenerationConfig() when omitted.

None
system_prompt str

System instruction prepended to the request when non-empty.

DEFAULT_SYSTEM_PROMPT
history list[Message] | None

Optional prior conversation messages to prepend before the new user message.

None
sink StreamSink | None

Optional streaming sink for incremental callbacks.

None

Returns:

Name Type Description
PromptResponse PromptResponse

Final prompt response and assistant message payload.

Raises:

Type Description
ValueError

Raised when parts is empty or when runtime/generation validation or backend loading fails.

Source code in src/ollm/client.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def prompt_parts(
    self,
    parts: list[ContentPart],
    *,
    runtime_config: RuntimeConfig,
    generation_config: GenerationConfig | None = None,
    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    history: list[Message] | None = None,
    sink: StreamSink | None = None,
) -> PromptResponse:
    """Execute a prompt composed from explicit content parts.

    Args:
        parts (list[ContentPart]): Prompt payload parts in final user-message
            order.
        runtime_config (RuntimeConfig): Runtime configuration to execute.
        generation_config (GenerationConfig | None): Optional generation
            overrides. Defaults to ``GenerationConfig()`` when omitted.
        system_prompt (str): System instruction prepended to the request
            when non-empty.
        history (list[Message] | None): Optional prior conversation messages
            to prepend before the new user message.
        sink (StreamSink | None): Optional streaming sink for incremental
            callbacks.

    Returns:
        PromptResponse: Final prompt response and assistant message payload.

    Raises:
        ValueError: Raised when ``parts`` is empty or when runtime/generation
            validation or backend loading fails.
    """
    if not parts:
        raise ValueError("A prompt requires at least one content part")
    effective_runtime_config = self._runtime_config_for_parts(runtime_config, parts)
    effective_generation_config = (
        GenerationConfig() if generation_config is None else generation_config
    )
    effective_runtime_config.validate()
    effective_generation_config.validate()
    runtime = self.runtime_loader.load(effective_runtime_config)
    request_messages = []
    if system_prompt:
        request_messages.append(Message.system_text(system_prompt))
    if history:
        request_messages.extend(history)
    request_messages.append(Message(role=MessageRole.USER, content=list(parts)))
    request = PromptRequest(
        runtime_config=runtime.config,
        generation_config=effective_generation_config,
        messages=request_messages,
    )
    return self.runtime_executor.execute(runtime, request, sink=sink)

session

session(
    *,
    runtime_config: RuntimeConfig,
    generation_config: GenerationConfig | None = None,
    session_name: str = "default",
    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    messages: list[Message] | None = None,
    autosave_path: Path | None = None,
) -> ChatSession

Create a reusable chat session over the shared runtime stack.

Parameters:

Name Type Description Default
runtime_config RuntimeConfig

Runtime configuration for the session.

required
generation_config GenerationConfig | None

Optional generation overrides. Defaults to GenerationConfig() when omitted.

None
session_name str

Human-readable session label.

'default'
system_prompt str

Session-wide system instruction.

DEFAULT_SYSTEM_PROMPT
messages list[Message] | None

Optional initial transcript messages.

None
autosave_path Path | None

Optional transcript autosave path.

None

Returns:

Name Type Description
ChatSession ChatSession

Reusable session object bound to the shared runtime

ChatSession

stack.

Raises:

Type Description
ValueError

Raised when the runtime or generation configuration is invalid.

Source code in src/ollm/client.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def session(
    self,
    *,
    runtime_config: RuntimeConfig,
    generation_config: GenerationConfig | None = None,
    session_name: str = "default",
    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
    messages: list[Message] | None = None,
    autosave_path: Path | None = None,
) -> ChatSession:
    """Create a reusable chat session over the shared runtime stack.

    Args:
        runtime_config (RuntimeConfig): Runtime configuration for the
            session.
        generation_config (GenerationConfig | None): Optional generation
            overrides. Defaults to ``GenerationConfig()`` when omitted.
        session_name (str): Human-readable session label.
        system_prompt (str): Session-wide system instruction.
        messages (list[Message] | None): Optional initial transcript
            messages.
        autosave_path (Path | None): Optional transcript autosave path.

    Returns:
        ChatSession: Reusable session object bound to the shared runtime
        stack.

    Raises:
        ValueError: Raised when the runtime or generation configuration is
            invalid.
    """
    session = ChatSession(
        runtime_loader=self.runtime_loader,
        runtime_executor=self.runtime_executor,
        runtime_config=runtime_config,
        generation_config=GenerationConfig()
        if generation_config is None
        else generation_config,
        session_name=session_name,
        system_prompt=system_prompt,
        autosave_path=autosave_path,
    )
    session.runtime_config.validate()
    session.generation_config.validate()
    if messages:
        session.messages.extend(messages)
    return session