Skip to content

Runtime Configuration API

These configuration types are shared by both the CLI and the Python library.

Describe how a model reference should be resolved and executed.

This is the shared execution contract used by the CLI, the library, and the local server. Field annotations remain the source of truth for supported options, while the helper methods normalize and validate those fields for planning and execution.

Source code in src/ollm/runtime/config.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
@dataclass(slots=True)
class RuntimeConfig:
    """Describe how a model reference should be resolved and executed.

    This is the shared execution contract used by the CLI, the library, and the
    local server. Field annotations remain the source of truth for supported
    options, while the helper methods normalize and validate those fields for
    planning and execution.
    """

    model_reference: str = DEFAULT_MODEL_REFERENCE
    models_dir: Path = field(default_factory=lambda: Path("models"))
    device: str = DEFAULT_DEVICE
    backend: str | None = None
    adapter_dir: Path | None = None
    multimodal: bool = False
    use_specialization: bool = True
    cache_dir: Path = field(default_factory=lambda: Path("kv_cache"))
    use_cache: bool = True
    kv_cache_strategy: str | None = None
    strategy_selector_profile: str = DEFAULT_STRATEGY_SELECTOR_PROFILE
    kv_cache_lifecycle: str = DEFAULT_KV_CACHE_LIFECYCLE
    kv_cache_adaptation_mode: str = DEFAULT_KV_CACHE_ADAPTATION_MODE
    kv_cache_window_tokens: int | None = None
    dense_projection_chunk_rows: int | None = None
    offload_cpu_layers: int = 0
    offload_cpu_policy: str = DEFAULT_CPU_OFFLOAD_POLICY
    offload_gpu_layers: int = 0
    force_download: bool = False
    stats: bool = False
    verbose: bool = False
    quiet: bool = False

    def resolved_models_dir(self) -> Path:
        """Return the absolute local models directory."""
        return self.models_dir.expanduser().resolve()

    def resolved_backend(self) -> str | None:
        """Return the normalized backend override when provided."""
        return normalize_backend(self.backend)

    def resolved_cache_dir(self) -> Path:
        """Return the absolute cache directory."""
        return self.cache_dir.expanduser().resolve()

    def requested_kv_cache_strategy(self) -> str | None:
        """Return the normalized explicit KV strategy override when one exists."""

        return normalize_kv_cache_strategy(self.kv_cache_strategy)

    def resolved_strategy_selector_profile(self) -> str:
        """Return the normalized selector profile."""

        return resolve_strategy_selector_profile(self.strategy_selector_profile)

    def resolved_kv_cache_strategy(self) -> str:
        """Return the normalized KV cache strategy."""
        normalized_strategy = self.requested_kv_cache_strategy()
        if normalized_strategy is None:
            return DEFAULT_KV_CACHE_STRATEGY
        return normalized_strategy

    def resolved_kv_cache_lifecycle(self) -> str:
        """Return the normalized cache lifecycle."""
        return resolve_kv_cache_lifecycle(
            self.kv_cache_strategy,
            self.kv_cache_lifecycle,
        )

    def resolved_kv_cache_adaptation_mode(self) -> str:
        """Return the normalized cache adaptation mode."""

        normalized_mode = normalize_kv_cache_adaptation_mode(
            self.kv_cache_adaptation_mode
        )
        if normalized_mode is None:
            return DEFAULT_KV_CACHE_ADAPTATION_MODE
        return normalized_mode

    def resolved_kv_cache_window_tokens(self) -> int | None:
        """Return the normalized sliding-window token budget."""

        return resolve_kv_cache_window_tokens(
            _window_strategy_for_validation(
                self.kv_cache_strategy,
                self.strategy_selector_profile,
                self.kv_cache_window_tokens,
            ),
            self.kv_cache_window_tokens,
        )

    def resolved_dense_projection_chunk_rows(self) -> int | None:
        """Return the normalized explicit dense-projection chunk row budget."""

        return normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)

    def resolved_offload_cpu_policy(self) -> str:
        """Return the normalized CPU offload policy."""

        return resolve_cpu_offload_policy(self.offload_cpu_policy)

    def resolved_adapter_dir(self) -> Path | None:
        """Return the absolute adapter directory when one is configured."""
        if self.adapter_dir is None:
            return None
        return self.adapter_dir.expanduser().resolve()

    def validate(self) -> None:
        """Validate the configuration before planning or execution.

        Raises:
            ValueError: Raised when any runtime option is structurally invalid,
                contradictory, or unsupported for the current execution model.
        """
        if not self.model_reference.strip():
            raise ValueError("--model cannot be empty")
        if self.backend is not None:
            normalize_backend(self.backend)
        normalize_kv_cache_strategy(self.kv_cache_strategy)
        normalize_strategy_selector_profile(self.strategy_selector_profile)
        resolve_kv_cache_lifecycle(
            self.kv_cache_strategy,
            self.kv_cache_lifecycle,
        )
        normalize_kv_cache_adaptation_mode(self.kv_cache_adaptation_mode)
        resolve_kv_cache_window_tokens(
            _window_strategy_for_validation(
                self.kv_cache_strategy,
                self.strategy_selector_profile,
                self.kv_cache_window_tokens,
            ),
            self.kv_cache_window_tokens,
        )
        normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)
        normalize_cpu_offload_policy(self.offload_cpu_policy)
        if self.verbose and self.quiet:
            raise ValueError("--verbose and --quiet cannot be used together")
        if (
            not self.use_specialization
            and self.resolved_backend() == "optimized-native"
        ):
            raise ValueError(
                "--backend optimized-native cannot be combined with --no-specialization"
            )
        if self.offload_cpu_layers < 0:
            raise ValueError("--offload-cpu-layers must be zero or greater")
        if self.offload_cpu_layers > 0 and self.device == "cpu":
            raise ValueError(
                "--offload-cpu-layers requires an accelerator runtime device"
            )
        if self.offload_cpu_layers > 0 and self.offload_gpu_layers > 0:
            raise ValueError(
                "--offload-cpu-layers cannot be combined with "
                "--offload-gpu-layers in this runtime"
            )
        if self.offload_gpu_layers < 0:
            raise ValueError("--offload-gpu-layers must be zero or greater")

resolved_models_dir

resolved_models_dir() -> Path

Return the absolute local models directory.

Source code in src/ollm/runtime/config.py
111
112
113
def resolved_models_dir(self) -> Path:
    """Return the absolute local models directory."""
    return self.models_dir.expanduser().resolve()

resolved_backend

resolved_backend() -> str | None

Return the normalized backend override when provided.

Source code in src/ollm/runtime/config.py
115
116
117
def resolved_backend(self) -> str | None:
    """Return the normalized backend override when provided."""
    return normalize_backend(self.backend)

resolved_cache_dir

resolved_cache_dir() -> Path

Return the absolute cache directory.

Source code in src/ollm/runtime/config.py
119
120
121
def resolved_cache_dir(self) -> Path:
    """Return the absolute cache directory."""
    return self.cache_dir.expanduser().resolve()

requested_kv_cache_strategy

requested_kv_cache_strategy() -> str | None

Return the normalized explicit KV strategy override when one exists.

Source code in src/ollm/runtime/config.py
123
124
125
126
def requested_kv_cache_strategy(self) -> str | None:
    """Return the normalized explicit KV strategy override when one exists."""

    return normalize_kv_cache_strategy(self.kv_cache_strategy)

resolved_strategy_selector_profile

resolved_strategy_selector_profile() -> str

Return the normalized selector profile.

Source code in src/ollm/runtime/config.py
128
129
130
131
def resolved_strategy_selector_profile(self) -> str:
    """Return the normalized selector profile."""

    return resolve_strategy_selector_profile(self.strategy_selector_profile)

resolved_kv_cache_strategy

resolved_kv_cache_strategy() -> str

Return the normalized KV cache strategy.

Source code in src/ollm/runtime/config.py
133
134
135
136
137
138
def resolved_kv_cache_strategy(self) -> str:
    """Return the normalized KV cache strategy."""
    normalized_strategy = self.requested_kv_cache_strategy()
    if normalized_strategy is None:
        return DEFAULT_KV_CACHE_STRATEGY
    return normalized_strategy

resolved_kv_cache_lifecycle

resolved_kv_cache_lifecycle() -> str

Return the normalized cache lifecycle.

Source code in src/ollm/runtime/config.py
140
141
142
143
144
145
def resolved_kv_cache_lifecycle(self) -> str:
    """Return the normalized cache lifecycle."""
    return resolve_kv_cache_lifecycle(
        self.kv_cache_strategy,
        self.kv_cache_lifecycle,
    )

resolved_kv_cache_adaptation_mode

resolved_kv_cache_adaptation_mode() -> str

Return the normalized cache adaptation mode.

Source code in src/ollm/runtime/config.py
147
148
149
150
151
152
153
154
155
def resolved_kv_cache_adaptation_mode(self) -> str:
    """Return the normalized cache adaptation mode."""

    normalized_mode = normalize_kv_cache_adaptation_mode(
        self.kv_cache_adaptation_mode
    )
    if normalized_mode is None:
        return DEFAULT_KV_CACHE_ADAPTATION_MODE
    return normalized_mode

resolved_kv_cache_window_tokens

resolved_kv_cache_window_tokens() -> int | None

Return the normalized sliding-window token budget.

Source code in src/ollm/runtime/config.py
157
158
159
160
161
162
163
164
165
166
167
def resolved_kv_cache_window_tokens(self) -> int | None:
    """Return the normalized sliding-window token budget."""

    return resolve_kv_cache_window_tokens(
        _window_strategy_for_validation(
            self.kv_cache_strategy,
            self.strategy_selector_profile,
            self.kv_cache_window_tokens,
        ),
        self.kv_cache_window_tokens,
    )

resolved_dense_projection_chunk_rows

resolved_dense_projection_chunk_rows() -> int | None

Return the normalized explicit dense-projection chunk row budget.

Source code in src/ollm/runtime/config.py
169
170
171
172
def resolved_dense_projection_chunk_rows(self) -> int | None:
    """Return the normalized explicit dense-projection chunk row budget."""

    return normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)

resolved_offload_cpu_policy

resolved_offload_cpu_policy() -> str

Return the normalized CPU offload policy.

Source code in src/ollm/runtime/config.py
174
175
176
177
def resolved_offload_cpu_policy(self) -> str:
    """Return the normalized CPU offload policy."""

    return resolve_cpu_offload_policy(self.offload_cpu_policy)

resolved_adapter_dir

resolved_adapter_dir() -> Path | None

Return the absolute adapter directory when one is configured.

Source code in src/ollm/runtime/config.py
179
180
181
182
183
def resolved_adapter_dir(self) -> Path | None:
    """Return the absolute adapter directory when one is configured."""
    if self.adapter_dir is None:
        return None
    return self.adapter_dir.expanduser().resolve()

validate

validate() -> None

Validate the configuration before planning or execution.

Raises:

Type Description
ValueError

Raised when any runtime option is structurally invalid, contradictory, or unsupported for the current execution model.

Source code in src/ollm/runtime/config.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def validate(self) -> None:
    """Validate the configuration before planning or execution.

    Raises:
        ValueError: Raised when any runtime option is structurally invalid,
            contradictory, or unsupported for the current execution model.
    """
    if not self.model_reference.strip():
        raise ValueError("--model cannot be empty")
    if self.backend is not None:
        normalize_backend(self.backend)
    normalize_kv_cache_strategy(self.kv_cache_strategy)
    normalize_strategy_selector_profile(self.strategy_selector_profile)
    resolve_kv_cache_lifecycle(
        self.kv_cache_strategy,
        self.kv_cache_lifecycle,
    )
    normalize_kv_cache_adaptation_mode(self.kv_cache_adaptation_mode)
    resolve_kv_cache_window_tokens(
        _window_strategy_for_validation(
            self.kv_cache_strategy,
            self.strategy_selector_profile,
            self.kv_cache_window_tokens,
        ),
        self.kv_cache_window_tokens,
    )
    normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)
    normalize_cpu_offload_policy(self.offload_cpu_policy)
    if self.verbose and self.quiet:
        raise ValueError("--verbose and --quiet cannot be used together")
    if (
        not self.use_specialization
        and self.resolved_backend() == "optimized-native"
    ):
        raise ValueError(
            "--backend optimized-native cannot be combined with --no-specialization"
        )
    if self.offload_cpu_layers < 0:
        raise ValueError("--offload-cpu-layers must be zero or greater")
    if self.offload_cpu_layers > 0 and self.device == "cpu":
        raise ValueError(
            "--offload-cpu-layers requires an accelerator runtime device"
        )
    if self.offload_cpu_layers > 0 and self.offload_gpu_layers > 0:
        raise ValueError(
            "--offload-cpu-layers cannot be combined with "
            "--offload-gpu-layers in this runtime"
        )
    if self.offload_gpu_layers < 0:
        raise ValueError("--offload-gpu-layers must be zero or greater")

Describe generation-time sampling and streaming behavior.

Field annotations remain the source of truth for supported sampling controls. Use :meth:validate before execution when constructing this type directly.

Source code in src/ollm/runtime/config.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
@dataclass(slots=True)
class GenerationConfig:
    """Describe generation-time sampling and streaming behavior.

    Field annotations remain the source of truth for supported sampling controls.
    Use :meth:`validate` before execution when constructing this type directly.
    """

    max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS
    temperature: float = 0.0
    top_p: float | None = None
    top_k: int | None = None
    seed: int | None = None
    stream: bool = True

    def validate(self) -> None:
        """Validate sampling and generation limits.

        Raises:
            ValueError: Raised when token or sampling limits fall outside the
                supported runtime range.
        """
        if self.max_new_tokens <= 0:
            raise ValueError("--max-new-tokens must be greater than zero")
        if self.temperature < 0:
            raise ValueError("--temperature must be zero or greater")
        if self.top_p is not None and not 0 < self.top_p <= 1:
            raise ValueError("--top-p must be within (0, 1]")
        if self.top_k is not None and self.top_k <= 0:
            raise ValueError("--top-k must be greater than zero")

    def sampling_enabled(self) -> bool:
        """Return whether stochastic sampling is enabled.

        Returns:
            bool: ``True`` when ``temperature`` enables stochastic sampling.
        """
        return self.temperature > 0

validate

validate() -> None

Validate sampling and generation limits.

Raises:

Type Description
ValueError

Raised when token or sampling limits fall outside the supported runtime range.

Source code in src/ollm/runtime/config.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def validate(self) -> None:
    """Validate sampling and generation limits.

    Raises:
        ValueError: Raised when token or sampling limits fall outside the
            supported runtime range.
    """
    if self.max_new_tokens <= 0:
        raise ValueError("--max-new-tokens must be greater than zero")
    if self.temperature < 0:
        raise ValueError("--temperature must be zero or greater")
    if self.top_p is not None and not 0 < self.top_p <= 1:
        raise ValueError("--top-p must be within (0, 1]")
    if self.top_k is not None and self.top_k <= 0:
        raise ValueError("--top-k must be greater than zero")

sampling_enabled

sampling_enabled() -> bool

Return whether stochastic sampling is enabled.

Returns:

Name Type Description
bool bool

True when temperature enables stochastic sampling.

Source code in src/ollm/runtime/config.py
268
269
270
271
272
273
274
def sampling_enabled(self) -> bool:
    """Return whether stochastic sampling is enabled.

    Returns:
        bool: ``True`` when ``temperature`` enables stochastic sampling.
    """
    return self.temperature > 0