Describe how a model reference should be resolved and executed.
This is the shared execution contract used by the CLI, the library, and the
local server. Field annotations remain the source of truth for supported
options, while the helper methods normalize and validate those fields for
planning and execution.
Source code in src/ollm/runtime/config.py
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234 | @dataclass(slots=True)
class RuntimeConfig:
"""Describe how a model reference should be resolved and executed.
This is the shared execution contract used by the CLI, the library, and the
local server. Field annotations remain the source of truth for supported
options, while the helper methods normalize and validate those fields for
planning and execution.
"""
model_reference: str = DEFAULT_MODEL_REFERENCE
models_dir: Path = field(default_factory=lambda: Path("models"))
device: str = DEFAULT_DEVICE
backend: str | None = None
adapter_dir: Path | None = None
multimodal: bool = False
use_specialization: bool = True
cache_dir: Path = field(default_factory=lambda: Path("kv_cache"))
use_cache: bool = True
kv_cache_strategy: str | None = None
strategy_selector_profile: str = DEFAULT_STRATEGY_SELECTOR_PROFILE
kv_cache_lifecycle: str = DEFAULT_KV_CACHE_LIFECYCLE
kv_cache_adaptation_mode: str = DEFAULT_KV_CACHE_ADAPTATION_MODE
kv_cache_window_tokens: int | None = None
dense_projection_chunk_rows: int | None = None
offload_cpu_layers: int = 0
offload_cpu_policy: str = DEFAULT_CPU_OFFLOAD_POLICY
offload_gpu_layers: int = 0
force_download: bool = False
stats: bool = False
verbose: bool = False
quiet: bool = False
def resolved_models_dir(self) -> Path:
"""Return the absolute local models directory."""
return self.models_dir.expanduser().resolve()
def resolved_backend(self) -> str | None:
"""Return the normalized backend override when provided."""
return normalize_backend(self.backend)
def resolved_cache_dir(self) -> Path:
"""Return the absolute cache directory."""
return self.cache_dir.expanduser().resolve()
def requested_kv_cache_strategy(self) -> str | None:
"""Return the normalized explicit KV strategy override when one exists."""
return normalize_kv_cache_strategy(self.kv_cache_strategy)
def resolved_strategy_selector_profile(self) -> str:
"""Return the normalized selector profile."""
return resolve_strategy_selector_profile(self.strategy_selector_profile)
def resolved_kv_cache_strategy(self) -> str:
"""Return the normalized KV cache strategy."""
normalized_strategy = self.requested_kv_cache_strategy()
if normalized_strategy is None:
return DEFAULT_KV_CACHE_STRATEGY
return normalized_strategy
def resolved_kv_cache_lifecycle(self) -> str:
"""Return the normalized cache lifecycle."""
return resolve_kv_cache_lifecycle(
self.kv_cache_strategy,
self.kv_cache_lifecycle,
)
def resolved_kv_cache_adaptation_mode(self) -> str:
"""Return the normalized cache adaptation mode."""
normalized_mode = normalize_kv_cache_adaptation_mode(
self.kv_cache_adaptation_mode
)
if normalized_mode is None:
return DEFAULT_KV_CACHE_ADAPTATION_MODE
return normalized_mode
def resolved_kv_cache_window_tokens(self) -> int | None:
"""Return the normalized sliding-window token budget."""
return resolve_kv_cache_window_tokens(
_window_strategy_for_validation(
self.kv_cache_strategy,
self.strategy_selector_profile,
self.kv_cache_window_tokens,
),
self.kv_cache_window_tokens,
)
def resolved_dense_projection_chunk_rows(self) -> int | None:
"""Return the normalized explicit dense-projection chunk row budget."""
return normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)
def resolved_offload_cpu_policy(self) -> str:
"""Return the normalized CPU offload policy."""
return resolve_cpu_offload_policy(self.offload_cpu_policy)
def resolved_adapter_dir(self) -> Path | None:
"""Return the absolute adapter directory when one is configured."""
if self.adapter_dir is None:
return None
return self.adapter_dir.expanduser().resolve()
def validate(self) -> None:
"""Validate the configuration before planning or execution.
Raises:
ValueError: Raised when any runtime option is structurally invalid,
contradictory, or unsupported for the current execution model.
"""
if not self.model_reference.strip():
raise ValueError("--model cannot be empty")
if self.backend is not None:
normalize_backend(self.backend)
normalize_kv_cache_strategy(self.kv_cache_strategy)
normalize_strategy_selector_profile(self.strategy_selector_profile)
resolve_kv_cache_lifecycle(
self.kv_cache_strategy,
self.kv_cache_lifecycle,
)
normalize_kv_cache_adaptation_mode(self.kv_cache_adaptation_mode)
resolve_kv_cache_window_tokens(
_window_strategy_for_validation(
self.kv_cache_strategy,
self.strategy_selector_profile,
self.kv_cache_window_tokens,
),
self.kv_cache_window_tokens,
)
normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)
normalize_cpu_offload_policy(self.offload_cpu_policy)
if self.verbose and self.quiet:
raise ValueError("--verbose and --quiet cannot be used together")
if (
not self.use_specialization
and self.resolved_backend() == "optimized-native"
):
raise ValueError(
"--backend optimized-native cannot be combined with --no-specialization"
)
if self.offload_cpu_layers < 0:
raise ValueError("--offload-cpu-layers must be zero or greater")
if self.offload_cpu_layers > 0 and self.device == "cpu":
raise ValueError(
"--offload-cpu-layers requires an accelerator runtime device"
)
if self.offload_cpu_layers > 0 and self.offload_gpu_layers > 0:
raise ValueError(
"--offload-cpu-layers cannot be combined with "
"--offload-gpu-layers in this runtime"
)
if self.offload_gpu_layers < 0:
raise ValueError("--offload-gpu-layers must be zero or greater")
|
resolved_models_dir
resolved_models_dir() -> Path
Return the absolute local models directory.
Source code in src/ollm/runtime/config.py
| def resolved_models_dir(self) -> Path:
"""Return the absolute local models directory."""
return self.models_dir.expanduser().resolve()
|
resolved_backend
resolved_backend() -> str | None
Return the normalized backend override when provided.
Source code in src/ollm/runtime/config.py
| def resolved_backend(self) -> str | None:
"""Return the normalized backend override when provided."""
return normalize_backend(self.backend)
|
resolved_cache_dir
resolved_cache_dir() -> Path
Return the absolute cache directory.
Source code in src/ollm/runtime/config.py
| def resolved_cache_dir(self) -> Path:
"""Return the absolute cache directory."""
return self.cache_dir.expanduser().resolve()
|
requested_kv_cache_strategy
requested_kv_cache_strategy() -> str | None
Return the normalized explicit KV strategy override when one exists.
Source code in src/ollm/runtime/config.py
| def requested_kv_cache_strategy(self) -> str | None:
"""Return the normalized explicit KV strategy override when one exists."""
return normalize_kv_cache_strategy(self.kv_cache_strategy)
|
resolved_strategy_selector_profile
resolved_strategy_selector_profile() -> str
Return the normalized selector profile.
Source code in src/ollm/runtime/config.py
| def resolved_strategy_selector_profile(self) -> str:
"""Return the normalized selector profile."""
return resolve_strategy_selector_profile(self.strategy_selector_profile)
|
resolved_kv_cache_strategy
resolved_kv_cache_strategy() -> str
Return the normalized KV cache strategy.
Source code in src/ollm/runtime/config.py
| def resolved_kv_cache_strategy(self) -> str:
"""Return the normalized KV cache strategy."""
normalized_strategy = self.requested_kv_cache_strategy()
if normalized_strategy is None:
return DEFAULT_KV_CACHE_STRATEGY
return normalized_strategy
|
resolved_kv_cache_lifecycle
resolved_kv_cache_lifecycle() -> str
Return the normalized cache lifecycle.
Source code in src/ollm/runtime/config.py
| def resolved_kv_cache_lifecycle(self) -> str:
"""Return the normalized cache lifecycle."""
return resolve_kv_cache_lifecycle(
self.kv_cache_strategy,
self.kv_cache_lifecycle,
)
|
resolved_kv_cache_adaptation_mode
resolved_kv_cache_adaptation_mode() -> str
Return the normalized cache adaptation mode.
Source code in src/ollm/runtime/config.py
147
148
149
150
151
152
153
154
155 | def resolved_kv_cache_adaptation_mode(self) -> str:
"""Return the normalized cache adaptation mode."""
normalized_mode = normalize_kv_cache_adaptation_mode(
self.kv_cache_adaptation_mode
)
if normalized_mode is None:
return DEFAULT_KV_CACHE_ADAPTATION_MODE
return normalized_mode
|
resolved_kv_cache_window_tokens
resolved_kv_cache_window_tokens() -> int | None
Return the normalized sliding-window token budget.
Source code in src/ollm/runtime/config.py
157
158
159
160
161
162
163
164
165
166
167 | def resolved_kv_cache_window_tokens(self) -> int | None:
"""Return the normalized sliding-window token budget."""
return resolve_kv_cache_window_tokens(
_window_strategy_for_validation(
self.kv_cache_strategy,
self.strategy_selector_profile,
self.kv_cache_window_tokens,
),
self.kv_cache_window_tokens,
)
|
resolved_dense_projection_chunk_rows
resolved_dense_projection_chunk_rows() -> int | None
Return the normalized explicit dense-projection chunk row budget.
Source code in src/ollm/runtime/config.py
| def resolved_dense_projection_chunk_rows(self) -> int | None:
"""Return the normalized explicit dense-projection chunk row budget."""
return normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)
|
resolved_offload_cpu_policy
resolved_offload_cpu_policy() -> str
Return the normalized CPU offload policy.
Source code in src/ollm/runtime/config.py
| def resolved_offload_cpu_policy(self) -> str:
"""Return the normalized CPU offload policy."""
return resolve_cpu_offload_policy(self.offload_cpu_policy)
|
resolved_adapter_dir
resolved_adapter_dir() -> Path | None
Return the absolute adapter directory when one is configured.
Source code in src/ollm/runtime/config.py
| def resolved_adapter_dir(self) -> Path | None:
"""Return the absolute adapter directory when one is configured."""
if self.adapter_dir is None:
return None
return self.adapter_dir.expanduser().resolve()
|
validate
Validate the configuration before planning or execution.
Raises:
| Type |
Description |
ValueError
|
Raised when any runtime option is structurally invalid,
contradictory, or unsupported for the current execution model.
|
Source code in src/ollm/runtime/config.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234 | def validate(self) -> None:
"""Validate the configuration before planning or execution.
Raises:
ValueError: Raised when any runtime option is structurally invalid,
contradictory, or unsupported for the current execution model.
"""
if not self.model_reference.strip():
raise ValueError("--model cannot be empty")
if self.backend is not None:
normalize_backend(self.backend)
normalize_kv_cache_strategy(self.kv_cache_strategy)
normalize_strategy_selector_profile(self.strategy_selector_profile)
resolve_kv_cache_lifecycle(
self.kv_cache_strategy,
self.kv_cache_lifecycle,
)
normalize_kv_cache_adaptation_mode(self.kv_cache_adaptation_mode)
resolve_kv_cache_window_tokens(
_window_strategy_for_validation(
self.kv_cache_strategy,
self.strategy_selector_profile,
self.kv_cache_window_tokens,
),
self.kv_cache_window_tokens,
)
normalize_dense_projection_chunk_rows(self.dense_projection_chunk_rows)
normalize_cpu_offload_policy(self.offload_cpu_policy)
if self.verbose and self.quiet:
raise ValueError("--verbose and --quiet cannot be used together")
if (
not self.use_specialization
and self.resolved_backend() == "optimized-native"
):
raise ValueError(
"--backend optimized-native cannot be combined with --no-specialization"
)
if self.offload_cpu_layers < 0:
raise ValueError("--offload-cpu-layers must be zero or greater")
if self.offload_cpu_layers > 0 and self.device == "cpu":
raise ValueError(
"--offload-cpu-layers requires an accelerator runtime device"
)
if self.offload_cpu_layers > 0 and self.offload_gpu_layers > 0:
raise ValueError(
"--offload-cpu-layers cannot be combined with "
"--offload-gpu-layers in this runtime"
)
if self.offload_gpu_layers < 0:
raise ValueError("--offload-gpu-layers must be zero or greater")
|