Skip to content

Commit ee2a1b6

Browse files
BSXXSByyk
andauthored
revert: adjust model default configuration (#11)
Co-authored-by: yyk <yanyukun@xiaomi.com>
1 parent dc88ddc commit ee2a1b6

3 files changed

Lines changed: 8 additions & 5 deletions

File tree

config/ai_engine_config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ models:
5050
model_path: "/models/Qwen3-8B/Qwen3-8B-Q4_K_M.gguf"
5151

5252
parallel_seq_num: 2
53-
total_context_num: 16384
54-
context_per_seq: 8192
53+
total_context_num: 12288
54+
context_per_seq: 6144
5555
chunk_size: 1024
5656

5757
device: "cuda"

miloco_ai_engine/config/config_info.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class ModelConfigUpdate(BaseModel):
2525
cache_seq_num: int = Field(description="Cache sequence count")
2626
parallel_seq_num: int = Field(description="Parallel sequence count")
2727
total_context_num: int = Field(description="Context window size")
28+
context_per_seq: int = Field(default=-1, description="Maximum available context")
2829
chunk_size: int = Field(description="Batch size")
2930

3031
class ModelConfig(BaseModel):
@@ -78,13 +79,14 @@ def update(self, config_update: ModelConfigUpdate) -> None:
7879
self.n_seq_max = self.cache_seq_num + config_update.parallel_seq_num
7980

8081
self.total_context_num = config_update.total_context_num
82+
self.context_per_seq = config_update.context_per_seq \
83+
if config_update.context_per_seq > 0 else self.context_per_seq
8184
self.chunk_size = config_update.chunk_size
8285

8386
def to_dict(self) -> dict:
8487
"""
8588
Convert to dictionary for C++ library initialization input
8689
"""
87-
8890
r = self.model_dump()
8991
r.pop("task_classification")
9092
# Remove keys with None values from config dictionary

miloco_ai_engine/config/config_optimizer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@
5252
device=ModelDevice.CPU,
5353
cache_seq_num=0,
5454
parallel_seq_num=2,
55-
total_context_num=16384,
55+
total_context_num=12288,
5656
chunk_size=1024
5757
)
5858
LOW_MODE_QWEN3_CONFIG_UPDATE = ModelConfigUpdate(
5959
device=ModelDevice.CUDA,
6060
cache_seq_num=0,
6161
parallel_seq_num=2,
62-
total_context_num=16384,
62+
total_context_num=12288,
6363
chunk_size=1024
6464
)
6565
SMALL_MODE_QWEN3_CONFIG_UPDATE = LOW_MODE_QWEN3_CONFIG_UPDATE
@@ -69,6 +69,7 @@
6969
cache_seq_num=0,
7070
parallel_seq_num=3,
7171
total_context_num=24576,
72+
context_per_seq=8192,
7273
chunk_size=1024
7374
)
7475

0 commit comments

Comments
 (0)