Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/seq-db/seq-db.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ func startStore(
IDsZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
LIDsZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
LIDBlockSize: cfg.Sealing.Lids.BlockSize,
TokenBlockSize: int(cfg.Sealing.Tokens.BlockSize),
TokenListZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
DocsPositionsZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
TokenTableZstdLevel: cfg.Compression.SealedZstdCompressionLevel,
Expand Down
5 changes: 5 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ type Config struct {
} `config:"storage"`

Sealing struct {
Tokens struct {
// BlockSize sets max token block size in bytes.
BlockSize Bytes `config:"block_size" default:"16KiB"`
} `config:"tokens"`

Lids struct {
// BlockSize sets max lids (postings) saved per LIDs block.
BlockSize int `config:"block_size" default:"65536"`
Expand Down
1 change: 1 addition & 0 deletions config/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func (c *Config) storeValidations() []validateFn {
inRange("compression.doc_block_zstd_compression_level", -7, 22, c.Compression.DocBlockZstdCompressionLevel),
greaterThan("sealing.lids.block_size", 0, c.Sealing.Lids.BlockSize),
lessOrEqThan("sealing.lids.block_size", 65536, c.Sealing.Lids.BlockSize),
greaterThan("sealing.tokens.block_size", 0, c.Sealing.Tokens.BlockSize),
inRange("offloading.queue_size_percent", 0, 100, c.Offloading.QueueSizePercent),

greaterThan("experimental.max_regex_tokens_check", -1, c.Experimental.MaxRegexTokensCheck),
Expand Down
15 changes: 15 additions & 0 deletions config/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,21 @@ limits:
env: map[string]string{"SEQDB_SEALING_LIDS_BLOCK_SIZE": "8192"},
expectErr: false,
},
{
name: "Invalid sealing.tokens.block_size",
cfg: createCfgFile(t, base+`
sealing:
tokens:
block_size: -1B
`),
expectErr: true,
},
{
name: "Valid sealing.tokens.block_size",
cfg: baseCfg,
env: map[string]string{"SEQDB_SEALING_TOKENS_BLOCK_SIZE": "32KiB"},
expectErr: false,
},
}

for _, tt := range tests {
Expand Down
6 changes: 6 additions & 0 deletions docs/en/02-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ Compression level settings for various data types.

Settings for fraction sealing.

### Tokens

| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `sealing.tokens.block_size` | Bytes | `16KiB` | Max token block size in bytes |

### Lids

| Field | Type | Default | Description |
Expand Down
6 changes: 6 additions & 0 deletions docs/ru/02-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ id: configuration

Настройки запечатывания фракций.

### Tokens

| Параметр | Тип | Значение по умолчанию | Описание |
|----------|-----|----------------------|-----------|
| `sealing.tokens.block_size` | Bytes | `16KiB` | Максимальный размер блока токенов в байтах |

### Lids

| Параметр | Тип | Значение по умолчанию | Описание |
Expand Down
1 change: 1 addition & 0 deletions frac/common/seal_params.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ type SealParams struct {

DocBlocksZstdLevel int // DocBlocksZstdLevel is the zstd compress level of each document block.
LIDBlockSize int
TokenBlockSize int
DocBlockSize int // DocBlockSize is decompressed payload size of document block.
}
5 changes: 3 additions & 2 deletions frac/fraction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func (s *FractionTestSuite) SetupTestCommon() {
TokenTableZstdLevel: 1,
DocBlocksZstdLevel: 1,
LIDBlockSize: 512,
TokenBlockSize: 128,
DocBlockSize: 128 * int(units.KiB),
}

Expand Down Expand Up @@ -1901,11 +1902,11 @@ func (s *FractionTestSuite) TestFractionInfo() {
s.Require().Equal(uint64(0), info.IndexOnDisk, "index on disk doesn't match")
case *Sealed:
s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value")
s.Require().True(info.IndexOnDisk > uint64(1300) && info.IndexOnDisk < uint64(1450),
s.Require().True(info.IndexOnDisk > uint64(1300) && info.IndexOnDisk < uint64(1500),
"index on disk doesn't match. actual value: %d", info.IndexOnDisk)
case *Remote:
s.Require().Equal(uint64(0), info.MetaOnDisk, "meta on disk doesn't match. actual value")
s.Require().True(info.IndexOnDisk > uint64(1300) && info.IndexOnDisk < uint64(1450),
s.Require().True(info.IndexOnDisk > uint64(1300) && info.IndexOnDisk < uint64(1500),
"index on disk doesn't match. actual value: %d", info.IndexOnDisk)
default:
s.Require().Fail("unsupported fraction type")
Expand Down
4 changes: 4 additions & 0 deletions frac/sealed/token/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ func (tp *Provider) findInBlocks(firstTID, lastTID uint32, search func(*Block, i
var tids []uint32

for _, entry := range tp.entries {
if !entry.checkTIDsInBlock(firstTID, lastTID) {
continue
}

block := tp.findBlock(entry.BlockIndex)
firstIndex, lastIndex := entry.narrowIndexes(firstTID, lastTID)
indexes, err := search(block, firstIndex, lastIndex)
Expand Down
12 changes: 12 additions & 0 deletions frac/sealed/token/table_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ func (t *TableEntry) narrowIndexes(firstTID, lastTID uint32) (int, int) {
return firstIndex, lastIndex
}

func (t *TableEntry) checkTIDsInBlock(firstTID, lastTID uint32) bool {
if lastTID < t.StartTID {
return false
}

if firstTID > t.getLastTID() {
return false
}

return true
}

func (t *TableEntry) checkTIDInBlock(tid uint32) bool {
if tid < t.StartTID {
return false
Expand Down
3 changes: 3 additions & 0 deletions fracmanager/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ func FillConfigWithDefault(config *Config) *Config {
if config.SealParams.LIDBlockSize == 0 {
config.SealParams.LIDBlockSize = consts.DefaultLIDBlockCap
}
if config.SealParams.TokenBlockSize == 0 {
config.SealParams.TokenBlockSize = consts.RegularBlockSize
}
if config.SealParams.TokenListZstdLevel == 0 {
config.SealParams.TokenListZstdLevel = zstdDefaultLevel
}
Expand Down
12 changes: 8 additions & 4 deletions indexwriter/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,16 @@ type IndexWriter struct {
}

func New(params common.SealParams) *IndexWriter {
if params.TokenBlockSize == 0 {
params.TokenBlockSize = consts.RegularBlockSize
}

return &IndexWriter{
params: params,
buf1: make([]byte, 0, consts.RegularBlockSize),
buf2: make([]byte, 0, consts.RegularBlockSize),
buf1: make([]byte, 0, params.TokenBlockSize),
buf2: make([]byte, 0, params.TokenBlockSize),
buf32: make([]uint32, 0, consts.DefaultLIDBlockCap),
buf64: make([]uint64, 0, consts.RegularBlockSize),
buf64: make([]uint64, 0, params.TokenBlockSize),
}
}

Expand Down Expand Up @@ -153,7 +157,7 @@ func (s *IndexWriter) WriteTokenTriplet(tws, lws io.WriteSeeker, src Source) err
)

var allFieldsTables []token.FieldTable
for pair, err := range tokenBlock(src.TokenTriplet(), lidAccumulator.add, consts.RegularBlockSize) {
for pair, err := range tokenBlock(src.TokenTriplet(), lidAccumulator.add, s.params.TokenBlockSize) {
if err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions sealing/sealer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ type Source = indexwriter.Source
// and returns PreloadedData for fast initialization of the sealed fraction.
func Seal(src Source, params common.SealParams) (*sealed.PreloadedData, error) {
info := src.Info()
info.ConstRegularBlockSize = params.TokenBlockSize
if info.ConstRegularBlockSize == 0 {
info.ConstRegularBlockSize = consts.RegularBlockSize
}

if info.To == 0 {
return nil, errors.New("sealing of an empty active fraction is not supported")
Expand Down