mirror of
https://github.com/chrislusf/seaweedfs
synced 2024-06-03 01:00:04 +02:00
tests can compile
This commit is contained in:
parent
dc4ed2cd9b
commit
59f44b70c3
6
weed/data/Makefile
Normal file
6
weed/data/Makefile
Normal file
|
@ -0,0 +1,6 @@
|
|||
all: gen
|
||||
|
||||
.PHONY : gen
|
||||
|
||||
gen:
|
||||
protoc columnar.proto --go_out=./columnar_pb --go-grpc_out=./columnar_pb --go_opt=paths=source_relative --go-grpc_opt=paths=source_relative
|
32
weed/data/column_uint16.go
Normal file
32
weed/data/column_uint16.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package data
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
type ColumnUint16 struct {
|
||||
}
|
||||
|
||||
const SIZE_Uint16 = 2
|
||||
|
||||
func (c *ColumnUint16) Read(buf []byte, readerAt io.ReaderAt, offset int64, i int64) uint16 {
|
||||
if n, err := readerAt.ReadAt(buf, offset+i*SIZE_Uint16); n == SIZE_Uint16 && err == nil {
|
||||
return binary.BigEndian.Uint16(buf)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func WriteUint16s(buf []byte, data []uint16) (err error) {
|
||||
off := 0
|
||||
size := len(data)
|
||||
if len(buf) < size<<1 {
|
||||
return fmt.Errorf("buf too small")
|
||||
}
|
||||
for _, dat := range data {
|
||||
binary.BigEndian.PutUint16(buf[off:], dat)
|
||||
off += SIZE_Uint16
|
||||
}
|
||||
return nil
|
||||
}
|
32
weed/data/column_uint32.go
Normal file
32
weed/data/column_uint32.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package data
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
type ColumnUint32 struct {
|
||||
}
|
||||
|
||||
const SIZE_Uint32 = 4
|
||||
|
||||
func (c *ColumnUint32) Read(buf []byte, readerAt io.ReaderAt, offset int64, i int64) uint32 {
|
||||
if n, err := readerAt.ReadAt(buf, offset+i*SIZE_Uint32); n == SIZE_Uint32 && err == nil {
|
||||
return binary.BigEndian.Uint32(buf)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func WriteUint32s(buf []byte, data []uint32) (err error) {
|
||||
off := 0
|
||||
size := len(data)
|
||||
if len(buf) < size<<2 {
|
||||
return fmt.Errorf("buf too small")
|
||||
}
|
||||
for _, dat := range data {
|
||||
binary.BigEndian.PutUint32(buf[off:], dat)
|
||||
off += SIZE_Uint32
|
||||
}
|
||||
return nil
|
||||
}
|
103
weed/data/columnar.proto
Normal file
103
weed/data/columnar.proto
Normal file
|
@ -0,0 +1,103 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package columnar_pb;
|
||||
|
||||
option go_package = "github.com/seaweedfs/seaweedfs/weed/data/columnar_pb";
|
||||
|
||||
message FileId {
|
||||
uint32 volume_id = 1;
|
||||
uint64 file_key = 2;
|
||||
fixed32 cookie = 3;
|
||||
}
|
||||
|
||||
enum LogicalType {
|
||||
Uint8 = 0;
|
||||
Uint16 = 1;
|
||||
Float32 = 4;
|
||||
}
|
||||
|
||||
message ColumnUint16 {
|
||||
uint32 base = 1;
|
||||
uint32 min = 3;
|
||||
uint32 max = 4;
|
||||
}
|
||||
|
||||
message ColumnUint32 {
|
||||
uint32 base = 1;
|
||||
uint32 min = 3;
|
||||
uint32 max = 4;
|
||||
}
|
||||
|
||||
message ColumnFloat32 {
|
||||
uint32 min = 3;
|
||||
uint32 max = 4;
|
||||
}
|
||||
|
||||
message ColumnSplit {
|
||||
// The ids of the fields/columns in this file
|
||||
int32 field_id = 1;
|
||||
FileId file_id = 2;
|
||||
int64 row_offset = 3;
|
||||
int32 row_count = 4;
|
||||
|
||||
oneof storage_type {
|
||||
ColumnUint16 meta_uint16 = 8;
|
||||
ColumnUint32 meta_uint32 = 9;
|
||||
ColumnFloat32 meta_float32 = 10;
|
||||
}
|
||||
}
|
||||
|
||||
message Snapshot {
|
||||
// All fields of the dataset, including the nested fields.
|
||||
repeated Field fields = 1;
|
||||
|
||||
repeated string data_files = 2;
|
||||
|
||||
// Snapshot version number.
|
||||
uint64 version = 3;
|
||||
|
||||
}
|
||||
|
||||
message DataFile {
|
||||
repeated int32 field_ids = 1;
|
||||
repeated RowGroup row_groups = 2;
|
||||
}
|
||||
|
||||
message RowGroup {
|
||||
int64 row_offset = 1;
|
||||
int32 row_count = 2;
|
||||
repeated ColumnSplit column_splits = 3;
|
||||
}
|
||||
|
||||
// Field metadata for a column.
|
||||
message Field {
|
||||
enum Type {
|
||||
PARENT = 0;
|
||||
REPEATED = 1;
|
||||
LEAF = 2;
|
||||
}
|
||||
Type type = 1;
|
||||
|
||||
// Fully qualified name.
|
||||
string name = 2;
|
||||
/// Field Id.
|
||||
int32 id = 3;
|
||||
/// Parent Field ID. If not set, this is a top-level column.
|
||||
int32 parent_id = 4;
|
||||
|
||||
// Logical types, support parameterized Arrow Type.
|
||||
LogicalType logical_type = 5;
|
||||
// If this field is nullable.
|
||||
bool nullable = 6;
|
||||
}
|
||||
|
||||
|
||||
message AnyValue {
|
||||
oneof value {
|
||||
bytes bytes_value = 1;
|
||||
bool bool_value = 2;
|
||||
uint64 int64_value = 3;
|
||||
uint32 int32_value = 4;
|
||||
double double_value = 5;
|
||||
}
|
||||
}
|
1199
weed/data/columnar_pb/columnar.pb.go
Normal file
1199
weed/data/columnar_pb/columnar.pb.go
Normal file
File diff suppressed because it is too large
Load diff
69
weed/data/datum.go
Normal file
69
weed/data/datum.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
package data
|
||||
|
||||
import "fmt"
|
||||
|
||||
type Datum interface {
|
||||
Compare(other Datum) (int, error)
|
||||
}
|
||||
type Datums []Datum
|
||||
|
||||
type DUint16 uint16
|
||||
type DUint32 uint32
|
||||
type dNull struct{}
|
||||
|
||||
var (
|
||||
DNull Datum = dNull{}
|
||||
)
|
||||
|
||||
func (d dNull) Compare(other Datum) (int, error) {
|
||||
if other == DNull {
|
||||
return 0, nil
|
||||
}
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func NewDUint16(d DUint16) *DUint16 {
|
||||
return &d
|
||||
}
|
||||
func NewDUint32(d DUint32) *DUint32 {
|
||||
return &d
|
||||
}
|
||||
|
||||
func (d *DUint16) Compare(other Datum) (int, error) {
|
||||
if other == DNull {
|
||||
return 1, nil
|
||||
}
|
||||
thisV := *d
|
||||
var otherV DUint16
|
||||
switch t := other.(type) {
|
||||
case *DUint16:
|
||||
otherV = *t
|
||||
default:
|
||||
return 0, fmt.Errorf("unsupported")
|
||||
}
|
||||
if thisV < otherV {
|
||||
return -1, nil
|
||||
}
|
||||
if thisV > otherV {
|
||||
return 1, nil
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
func (d *DUint32) Compare(other Datum) (int, error) {
|
||||
if other == DNull {
|
||||
return 1, nil
|
||||
}
|
||||
thisV := *d
|
||||
var otherV DUint32
|
||||
switch t := other.(type) {
|
||||
case *DUint32:
|
||||
otherV = *t
|
||||
}
|
||||
if thisV < otherV {
|
||||
return -1, nil
|
||||
}
|
||||
if thisV > otherV {
|
||||
return 1, nil
|
||||
}
|
||||
return 0, nil
|
||||
}
|
194
weed/data/read_test.go
Normal file
194
weed/data/read_test.go
Normal file
|
@ -0,0 +1,194 @@
|
|||
package data
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRead(t *testing.T) {
|
||||
x := make([]uint16, 128)
|
||||
y := make([]uint32, 128)
|
||||
|
||||
for i := range x {
|
||||
x[i] = uint16(i)
|
||||
}
|
||||
for i := range y {
|
||||
y[i] = uint32(i * 32)
|
||||
}
|
||||
|
||||
xbuf := make([]byte, len(x)*SIZE_Uint16)
|
||||
ybuf := make([]byte, len(x)*SIZE_Uint32)
|
||||
|
||||
WriteUint16s(xbuf, x)
|
||||
WriteUint32s(ybuf, y)
|
||||
|
||||
df := &DataFile{
|
||||
xbuf: xbuf,
|
||||
ybuf: ybuf,
|
||||
xLen: len(xbuf),
|
||||
yLen: len(ybuf),
|
||||
xReaderAt: util.NewBytesReader(xbuf),
|
||||
yReaderAt: util.NewBytesReader(ybuf),
|
||||
}
|
||||
|
||||
dataLayout := make(map[FieldName]DataLayout)
|
||||
dataLayout["x"] = DataLayout{
|
||||
LayoutType: Uint16,
|
||||
SortType: Unsorted,
|
||||
}
|
||||
dataLayout["y"] = DataLayout{
|
||||
LayoutType: Uint32,
|
||||
SortType: Unsorted,
|
||||
}
|
||||
|
||||
rows, err := df.ReadRows("x", dataLayout, Equal, NewDUint16(65))
|
||||
if err != nil {
|
||||
fmt.Printf("err: %v", err)
|
||||
return
|
||||
}
|
||||
for _, row := range rows {
|
||||
fmt.Printf("row %d width %d ", row.index, len(row.Datums))
|
||||
for i, d := range row.Datums {
|
||||
fmt.Printf("%d: %v ", i, d)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type Operator int32
|
||||
type LayoutType int32
|
||||
type SortType int32
|
||||
|
||||
const (
|
||||
Equal Operator = 0
|
||||
GreaterThan
|
||||
GreaterOrEqual
|
||||
LessThan
|
||||
LessOrEqual
|
||||
|
||||
Uint16 LayoutType = 0
|
||||
Uint32 = 1
|
||||
|
||||
Unsorted SortType = 0
|
||||
Ascending
|
||||
Descending
|
||||
)
|
||||
|
||||
type DataFile struct {
|
||||
xbuf []byte
|
||||
ybuf []byte
|
||||
xReaderAt io.ReaderAt
|
||||
xLen int
|
||||
yReaderAt io.ReaderAt
|
||||
yLen int
|
||||
}
|
||||
|
||||
type DataLayout struct {
|
||||
LayoutType
|
||||
SortType
|
||||
}
|
||||
|
||||
type FieldName string
|
||||
|
||||
func (d *DataFile) ReadRows(field FieldName, layout map[FieldName]DataLayout, op Operator, operand Datum) (rows []*Row, err error) {
|
||||
if field == "x" {
|
||||
rows, err = pushDownReadRows(d.xReaderAt, d.xLen, layout[field], op, operand)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = hydrateRows(d.yReaderAt, d.yLen, layout["y"], rows)
|
||||
}
|
||||
if field == "y" {
|
||||
rows, err = pushDownReadRows(d.yReaderAt, d.yLen, layout[field], op, operand)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = hydrateRows(d.xReaderAt, d.xLen, layout["x"], rows)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type Row struct {
|
||||
index int
|
||||
Datums
|
||||
}
|
||||
|
||||
func pushDownReadRows(readerAt io.ReaderAt, dataLen int, layout DataLayout, op Operator, operand Datum) (rows []*Row, err error) {
|
||||
if layout.LayoutType == Uint16 {
|
||||
if layout.SortType == Unsorted {
|
||||
buf := make([]byte, SIZE_Uint16)
|
||||
for i := 0; i < dataLen; i += SIZE_Uint16 {
|
||||
if n, err := readerAt.ReadAt(buf, int64(i)); n == SIZE_Uint16 && err == nil {
|
||||
d := NewDUint16(DUint16(binary.BigEndian.Uint16(buf)))
|
||||
cmp, err := d.Compare(operand)
|
||||
if err != nil {
|
||||
return rows, err
|
||||
}
|
||||
if cmp == 0 && op == Equal {
|
||||
println(1)
|
||||
rows = append(rows, &Row{
|
||||
index: i / SIZE_Uint16,
|
||||
Datums: []Datum{d},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if layout.LayoutType == Uint32 {
|
||||
if layout.SortType == Unsorted {
|
||||
buf := make([]byte, SIZE_Uint32)
|
||||
for i := 0; i < dataLen; i += SIZE_Uint32 {
|
||||
if n, err := readerAt.ReadAt(buf, int64(i)); n == SIZE_Uint32 && err == nil {
|
||||
d := NewDUint32(DUint32(binary.BigEndian.Uint32(buf)))
|
||||
cmp, err := d.Compare(operand)
|
||||
if err != nil {
|
||||
return rows, err
|
||||
}
|
||||
if cmp == 0 && op == Equal {
|
||||
println(2)
|
||||
rows = append(rows, &Row{
|
||||
index: i / SIZE_Uint32,
|
||||
Datums: []Datum{d},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func hydrateRows(readerAt io.ReaderAt, dataLen int, layout DataLayout, rows []*Row) (err error) {
|
||||
if layout.LayoutType == Uint16 {
|
||||
if layout.SortType == Unsorted {
|
||||
buf := make([]byte, SIZE_Uint16)
|
||||
for _, row := range rows {
|
||||
if n, err := readerAt.ReadAt(buf, int64(row.index)*SIZE_Uint16); n == SIZE_Uint16 && err == nil {
|
||||
t := binary.BigEndian.Uint16(buf)
|
||||
d := NewDUint16(DUint16(t))
|
||||
println(3, "add", t)
|
||||
row.Datums = append(row.Datums, d)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if layout.LayoutType == Uint32 {
|
||||
if layout.SortType == Unsorted {
|
||||
buf := make([]byte, SIZE_Uint32)
|
||||
for _, row := range rows {
|
||||
if n, err := readerAt.ReadAt(buf, int64(row.index)*SIZE_Uint32); n == SIZE_Uint32 && err == nil {
|
||||
t := binary.BigEndian.Uint32(buf)
|
||||
d := NewDUint32(DUint32(t))
|
||||
println(4, "add", t)
|
||||
row.Datums = append(row.Datums, d)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
Loading…
Reference in a new issue